// SPDX-License-Identifier: GPL-2.0-or-later /* * amd-pstate.c - AMD Processor P-state Frequency Driver * * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. * * Author: Huang Rui <ray.huang@amd.com> * * AMD P-State introduces a new CPU performance scaling design for AMD * processors using the ACPI Collaborative Performance and Power Control (CPPC) * feature which works with the AMD SMU firmware providing a finer grained * frequency control range. It is to replace the legacy ACPI P-States control, * allows a flexible, low-latency interface for the Linux kernel to directly * communicate the performance hints to hardware. * * AMD P-State is supported on recent AMD Zen base CPU series include some of * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD * P-State supported system. And there are two types of hardware implementations * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution. * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types.
*/
/* * AMD Energy Preference Performance (EPP) * The EPP is used in the CCLK DPM controller to drive * the frequency that a core is going to operate during * short periods of activity. EPP values will be utilized for * different OS profiles (balanced, performance, power savings) * display strings corresponding to EPP index in the * energy_perf_strings[] * index String *------------------------------------- * 0 default * 1 performance * 2 balance_performance * 3 balance_power * 4 power
*/ enum energy_perf_value_index {
EPP_INDEX_DEFAULT = 0,
EPP_INDEX_PERFORMANCE,
EPP_INDEX_BALANCE_PERFORMANCE,
EPP_INDEX_BALANCE_POWERSAVE,
EPP_INDEX_POWERSAVE,
};
staticint __init dmi_matched_7k62_bios_bug(conststruct dmi_system_id *dmi)
{ /** * match the broken bios for family 17h processor support CPPC V2 * broken BIOS lack of nominal_freq and lowest_freq capabilities * definition in ACPI tables
*/ if (cpu_feature_enabled(X86_FEATURE_ZEN2)) {
quirks = dmi->driver_data;
pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident); return 1;
}
staticinlineint get_mode_idx_from_str(constchar *str, size_t size)
{ int i;
for (i=0; i < AMD_PSTATE_MAX; i++) { if (!strncmp(str, amd_pstate_mode_string[i], size)) return i;
} return -EINVAL;
}
static DEFINE_MUTEX(amd_pstate_driver_lock);
static u8 msr_get_epp(struct amd_cpudata *cpudata)
{
u64 value; int ret;
ret = rdmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); if (ret < 0) {
pr_debug("Could not retrieve energy perf value (%d)\n", ret); return ret;
}
value = prev = READ_ONCE(cpudata->cppc_req_cached);
value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK |
AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK);
value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf);
value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf);
value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
if (trace_amd_pstate_epp_perf_enabled()) { union perf_cached perf = READ_ONCE(cpudata->perf);
trace_amd_pstate_epp_perf(cpudata->cpu,
perf.highest_perf,
epp,
min_perf,
max_perf,
policy->boost_enabled,
value != prev);
}
if (value == prev) return 0;
if (fast_switch) {
wrmsrq(MSR_AMD_CPPC_REQ, value); return 0;
} else { int ret = wrmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
perf_ctrls.energy_perf = epp;
ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); if (ret) {
pr_debug("failed to set energy perf value (%d)\n", ret); return ret;
}
value = READ_ONCE(cpudata->cppc_req_cached);
value &= ~AMD_CPPC_EPP_PERF_MASK;
value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
WRITE_ONCE(cpudata->cppc_req_cached, value);
/* * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an * indication that the min_perf value is the one specified through the BIOS option
*/
cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK);
if (cppc_state == AMD_PSTATE_ACTIVE) { int ret = shmem_set_epp(policy, epp);
if (ret) return ret;
}
value = prev = READ_ONCE(cpudata->cppc_req_cached);
value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK |
AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK);
value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf);
value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf);
value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
if (trace_amd_pstate_epp_perf_enabled()) { union perf_cached perf = READ_ONCE(cpudata->perf);
trace_amd_pstate_epp_perf(cpudata->cpu,
perf.highest_perf,
epp,
min_perf,
max_perf,
policy->boost_enabled,
value != prev);
}
/* limit the max perf when core performance boost feature is disabled */ if (!cpudata->boost_supported)
max_perf = min_t(u8, perf.nominal_perf, max_perf);
staticint amd_pstate_verify(struct cpufreq_policy_data *policy_data)
{ /* * Initialize lower frequency limit (i.e.policy->min) with * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS, * Override the initial value set by cpufreq core and amd-pstate qos_requests.
*/ if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) { struct cpufreq_policy *policy __free(put_cpufreq_policy) =
cpufreq_cpu_get(policy_data->cpu); struct amd_cpudata *cpudata; union perf_cached perf;
WARN_ON(fast_switch && !policy->fast_switch_enabled); /* * If fast_switch is desired, then there aren't any registered * transition notifiers. See comment for * cpufreq_enable_fast_switch().
*/ if (!fast_switch)
cpufreq_freq_transition_begin(policy, &freqs);
if (cppc_state == AMD_PSTATE_PASSIVE) {
ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq); if (ret < 0)
pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu);
}
return ret < 0 ? ret : 0;
}
staticint amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
{ struct amd_cpudata *cpudata = policy->driver_data; int ret;
if (!cpudata->boost_supported) {
pr_err("Boost mode is not supported by this processor or SBIOS\n"); return -EOPNOTSUPP;
}
ret = amd_pstate_cpu_boost_update(policy, state);
refresh_frequency_limits(policy);
return ret;
}
staticint amd_pstate_init_boost_support(struct amd_cpudata *cpudata)
{
u64 boost_val; int ret = -1;
/* * If platform has no CPB support or disable it, initialize current driver * boost_enabled state to be false, it is not an error for cpufreq core to handle.
*/ if (!cpu_feature_enabled(X86_FEATURE_CPB)) {
pr_debug_once("Boost CPB capabilities not present in the processor\n");
ret = 0; goto exit_err;
}
ret = rdmsrq_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); if (ret) {
pr_err_once("failed to read initial CPU boost state!\n");
ret = -EIO; goto exit_err;
}
if (!(boost_val & MSR_K7_HWCR_CPB_DIS))
cpudata->boost_supported = true;
static void amd_perf_ctl_reset(unsigned int cpu)
{
wrmsrq_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
}
#define CPPC_MAX_PERF U8_MAX
static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
{ /* user disabled or not detected */ if (!amd_pstate_prefcore) return;
/* should use amd-hfi instead */ if (cpu_feature_enabled(X86_FEATURE_AMD_WORKLOAD_CLASS) &&
IS_ENABLED(CONFIG_AMD_HFI)) {
amd_pstate_prefcore = false; return;
}
cpudata->hw_prefcore = true;
/* Priorities must be initialized before ITMT support can be toggled on. */
sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu);
}
/* * Get pstate transition delay time from ACPI tables that firmware set * instead of using hardcode value directly.
*/ static u32 amd_pstate_get_transition_delay_us(unsigned int cpu)
{
u32 transition_delay_ns;
transition_delay_ns = cppc_get_transition_latency(cpu); if (transition_delay_ns == CPUFREQ_ETERNAL) { if (cpu_feature_enabled(X86_FEATURE_AMD_FAST_CPPC)) return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY; else return AMD_PSTATE_TRANSITION_DELAY;
}
return transition_delay_ns / NSEC_PER_USEC;
}
/* * Get pstate transition latency value from ACPI tables that firmware * set instead of using hardcode value directly.
*/ static u32 amd_pstate_get_transition_latency(unsigned int cpu)
{
u32 transition_latency;
transition_latency = cppc_get_transition_latency(cpu); if (transition_latency == CPUFREQ_ETERNAL) return AMD_PSTATE_TRANSITION_LATENCY;
return transition_latency;
}
/* * amd_pstate_init_freq: Initialize the nominal_freq and lowest_nonlinear_freq * for the @cpudata object. * * Requires: all perf members of @cpudata to be initialized. * * Returns 0 on success, non-zero value on failure.
*/ staticint amd_pstate_init_freq(struct amd_cpudata *cpudata)
{
u32 min_freq, max_freq, nominal_freq, lowest_nonlinear_freq; struct cppc_perf_caps cppc_perf; union perf_cached perf; int ret;
ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); if (ret) return ret;
perf = READ_ONCE(cpudata->perf);
/** * Below values need to be initialized correctly, otherwise driver will fail to load * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf * lowest_nonlinear_freq is a value between [min_freq, nominal_freq] * Check _CPC in ACPI table objects if any values are incorrect
*/ if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) {
pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n",
min_freq, max_freq, nominal_freq); return -EINVAL;
}
if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) {
pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n",
lowest_nonlinear_freq, min_freq, nominal_freq); return -EINVAL;
}
return 0;
}
staticint amd_pstate_cpu_init(struct cpufreq_policy *policy)
{ struct amd_cpudata *cpudata; union perf_cached perf; struct device *dev; int ret;
/* * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, * which is ideal for initialization process.
*/
amd_perf_ctl_reset(policy->cpu);
dev = get_cpu_device(policy->cpu); if (!dev) return -ENODEV;
cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); if (!cpudata) return -ENOMEM;
cpudata->cpu = policy->cpu;
ret = amd_pstate_init_perf(cpudata); if (ret)
goto free_cpudata1;
amd_pstate_init_prefcore(cpudata);
ret = amd_pstate_init_freq(cpudata); if (ret)
goto free_cpudata1;
ret = amd_pstate_init_boost_support(cpudata); if (ret)
goto free_cpudata1;
/* * This frequency is to indicate the maximum hardware frequency. * If boost is not active but supported, the frequency will be larger than the * one in cpuinfo.
*/ static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, char *buf)
{ struct amd_cpudata *cpudata; union perf_cached perf;
/* * In some of ASICs, the highest_perf is not the one in the _CPC table, so we * need to expose it to sysfs.
*/ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, char *buf)
{ struct amd_cpudata *cpudata;
staticbool amd_pstate_acpi_pm_profile_server(void)
{
switch (acpi_gbl_FADT.preferred_profile) {
case PM_ENTERPRISE_SERVER:
case PM_SOHO_SERVER:
case PM_PERFORMANCE_SERVER: returntrue;
} returnfalse;
}
staticbool amd_pstate_acpi_pm_profile_undefined(void)
{ if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED) returntrue; if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES) returntrue; returnfalse;
}
staticint amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
{ struct amd_cpudata *cpudata; union perf_cached perf; struct device *dev; int ret;
/* * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, * which is ideal for initialization process.
*/
amd_perf_ctl_reset(policy->cpu);
dev = get_cpu_device(policy->cpu); if (!dev) return -ENODEV;
cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); if (!cpudata) return -ENOMEM;
cpudata->cpu = policy->cpu;
ret = amd_pstate_init_perf(cpudata); if (ret)
goto free_cpudata1;
amd_pstate_init_prefcore(cpudata);
ret = amd_pstate_init_freq(cpudata); if (ret)
goto free_cpudata1;
ret = amd_pstate_init_boost_support(cpudata); if (ret)
goto free_cpudata1;
/* * Set the policy to provide a valid fallback value in case * the default cpufreq governor is neither powersave nor performance.
*/ if (amd_pstate_acpi_pm_profile_server() ||
amd_pstate_acpi_pm_profile_undefined()) {
policy->policy = CPUFREQ_POLICY_PERFORMANCE;
cpudata->epp_default = amd_pstate_get_epp(cpudata);
} else {
policy->policy = CPUFREQ_POLICY_POWERSAVE;
cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
}
ret = amd_pstate_set_epp(policy, cpudata->epp_default); if (ret) return ret;
current_pstate_driver->adjust_perf = NULL;
return 0;
free_cpudata1:
pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret);
kfree(cpudata); return ret;
}
ret = amd_pstate_epp_update_limit(policy, true); if (ret) return ret;
/* * policy->cur is never updated with the amd_pstate_epp driver, but it * is used as a stale frequency value. So, keep it within limits.
*/
policy->cur = policy->min;
/* * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the * limits, epp and desired perf will get reset to the cached values in cpudata struct
*/ return amd_pstate_update_perf(policy, perf.bios_min_perf,
FIELD_GET(AMD_CPPC_DES_PERF_MASK, cpudata->cppc_req_cached),
FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached),
FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached), false);
}
staticint amd_pstate_suspend(struct cpufreq_policy *policy)
{ struct amd_cpudata *cpudata = policy->driver_data; union perf_cached perf = READ_ONCE(cpudata->perf); int ret;
/* * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified * min_perf value across kexec reboots. If this CPU is just resumed back without kexec, * the limits, epp and desired perf will get reset to the cached values in cpudata struct
*/
ret = amd_pstate_update_perf(policy, perf.bios_min_perf,
FIELD_GET(AMD_CPPC_DES_PERF_MASK, cpudata->cppc_req_cached),
FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached),
FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached), false); if (ret) return ret;
/* set this flag to avoid setting core offline*/
cpudata->suspended = true;
/* Set CPPC_REQ to last sane value until the governor updates it */ return amd_pstate_update_perf(policy, perf.min_limit_perf, cur_perf, perf.max_limit_perf,
0U, false);
}
/* * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F. * show the debug message that helps to check if the CPU has CPPC support for loading issue.
*/ staticbool amd_cppc_supported(void)
{ struct cpuinfo_x86 *c = &cpu_data(0); bool warn = false;
if ((boot_cpu_data.x86 == 0x17) && (boot_cpu_data.x86_model < 0x30)) {
pr_debug_once("CPPC feature is not supported by the processor\n"); returnfalse;
}
/* * If the CPPC feature is disabled in the BIOS for processors * that support MSR-based CPPC, the AMD Pstate driver may not * function correctly. * * For such processors, check the CPPC flag and display a * warning message if the platform supports CPPC. * * Note: The code check below will not abort the driver * registration process because of the code is added for * debugging purposes. Besides, it may still be possible for * the driver to work using the shared-memory mechanism.
*/ if (!cpu_feature_enabled(X86_FEATURE_CPPC)) { if (cpu_feature_enabled(X86_FEATURE_ZEN2)) {
switch (c->x86_model) {
case 0x60 ... 0x6F:
case 0x80 ... 0xAF:
warn = true;
break;
}
} elseif (cpu_feature_enabled(X86_FEATURE_ZEN3) ||
cpu_feature_enabled(X86_FEATURE_ZEN4)) {
switch (c->x86_model) {
case 0x10 ... 0x1F:
case 0x40 ... 0xAF:
warn = true;
break;
}
} elseif (cpu_feature_enabled(X86_FEATURE_ZEN5)) {
warn = true;
}
}
if (warn)
pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n" "Please enable it if your BIOS has the CPPC option.\n"); returntrue;
}
staticint __init amd_pstate_init(void)
{ struct device *dev_root; int ret;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return -ENODEV;
/* show debug message only if CPPC is not supported */ if (!amd_cppc_supported()) return -EOPNOTSUPP;
/* show warning message when BIOS broken or ACPI disabled */ if (!acpi_cpc_valid()) {
pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); return -ENODEV;
}
/* don't keep reloading if cpufreq_driver exists */ if (cpufreq_get_current_driver()) return -EEXIST;
quirks = NULL;
/* check if this machine need CPPC quirks */
dmi_check_system(amd_pstate_quirks_table);
/* * determine the driver mode from the command line or kernel config. * If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED. * command line options will override the kernel config settings.
*/
if (cppc_state == AMD_PSTATE_UNDEFINED) { /* Disable on the following configs by default: * 1. Undefined platforms * 2. Server platforms with CPUs older than Family 0x1A.
*/ if (amd_pstate_acpi_pm_profile_undefined() ||
(amd_pstate_acpi_pm_profile_server() && boot_cpu_data.x86 < 0x1A)) {
pr_info("driver load is disabled, boot with specific mode to enable this\n"); return -ENODEV;
} /* get driver mode from kernel config option [1:4] */
cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE;
}
if (cppc_state == AMD_PSTATE_DISABLE) {
pr_info("driver load is disabled, boot with specific mode to enable this\n"); return -ENODEV;
}
/* capability check */ if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
pr_debug("AMD CPPC MSR based functionality is supported\n");
} else {
pr_debug("AMD CPPC shared memory based functionality is supported\n");
static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable);
static_call_update(amd_pstate_init_perf, shmem_init_perf);
static_call_update(amd_pstate_update_perf, shmem_update_perf);
static_call_update(amd_pstate_get_epp, shmem_get_epp);
static_call_update(amd_pstate_set_epp, shmem_set_epp);
}
if (amd_pstate_prefcore) {
ret = amd_detect_prefcore(&amd_pstate_prefcore); if (ret) return ret;
}
ret = amd_pstate_register_driver(cppc_state); if (ret) {
pr_err("failed to register with return %d\n", ret); return ret;
}
dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) {
ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group);
put_device(dev_root); if (ret) {
pr_err("sysfs attribute export failed with error %d.\n", ret);
goto global_attr_free;
}
}
MODULE_AUTHOR("Huang Rui ");
MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
Messung V0.5
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.13Bemerkung:
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.