cpufreq: amd-pstate: implement Pstate EPP support for the AMD processors

Add EPP driver support for AMD SoCs which support a dedicated MSR for
CPPC.  EPP is used by the DPM controller to configure the frequency that
a core operates at during short periods of activity.

The SoC EPP targets are configured on a scale from 0 to 255 where 0
represents maximum performance and 255 represents maximum efficiency.

The amd-pstate driver exports profile string names to userspace that are
tied to specific EPP values.

The balance_performance string (0x80) provides the best balance for
efficiency versus power on most systems, but users can choose other
strings to meet their needs as well.

$ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences
default performance balance_performance balance_power power

$ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preference
balance_performance

To enable the driver,it needs to add `amd_pstate=active` to kernel
command line and kernel will load the active mode epp driver

Acked-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Mario Limonciello <Mario.Limonciello@amd.com>
Reviewed-by: Wyes Karny <wyes.karny@amd.com>
Tested-by: Wyes Karny <wyes.karny@amd.com>
Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
This commit is contained in:
Perry Yuan 2023-01-31 17:00:09 +08:00 коммит произвёл Rafael J. Wysocki
Родитель 36c5014e54
Коммит ffa5096a7c
2 изменённых файлов: 429 добавлений и 7 удалений

Просмотреть файл

@ -59,9 +59,52 @@
* we disable it by default to go acpi-cpufreq on these processors and add a * we disable it by default to go acpi-cpufreq on these processors and add a
* module parameter to be able to enable it manually for debugging. * module parameter to be able to enable it manually for debugging.
*/ */
static struct cpufreq_driver *current_pstate_driver;
static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_driver;
static struct cpufreq_driver amd_pstate_epp_driver;
static int cppc_state = AMD_PSTATE_DISABLE; static int cppc_state = AMD_PSTATE_DISABLE;
/*
* AMD Energy Preference Performance (EPP)
* The EPP is used in the CCLK DPM controller to drive
* the frequency that a core is going to operate during
* short periods of activity. EPP values will be utilized for
* different OS profiles (balanced, performance, power savings)
* display strings corresponding to EPP index in the
* energy_perf_strings[]
* index String
*-------------------------------------
* 0 default
* 1 performance
* 2 balance_performance
* 3 balance_power
* 4 power
*/
enum energy_perf_value_index {
EPP_INDEX_DEFAULT = 0,
EPP_INDEX_PERFORMANCE,
EPP_INDEX_BALANCE_PERFORMANCE,
EPP_INDEX_BALANCE_POWERSAVE,
EPP_INDEX_POWERSAVE,
};
static const char * const energy_perf_strings[] = {
[EPP_INDEX_DEFAULT] = "default",
[EPP_INDEX_PERFORMANCE] = "performance",
[EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
[EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
[EPP_INDEX_POWERSAVE] = "power",
NULL
};
static unsigned int epp_values[] = {
[EPP_INDEX_DEFAULT] = 0,
[EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE,
[EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE,
[EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE,
[EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE,
};
static inline int get_mode_idx_from_str(const char *str, size_t size) static inline int get_mode_idx_from_str(const char *str, size_t size)
{ {
int i; int i;
@ -73,6 +116,114 @@ static inline int get_mode_idx_from_str(const char *str, size_t size)
return -EINVAL; return -EINVAL;
} }
static DEFINE_MUTEX(amd_pstate_limits_lock);
static DEFINE_MUTEX(amd_pstate_driver_lock);
static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
{
u64 epp;
int ret;
if (boot_cpu_has(X86_FEATURE_CPPC)) {
if (!cppc_req_cached) {
epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
&cppc_req_cached);
if (epp)
return epp;
}
epp = (cppc_req_cached >> 24) & 0xFF;
} else {
ret = cppc_get_epp_perf(cpudata->cpu, &epp);
if (ret < 0) {
pr_debug("Could not retrieve energy perf value (%d)\n", ret);
return -EIO;
}
}
return (s16)(epp & 0xff);
}
static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
{
s16 epp;
int index = -EINVAL;
epp = amd_pstate_get_epp(cpudata, 0);
if (epp < 0)
return epp;
switch (epp) {
case AMD_CPPC_EPP_PERFORMANCE:
index = EPP_INDEX_PERFORMANCE;
break;
case AMD_CPPC_EPP_BALANCE_PERFORMANCE:
index = EPP_INDEX_BALANCE_PERFORMANCE;
break;
case AMD_CPPC_EPP_BALANCE_POWERSAVE:
index = EPP_INDEX_BALANCE_POWERSAVE;
break;
case AMD_CPPC_EPP_POWERSAVE:
index = EPP_INDEX_POWERSAVE;
break;
default:
break;
}
return index;
}
static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
{
int ret;
struct cppc_perf_ctrls perf_ctrls;
if (boot_cpu_has(X86_FEATURE_CPPC)) {
u64 value = READ_ONCE(cpudata->cppc_req_cached);
value &= ~GENMASK_ULL(31, 24);
value |= (u64)epp << 24;
WRITE_ONCE(cpudata->cppc_req_cached, value);
ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
if (!ret)
cpudata->epp_cached = epp;
} else {
perf_ctrls.energy_perf = epp;
ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
if (ret) {
pr_debug("failed to set energy perf value (%d)\n", ret);
return ret;
}
cpudata->epp_cached = epp;
}
return ret;
}
static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
int pref_index)
{
int epp = -EINVAL;
int ret;
if (!pref_index) {
pr_debug("EPP pref_index is invalid\n");
return -EINVAL;
}
if (epp == -EINVAL)
epp = epp_values[pref_index];
if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
pr_debug("EPP cannot be set under performance policy\n");
return -EBUSY;
}
ret = amd_pstate_set_epp(cpudata, epp);
return ret;
}
static inline int pstate_enable(bool enable) static inline int pstate_enable(bool enable)
{ {
return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable);
@ -81,11 +232,21 @@ static inline int pstate_enable(bool enable)
static int cppc_enable(bool enable) static int cppc_enable(bool enable)
{ {
int cpu, ret = 0; int cpu, ret = 0;
struct cppc_perf_ctrls perf_ctrls;
for_each_present_cpu(cpu) { for_each_present_cpu(cpu) {
ret = cppc_set_enable(cpu, enable); ret = cppc_set_enable(cpu, enable);
if (ret) if (ret)
return ret; return ret;
/* Enable autonomous mode for EPP */
if (cppc_state == AMD_PSTATE_ACTIVE) {
/* Set desired perf as zero to allow EPP firmware control */
perf_ctrls.desired_perf = 0;
ret = cppc_set_perf(cpu, &perf_ctrls);
if (ret)
return ret;
}
} }
return ret; return ret;
@ -429,7 +590,7 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
return; return;
cpudata->boost_supported = true; cpudata->boost_supported = true;
amd_pstate_driver.boost_enabled = true; current_pstate_driver->boost_enabled = true;
} }
static void amd_perf_ctl_reset(unsigned int cpu) static void amd_perf_ctl_reset(unsigned int cpu)
@ -603,10 +764,61 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
return sprintf(&buf[0], "%u\n", perf); return sprintf(&buf[0], "%u\n", perf);
} }
static ssize_t show_energy_performance_available_preferences(
struct cpufreq_policy *policy, char *buf)
{
int i = 0;
int offset = 0;
while (energy_perf_strings[i] != NULL)
offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]);
sysfs_emit_at(buf, offset, "\n");
return offset;
}
static ssize_t store_energy_performance_preference(
struct cpufreq_policy *policy, const char *buf, size_t count)
{
struct amd_cpudata *cpudata = policy->driver_data;
char str_preference[21];
ssize_t ret;
ret = sscanf(buf, "%20s", str_preference);
if (ret != 1)
return -EINVAL;
ret = match_string(energy_perf_strings, -1, str_preference);
if (ret < 0)
return -EINVAL;
mutex_lock(&amd_pstate_limits_lock);
ret = amd_pstate_set_energy_pref_index(cpudata, ret);
mutex_unlock(&amd_pstate_limits_lock);
return ret ?: count;
}
static ssize_t show_energy_performance_preference(
struct cpufreq_policy *policy, char *buf)
{
struct amd_cpudata *cpudata = policy->driver_data;
int preference;
preference = amd_pstate_get_energy_pref_index(cpudata);
if (preference < 0)
return preference;
return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
}
cpufreq_freq_attr_ro(amd_pstate_max_freq); cpufreq_freq_attr_ro(amd_pstate_max_freq);
cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
cpufreq_freq_attr_ro(amd_pstate_highest_perf); cpufreq_freq_attr_ro(amd_pstate_highest_perf);
cpufreq_freq_attr_rw(energy_performance_preference);
cpufreq_freq_attr_ro(energy_performance_available_preferences);
static struct freq_attr *amd_pstate_attr[] = { static struct freq_attr *amd_pstate_attr[] = {
&amd_pstate_max_freq, &amd_pstate_max_freq,
@ -615,6 +827,186 @@ static struct freq_attr *amd_pstate_attr[] = {
NULL, NULL,
}; };
static struct freq_attr *amd_pstate_epp_attr[] = {
&amd_pstate_max_freq,
&amd_pstate_lowest_nonlinear_freq,
&amd_pstate_highest_perf,
&energy_performance_preference,
&energy_performance_available_preferences,
NULL,
};
static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
{
int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
struct amd_cpudata *cpudata;
struct device *dev;
int rc;
u64 value;
/*
* Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
* which is ideal for initialization process.
*/
amd_perf_ctl_reset(policy->cpu);
dev = get_cpu_device(policy->cpu);
if (!dev)
goto free_cpudata1;
cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
if (!cpudata)
return -ENOMEM;
cpudata->cpu = policy->cpu;
cpudata->epp_policy = 0;
rc = amd_pstate_init_perf(cpudata);
if (rc)
goto free_cpudata1;
min_freq = amd_get_min_freq(cpudata);
max_freq = amd_get_max_freq(cpudata);
nominal_freq = amd_get_nominal_freq(cpudata);
lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
min_freq, max_freq);
ret = -EINVAL;
goto free_cpudata1;
}
policy->cpuinfo.min_freq = min_freq;
policy->cpuinfo.max_freq = max_freq;
/* It will be updated by governor */
policy->cur = policy->cpuinfo.min_freq;
/* Initial processor data capability frequencies */
cpudata->max_freq = max_freq;
cpudata->min_freq = min_freq;
cpudata->nominal_freq = nominal_freq;
cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
policy->driver_data = cpudata;
cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0);
policy->min = policy->cpuinfo.min_freq;
policy->max = policy->cpuinfo.max_freq;
/*
* Set the policy to powersave to provide a valid fallback value in case
* the default cpufreq governor is neither powersave nor performance.
*/
policy->policy = CPUFREQ_POLICY_POWERSAVE;
if (boot_cpu_has(X86_FEATURE_CPPC)) {
policy->fast_switch_possible = true;
ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
if (ret)
return ret;
WRITE_ONCE(cpudata->cppc_req_cached, value);
ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
if (ret)
return ret;
WRITE_ONCE(cpudata->cppc_cap1_cached, value);
}
amd_pstate_boost_init(cpudata);
return 0;
free_cpudata1:
kfree(cpudata);
return ret;
}
static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
{
pr_debug("CPU %d exiting\n", policy->cpu);
policy->fast_switch_possible = false;
return 0;
}
static void amd_pstate_epp_init(unsigned int cpu)
{
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
struct amd_cpudata *cpudata = policy->driver_data;
u32 max_perf, min_perf;
u64 value;
s16 epp;
max_perf = READ_ONCE(cpudata->highest_perf);
min_perf = READ_ONCE(cpudata->lowest_perf);
value = READ_ONCE(cpudata->cppc_req_cached);
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
min_perf = max_perf;
/* Initial min/max values for CPPC Performance Controls Register */
value &= ~AMD_CPPC_MIN_PERF(~0L);
value |= AMD_CPPC_MIN_PERF(min_perf);
value &= ~AMD_CPPC_MAX_PERF(~0L);
value |= AMD_CPPC_MAX_PERF(max_perf);
/* CPPC EPP feature require to set zero to the desire perf bit */
value &= ~AMD_CPPC_DES_PERF(~0L);
value |= AMD_CPPC_DES_PERF(0);
if (cpudata->epp_policy == cpudata->policy)
goto skip_epp;
cpudata->epp_policy = cpudata->policy;
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
epp = amd_pstate_get_epp(cpudata, value);
if (epp < 0)
goto skip_epp;
/* force the epp value to be zero for performance policy */
epp = 0;
} else {
/* Get BIOS pre-defined epp value */
epp = amd_pstate_get_epp(cpudata, value);
if (epp)
goto skip_epp;
}
/* Set initial EPP value */
if (boot_cpu_has(X86_FEATURE_CPPC)) {
value &= ~GENMASK_ULL(31, 24);
value |= (u64)epp << 24;
}
skip_epp:
WRITE_ONCE(cpudata->cppc_req_cached, value);
amd_pstate_set_epp(cpudata, epp);
cpufreq_cpu_put(policy);
}
static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
if (!policy->cpuinfo.max_freq)
return -ENODEV;
pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
policy->cpuinfo.max_freq, policy->max);
cpudata->policy = policy->policy;
amd_pstate_epp_init(policy->cpu);
return 0;
}
static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
{
cpufreq_verify_within_cpu_limits(policy);
pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
return 0;
}
static struct cpufreq_driver amd_pstate_driver = { static struct cpufreq_driver amd_pstate_driver = {
.flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
.verify = amd_pstate_verify, .verify = amd_pstate_verify,
@ -628,6 +1020,16 @@ static struct cpufreq_driver amd_pstate_driver = {
.attr = amd_pstate_attr, .attr = amd_pstate_attr,
}; };
static struct cpufreq_driver amd_pstate_epp_driver = {
.flags = CPUFREQ_CONST_LOOPS,
.verify = amd_pstate_epp_verify_policy,
.setpolicy = amd_pstate_epp_set_policy,
.init = amd_pstate_epp_cpu_init,
.exit = amd_pstate_epp_cpu_exit,
.name = "amd_pstate_epp",
.attr = amd_pstate_epp_attr,
};
static int __init amd_pstate_init(void) static int __init amd_pstate_init(void)
{ {
int ret; int ret;
@ -656,7 +1058,8 @@ static int __init amd_pstate_init(void)
/* capability check */ /* capability check */
if (boot_cpu_has(X86_FEATURE_CPPC)) { if (boot_cpu_has(X86_FEATURE_CPPC)) {
pr_debug("AMD CPPC MSR based functionality is supported\n"); pr_debug("AMD CPPC MSR based functionality is supported\n");
amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; if (cppc_state == AMD_PSTATE_PASSIVE)
current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
} else { } else {
pr_debug("AMD CPPC shared memory based functionality is supported\n"); pr_debug("AMD CPPC shared memory based functionality is supported\n");
static_call_update(amd_pstate_enable, cppc_enable); static_call_update(amd_pstate_enable, cppc_enable);
@ -667,14 +1070,13 @@ static int __init amd_pstate_init(void)
/* enable amd pstate feature */ /* enable amd pstate feature */
ret = amd_pstate_enable(true); ret = amd_pstate_enable(true);
if (ret) { if (ret) {
pr_err("failed to enable amd-pstate with return %d\n", ret); pr_err("failed to enable with return %d\n", ret);
return ret; return ret;
} }
ret = cpufreq_register_driver(&amd_pstate_driver); ret = cpufreq_register_driver(current_pstate_driver);
if (ret) if (ret)
pr_err("failed to register amd_pstate_driver with return %d\n", pr_err("failed to register with return %d\n", ret);
ret);
return ret; return ret;
} }
@ -696,6 +1098,12 @@ static int __init amd_pstate_param(char *str)
if (cppc_state == AMD_PSTATE_DISABLE) if (cppc_state == AMD_PSTATE_DISABLE)
pr_info("driver is explicitly disabled\n"); pr_info("driver is explicitly disabled\n");
if (cppc_state == AMD_PSTATE_ACTIVE)
current_pstate_driver = &amd_pstate_epp_driver;
if (cppc_state == AMD_PSTATE_PASSIVE)
current_pstate_driver = &amd_pstate_driver;
return 0; return 0;
} }

Просмотреть файл

@ -12,6 +12,11 @@
#include <linux/pm_qos.h> #include <linux/pm_qos.h>
#define AMD_CPPC_EPP_PERFORMANCE 0x00
#define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80
#define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF
#define AMD_CPPC_EPP_POWERSAVE 0xFF
/********************************************************************* /*********************************************************************
* AMD P-state INTERFACE * * AMD P-state INTERFACE *
*********************************************************************/ *********************************************************************/
@ -47,6 +52,10 @@ struct amd_aperf_mperf {
* @prev: Last Aperf/Mperf/tsc count value read from register * @prev: Last Aperf/Mperf/tsc count value read from register
* @freq: current cpu frequency value * @freq: current cpu frequency value
* @boost_supported: check whether the Processor or SBIOS supports boost mode * @boost_supported: check whether the Processor or SBIOS supports boost mode
* @epp_policy: Last saved policy used to set energy-performance preference
* @epp_cached: Cached CPPC energy-performance preference value
* @policy: Cpufreq policy value
* @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value
* *
* The amd_cpudata is key private data for each CPU thread in AMD P-State, and * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
* represents all the attributes and goals that AMD P-State requests at runtime. * represents all the attributes and goals that AMD P-State requests at runtime.
@ -72,6 +81,12 @@ struct amd_cpudata {
u64 freq; u64 freq;
bool boost_supported; bool boost_supported;
/* EPP feature related attributes*/
s16 epp_policy;
s16 epp_cached;
u32 policy;
u64 cppc_cap1_cached;
}; };
/* /*
@ -90,5 +105,4 @@ static const char * const amd_pstate_mode_string[] = {
[AMD_PSTATE_ACTIVE] = "active", [AMD_PSTATE_ACTIVE] = "active",
NULL, NULL,
}; };
#endif /* _LINUX_AMD_PSTATE_H */ #endif /* _LINUX_AMD_PSTATE_H */