From b00345d1994d588fa2687e1238fcd542f0320cba Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 14 Jun 2016 23:12:59 -0700 Subject: [PATCH 1/2] cpufreq: intel_pstate: Adjust _PSS[0] freqeuency if needed The maximum turbo P-State used by the intel_pstate driver may be limited by ACPI _PSS table entry 0. After commit 9522a2ff9cde (cpufreq: intel_pstate: Enforce _PPC limits), the maximum performance on servers will be capped by the _PSS table entry 0 by default. Even though that is formally correct, it may lead to preformance regressions in some cases. Namely, if the _PSS table entry 0 is not the maximum turbo P-State, performance measured after commit 9522a2ff9cde will not match the performance measured before that commit on the same system. For this reason, modify the code to always use the maximum turbo frequency as the one that corresponds to _PSS table entry 0 if turbo is enabled in the BIOS. This way, the performance levels from before commit 9522a2ff9cde will be restored on the affected systems. Fixes: 9522a2ff9cde (cpufreq: intel_pstate: Enforce _PPC limits) Suggested-by: Rafael J. Wysocki Signed-off-by: Srinivas Pandruvada [ rjw : Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ee367e9b7d2e..fe9dc17ea873 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -372,26 +372,9 @@ static bool intel_pstate_get_ppc_enable_status(void) return acpi_ppc; } -/* - * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and - * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and - * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state - * ratio, out of it only high 8 bits are used. For example 0x1700 is setting - * target ratio 0x17. The _PSS control value stores in a format which can be - * directly written to PERF_CTL MSR. But in intel_pstate driver this shift - * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()). - * This function converts the _PSS control value to intel pstate driver format - * for comparison and assignment. - */ -static int convert_to_native_pstate_format(struct cpudata *cpu, int index) -{ - return cpu->acpi_perf_data.states[index].control >> 8; -} - static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) { struct cpudata *cpu; - int turbo_pss_ctl; int ret; int i; @@ -441,11 +424,10 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) * max frequency, which will cause a reduced performance as * this driver uses real max turbo frequency as the max * frequency. So correct this frequency in _PSS table to - * correct max turbo frequency based on the turbo ratio. + * correct max turbo frequency based on the turbo state. * Also need to convert to MHz as _PSS freq is in MHz. */ - turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0); - if (turbo_pss_ctl > cpu->pstate.max_pstate) + if (!limits->turbo_disabled) cpu->acpi_perf_data.states[0].core_frequency = policy->cpuinfo.max_freq / 1000; cpu->valid_pss_table = true; From 79ee2e8f730411a30b271d5f9cdeae189fa66174 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 16 Jun 2016 19:03:11 +0530 Subject: [PATCH 2/2] PM / OPP: Add 'UNKNOWN' status for shared_opp in struct opp_table dev_pm_opp_get_sharing_cpus() returns 0 even in the case when the OPP core doesn't know whether or not the table is shared. It works on the majority of platforms, where the OPP table is never created before invoking the function and then -ENODEV is returned by it. But in the case of one platform (Jetson TK1) at least, the situation is a bit different. The OPP table has been created (somehow) before dev_pm_opp_get_sharing_cpus() is called and it returns 0. Its caller treats that as 'the CPUs don't share OPPs' and that leads to degraded performance. Fix this by converting 'shared_opp' in struct opp_table to an enum and making dev_pm_opp_get_sharing_cpus() return -EINVAL in case when the value of that field is "access unknown", so that the caller can handle it accordingly (cpufreq-dt considers that as 'all CPUs share the table', for example). Fixes: 6f707daa3833 "PM / OPP: Add dev_pm_opp_get_sharing_cpus()" Reported-and-tested-by: Alexandre Courbot Signed-off-by: Viresh Kumar [ rjw : Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp/cpu.c | 12 +++++++++--- drivers/base/power/opp/of.c | 10 ++++++++-- drivers/base/power/opp/opp.h | 8 +++++++- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c index 83d6e7ba1a34..8c3434bdb26d 100644 --- a/drivers/base/power/opp/cpu.c +++ b/drivers/base/power/opp/cpu.c @@ -211,7 +211,7 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, } /* Mark opp-table as multiple CPUs are sharing it now */ - opp_table->shared_opp = true; + opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED; } unlock: mutex_unlock(&opp_table_lock); @@ -227,7 +227,8 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus); * * This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev. * - * Returns -ENODEV if OPP table isn't already present. + * Returns -ENODEV if OPP table isn't already present and -EINVAL if the OPP + * table's status is access-unknown. * * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks @@ -249,9 +250,14 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) goto unlock; } + if (opp_table->shared_opp == OPP_TABLE_ACCESS_UNKNOWN) { + ret = -EINVAL; + goto unlock; + } + cpumask_clear(cpumask); - if (opp_table->shared_opp) { + if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED) { list_for_each_entry(opp_dev, &opp_table->dev_list, node) cpumask_set_cpu(opp_dev->dev->id, cpumask); } else { diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c index 94d2010558e3..1dfd3dd92624 100644 --- a/drivers/base/power/opp/of.c +++ b/drivers/base/power/opp/of.c @@ -34,7 +34,10 @@ static struct opp_table *_managed_opp(const struct device_node *np) * But the OPPs will be considered as shared only if the * OPP table contains a "opp-shared" property. */ - return opp_table->shared_opp ? opp_table : NULL; + if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED) + return opp_table; + + return NULL; } } @@ -353,7 +356,10 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) } opp_table->np = opp_np; - opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared"); + if (of_property_read_bool(opp_np, "opp-shared")) + opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED; + else + opp_table->shared_opp = OPP_TABLE_ACCESS_EXCLUSIVE; mutex_unlock(&opp_table_lock); diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index 20f3be22e060..fabd5ca1a083 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -119,6 +119,12 @@ struct opp_device { #endif }; +enum opp_table_access { + OPP_TABLE_ACCESS_UNKNOWN = 0, + OPP_TABLE_ACCESS_EXCLUSIVE = 1, + OPP_TABLE_ACCESS_SHARED = 2, +}; + /** * struct opp_table - Device opp structure * @node: table node - contains the devices with OPPs that @@ -166,7 +172,7 @@ struct opp_table { /* For backward compatibility with v1 bindings */ unsigned int voltage_tolerance_v1; - bool shared_opp; + enum opp_table_access shared_opp; struct dev_pm_opp *suspend_opp; unsigned int *supported_hw;