From 7b0bf99b9ee497cc0f079472566aff716d033d43 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Tue, 28 Apr 2020 17:43:15 +0800 Subject: [PATCH 01/11] cpupower: Remove unneeded semicolon Fixes coccicheck warnings: tools/power/cpupower/utils/cpupower-info.c:65:2-3: Unneeded semicolon tools/power/cpupower/utils/cpupower-set.c:75:2-3: Unneeded semicolon tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c:120:2-3: Unneeded semicolon tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c:175:2-3: Unneeded semicolon tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c:56:2-3: Unneeded semicolon tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c:75:2-3: Unneeded semicolon tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c:82:2-3: Unneeded semicolon tools/power/cpupower/utils/idle_monitor/nhm_idle.c:94:2-3: Unneeded semicolon tools/power/cpupower/utils/idle_monitor/snb_idle.c:80:2-3: Unneeded semicolon Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpupower-info.c | 2 +- tools/power/cpupower/utils/cpupower-set.c | 2 +- tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c | 2 +- tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 6 +++--- tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c | 2 +- tools/power/cpupower/utils/idle_monitor/nhm_idle.c | 2 +- tools/power/cpupower/utils/idle_monitor/snb_idle.c | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c index d3755ea70d4d..0ba61a2c4d81 100644 --- a/tools/power/cpupower/utils/cpupower-info.c +++ b/tools/power/cpupower/utils/cpupower-info.c @@ -62,7 +62,7 @@ int cmd_info(int argc, char **argv) default: print_wrong_arg_exit(); } - }; + } if (!params.params) params.params = 0x7; diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index 3cca6f715dd9..052044d7e012 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -72,7 +72,7 @@ int cmd_set(int argc, char **argv) default: print_wrong_arg_exit(); } - }; + } if (!params.params) print_wrong_arg_exit(); diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c index 20f46348271b..5edd35bd9ee9 100644 --- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c @@ -117,7 +117,7 @@ static int amd_fam14h_get_pci_info(struct cstate *state, break; default: return -1; - }; + } return 0; } diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index a65f7d011513..8b42c2f0a5b0 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -53,7 +53,7 @@ static int cpuidle_start(void) dprint("CPU %d - State: %d - Val: %llu\n", cpu, state, previous_count[cpu][state]); } - }; + } return 0; } @@ -72,7 +72,7 @@ static int cpuidle_stop(void) dprint("CPU %d - State: %d - Val: %llu\n", cpu, state, previous_count[cpu][state]); } - }; + } return 0; } @@ -172,7 +172,7 @@ static struct cpuidle_monitor *cpuidle_register(void) cpuidle_cstates[num].id = num; cpuidle_cstates[num].get_count_percent = cpuidle_get_count_percent; - }; + } /* Free this at program termination */ previous_count = malloc(sizeof(long long *) * cpu_count); diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c index 97ad3233a521..55e55b6b42f9 100644 --- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c @@ -79,7 +79,7 @@ static int hsw_ext_get_count(enum intel_hsw_ext_id id, unsigned long long *val, break; default: return -1; - }; + } if (read_msr(cpu, msr, val)) return -1; return 0; diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c index 114271165182..16eaf006f61f 100644 --- a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c @@ -91,7 +91,7 @@ static int nhm_get_count(enum intel_nhm_id id, unsigned long long *val, break; default: return -1; - }; + } if (read_msr(cpu, msr, val)) return -1; diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c index df8b223cc096..811d63ab17a7 100644 --- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c @@ -77,7 +77,7 @@ static int snb_get_count(enum intel_snb_id id, unsigned long long *val, break; default: return -1; - }; + } if (read_msr(cpu, msr, val)) return -1; return 0; From 33c980036deb5ee9961db82401c0fbfb96f126b3 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 15 May 2020 15:30:41 +0800 Subject: [PATCH 02/11] powercap/intel_rapl: add support for ElkhartLake Add intel_rapl support for ElkhartLake platform. Signed-off-by: Jacob Pan Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index eb328655bc01..c3e335e37c7d 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -989,6 +989,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &rapl_defaults_core), From 213081dadd308d5d41a5110c2dc87fd8ba42de5e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 11 May 2020 16:57:31 -0700 Subject: [PATCH 03/11] Documentation: admin-guide: pm: Document intel-speed-select Added documentation to configure servers to use Intel(R) Speed Select Technology using intel-speed-select tool. Signed-off-by: Srinivas Pandruvada Acked-by: Andriy Shevchenko Signed-off-by: Rafael J. Wysocki --- .../admin-guide/pm/intel-speed-select.rst | 917 ++++++++++++++++++ .../admin-guide/pm/working-state.rst | 1 + 2 files changed, 918 insertions(+) create mode 100644 Documentation/admin-guide/pm/intel-speed-select.rst diff --git a/Documentation/admin-guide/pm/intel-speed-select.rst b/Documentation/admin-guide/pm/intel-speed-select.rst new file mode 100644 index 000000000000..b2ca601c21c6 --- /dev/null +++ b/Documentation/admin-guide/pm/intel-speed-select.rst @@ -0,0 +1,917 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================================ +Intel(R) Speed Select Technology User Guide +============================================================ + +The Intel(R) Speed Select Technology (Intel(R) SST) provides a powerful new +collection of features that give more granular control over CPU performance. +With Intel(R) SST, one server can be configured for power and performance for a +variety of diverse workload requirements. + +Refer to the links below for an overview of the technology: + +- https://www.intel.com/content/www/us/en/architecture-and-technology/speed-select-technology-article.html +- https://builders.intel.com/docs/networkbuilders/intel-speed-select-technology-base-frequency-enhancing-performance.pdf + +These capabilities are further enhanced in some of the newer generations of +server platforms where these features can be enumerated and controlled +dynamically without pre-configuring via BIOS setup options. This dynamic +configuration is done via mailbox commands to the hardware. One way to enumerate +and configure these features is by using the Intel Speed Select utility. + +This document explains how to use the Intel Speed Select tool to enumerate and +control Intel(R) SST features. This document gives example commands and explains +how these commands change the power and performance profile of the system under +test. Using this tool as an example, customers can replicate the messaging +implemented in the tool in their production software. + +intel-speed-select configuration tool +====================================== + +Most Linux distribution packages may include the "intel-speed-select" tool. If not, +it can be built by downloading the Linux kernel tree from kernel.org. Once +downloaded, the tool can be built without building the full kernel. + +From the kernel tree, run the following commands:: + +# cd tools/power/x86/intel-speed-select/ +# make +# make install + +Getting Help +------------ + +To get help with the tool, execute the command below:: + +# intel-speed-select --help + +The top-level help describes arguments and features. Notice that there is a +multi-level help structure in the tool. For example, to get help for the feature "perf-profile":: + +# intel-speed-select perf-profile --help + +To get help on a command, another level of help is provided. For example for the command info "info":: + +# intel-speed-select perf-profile info --help + +Summary of platform capability +------------------------------ +To check the current platform and driver capaibilities, execute:: + +#intel-speed-select --info + +For example on a test system:: + + # intel-speed-select --info + Intel(R) Speed Select Technology + Executing on CPU model: X + Platform: API version : 1 + Platform: Driver version : 1 + Platform: mbox supported : 1 + Platform: mmio supported : 1 + Intel(R) SST-PP (feature perf-profile) is supported + TDP level change control is unlocked, max level: 4 + Intel(R) SST-TF (feature turbo-freq) is supported + Intel(R) SST-BF (feature base-freq) is not supported + Intel(R) SST-CP (feature core-power) is supported + +Intel(R) Speed Select Technology - Performance Profile (Intel(R) SST-PP) +------------------------------------------------------------------------ + +This feature allows configuration of a server dynamically based on workload +performance requirements. This helps users during deployment as they do not have +to choose a specific server configuration statically. This Intel(R) Speed Select +Technology - Performance Profile (Intel(R) SST-PP) feature introduces a mechanism +that allows multiple optimized performance profiles per system. Each profile +defines a set of CPUs that need to be online and rest offline to sustain a +guaranteed base frequency. Once the user issues a command to use a specific +performance profile and meet CPU online/offline requirement, the user can expect +a change in the base frequency dynamically. This feature is called +"perf-profile" when using the Intel Speed Select tool. + +Number or performance levels +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There can be multiple performance profiles on a system. To get the number of +profiles, execute the command below:: + + # intel-speed-select perf-profile get-config-levels + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-0 + get-config-levels:4 + package-1 + die-0 + cpu-14 + get-config-levels:4 + +On this system under test, there are 4 performance profiles in addition to the +base performance profile (which is performance level 0). + +Lock/Unlock status +~~~~~~~~~~~~~~~~~~ + +Even if there are multiple performance profiles, it is possible that that they +are locked. If they are locked, users cannot issue a command to change the +performance state. It is possible that there is a BIOS setup to unlock or check +with your system vendor. + +To check if the system is locked, execute the following command:: + + # intel-speed-select perf-profile get-lock-status + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-0 + get-lock-status:0 + package-1 + die-0 + cpu-14 + get-lock-status:0 + +In this case, lock status is 0, which means that the system is unlocked. + +Properties of a performance level +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To get properties of a specific performance level (For example for the level 0, below), execute the command below:: + + # intel-speed-select perf-profile info -l 0 + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-0 + perf-profile-level-0 + cpu-count:28 + enable-cpu-mask:000003ff,f0003fff + enable-cpu-list:0,1,2,3,4,5,6,7,8,9,10,11,12,13,28,29,30,31,32,33,34,35,36,37,38,39,40,41 + thermal-design-power-ratio:26 + base-frequency(MHz):2600 + speed-select-turbo-freq:disabled + speed-select-base-freq:disabled + ... + ... + +Here -l option is used to specify a performance level. + +If the option -l is omitted, then this command will print information about all +the performance levels. The above command is printing properties of the +performance level 0. + +For this performance profile, the list of CPUs displayed by the +"enable-cpu-mask/enable-cpu-list" at the max can be "online." When that +condition is met, then base frequency of 2600 MHz can be maintained. To +understand more, execute "intel-speed-select perf-profile info" for performance +level 4:: + + # intel-speed-select perf-profile info -l 4 + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-0 + perf-profile-level-4 + cpu-count:28 + enable-cpu-mask:000000fa,f0000faf + enable-cpu-list:0,1,2,3,5,7,8,9,10,11,28,29,30,31,33,35,36,37,38,39 + thermal-design-power-ratio:28 + base-frequency(MHz):2800 + speed-select-turbo-freq:disabled + speed-select-base-freq:unsupported + ... + ... + +There are fewer CPUs in the "enable-cpu-mask/enable-cpu-list". Consequently, if +the user only keeps these CPUs online and the rest "offline," then the base +frequency is increased to 2.8 GHz compared to 2.6 GHz at performance level 0. + +Get current performance level +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To get the current performance level, execute:: + + # intel-speed-select perf-profile get-config-current-level + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-0 + get-config-current_level:0 + +First verify that the base_frequency displayed by the cpufreq sysfs is correct:: + + # cat /sys/devices/system/cpu/cpu0/cpufreq/base_frequency + 2600000 + +This matches the base-frequency (MHz) field value displayed from the +"perf-profile info" command for performance level 0(cpufreq frequency is in +KHz). + +To check if the average frequency is equal to the base frequency for a 100% busy +workload, disable turbo:: + +# echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo + +Then runs a busy workload on all CPUs, for example:: + +#stress -c 64 + +To verify the base frequency, run turbostat:: + + #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1 + + Package Core CPU Bzy_MHz + - - 2600 + 0 0 0 2600 + 0 1 1 2600 + 0 2 2 2600 + 0 3 3 2600 + 0 4 4 2600 + . . . . + + +Changing performance level +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To the change the performance level to 4, execute:: + + # intel-speed-select -d perf-profile set-config-level -l 4 -o + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-0 + perf-profile + set_tdp_level:success + +In the command above, "-o" is optional. If it is specified, then it will also +offline CPUs which are not present in the enable_cpu_mask for this performance +level. + +Now if the base_frequency is checked:: + + #cat /sys/devices/system/cpu/cpu0/cpufreq/base_frequency + 2800000 + +Which shows that the base frequency now increased from 2600 MHz at performance +level 0 to 2800 MHz at performance level 4. As a result, any workload, which can +use fewer CPUs, can see a boost of 200 MHz compared to performance level 0. + +Check presence of other Intel(R) SST features +--------------------------------------------- + +Each of the performance profiles also specifies weather there is support of +other two Intel(R) SST features (Intel(R) Speed Select Technology - Base Frequency +(Intel(R) SST-BF) and Intel(R) Speed Select Technology - Turbo Frequency (Intel +SST-TF)). + +For example, from the output of "perf-profile info" above, for level 0 and level +4: + +For level 0:: + speed-select-turbo-freq:disabled + speed-select-base-freq:disabled + +For level 4:: + speed-select-turbo-freq:disabled + speed-select-base-freq:unsupported + +Given these results, the "speed-select-base-freq" (Intel(R) SST-BF) in level 4 +changed from "disabled" to "unsupported" compared to performance level 0. + +This means that at performance level 4, the "speed-select-base-freq" feature is +not supported. However, at performance level 0, this feature is "supported", but +currently "disabled", meaning the user has not activated this feature. Whereas +"speed-select-turbo-freq" (Intel(R) SST-TF) is supported at both performance +levels, but currently not activated by the user. + +The Intel(R) SST-BF and the Intel(R) SST-TF features are built on a foundation +technology called Intel(R) Speed Select Technology - Core Power (Intel(R) SST-CP). +The platform firmware enables this feature when Intel(R) SST-BF or Intel(R) SST-TF +is supported on a platform. + +Intel(R) Speed Select Technology Core Power (Intel(R) SST-CP) +--------------------------------------------------------------- + +Intel(R) Speed Select Technology Core Power (Intel(R) SST-CP) is an interface that +allows users to define per core priority. This defines a mechanism to distribute +power among cores when there is a power constrained scenario. This defines a +class of service (CLOS) configuration. + +The user can configure up to 4 class of service configurations. Each CLOS group +configuration allows definitions of parameters, which affects how the frequency +can be limited and power is distributed. Each CPU core can be tied to a class of +service and hence an associated priority. The granularity is at core level not +at per CPU level. + +Enable CLOS based prioritization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To use CLOS based prioritization feature, firmware must be informed to enable +and use a priority type. There is a default per platform priority type, which +can be changed with optional command line parameter. + +To enable and check the options, execute:: + + # intel-speed-select core-power enable --help + Intel(R) Speed Select Technology + Executing on CPU model: X + Enable core-power for a package/die + Clos Enable: Specify priority type with [--priority|-p] + 0: Proportional, 1: Ordered + +There are two types of priority types: + +- Ordered + +Priority for ordered throttling is defined based on the index of the assigned +CLOS group. Where CLOS0 gets highest priority (throttled last). + +Priority order is: +CLOS0 > CLOS1 > CLOS2 > CLOS3. + +- Proportional + +When proportional priority is used, there is an additional parameter called +frequency_weight, which can be specified per CLOS group. The goal of +proportional priority is to provide each core with the requested min., then +distribute all remaining (excess/deficit) budgets in proportion to a defined +weight. This proportional priority can be configured using "core-power config" +command. + +To enable with the platform default priority type, execute:: + + # intel-speed-select core-power enable + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-0 + core-power + enable:success + package-1 + die-0 + cpu-6 + core-power + enable:success + +The scope of this enable is per package or die scoped when a package contains +multiple dies. To check if CLOS is enabled and get priority type, "core-power +info" command can be used. For example to check the status of core-power feature +on CPU 0, execute:: + + # intel-speed-select -c 0 core-power info + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-0 + core-power + support-status:supported + enable-status:enabled + clos-enable-status:enabled + priority-type:proportional + package-1 + die-0 + cpu-24 + core-power + support-status:supported + enable-status:enabled + clos-enable-status:enabled + priority-type:proportional + +Configuring CLOS groups +~~~~~~~~~~~~~~~~~~~~~~~ + +Each CLOS group has its own attributes including min, max, freq_weight and +desired. These parameters can be configured with "core-power config" command. +Defaults will be used if user skips setting a parameter except clos id, which is +mandatory. To check core-power config options, execute:: + + # intel-speed-select core-power config --help + Intel(R) Speed Select Technology + Executing on CPU model: X + Set core-power configuration for one of the four clos ids + Specify targeted clos id with [--clos|-c] + Specify clos Proportional Priority [--weight|-w] + Specify clos min in MHz with [--min|-n] + Specify clos max in MHz with [--max|-m] + +For example:: + + # intel-speed-select core-power config -c 0 + Intel(R) Speed Select Technology + Executing on CPU model: X + clos epp is not specified, default: 0 + clos frequency weight is not specified, default: 0 + clos min is not specified, default: 0 MHz + clos max is not specified, default: 25500 MHz + clos desired is not specified, default: 0 + package-0 + die-0 + cpu-0 + core-power + config:success + package-1 + die-0 + cpu-6 + core-power + config:success + +The user has the option to change defaults. For example, the user can change the +"min" and set the base frequency to always get guaranteed base frequency. + +Get the current CLOS configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To check the current configuration, "core-power get-config" can be used. For +example, to get the configuration of CLOS 0:: + + # intel-speed-select core-power get-config -c 0 + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-0 + core-power + clos:0 + epp:0 + clos-proportional-priority:0 + clos-min:0 MHz + clos-max:Max Turbo frequency + clos-desired:0 MHz + package-1 + die-0 + cpu-24 + core-power + clos:0 + epp:0 + clos-proportional-priority:0 + clos-min:0 MHz + clos-max:Max Turbo frequency + clos-desired:0 MHz + +Associating a CPU with a CLOS group +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To associate a CPU to a CLOS group "core-power assoc" command can be used:: + + # intel-speed-select core-power assoc --help + Intel(R) Speed Select Technology + Executing on CPU model: X + Associate a clos id to a CPU + Specify targeted clos id with [--clos|-c] + + +For example to associate CPU 10 to CLOS group 3, execute:: + + # intel-speed-select -c 10 core-power assoc -c 3 + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-10 + core-power + assoc:success + +Once a CPU is associated, its sibling CPUs are also associated to a CLOS group. +Once associated, avoid changing Linux "cpufreq" subsystem scaling frequency +limits. + +To check the existing association for a CPU, "core-power get-assoc" command can +be used. For example, to get association of CPU 10, execute:: + + # intel-speed-select -c 10 core-power get-assoc + Intel(R) Speed Select Technology + Executing on CPU model: X + package-1 + die-0 + cpu-10 + get-assoc + clos:3 + +This shows that CPU 10 is part of a CLOS group 3. + + +Disable CLOS based prioritization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To disable, execute:: + +# intel-speed-select core-power disable + +Some features like Intel(R) SST-TF can only be enabled when CLOS based prioritization +is enabled. For this reason, disabling while Intel(R) SST-TF is enabled can cause +Intel(R) SST-TF to fail. This will cause the "disable" command to display an error +if Intel(R) SST-TF is already enabled. In turn, to disable, the Intel(R) SST-TF +feature must be disabled first. + +Intel(R) Speed Select Technology - Base Frequency (Intel(R) SST-BF) +------------------------------------------------------------------- + +The Intel(R) Speed Select Technology - Base Frequency (Intel(R) SST-BF) feature lets +the user control base frequency. If some critical workload threads demand +constant high guaranteed performance, then this feature can be used to execute +the thread at higher base frequency on specific sets of CPUs (high priority +CPUs) at the cost of lower base frequency (low priority CPUs) on other CPUs. +This feature does not require offline of the low priority CPUs. + +The support of Intel(R) SST-BF depends on the Intel(R) Speed Select Technology - +Performance Profile (Intel(R) SST-PP) performance level configuration. It is +possible that only certain performance levels support Intel(R) SST-BF. It is also +possible that only base performance level (level = 0) has support of Intel +SST-BF. Consequently, first select the desired performance level to enable this +feature. + +In the system under test here, Intel(R) SST-BF is supported at the base +performance level 0, but currently disabled. For example for the level 0:: + + # intel-speed-select -c 0 perf-profile info -l 0 + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-0 + perf-profile-level-0 + ... + + speed-select-base-freq:disabled + ... + +Before enabling Intel(R) SST-BF and measuring its impact on a workload +performance, execute some workload and measure performance and get a baseline +performance to compare against. + +Here the user wants more guaranteed performance. For this reason, it is likely +that turbo is disabled. To disable turbo, execute:: + +#echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo + +Based on the output of the "intel-speed-select perf-profile info -l 0" base +frequency of guaranteed frequency 2600 MHz. + + +Measure baseline performance for comparison +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To compare, pick a multi-threaded workload where each thread can be scheduled on +separate CPUs. "Hackbench pipe" test is a good example on how to improve +performance using Intel(R) SST-BF. + +Below, the workload is measuring average scheduler wakeup latency, so a lower +number means better performance:: + + # taskset -c 3,4 perf bench -r 100 sched pipe + # Running 'sched/pipe' benchmark: + # Executed 1000000 pipe operations between two processes + Total time: 6.102 [sec] + 6.102445 usecs/op + 163868 ops/sec + +While running the above test, if we take turbostat output, it will show us that +2 of the CPUs are busy and reaching max. frequency (which would be the base +frequency as the turbo is disabled). The turbostat output:: + + #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1 + Package Core CPU Bzy_MHz + 0 0 0 1000 + 0 1 1 1005 + 0 2 2 1000 + 0 3 3 2600 + 0 4 4 2600 + 0 5 5 1000 + 0 6 6 1000 + 0 7 7 1005 + 0 8 8 1005 + 0 9 9 1000 + 0 10 10 1000 + 0 11 11 995 + 0 12 12 1000 + 0 13 13 1000 + +From the above turbostat output, both CPU 3 and 4 are very busy and reaching +full guaranteed frequency of 2600 MHz. + +Intel(R) SST-BF Capabilities +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To get capabilities of Intel(R) SST-BF for the current performance level 0, +execute:: + + # intel-speed-select base-freq info -l 0 + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-0 + speed-select-base-freq + high-priority-base-frequency(MHz):3000 + high-priority-cpu-mask:00000216,00002160 + high-priority-cpu-list:5,6,8,13,33,34,36,41 + low-priority-base-frequency(MHz):2400 + tjunction-temperature(C):125 + thermal-design-power(W):205 + +The above capabilities show that there are some CPUs on this system that can +offer base frequency of 3000 MHz compared to the standard base frequency at this +performance levels. Nevertheless, these CPUs are fixed, and they are presented +via high-priority-cpu-list/high-priority-cpu-mask. But if this Intel(R) SST-BF +feature is selected, the low priorities CPUs (which are not in +high-priority-cpu-list) can only offer up to 2400 MHz. As a result, if this +clipping of low priority CPUs is acceptable, then the user can enable Intel +SST-BF feature particularly for the above "sched pipe" workload since only two +CPUs are used, they can be scheduled on high priority CPUs and can get boost of +400 MHz. + +Enable Intel(R) SST-BF +~~~~~~~~~~~~~~~~~~~~~~ + +To enable Intel(R) SST-BF feature, execute:: + + # intel-speed-select base-freq enable -a + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-0 + base-freq + enable:success + package-1 + die-0 + cpu-14 + base-freq + enable:success + +In this case, -a option is optional. This not only enables Intel(R) SST-BF, but it +also adjusts the priority of cores using Intel(R) Speed Select Technology Core +Power (Intel(R) SST-CP) features. This option sets the minimum performance of each +Intel(R) Speed Select Technology - Performance Profile (Intel(R) SST-PP) class to +maximum performance so that the hardware will give maximum performance possible +for each CPU. + +If -a option is not used, then the following steps are required before enabling +Intel(R) SST-BF: + +- Discover Intel(R) SST-BF and note low and high priority base frequency +- Note the high prioity CPU list +- Enable CLOS using core-power feature set +- Configure CLOS parameters. Use CLOS.min to set to minimum performance +- Subscribe desired CPUs to CLOS groups + +With this configuration, if the same workload is executed by pinning the +workload to high priority CPUs (CPU 5 and 6 in this case):: + + #taskset -c 5,6 perf bench -r 100 sched pipe + # Running 'sched/pipe' benchmark: + # Executed 1000000 pipe operations between two processes + Total time: 5.627 [sec] + 5.627922 usecs/op + 177685 ops/sec + +This way, by enabling Intel(R) SST-BF, the performance of this benchmark is +improved (latency reduced) by 7.79%. From the turbostat output, it can be +observed that the high priority CPUs reached 3000 MHz compared to 2600 MHz. +The turbostat output:: + + #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1 + Package Core CPU Bzy_MHz + 0 0 0 2151 + 0 1 1 2166 + 0 2 2 2175 + 0 3 3 2175 + 0 4 4 2175 + 0 5 5 3000 + 0 6 6 3000 + 0 7 7 2180 + 0 8 8 2662 + 0 9 9 2176 + 0 10 10 2175 + 0 11 11 2176 + 0 12 12 2176 + 0 13 13 2661 + +Disable Intel(R) SST-BF +~~~~~~~~~~~~~~~~~~~~~~~ + +To disable the Intel(R) SST-BF feature, execute:: + +# intel-speed-select base-freq disable -a + + +Intel(R) Speed Select Technology - Turbo Frequency (Intel(R) SST-TF) +-------------------------------------------------------------------- + +This feature enables the ability to set different "All core turbo ratio limits" +to cores based on the priority. By using this feature, some cores can be +configured to get higher turbo frequency by designating them as high priority at +the cost of lower or no turbo frequency on the low priority cores. + +For this reason, this feature is only useful when system is busy utilizing all +CPUs, but the user wants some configurable option to get high performance on +some CPUs. + +The support of Intel(R) Speed Select Technology - Turbo Frequency (Intel(R) SST-TF) +depends on the Intel(R) Speed Select Technology - Performance Profile (Intel +SST-PP) performance level configuration. It is possible that only a certain +performance level supports Intel(R) SST-TF. It is also possible that only the base +performance level (level = 0) has the support of Intel(R) SST-TF. Hence, first +select the desired performance level to enable this feature. + +In the system under test here, Intel(R) SST-TF is supported at the base +performance level 0, but currently disabled:: + + # intel-speed-select -c 0 perf-profile info -l 0 + Intel(R) Speed Select Technology + package-0 + die-0 + cpu-0 + perf-profile-level-0 + ... + ... + speed-select-turbo-freq:disabled + ... + ... + + +To check if performance can be improved using Intel(R) SST-TF feature, get the turbo +frequency properties with Intel(R) SST-TF enabled and compare to the base turbo +capability of this system. + +Get Base turbo capability +~~~~~~~~~~~~~~~~~~~~~~~~~ + +To get the base turbo capability of performance level 0, execute:: + + # intel-speed-select perf-profile info -l 0 + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-0 + perf-profile-level-0 + ... + ... + turbo-ratio-limits-sse + bucket-0 + core-count:2 + max-turbo-frequency(MHz):3200 + bucket-1 + core-count:4 + max-turbo-frequency(MHz):3100 + bucket-2 + core-count:6 + max-turbo-frequency(MHz):3100 + bucket-3 + core-count:8 + max-turbo-frequency(MHz):3100 + bucket-4 + core-count:10 + max-turbo-frequency(MHz):3100 + bucket-5 + core-count:12 + max-turbo-frequency(MHz):3100 + bucket-6 + core-count:14 + max-turbo-frequency(MHz):3100 + bucket-7 + core-count:16 + max-turbo-frequency(MHz):3100 + +Based on the data above, when all the CPUS are busy, the max. frequency of 3100 +MHz can be achieved. If there is some busy workload on cpu 0 - 11 (e.g. stress) +and on CPU 12 and 13, execute "hackbench pipe" workload:: + + # taskset -c 12,13 perf bench -r 100 sched pipe + # Running 'sched/pipe' benchmark: + # Executed 1000000 pipe operations between two processes + Total time: 5.705 [sec] + 5.705488 usecs/op + 175269 ops/sec + +The turbostat output:: + + #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1 + Package Core CPU Bzy_MHz + 0 0 0 3000 + 0 1 1 3000 + 0 2 2 3000 + 0 3 3 3000 + 0 4 4 3000 + 0 5 5 3100 + 0 6 6 3100 + 0 7 7 3000 + 0 8 8 3100 + 0 9 9 3000 + 0 10 10 3000 + 0 11 11 3000 + 0 12 12 3100 + 0 13 13 3100 + +Based on turbostat output, the performance is limited by frequency cap of 3100 +MHz. To check if the hackbench performance can be improved for CPU 12 and CPU +13, first check the capability of the Intel(R) SST-TF feature for this performance +level. + +Get Intel(R) SST-TF Capability +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To get the capability, the "turbo-freq info" command can be used:: + + # intel-speed-select turbo-freq info -l 0 + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-0 + speed-select-turbo-freq + bucket-0 + high-priority-cores-count:2 + high-priority-max-frequency(MHz):3200 + high-priority-max-avx2-frequency(MHz):3200 + high-priority-max-avx512-frequency(MHz):3100 + bucket-1 + high-priority-cores-count:4 + high-priority-max-frequency(MHz):3100 + high-priority-max-avx2-frequency(MHz):3000 + high-priority-max-avx512-frequency(MHz):2900 + bucket-2 + high-priority-cores-count:6 + high-priority-max-frequency(MHz):3100 + high-priority-max-avx2-frequency(MHz):3000 + high-priority-max-avx512-frequency(MHz):2900 + speed-select-turbo-freq-clip-frequencies + low-priority-max-frequency(MHz):2600 + low-priority-max-avx2-frequency(MHz):2400 + low-priority-max-avx512-frequency(MHz):2100 + +Based on the output above, there is an Intel(R) SST-TF bucket for which there are +two high priority cores. If only two high priority cores are set, then max. +turbo frequency on those cores can be increased to 3200 MHz. This is 100 MHz +more than the base turbo capability for all cores. + +In turn, for the hackbench workload, two CPUs can be set as high priority and +rest as low priority. One side effect is that once enabled, the low priority +cores will be clipped to a lower frequency of 2600 MHz. + +Enable Intel(R) SST-TF +~~~~~~~~~~~~~~~~~~~~~~ + +To enable Intel(R) SST-TF, execute:: + + # intel-speed-select -c 12,13 turbo-freq enable -a + Intel(R) Speed Select Technology + Executing on CPU model: X + package-0 + die-0 + cpu-12 + turbo-freq + enable:success + package-0 + die-0 + cpu-13 + turbo-freq + enable:success + package--1 + die-0 + cpu-63 + turbo-freq --auto + enable:success + +In this case, the option "-a" is optional. If set, it enables Intel(R) SST-TF +feature and also sets the CPUs to high and and low priority using Intel Speed +Select Technology Core Power (Intel(R) SST-CP) features. The CPU numbers passed +with "-c" arguments are marked as high priority, including its siblings. + +If -a option is not used, then the following steps are required before enabling +Intel(R) SST-TF: + +- Discover Intel(R) SST-TF and note buckets of high priority cores and maximum frequency + +- Enable CLOS using core-power feature set - Configure CLOS parameters + +- Subscribe desired CPUs to CLOS groups making sure that high priority cores are set to the maximum frequency + +If the same hackbench workload is executed, schedule hackbench threads on high +priority CPUs:: + + #taskset -c 12,13 perf bench -r 100 sched pipe + # Running 'sched/pipe' benchmark: + # Executed 1000000 pipe operations between two processes + Total time: 5.510 [sec] + 5.510165 usecs/op + 180826 ops/sec + +This improved performance by around 3.3% improvement on a busy system. Here the +turbostat output will show that the CPU 12 and CPU 13 are getting 100 MHz boost. +The turbostat output:: + + #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1 + Package Core CPU Bzy_MHz + ... + 0 12 12 3200 + 0 13 13 3200 diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst index 0a38cdf39df1..f40994c422dc 100644 --- a/Documentation/admin-guide/pm/working-state.rst +++ b/Documentation/admin-guide/pm/working-state.rst @@ -13,3 +13,4 @@ Working-State Power Management intel_pstate cpufreq_drivers intel_epb + intel-speed-select From 03c3b413a14d8bbf0afc8e069cf2a65df580c30c Mon Sep 17 00:00:00 2001 From: Sumeet Pawnikar Date: Thu, 21 May 2020 12:14:26 +0530 Subject: [PATCH 04/11] powercap: RAPL: remove unused local MSR define Remove unused PLATFORM_POWER_LIMIT MSR local definition from file intel_rapl_common.c. This was missed while splitting old RAPL code intel_rapl.c file into two new files intel_rapl_msr.c and intel_rapl_common.c as per the commit 3382388d7148 ("intel_rapl: abstract RAPL common code"). Currently, this #define entry is being used only in intel_rapl_msr.c file and local definition present in this file. Signed-off-by: Sumeet Pawnikar Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index c3e335e37c7d..61a63a16b5e7 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -26,9 +26,6 @@ #include #include -/* Local defines */ -#define MSR_PLATFORM_POWER_LIMIT 0x0000065C - /* bitmasks for RAPL MSRs, used by primitive access functions */ #define ENERGY_STATUS_MASK 0xffffffff From d2216ba3ebea8d8864c5094526b8f9302c01021c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 3 Apr 2020 01:24:48 +0300 Subject: [PATCH 05/11] PM / devfreq: tegra30: Make CPUFreq notifier to take into account boosting We're taking into account both HW memory-accesses + CPU activity based on current CPU's frequency. For memory-accesses there is a kind of hysteresis in a form of "boosting" which is managed by the tegra30-devfreq driver. If current HW memory activity is higher than activity judged based of the CPU's frequency, then there is no need to schedule cpufreq_update_work because the result of the work will be a NO-OP. And thus, tegra_actmon_cpufreq_contribution() should return 0, meaning that at the moment CPU frequency doesn't contribute anything to the final decision about required memory clock rate. Signed-off-by: Dmitry Osipenko Signed-off-by: Chanwoo Choi --- drivers/devfreq/tegra30-devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index 28b2c7ca416e..dfc3ac93c584 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -420,7 +420,7 @@ tegra_actmon_cpufreq_contribution(struct tegra_devfreq *tegra, static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra, cpu_freq); - if (dev_freq >= static_cpu_emc_freq) + if (dev_freq + actmon_dev->boost_freq >= static_cpu_emc_freq) return 0; return static_cpu_emc_freq; From 0716f9fdb3b65b396531e490a42a4f58a69e1c8f Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 4 Apr 2020 20:34:02 +0200 Subject: [PATCH 06/11] PM / devfreq: tegra30: Delete an error message in tegra_devfreq_probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function “platform_get_irq” can log an error already. Thus omit a redundant message for the exception handling in the calling function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Reviewed-by: Dmitry Osipenko Signed-off-by: Chanwoo Choi --- drivers/devfreq/tegra30-devfreq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index dfc3ac93c584..e94a27804c20 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -807,10 +807,9 @@ static int tegra_devfreq_probe(struct platform_device *pdev) } err = platform_get_irq(pdev, 0); - if (err < 0) { - dev_err(&pdev->dev, "Failed to get IRQ: %d\n", err); + if (err < 0) return err; - } + tegra->irq = err; irq_set_status_flags(tegra->irq, IRQ_NOAUTOEN); From 5173a9756c8df9c387e04e49da0c4061951bbfec Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Mon, 6 Apr 2020 15:03:07 +0300 Subject: [PATCH 07/11] PM / devfreq: Add generic imx bus scaling driver Add initial support for dynamic frequency switching on pieces of the imx interconnect fabric. All this driver does is set a clk rate based on an opp table, it does not map register areas. Signed-off-by: Leonard Crestez Tested-by: Martin Kepplinger Acked-by: Chanwoo Choi Signed-off-by: Chanwoo Choi --- drivers/devfreq/Kconfig | 8 +++ drivers/devfreq/Makefile | 1 + drivers/devfreq/imx-bus.c | 138 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+) create mode 100644 drivers/devfreq/imx-bus.c diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 0b1df12e0f21..37dc40d1fcfb 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -91,6 +91,14 @@ config ARM_EXYNOS_BUS_DEVFREQ and adjusts the operating frequencies and voltages with OPP support. This does not yet operate with optimal voltages. +config ARM_IMX_BUS_DEVFREQ + tristate "i.MX Generic Bus DEVFREQ Driver" + depends on ARCH_MXC || COMPILE_TEST + select DEVFREQ_GOV_USERSPACE + help + This adds the generic DEVFREQ driver for i.MX interconnects. It + allows adjusting NIC/NOC frequency. + config ARM_IMX8M_DDRC_DEVFREQ tristate "i.MX8M DDRC DEVFREQ Driver" depends on (ARCH_MXC && HAVE_ARM_SMCCC) || \ diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile index 3eb4d5e6635c..3ca1ad0ecb97 100644 --- a/drivers/devfreq/Makefile +++ b/drivers/devfreq/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_DEVFREQ_GOV_PASSIVE) += governor_passive.o # DEVFREQ Drivers obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o +obj-$(CONFIG_ARM_IMX_BUS_DEVFREQ) += imx-bus.o obj-$(CONFIG_ARM_IMX8M_DDRC_DEVFREQ) += imx8m-ddrc.o obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ) += rk3399_dmc.o obj-$(CONFIG_ARM_TEGRA_DEVFREQ) += tegra30-devfreq.o diff --git a/drivers/devfreq/imx-bus.c b/drivers/devfreq/imx-bus.c new file mode 100644 index 000000000000..428f7980a2f2 --- /dev/null +++ b/drivers/devfreq/imx-bus.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 NXP + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct imx_bus { + struct devfreq_dev_profile profile; + struct devfreq *devfreq; + struct clk *clk; +}; + +static int imx_bus_target(struct device *dev, + unsigned long *freq, u32 flags) +{ + struct dev_pm_opp *new_opp; + int ret; + + new_opp = devfreq_recommended_opp(dev, freq, flags); + if (IS_ERR(new_opp)) { + ret = PTR_ERR(new_opp); + dev_err(dev, "failed to get recommended opp: %d\n", ret); + return ret; + } + dev_pm_opp_put(new_opp); + + return dev_pm_opp_set_rate(dev, *freq); +} + +static int imx_bus_get_cur_freq(struct device *dev, unsigned long *freq) +{ + struct imx_bus *priv = dev_get_drvdata(dev); + + *freq = clk_get_rate(priv->clk); + + return 0; +} + +static int imx_bus_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + struct imx_bus *priv = dev_get_drvdata(dev); + + stat->busy_time = 0; + stat->total_time = 0; + stat->current_frequency = clk_get_rate(priv->clk); + + return 0; +} + +static void imx_bus_exit(struct device *dev) +{ + dev_pm_opp_of_remove_table(dev); +} + +static int imx_bus_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct imx_bus *priv; + const char *gov = DEVFREQ_GOV_USERSPACE; + int ret; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + /* + * Fetch the clock to adjust but don't explicitly enable. + * + * For imx bus clock clk_set_rate is safe no matter if the clock is on + * or off and some peripheral side-buses might be off unless enabled by + * drivers for devices on those specific buses. + * + * Rate adjustment on a disabled bus clock just takes effect later. + */ + priv->clk = devm_clk_get(dev, NULL); + if (IS_ERR(priv->clk)) { + ret = PTR_ERR(priv->clk); + dev_err(dev, "failed to fetch clk: %d\n", ret); + return ret; + } + platform_set_drvdata(pdev, priv); + + ret = dev_pm_opp_of_add_table(dev); + if (ret < 0) { + dev_err(dev, "failed to get OPP table\n"); + return ret; + } + + priv->profile.polling_ms = 1000; + priv->profile.target = imx_bus_target; + priv->profile.get_dev_status = imx_bus_get_dev_status; + priv->profile.exit = imx_bus_exit; + priv->profile.get_cur_freq = imx_bus_get_cur_freq; + priv->profile.initial_freq = clk_get_rate(priv->clk); + + priv->devfreq = devm_devfreq_add_device(dev, &priv->profile, + gov, NULL); + if (IS_ERR(priv->devfreq)) { + ret = PTR_ERR(priv->devfreq); + dev_err(dev, "failed to add devfreq device: %d\n", ret); + goto err; + } + + return 0; + +err: + dev_pm_opp_of_remove_table(dev); + return ret; +} + +static const struct of_device_id imx_bus_of_match[] = { + { .compatible = "fsl,imx8m-noc", }, + { .compatible = "fsl,imx8m-nic", }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, imx_bus_of_match); + +static struct platform_driver imx_bus_platdrv = { + .probe = imx_bus_probe, + .driver = { + .name = "imx-bus-devfreq", + .of_match_table = of_match_ptr(imx_bus_of_match), + }, +}; +module_platform_driver(imx_bus_platdrv); + +MODULE_DESCRIPTION("Generic i.MX bus frequency scaling driver"); +MODULE_AUTHOR("Leonard Crestez "); +MODULE_LICENSE("GPL v2"); From 02355216b4c00b1fe3f1e969aa15eca99240f8f3 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Mon, 6 Apr 2020 15:03:08 +0300 Subject: [PATCH 08/11] PM / devfreq: imx: Register interconnect device There is no single device which can represent the imx interconnect. Instead of adding a virtual one just make the main &noc act as the global interconnect provider. The imx interconnect provider driver will scale the NOC and DDRC based on bandwidth request. More scalable nodes can be added in the future, for example for audio/display/vpu/gpu NICs. Signed-off-by: Leonard Crestez Tested-by: Martin Kepplinger Acked-by: Chanwoo Choi Signed-off-by: Chanwoo Choi --- drivers/devfreq/imx-bus.c | 41 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/drivers/devfreq/imx-bus.c b/drivers/devfreq/imx-bus.c index 428f7980a2f2..532e7954032f 100644 --- a/drivers/devfreq/imx-bus.c +++ b/drivers/devfreq/imx-bus.c @@ -16,6 +16,7 @@ struct imx_bus { struct devfreq_dev_profile profile; struct devfreq *devfreq; struct clk *clk; + struct platform_device *icc_pdev; }; static int imx_bus_target(struct device *dev, @@ -58,7 +59,40 @@ static int imx_bus_get_dev_status(struct device *dev, static void imx_bus_exit(struct device *dev) { + struct imx_bus *priv = dev_get_drvdata(dev); + dev_pm_opp_of_remove_table(dev); + platform_device_unregister(priv->icc_pdev); +} + +/* imx_bus_init_icc() - register matching icc provider if required */ +static int imx_bus_init_icc(struct device *dev) +{ + struct imx_bus *priv = dev_get_drvdata(dev); + const char *icc_driver_name; + + if (!of_get_property(dev->of_node, "#interconnect-cells", 0)) + return 0; + if (!IS_ENABLED(CONFIG_INTERCONNECT_IMX)) { + dev_warn(dev, "imx interconnect drivers disabled\n"); + return 0; + } + + icc_driver_name = of_device_get_match_data(dev); + if (!icc_driver_name) { + dev_err(dev, "unknown interconnect driver\n"); + return 0; + } + + priv->icc_pdev = platform_device_register_data( + dev, icc_driver_name, -1, NULL, 0); + if (IS_ERR(priv->icc_pdev)) { + dev_err(dev, "failed to register icc provider %s: %ld\n", + icc_driver_name, PTR_ERR(priv->devfreq)); + return PTR_ERR(priv->devfreq); + } + + return 0; } static int imx_bus_probe(struct platform_device *pdev) @@ -110,6 +144,10 @@ static int imx_bus_probe(struct platform_device *pdev) goto err; } + ret = imx_bus_init_icc(dev); + if (ret) + goto err; + return 0; err: @@ -118,6 +156,9 @@ err: } static const struct of_device_id imx_bus_of_match[] = { + { .compatible = "fsl,imx8mq-noc", .data = "imx8mq-interconnect", }, + { .compatible = "fsl,imx8mm-noc", .data = "imx8mm-interconnect", }, + { .compatible = "fsl,imx8mn-noc", .data = "imx8mn-interconnect", }, { .compatible = "fsl,imx8m-noc", }, { .compatible = "fsl,imx8m-nic", }, { /* sentinel */ }, From a316b5ca9ead8065ac873e655c313248ecd732bc Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 27 Feb 2020 20:08:54 +0300 Subject: [PATCH 09/11] PM / devfreq: Replace strncpy with strscpy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC produces this warning when kernel compiled using `make W=1`: warning: ‘strncpy’ specified bound 16 equals destination size [-Wstringop-truncation] 772 | strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN); The strncpy doesn't take care of NULL-termination of the destination buffer, while the strscpy does. Signed-off-by: Dmitry Osipenko Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 6fecd11dafdd..ef3d2bc3d1ac 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -768,7 +768,7 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->dev.release = devfreq_dev_release; INIT_LIST_HEAD(&devfreq->node); devfreq->profile = profile; - strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN); + strscpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN); devfreq->previous_freq = profile->initial_freq; devfreq->last_status.current_frequency = profile->initial_freq; devfreq->data = data; From 48bbf6375131155d329eb8e06ae962e27cbba032 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 08:12:45 -0500 Subject: [PATCH 10/11] PM / devfreq: imx-bus: Fix inconsistent IS_ERR and PTR_ERR Fix inconsistent IS_ERR and PTR_ERR in imx_bus_init_icc(). The proper pointer to be passed as argument to PTR_ERR() is priv->icc_pdev. This bug was detected with the help of Coccinelle. Fixes: 16c1d2f1b0bd ("PM / devfreq: imx: Register interconnect device") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Dong Aisheng [cw00.choi: Edit the patch title from 'imx' to 'imx-bus'] Signed-off-by: Chanwoo Choi --- drivers/devfreq/imx-bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/devfreq/imx-bus.c b/drivers/devfreq/imx-bus.c index 532e7954032f..4f38455ad742 100644 --- a/drivers/devfreq/imx-bus.c +++ b/drivers/devfreq/imx-bus.c @@ -88,8 +88,8 @@ static int imx_bus_init_icc(struct device *dev) dev, icc_driver_name, -1, NULL, 0); if (IS_ERR(priv->icc_pdev)) { dev_err(dev, "failed to register icc provider %s: %ld\n", - icc_driver_name, PTR_ERR(priv->devfreq)); - return PTR_ERR(priv->devfreq); + icc_driver_name, PTR_ERR(priv->icc_pdev)); + return PTR_ERR(priv->icc_pdev); } return 0; From 8fc0e48e0faefef5064f3cb803d3d12314e16ec4 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 12 May 2020 08:41:58 +0200 Subject: [PATCH 11/11] PM / devfreq: Use lockdep asserts instead of manual checks for locked mutex Instead of warning when mutex_is_locked(), just use the lockdep framework. The code is smaller and checks could be disabled for production environments (it is useful only during development). Put asserts at beginning of function, even before validating arguments. The behavior of update_devfreq() is now changed because lockdep assert will only print a warning, not return with EINVAL. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index ef3d2bc3d1ac..52b9c3e141f3 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -60,12 +60,12 @@ static struct devfreq *find_device_devfreq(struct device *dev) { struct devfreq *tmp_devfreq; + lockdep_assert_held(&devfreq_list_lock); + if (IS_ERR_OR_NULL(dev)) { pr_err("DEVFREQ: %s: Invalid parameters\n", __func__); return ERR_PTR(-EINVAL); } - WARN(!mutex_is_locked(&devfreq_list_lock), - "devfreq_list_lock must be locked."); list_for_each_entry(tmp_devfreq, &devfreq_list, node) { if (tmp_devfreq->dev.parent == dev) @@ -258,12 +258,12 @@ static struct devfreq_governor *find_devfreq_governor(const char *name) { struct devfreq_governor *tmp_governor; + lockdep_assert_held(&devfreq_list_lock); + if (IS_ERR_OR_NULL(name)) { pr_err("DEVFREQ: %s: Invalid parameters\n", __func__); return ERR_PTR(-EINVAL); } - WARN(!mutex_is_locked(&devfreq_list_lock), - "devfreq_list_lock must be locked."); list_for_each_entry(tmp_governor, &devfreq_governor_list, node) { if (!strncmp(tmp_governor->name, name, DEVFREQ_NAME_LEN)) @@ -289,12 +289,12 @@ static struct devfreq_governor *try_then_request_governor(const char *name) struct devfreq_governor *governor; int err = 0; + lockdep_assert_held(&devfreq_list_lock); + if (IS_ERR_OR_NULL(name)) { pr_err("DEVFREQ: %s: Invalid parameters\n", __func__); return ERR_PTR(-EINVAL); } - WARN(!mutex_is_locked(&devfreq_list_lock), - "devfreq_list_lock must be locked."); governor = find_devfreq_governor(name); if (IS_ERR(governor)) { @@ -392,10 +392,7 @@ int update_devfreq(struct devfreq *devfreq) int err = 0; u32 flags = 0; - if (!mutex_is_locked(&devfreq->lock)) { - WARN(true, "devfreq->lock must be locked by the caller.\n"); - return -EINVAL; - } + lockdep_assert_held(&devfreq->lock); if (!devfreq->governor) return -EINVAL;