tools: create power/x86/x86_energy_perf_policy
MSR_IA32_ENERGY_PERF_BIAS first became available on Westmere Xeon. It is implemented in all Sandy Bridge processors -- mobile, desktop and server. It is expected to become increasingly important in subsequent generations. x86_energy_perf_policy is a user-space utility to set the hardware energy vs performance policy hint in the processor. Most systems would benefit from "x86_energy_perf_policy normal" at system startup, as the hardware default is maximum performance at the expense of energy efficiency. See x86_energy_perf_policy.8 man page for more information. Background: Linux-2.6.36 added "epb" to /proc/cpuinfo to indicate if an x86 processor supports MSR_IA32_ENERGY_PERF_BIAS, without actually modifying the MSR. In March, 2010, Venkatesh Pallipadi proposed a small driver that programmed MSR_IA32_ENERGY_PERF_BIAS, based on the cpufreq governor in use. It also offered a boot-time cmdline option to override. http://lkml.org/lkml/2010/3/4/457 But hiding the hardware policy behind the governor choice was deemed "kinda icky". In June, 2010, I proposed a generic user/kernel API to generalize the power/performance policy trade-off. "RFC: /sys/power/policy_preference" http://lkml.org/lkml/2010/6/16/399 That is my preference for implementing this capability, but I received no support on the list. So in September, 2010, I sent x86_energy_perf_policy.c to LKML, a user-space utility that scribbles directly to the MSR. http://lkml.org/lkml/2010/9/28/246 Here is that same utility, after responding to some review feedback, to live in tools/power/, where it is easily found. Signed-off-by: Len Brown <len.brown@intel.com>
This commit is contained in:
Родитель
f6f94e2ab1
Коммит
d5532ee7b4
|
@ -0,0 +1,8 @@
|
||||||
|
x86_energy_perf_policy : x86_energy_perf_policy.c
|
||||||
|
|
||||||
|
clean :
|
||||||
|
rm -f x86_energy_perf_policy
|
||||||
|
|
||||||
|
install :
|
||||||
|
install x86_energy_perf_policy /usr/bin/
|
||||||
|
install x86_energy_perf_policy.8 /usr/share/man/man8/
|
|
@ -0,0 +1,104 @@
|
||||||
|
.\" This page Copyright (C) 2010 Len Brown <len.brown@intel.com>
|
||||||
|
.\" Distributed under the GPL, Copyleft 1994.
|
||||||
|
.TH X86_ENERGY_PERF_POLICY 8
|
||||||
|
.SH NAME
|
||||||
|
x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.ft B
|
||||||
|
.B x86_energy_perf_policy
|
||||||
|
.RB [ "\-c cpu" ]
|
||||||
|
.RB [ "\-v" ]
|
||||||
|
.RB "\-r"
|
||||||
|
.br
|
||||||
|
.B x86_energy_perf_policy
|
||||||
|
.RB [ "\-c cpu" ]
|
||||||
|
.RB [ "\-v" ]
|
||||||
|
.RB 'performance'
|
||||||
|
.br
|
||||||
|
.B x86_energy_perf_policy
|
||||||
|
.RB [ "\-c cpu" ]
|
||||||
|
.RB [ "\-v" ]
|
||||||
|
.RB 'normal'
|
||||||
|
.br
|
||||||
|
.B x86_energy_perf_policy
|
||||||
|
.RB [ "\-c cpu" ]
|
||||||
|
.RB [ "\-v" ]
|
||||||
|
.RB 'powersave'
|
||||||
|
.br
|
||||||
|
.B x86_energy_perf_policy
|
||||||
|
.RB [ "\-c cpu" ]
|
||||||
|
.RB [ "\-v" ]
|
||||||
|
.RB n
|
||||||
|
.br
|
||||||
|
.SH DESCRIPTION
|
||||||
|
\fBx86_energy_perf_policy\fP
|
||||||
|
allows software to convey
|
||||||
|
its policy for the relative importance of performance
|
||||||
|
versus energy savings to the processor.
|
||||||
|
|
||||||
|
The processor uses this information in model-specific ways
|
||||||
|
when it must select trade-offs between performance and
|
||||||
|
energy efficiency.
|
||||||
|
|
||||||
|
This policy hint does not supersede Processor Performance states
|
||||||
|
(P-states) or CPU Idle power states (C-states), but allows
|
||||||
|
software to have influence where it would otherwise be unable
|
||||||
|
to express a preference.
|
||||||
|
|
||||||
|
For example, this setting may tell the hardware how
|
||||||
|
aggressively or conservatively to control frequency
|
||||||
|
in the "turbo range" above the explicitly OS-controlled
|
||||||
|
P-state frequency range. It may also tell the hardware
|
||||||
|
how aggressively is should enter the OS requested C-states.
|
||||||
|
|
||||||
|
Support for this feature is indicated by CPUID.06H.ECX.bit3
|
||||||
|
per the Intel Architectures Software Developer's Manual.
|
||||||
|
|
||||||
|
.SS Options
|
||||||
|
\fB-c\fP limits operation to a single CPU.
|
||||||
|
The default is to operate on all CPUs.
|
||||||
|
Note that MSR_IA32_ENERGY_PERF_BIAS is defined per
|
||||||
|
logical processor, but that the initial implementations
|
||||||
|
of the MSR were shared among all processors in each package.
|
||||||
|
.PP
|
||||||
|
\fB-v\fP increases verbosity. By default
|
||||||
|
x86_energy_perf_policy is silent.
|
||||||
|
.PP
|
||||||
|
\fB-r\fP is for "read-only" mode - the unchanged state
|
||||||
|
is read and displayed.
|
||||||
|
.PP
|
||||||
|
.I performance
|
||||||
|
Set a policy where performance is paramount.
|
||||||
|
The processor will be unwilling to sacrifice any performance
|
||||||
|
for the sake of energy saving. This is the hardware default.
|
||||||
|
.PP
|
||||||
|
.I normal
|
||||||
|
Set a policy with a normal balance between performance and energy efficiency.
|
||||||
|
The processor will tolerate minor performance compromise
|
||||||
|
for potentially significant energy savings.
|
||||||
|
This reasonable default for most desktops and servers.
|
||||||
|
.PP
|
||||||
|
.I powersave
|
||||||
|
Set a policy where the processor can accept
|
||||||
|
a measurable performance hit to maximize energy efficiency.
|
||||||
|
.PP
|
||||||
|
.I n
|
||||||
|
Set MSR_IA32_ENERGY_PERF_BIAS to the specified number.
|
||||||
|
The range of valid numbers is 0-15, where 0 is maximum
|
||||||
|
performance and 15 is maximum energy efficiency.
|
||||||
|
|
||||||
|
.SH NOTES
|
||||||
|
.B "x86_energy_perf_policy "
|
||||||
|
runs only as root.
|
||||||
|
.SH FILES
|
||||||
|
.ta
|
||||||
|
.nf
|
||||||
|
/dev/cpu/*/msr
|
||||||
|
.fi
|
||||||
|
|
||||||
|
.SH "SEE ALSO"
|
||||||
|
msr(4)
|
||||||
|
.PP
|
||||||
|
.SH AUTHORS
|
||||||
|
.nf
|
||||||
|
Written by Len Brown <len.brown@intel.com>
|
|
@ -0,0 +1,325 @@
|
||||||
|
/*
|
||||||
|
* x86_energy_perf_policy -- set the energy versus performance
|
||||||
|
* policy preference bias on recent X86 processors.
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, Intel Corporation.
|
||||||
|
* Len Brown <len.brown@intel.com>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms and conditions of the GNU General Public License,
|
||||||
|
* version 2, as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
* more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along with
|
||||||
|
* this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/resource.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
unsigned int verbose; /* set with -v */
|
||||||
|
unsigned int read_only; /* set with -r */
|
||||||
|
char *progname;
|
||||||
|
unsigned long long new_bias;
|
||||||
|
int cpu = -1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Usage:
|
||||||
|
*
|
||||||
|
* -c cpu: limit action to a single CPU (default is all CPUs)
|
||||||
|
* -v: verbose output (can invoke more than once)
|
||||||
|
* -r: read-only, don't change any settings
|
||||||
|
*
|
||||||
|
* performance
|
||||||
|
* Performance is paramount.
|
||||||
|
* Unwilling to sacrafice any performance
|
||||||
|
* for the sake of energy saving. (hardware default)
|
||||||
|
*
|
||||||
|
* normal
|
||||||
|
* Can tolerate minor performance compromise
|
||||||
|
* for potentially significant energy savings.
|
||||||
|
* (reasonable default for most desktops and servers)
|
||||||
|
*
|
||||||
|
* powersave
|
||||||
|
* Can tolerate significant performance hit
|
||||||
|
* to maximize energy savings.
|
||||||
|
*
|
||||||
|
* n
|
||||||
|
* a numerical value to write to the underlying MSR.
|
||||||
|
*/
|
||||||
|
void usage(void)
|
||||||
|
{
|
||||||
|
printf("%s: [-c cpu] [-v] "
|
||||||
|
"(-r | 'performance' | 'normal' | 'powersave' | n)\n",
|
||||||
|
progname);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
|
||||||
|
|
||||||
|
#define BIAS_PERFORMANCE 0
|
||||||
|
#define BIAS_BALANCE 6
|
||||||
|
#define BIAS_POWERSAVE 15
|
||||||
|
|
||||||
|
void cmdline(int argc, char **argv)
|
||||||
|
{
|
||||||
|
int opt;
|
||||||
|
|
||||||
|
progname = argv[0];
|
||||||
|
|
||||||
|
while ((opt = getopt(argc, argv, "+rvc:")) != -1) {
|
||||||
|
switch (opt) {
|
||||||
|
case 'c':
|
||||||
|
cpu = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 'r':
|
||||||
|
read_only = 1;
|
||||||
|
break;
|
||||||
|
case 'v':
|
||||||
|
verbose++;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
usage();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* if -r, then should be no additional optind */
|
||||||
|
if (read_only && (argc > optind))
|
||||||
|
usage();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if no -r , then must be one additional optind
|
||||||
|
*/
|
||||||
|
if (!read_only) {
|
||||||
|
|
||||||
|
if (argc != optind + 1) {
|
||||||
|
printf("must supply -r or policy param\n");
|
||||||
|
usage();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!strcmp("performance", argv[optind])) {
|
||||||
|
new_bias = BIAS_PERFORMANCE;
|
||||||
|
} else if (!strcmp("normal", argv[optind])) {
|
||||||
|
new_bias = BIAS_BALANCE;
|
||||||
|
} else if (!strcmp("powersave", argv[optind])) {
|
||||||
|
new_bias = BIAS_POWERSAVE;
|
||||||
|
} else {
|
||||||
|
char *endptr;
|
||||||
|
|
||||||
|
new_bias = strtoull(argv[optind], &endptr, 0);
|
||||||
|
if (endptr == argv[optind] ||
|
||||||
|
new_bias > BIAS_POWERSAVE) {
|
||||||
|
fprintf(stderr, "invalid value: %s\n",
|
||||||
|
argv[optind]);
|
||||||
|
usage();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* validate_cpuid()
|
||||||
|
* returns on success, quietly exits on failure (make verbose with -v)
|
||||||
|
*/
|
||||||
|
void validate_cpuid(void)
|
||||||
|
{
|
||||||
|
unsigned int eax, ebx, ecx, edx, max_level;
|
||||||
|
char brand[16];
|
||||||
|
unsigned int fms, family, model, stepping;
|
||||||
|
|
||||||
|
eax = ebx = ecx = edx = 0;
|
||||||
|
|
||||||
|
asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx),
|
||||||
|
"=d" (edx) : "a" (0));
|
||||||
|
|
||||||
|
if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) {
|
||||||
|
if (verbose)
|
||||||
|
fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel",
|
||||||
|
(char *)&ebx, (char *)&edx, (char *)&ecx);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
|
||||||
|
family = (fms >> 8) & 0xf;
|
||||||
|
model = (fms >> 4) & 0xf;
|
||||||
|
stepping = fms & 0xf;
|
||||||
|
if (family == 6 || family == 0xf)
|
||||||
|
model += ((fms >> 16) & 0xf) << 4;
|
||||||
|
|
||||||
|
if (verbose > 1)
|
||||||
|
printf("CPUID %s %d levels family:model:stepping "
|
||||||
|
"0x%x:%x:%x (%d:%d:%d)\n", brand, max_level,
|
||||||
|
family, model, stepping, family, model, stepping);
|
||||||
|
|
||||||
|
if (!(edx & (1 << 5))) {
|
||||||
|
if (verbose)
|
||||||
|
printf("CPUID: no MSR\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Support for MSR_IA32_ENERGY_PERF_BIAS
|
||||||
|
* is indicated by CPUID.06H.ECX.bit3
|
||||||
|
*/
|
||||||
|
asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6));
|
||||||
|
if (verbose)
|
||||||
|
printf("CPUID.06H.ECX: 0x%x\n", ecx);
|
||||||
|
if (!(ecx & (1 << 3))) {
|
||||||
|
if (verbose)
|
||||||
|
printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
return; /* success */
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long long get_msr(int cpu, int offset)
|
||||||
|
{
|
||||||
|
unsigned long long msr;
|
||||||
|
char msr_path[32];
|
||||||
|
int retval;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
|
||||||
|
fd = open(msr_path, O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
printf("Try \"# modprobe msr\"\n");
|
||||||
|
perror(msr_path);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
retval = pread(fd, &msr, sizeof msr, offset);
|
||||||
|
|
||||||
|
if (retval != sizeof msr) {
|
||||||
|
printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
|
||||||
|
exit(-2);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
return msr;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long long put_msr(int cpu, unsigned long long new_msr, int offset)
|
||||||
|
{
|
||||||
|
unsigned long long old_msr;
|
||||||
|
char msr_path[32];
|
||||||
|
int retval;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
|
||||||
|
fd = open(msr_path, O_RDWR);
|
||||||
|
if (fd < 0) {
|
||||||
|
perror(msr_path);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
retval = pread(fd, &old_msr, sizeof old_msr, offset);
|
||||||
|
if (retval != sizeof old_msr) {
|
||||||
|
perror("pwrite");
|
||||||
|
printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
|
||||||
|
exit(-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
retval = pwrite(fd, &new_msr, sizeof new_msr, offset);
|
||||||
|
if (retval != sizeof new_msr) {
|
||||||
|
perror("pwrite");
|
||||||
|
printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval);
|
||||||
|
exit(-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
return old_msr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_msr(int cpu)
|
||||||
|
{
|
||||||
|
printf("cpu%d: 0x%016llx\n",
|
||||||
|
cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS));
|
||||||
|
}
|
||||||
|
|
||||||
|
void update_msr(int cpu)
|
||||||
|
{
|
||||||
|
unsigned long long previous_msr;
|
||||||
|
|
||||||
|
previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS);
|
||||||
|
|
||||||
|
if (verbose)
|
||||||
|
printf("cpu%d msr0x%x 0x%016llx -> 0x%016llx\n",
|
||||||
|
cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *proc_stat = "/proc/stat";
|
||||||
|
/*
|
||||||
|
* run func() on every cpu in /dev/cpu
|
||||||
|
*/
|
||||||
|
void for_every_cpu(void (func)(int))
|
||||||
|
{
|
||||||
|
FILE *fp;
|
||||||
|
int retval;
|
||||||
|
|
||||||
|
fp = fopen(proc_stat, "r");
|
||||||
|
if (fp == NULL) {
|
||||||
|
perror(proc_stat);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
|
||||||
|
if (retval != 0) {
|
||||||
|
perror("/proc/stat format");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
retval = fscanf(fp,
|
||||||
|
"cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n",
|
||||||
|
&cpu);
|
||||||
|
if (retval != 1)
|
||||||
|
return;
|
||||||
|
|
||||||
|
func(cpu);
|
||||||
|
}
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
cmdline(argc, argv);
|
||||||
|
|
||||||
|
if (verbose > 1)
|
||||||
|
printf("x86_energy_perf_policy Nov 24, 2010"
|
||||||
|
" - Len Brown <lenb@kernel.org>\n");
|
||||||
|
if (verbose > 1 && !read_only)
|
||||||
|
printf("new_bias %lld\n", new_bias);
|
||||||
|
|
||||||
|
validate_cpuid();
|
||||||
|
|
||||||
|
if (cpu != -1) {
|
||||||
|
if (read_only)
|
||||||
|
print_msr(cpu);
|
||||||
|
else
|
||||||
|
update_msr(cpu);
|
||||||
|
} else {
|
||||||
|
if (read_only)
|
||||||
|
for_every_cpu(print_msr);
|
||||||
|
else
|
||||||
|
for_every_cpu(update_msr);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
Загрузка…
Ссылка в новой задаче