tools: create power/x86/x86_energy_perf_policy
MSR_IA32_ENERGY_PERF_BIAS first became available on Westmere Xeon. It is implemented in all Sandy Bridge processors -- mobile, desktop and server. It is expected to become increasingly important in subsequent generations. x86_energy_perf_policy is a user-space utility to set the hardware energy vs performance policy hint in the processor. Most systems would benefit from "x86_energy_perf_policy normal" at system startup, as the hardware default is maximum performance at the expense of energy efficiency. See x86_energy_perf_policy.8 man page for more information. Background: Linux-2.6.36 added "epb" to /proc/cpuinfo to indicate if an x86 processor supports MSR_IA32_ENERGY_PERF_BIAS, without actually modifying the MSR. In March, 2010, Venkatesh Pallipadi proposed a small driver that programmed MSR_IA32_ENERGY_PERF_BIAS, based on the cpufreq governor in use. It also offered a boot-time cmdline option to override. http://lkml.org/lkml/2010/3/4/457 But hiding the hardware policy behind the governor choice was deemed "kinda icky". In June, 2010, I proposed a generic user/kernel API to generalize the power/performance policy trade-off. "RFC: /sys/power/policy_preference" http://lkml.org/lkml/2010/6/16/399 That is my preference for implementing this capability, but I received no support on the list. So in September, 2010, I sent x86_energy_perf_policy.c to LKML, a user-space utility that scribbles directly to the MSR. http://lkml.org/lkml/2010/9/28/246 Here is that same utility, after responding to some review feedback, to live in tools/power/, where it is easily found. Signed-off-by: Len Brown <len.brown@intel.com>
This commit is contained in:
Родитель
f6f94e2ab1
Коммит
d5532ee7b4
|
@ -0,0 +1,8 @@
|
|||
x86_energy_perf_policy : x86_energy_perf_policy.c
|
||||
|
||||
clean :
|
||||
rm -f x86_energy_perf_policy
|
||||
|
||||
install :
|
||||
install x86_energy_perf_policy /usr/bin/
|
||||
install x86_energy_perf_policy.8 /usr/share/man/man8/
|
|
@ -0,0 +1,104 @@
|
|||
.\" This page Copyright (C) 2010 Len Brown <len.brown@intel.com>
|
||||
.\" Distributed under the GPL, Copyleft 1994.
|
||||
.TH X86_ENERGY_PERF_POLICY 8
|
||||
.SH NAME
|
||||
x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS
|
||||
.SH SYNOPSIS
|
||||
.ft B
|
||||
.B x86_energy_perf_policy
|
||||
.RB [ "\-c cpu" ]
|
||||
.RB [ "\-v" ]
|
||||
.RB "\-r"
|
||||
.br
|
||||
.B x86_energy_perf_policy
|
||||
.RB [ "\-c cpu" ]
|
||||
.RB [ "\-v" ]
|
||||
.RB 'performance'
|
||||
.br
|
||||
.B x86_energy_perf_policy
|
||||
.RB [ "\-c cpu" ]
|
||||
.RB [ "\-v" ]
|
||||
.RB 'normal'
|
||||
.br
|
||||
.B x86_energy_perf_policy
|
||||
.RB [ "\-c cpu" ]
|
||||
.RB [ "\-v" ]
|
||||
.RB 'powersave'
|
||||
.br
|
||||
.B x86_energy_perf_policy
|
||||
.RB [ "\-c cpu" ]
|
||||
.RB [ "\-v" ]
|
||||
.RB n
|
||||
.br
|
||||
.SH DESCRIPTION
|
||||
\fBx86_energy_perf_policy\fP
|
||||
allows software to convey
|
||||
its policy for the relative importance of performance
|
||||
versus energy savings to the processor.
|
||||
|
||||
The processor uses this information in model-specific ways
|
||||
when it must select trade-offs between performance and
|
||||
energy efficiency.
|
||||
|
||||
This policy hint does not supersede Processor Performance states
|
||||
(P-states) or CPU Idle power states (C-states), but allows
|
||||
software to have influence where it would otherwise be unable
|
||||
to express a preference.
|
||||
|
||||
For example, this setting may tell the hardware how
|
||||
aggressively or conservatively to control frequency
|
||||
in the "turbo range" above the explicitly OS-controlled
|
||||
P-state frequency range. It may also tell the hardware
|
||||
how aggressively is should enter the OS requested C-states.
|
||||
|
||||
Support for this feature is indicated by CPUID.06H.ECX.bit3
|
||||
per the Intel Architectures Software Developer's Manual.
|
||||
|
||||
.SS Options
|
||||
\fB-c\fP limits operation to a single CPU.
|
||||
The default is to operate on all CPUs.
|
||||
Note that MSR_IA32_ENERGY_PERF_BIAS is defined per
|
||||
logical processor, but that the initial implementations
|
||||
of the MSR were shared among all processors in each package.
|
||||
.PP
|
||||
\fB-v\fP increases verbosity. By default
|
||||
x86_energy_perf_policy is silent.
|
||||
.PP
|
||||
\fB-r\fP is for "read-only" mode - the unchanged state
|
||||
is read and displayed.
|
||||
.PP
|
||||
.I performance
|
||||
Set a policy where performance is paramount.
|
||||
The processor will be unwilling to sacrifice any performance
|
||||
for the sake of energy saving. This is the hardware default.
|
||||
.PP
|
||||
.I normal
|
||||
Set a policy with a normal balance between performance and energy efficiency.
|
||||
The processor will tolerate minor performance compromise
|
||||
for potentially significant energy savings.
|
||||
This reasonable default for most desktops and servers.
|
||||
.PP
|
||||
.I powersave
|
||||
Set a policy where the processor can accept
|
||||
a measurable performance hit to maximize energy efficiency.
|
||||
.PP
|
||||
.I n
|
||||
Set MSR_IA32_ENERGY_PERF_BIAS to the specified number.
|
||||
The range of valid numbers is 0-15, where 0 is maximum
|
||||
performance and 15 is maximum energy efficiency.
|
||||
|
||||
.SH NOTES
|
||||
.B "x86_energy_perf_policy "
|
||||
runs only as root.
|
||||
.SH FILES
|
||||
.ta
|
||||
.nf
|
||||
/dev/cpu/*/msr
|
||||
.fi
|
||||
|
||||
.SH "SEE ALSO"
|
||||
msr(4)
|
||||
.PP
|
||||
.SH AUTHORS
|
||||
.nf
|
||||
Written by Len Brown <len.brown@intel.com>
|
|
@ -0,0 +1,325 @@
|
|||
/*
|
||||
* x86_energy_perf_policy -- set the energy versus performance
|
||||
* policy preference bias on recent X86 processors.
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2010, Intel Corporation.
|
||||
* Len Brown <len.brown@intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/resource.h>
|
||||
#include <fcntl.h>
|
||||
#include <signal.h>
|
||||
#include <sys/time.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
unsigned int verbose; /* set with -v */
|
||||
unsigned int read_only; /* set with -r */
|
||||
char *progname;
|
||||
unsigned long long new_bias;
|
||||
int cpu = -1;
|
||||
|
||||
/*
|
||||
* Usage:
|
||||
*
|
||||
* -c cpu: limit action to a single CPU (default is all CPUs)
|
||||
* -v: verbose output (can invoke more than once)
|
||||
* -r: read-only, don't change any settings
|
||||
*
|
||||
* performance
|
||||
* Performance is paramount.
|
||||
* Unwilling to sacrafice any performance
|
||||
* for the sake of energy saving. (hardware default)
|
||||
*
|
||||
* normal
|
||||
* Can tolerate minor performance compromise
|
||||
* for potentially significant energy savings.
|
||||
* (reasonable default for most desktops and servers)
|
||||
*
|
||||
* powersave
|
||||
* Can tolerate significant performance hit
|
||||
* to maximize energy savings.
|
||||
*
|
||||
* n
|
||||
* a numerical value to write to the underlying MSR.
|
||||
*/
|
||||
void usage(void)
|
||||
{
|
||||
printf("%s: [-c cpu] [-v] "
|
||||
"(-r | 'performance' | 'normal' | 'powersave' | n)\n",
|
||||
progname);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
|
||||
|
||||
#define BIAS_PERFORMANCE 0
|
||||
#define BIAS_BALANCE 6
|
||||
#define BIAS_POWERSAVE 15
|
||||
|
||||
void cmdline(int argc, char **argv)
|
||||
{
|
||||
int opt;
|
||||
|
||||
progname = argv[0];
|
||||
|
||||
while ((opt = getopt(argc, argv, "+rvc:")) != -1) {
|
||||
switch (opt) {
|
||||
case 'c':
|
||||
cpu = atoi(optarg);
|
||||
break;
|
||||
case 'r':
|
||||
read_only = 1;
|
||||
break;
|
||||
case 'v':
|
||||
verbose++;
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
}
|
||||
}
|
||||
/* if -r, then should be no additional optind */
|
||||
if (read_only && (argc > optind))
|
||||
usage();
|
||||
|
||||
/*
|
||||
* if no -r , then must be one additional optind
|
||||
*/
|
||||
if (!read_only) {
|
||||
|
||||
if (argc != optind + 1) {
|
||||
printf("must supply -r or policy param\n");
|
||||
usage();
|
||||
}
|
||||
|
||||
if (!strcmp("performance", argv[optind])) {
|
||||
new_bias = BIAS_PERFORMANCE;
|
||||
} else if (!strcmp("normal", argv[optind])) {
|
||||
new_bias = BIAS_BALANCE;
|
||||
} else if (!strcmp("powersave", argv[optind])) {
|
||||
new_bias = BIAS_POWERSAVE;
|
||||
} else {
|
||||
char *endptr;
|
||||
|
||||
new_bias = strtoull(argv[optind], &endptr, 0);
|
||||
if (endptr == argv[optind] ||
|
||||
new_bias > BIAS_POWERSAVE) {
|
||||
fprintf(stderr, "invalid value: %s\n",
|
||||
argv[optind]);
|
||||
usage();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* validate_cpuid()
|
||||
* returns on success, quietly exits on failure (make verbose with -v)
|
||||
*/
|
||||
void validate_cpuid(void)
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx, max_level;
|
||||
char brand[16];
|
||||
unsigned int fms, family, model, stepping;
|
||||
|
||||
eax = ebx = ecx = edx = 0;
|
||||
|
||||
asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx),
|
||||
"=d" (edx) : "a" (0));
|
||||
|
||||
if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) {
|
||||
if (verbose)
|
||||
fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel",
|
||||
(char *)&ebx, (char *)&edx, (char *)&ecx);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
|
||||
family = (fms >> 8) & 0xf;
|
||||
model = (fms >> 4) & 0xf;
|
||||
stepping = fms & 0xf;
|
||||
if (family == 6 || family == 0xf)
|
||||
model += ((fms >> 16) & 0xf) << 4;
|
||||
|
||||
if (verbose > 1)
|
||||
printf("CPUID %s %d levels family:model:stepping "
|
||||
"0x%x:%x:%x (%d:%d:%d)\n", brand, max_level,
|
||||
family, model, stepping, family, model, stepping);
|
||||
|
||||
if (!(edx & (1 << 5))) {
|
||||
if (verbose)
|
||||
printf("CPUID: no MSR\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Support for MSR_IA32_ENERGY_PERF_BIAS
|
||||
* is indicated by CPUID.06H.ECX.bit3
|
||||
*/
|
||||
asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6));
|
||||
if (verbose)
|
||||
printf("CPUID.06H.ECX: 0x%x\n", ecx);
|
||||
if (!(ecx & (1 << 3))) {
|
||||
if (verbose)
|
||||
printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n");
|
||||
exit(1);
|
||||
}
|
||||
return; /* success */
|
||||
}
|
||||
|
||||
unsigned long long get_msr(int cpu, int offset)
|
||||
{
|
||||
unsigned long long msr;
|
||||
char msr_path[32];
|
||||
int retval;
|
||||
int fd;
|
||||
|
||||
sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
|
||||
fd = open(msr_path, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
printf("Try \"# modprobe msr\"\n");
|
||||
perror(msr_path);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
retval = pread(fd, &msr, sizeof msr, offset);
|
||||
|
||||
if (retval != sizeof msr) {
|
||||
printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
|
||||
exit(-2);
|
||||
}
|
||||
close(fd);
|
||||
return msr;
|
||||
}
|
||||
|
||||
unsigned long long put_msr(int cpu, unsigned long long new_msr, int offset)
|
||||
{
|
||||
unsigned long long old_msr;
|
||||
char msr_path[32];
|
||||
int retval;
|
||||
int fd;
|
||||
|
||||
sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
|
||||
fd = open(msr_path, O_RDWR);
|
||||
if (fd < 0) {
|
||||
perror(msr_path);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
retval = pread(fd, &old_msr, sizeof old_msr, offset);
|
||||
if (retval != sizeof old_msr) {
|
||||
perror("pwrite");
|
||||
printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
|
||||
exit(-2);
|
||||
}
|
||||
|
||||
retval = pwrite(fd, &new_msr, sizeof new_msr, offset);
|
||||
if (retval != sizeof new_msr) {
|
||||
perror("pwrite");
|
||||
printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval);
|
||||
exit(-2);
|
||||
}
|
||||
|
||||
close(fd);
|
||||
|
||||
return old_msr;
|
||||
}
|
||||
|
||||
void print_msr(int cpu)
|
||||
{
|
||||
printf("cpu%d: 0x%016llx\n",
|
||||
cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS));
|
||||
}
|
||||
|
||||
void update_msr(int cpu)
|
||||
{
|
||||
unsigned long long previous_msr;
|
||||
|
||||
previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS);
|
||||
|
||||
if (verbose)
|
||||
printf("cpu%d msr0x%x 0x%016llx -> 0x%016llx\n",
|
||||
cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
char *proc_stat = "/proc/stat";
|
||||
/*
|
||||
* run func() on every cpu in /dev/cpu
|
||||
*/
|
||||
void for_every_cpu(void (func)(int))
|
||||
{
|
||||
FILE *fp;
|
||||
int retval;
|
||||
|
||||
fp = fopen(proc_stat, "r");
|
||||
if (fp == NULL) {
|
||||
perror(proc_stat);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
|
||||
if (retval != 0) {
|
||||
perror("/proc/stat format");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
int cpu;
|
||||
|
||||
retval = fscanf(fp,
|
||||
"cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n",
|
||||
&cpu);
|
||||
if (retval != 1)
|
||||
return;
|
||||
|
||||
func(cpu);
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
cmdline(argc, argv);
|
||||
|
||||
if (verbose > 1)
|
||||
printf("x86_energy_perf_policy Nov 24, 2010"
|
||||
" - Len Brown <lenb@kernel.org>\n");
|
||||
if (verbose > 1 && !read_only)
|
||||
printf("new_bias %lld\n", new_bias);
|
||||
|
||||
validate_cpuid();
|
||||
|
||||
if (cpu != -1) {
|
||||
if (read_only)
|
||||
print_msr(cpu);
|
||||
else
|
||||
update_msr(cpu);
|
||||
} else {
|
||||
if (read_only)
|
||||
for_every_cpu(print_msr);
|
||||
else
|
||||
for_every_cpu(update_msr);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
Загрузка…
Ссылка в новой задаче