diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 540a466e50f5..5cdd8d100ec9 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -102,15 +102,39 @@ struct mce_log { #ifdef __KERNEL__ -extern int mce_disabled; - -#include #include +#include +#include + +extern int mce_disabled; +extern int mce_p5_enabled; + +#ifdef CONFIG_X86_MCE +void mcheck_init(struct cpuinfo_x86 *c); +#else +static inline void mcheck_init(struct cpuinfo_x86 *c) {} +#endif + +#ifdef CONFIG_X86_OLD_MCE +extern int nr_mce_banks; +void amd_mcheck_init(struct cpuinfo_x86 *c); +void intel_p4_mcheck_init(struct cpuinfo_x86 *c); +void intel_p6_mcheck_init(struct cpuinfo_x86 *c); +#endif + +#ifdef CONFIG_X86_ANCIENT_MCE +void intel_p5_mcheck_init(struct cpuinfo_x86 *c); +void winchip_mcheck_init(struct cpuinfo_x86 *c); +static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } +#else +static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} +static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} +static inline void enable_p5_mce(void) {} +#endif void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, mce_dev); -extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); /* * To support more than 128 would need to escape the predefined @@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); -void mce_log_therm_throt_event(__u64 status); - extern atomic_t mce_entry; -void do_machine_check(struct pt_regs *, long); - typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); @@ -167,13 +187,32 @@ void mce_notify_process(void); DECLARE_PER_CPU(struct mce, injectm); extern struct file_operations mce_chrdev_ops; -#ifdef CONFIG_X86_MCE -void mcheck_init(struct cpuinfo_x86 *c); -#else -#define mcheck_init(c) do { } while (0) -#endif +/* + * Exception handler + */ + +/* Call the installed machine check handler for this CPU setup. */ +extern void (*machine_check_vector)(struct pt_regs *, long error_code); +void do_machine_check(struct pt_regs *, long); + +/* + * Threshold handler + */ extern void (*mce_threshold_vector)(void); +extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); + +/* + * Thermal handler + */ + +void intel_init_thermal(struct cpuinfo_x86 *c); + +#ifdef CONFIG_X86_NEW_MCE +void mce_log_therm_throt_event(__u64 status); +#else +static inline void mce_log_therm_throt_event(__u64 status) {} +#endif #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/therm_throt.h b/arch/x86/include/asm/therm_throt.h deleted file mode 100644 index c62349ee7860..000000000000 --- a/arch/x86/include/asm/therm_throt.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _ASM_X86_THERM_THROT_H -#define _ASM_X86_THERM_THROT_H - -#include - -extern atomic_t therm_throt_en; -int therm_throt_process(int curr); - -#endif /* _ASM_X86_THERM_THROT_H */ diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 45004faf67ea..188a1ca5ad2b 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -1,11 +1,12 @@ -obj-y = mce.o therm_throt.o +obj-y = mce.o obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o -obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o -obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o -obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o +obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o +obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o + +obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index 89e510424152..b945d5dbc609 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c @@ -10,10 +10,9 @@ #include #include +#include #include -#include "mce.h" - /* Machine Check Handler For AMD Athlon/Duron: */ static void k7_machine_check(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d9d77cfd8cce..284d1de968bc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -44,7 +44,6 @@ #include #include "mce-internal.h" -#include "mce.h" /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) @@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; -int mce_disabled; +int mce_disabled __read_mostly; #ifdef CONFIG_X86_NEW_MCE @@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count); * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors * 3: never panic or SIGBUS, log all errors (for testing only) */ -static int tolerant = 1; -static int banks; -static u64 *bank; -static unsigned long notify_user; -static int rip_msr; -static int mce_bootlog = -1; -static int monarch_timeout = -1; -static int mce_panic_timeout; -static int mce_dont_log_ce; -int mce_cmci_disabled; -int mce_ignore_ce; -int mce_ser; +static int tolerant __read_mostly = 1; +static int banks __read_mostly; +static u64 *bank __read_mostly; +static int rip_msr __read_mostly; +static int mce_bootlog __read_mostly = -1; +static int monarch_timeout __read_mostly = -1; +static int mce_panic_timeout __read_mostly; +static int mce_dont_log_ce __read_mostly; +int mce_cmci_disabled __read_mostly; +int mce_ignore_ce __read_mostly; +int mce_ser __read_mostly; -static char trigger[128]; -static char *trigger_argv[2] = { trigger, NULL }; +/* User mode helper program triggered by machine check event */ +static unsigned long mce_need_notify; +static char mce_helper[128]; +static char *mce_helper_argv[2] = { mce_helper, NULL }; static unsigned long dont_init_banks; @@ -180,7 +180,7 @@ void mce_log(struct mce *mce) wmb(); mce->finished = 1; - set_bit(0, ¬ify_user); + set_bit(0, &mce_need_notify); } static void print_mce(struct mce *m) @@ -691,18 +691,21 @@ static atomic_t global_nwo; * in the entry order. * TBD double check parallel CPU hotunplug */ -static int mce_start(int no_way_out, int *order) +static int mce_start(int *no_way_out) { - int nwo; + int order; int cpus = num_online_cpus(); u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; - if (!timeout) { - *order = -1; - return no_way_out; - } + if (!timeout) + return -1; - atomic_add(no_way_out, &global_nwo); + atomic_add(*no_way_out, &global_nwo); + /* + * global_nwo should be updated before mce_callin + */ + smp_wmb(); + order = atomic_add_return(1, &mce_callin); /* * Wait for everyone. @@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order) while (atomic_read(&mce_callin) != cpus) { if (mce_timed_out(&timeout)) { atomic_set(&global_nwo, 0); - *order = -1; - return no_way_out; + return -1; } ndelay(SPINUNIT); } + /* + * mce_callin should be read before global_nwo + */ + smp_rmb(); + + if (order == 1) { + /* + * Monarch: Starts executing now, the others wait. + */ + atomic_set(&mce_executing, 1); + } else { + /* + * Subject: Now start the scanning loop one by one in + * the original callin order. + * This way when there are any shared banks it will be + * only seen by one CPU before cleared, avoiding duplicates. + */ + while (atomic_read(&mce_executing) < order) { + if (mce_timed_out(&timeout)) { + atomic_set(&global_nwo, 0); + return -1; + } + ndelay(SPINUNIT); + } + } + /* * Cache the global no_way_out state. */ - nwo = atomic_read(&global_nwo); + *no_way_out = atomic_read(&global_nwo); - /* - * Monarch starts executing now, the others wait. - */ - if (*order == 1) { - atomic_set(&mce_executing, 1); - return nwo; - } - - /* - * Now start the scanning loop one by one - * in the original callin order. - * This way when there are any shared banks it will - * be only seen by one CPU before cleared, avoiding duplicates. - */ - while (atomic_read(&mce_executing) < *order) { - if (mce_timed_out(&timeout)) { - atomic_set(&global_nwo, 0); - *order = -1; - return no_way_out; - } - ndelay(SPINUNIT); - } - return nwo; + return order; } /* @@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) * check handler. */ int order; - /* * If no_way_out gets set, there is no safe way to recover from this * MCE. If tolerant is cranked up, we'll try anyway. @@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (!banks) goto out; - order = atomic_add_return(1, &mce_callin); mce_setup(&m); m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); @@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * This way we don't report duplicated events on shared banks * because the first one to see it will clear it. */ - no_way_out = mce_start(no_way_out, &order); + order = mce_start(&no_way_out); for (i = 0; i < banks; i++) { __clear_bit(i, toclear); if (!bank[i]) @@ -1118,7 +1122,7 @@ static void mcheck_timer(unsigned long data) static void mce_do_trigger(struct work_struct *work) { - call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); + call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); } static DECLARE_WORK(mce_trigger_work, mce_do_trigger); @@ -1135,7 +1139,7 @@ int mce_notify_irq(void) clear_thread_flag(TIF_MCE_NOTIFY); - if (test_and_clear_bit(0, ¬ify_user)) { + if (test_and_clear_bit(0, &mce_need_notify)) { wake_up_interruptible(&mce_wait); /* @@ -1143,7 +1147,7 @@ int mce_notify_irq(void) * work_pending is always cleared before the function is * executed. */ - if (trigger[0] && !work_pending(&mce_trigger_work)) + if (mce_helper[0] && !work_pending(&mce_trigger_work)) schedule_work(&mce_trigger_work); if (__ratelimit(&ratelimit)) @@ -1282,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) return; switch (c->x86_vendor) { case X86_VENDOR_INTEL: - if (mce_p5_enabled()) - intel_p5_mcheck_init(c); + intel_p5_mcheck_init(c); break; case X86_VENDOR_CENTAUR: winchip_mcheck_init(c); @@ -1609,8 +1612,9 @@ static int mce_resume(struct sys_device *dev) static void mce_cpu_restart(void *data) { del_timer_sync(&__get_cpu_var(mce_timer)); - if (mce_available(¤t_cpu_data)) - mce_init(); + if (!mce_available(¤t_cpu_data)) + return; + mce_init(); mce_init_timer(); } @@ -1620,6 +1624,26 @@ static void mce_restart(void) on_each_cpu(mce_cpu_restart, NULL, 1); } +/* Toggle features for corrected errors */ +static void mce_disable_ce(void *all) +{ + if (!mce_available(¤t_cpu_data)) + return; + if (all) + del_timer_sync(&__get_cpu_var(mce_timer)); + cmci_clear(); +} + +static void mce_enable_ce(void *all) +{ + if (!mce_available(¤t_cpu_data)) + return; + cmci_reenable(); + cmci_recheck(); + if (all) + mce_init_timer(); +} + static struct sysdev_class mce_sysclass = { .suspend = mce_suspend, .shutdown = mce_shutdown, @@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) { - strcpy(buf, trigger); + strcpy(buf, mce_helper); strcat(buf, "\n"); - return strlen(trigger) + 1; + return strlen(mce_helper) + 1; } static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, @@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *p; int len; - strncpy(trigger, buf, sizeof(trigger)); - trigger[sizeof(trigger)-1] = 0; - len = strlen(trigger); - p = strchr(trigger, '\n'); + strncpy(mce_helper, buf, sizeof(mce_helper)); + mce_helper[sizeof(mce_helper)-1] = 0; + len = strlen(mce_helper); + p = strchr(mce_helper, '\n'); if (*p) *p = 0; @@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, return len; } +static ssize_t set_ignore_ce(struct sys_device *s, + struct sysdev_attribute *attr, + const char *buf, size_t size) +{ + u64 new; + + if (strict_strtoull(buf, 0, &new) < 0) + return -EINVAL; + + if (mce_ignore_ce ^ !!new) { + if (new) { + /* disable ce features */ + on_each_cpu(mce_disable_ce, (void *)1, 1); + mce_ignore_ce = 1; + } else { + /* enable ce features */ + mce_ignore_ce = 0; + on_each_cpu(mce_enable_ce, (void *)1, 1); + } + } + return size; +} + +static ssize_t set_cmci_disabled(struct sys_device *s, + struct sysdev_attribute *attr, + const char *buf, size_t size) +{ + u64 new; + + if (strict_strtoull(buf, 0, &new) < 0) + return -EINVAL; + + if (mce_cmci_disabled ^ !!new) { + if (new) { + /* disable cmci */ + on_each_cpu(mce_disable_ce, NULL, 1); + mce_cmci_disabled = 1; + } else { + /* enable cmci */ + mce_cmci_disabled = 0; + on_each_cpu(mce_enable_ce, NULL, 1); + } + } + return size; +} + static ssize_t store_int_with_restart(struct sys_device *s, struct sysdev_attribute *attr, const char *buf, size_t size) @@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s, static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); +static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); static struct sysdev_ext_attribute attr_check_interval = { _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, @@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = { &check_interval }; +static struct sysdev_ext_attribute attr_ignore_ce = { + _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), + &mce_ignore_ce +}; + +static struct sysdev_ext_attribute attr_cmci_disabled = { + _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), + &mce_cmci_disabled +}; + static struct sysdev_attribute *mce_attrs[] = { - &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, + &attr_tolerant.attr, + &attr_check_interval.attr, + &attr_trigger, &attr_monarch_timeout.attr, + &attr_dont_log_ce.attr, + &attr_ignore_ce.attr, + &attr_cmci_disabled.attr, NULL }; @@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized; static __cpuinit int mce_create_device(unsigned int cpu) { int err; - int i; + int i, j; if (!mce_available(&boot_cpu_data)) return -EIO; @@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu) if (err) goto error; } - for (i = 0; i < banks; i++) { + for (j = 0; j < banks; j++) { err = sysdev_create_file(&per_cpu(mce_dev, cpu), - &bank_attrs[i]); + &bank_attrs[j]); if (err) goto error2; } @@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu) return 0; error2: - while (--i >= 0) - sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); + while (--j >= 0) + sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); error: while (--i >= 0) sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); @@ -1883,7 +1969,7 @@ static __init int mce_init_device(void) if (!mce_available(&boot_cpu_data)) return -EIO; - alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); + zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); err = mce_init_banks(); if (err) @@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ /* This has to be run for each processor */ void mcheck_init(struct cpuinfo_x86 *c) { - if (mce_disabled == 1) + if (mce_disabled) return; switch (c->x86_vendor) { @@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c) static int __init mcheck_enable(char *str) { - mce_disabled = -1; + mce_p5_enabled = 1; return 1; } - __setup("mce", mcheck_enable); #endif /* CONFIG_X86_OLD_MCE */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h deleted file mode 100644 index 84a552b458c8..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ /dev/null @@ -1,38 +0,0 @@ -#include -#include - -#ifdef CONFIG_X86_OLD_MCE -void amd_mcheck_init(struct cpuinfo_x86 *c); -void intel_p4_mcheck_init(struct cpuinfo_x86 *c); -void intel_p6_mcheck_init(struct cpuinfo_x86 *c); -#endif - -#ifdef CONFIG_X86_ANCIENT_MCE -void intel_p5_mcheck_init(struct cpuinfo_x86 *c); -void winchip_mcheck_init(struct cpuinfo_x86 *c); -extern int mce_p5_enable; -static inline int mce_p5_enabled(void) { return mce_p5_enable; } -static inline void enable_p5_mce(void) { mce_p5_enable = 1; } -#else -static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} -static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} -static inline int mce_p5_enabled(void) { return 0; } -static inline void enable_p5_mce(void) { } -#endif - -/* Call the installed machine check handler for this CPU setup. */ -extern void (*machine_check_vector)(struct pt_regs *, long error_code); - -#ifdef CONFIG_X86_OLD_MCE - -extern int nr_mce_banks; - -void intel_set_thermal_handler(void); - -#else - -static inline void intel_set_thermal_handler(void) { } - -#endif - -void intel_init_thermal(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c similarity index 100% rename from arch/x86/kernel/cpu/mcheck/mce_amd_64.c rename to arch/x86/kernel/cpu/mcheck/mce_amd.c diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 61e32881f41b..e1acec0f7a32 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -1,80 +1,226 @@ /* - * Common code for Intel machine checks + * Intel specific MCE features. + * Copyright 2004 Zwane Mwaikambo + * Copyright (C) 2008, 2009 Intel Corporation + * Author: Andi Kleen */ -#include -#include -#include + #include -#include - -#include -#include -#include +#include +#include #include +#include #include +#include -#include "mce.h" +/* + * Support for Intel Correct Machine Check Interrupts. This allows + * the CPU to raise an interrupt when a corrected machine check happened. + * Normally we pick those up using a regular polling timer. + * Also supports reliable discovery of shared banks. + */ -void intel_init_thermal(struct cpuinfo_x86 *c) +static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); + +/* + * cmci_discover_lock protects against parallel discovery attempts + * which could race against each other. + */ +static DEFINE_SPINLOCK(cmci_discover_lock); + +#define CMCI_THRESHOLD 1 + +static int cmci_supported(int *banks) { - unsigned int cpu = smp_processor_id(); - int tm2 = 0; - u32 l, h; + u64 cap; + + if (mce_cmci_disabled || mce_ignore_ce) + return 0; /* - * Thermal monitoring depends on ACPI, clock modulation - * and APIC as well + * Vendor check is not strictly needed, but the initial + * initialization is vendor keyed and this + * makes sure none of the backdoors are entered otherwise. */ - if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC) || - !cpu_has(c, X86_FEATURE_APIC)) { - pr_debug("Thermal monitoring disabled\n"); - return; - } - - /* - * First check if its enabled already, in which case there might - * be some SMM goo which handles it, so we can't even put a handler - * since it might be delivered via SMI already: - */ - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); - if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { - printk(KERN_DEBUG - "CPU%d: Thermal monitoring handled by SMI\n", cpu); - return; - } - - if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) - tm2 = 1; - - /* Check whether a vector already exists */ - if (h & APIC_VECTOR_MASK) { - printk(KERN_DEBUG - "CPU%d: Thermal LVT vector (%#x) already installed\n", - cpu, (h & APIC_VECTOR_MASK)); - return; - } - - /* We'll mask the thermal vector in the lapic till we're ready: */ - h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; - apic_write(APIC_LVTTHMR, h); - - rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); - wrmsr(MSR_IA32_THERM_INTERRUPT, - l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); - - intel_set_thermal_handler(); - - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); - - /* Unmask the thermal vector: */ - l = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - - printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", - cpu, tm2 ? "TM2" : "TM1"); - - /* enable thermal throttle processing */ - atomic_set(&therm_throt_en, 1); + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return 0; + if (!cpu_has_apic || lapic_get_maxlvt() < 6) + return 0; + rdmsrl(MSR_IA32_MCG_CAP, cap); + *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); + return !!(cap & MCG_CMCI_P); +} + +/* + * The interrupt handler. This is called on every event. + * Just call the poller directly to log any events. + * This could in theory increase the threshold under high load, + * but doesn't for now. + */ +static void intel_threshold_interrupt(void) +{ + machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); + mce_notify_irq(); +} + +static void print_update(char *type, int *hdr, int num) +{ + if (*hdr == 0) + printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); + *hdr = 1; + printk(KERN_CONT " %s:%d", type, num); +} + +/* + * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks + * on this CPU. Use the algorithm recommended in the SDM to discover shared + * banks. + */ +static void cmci_discover(int banks, int boot) +{ + unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); + unsigned long flags; + int hdr = 0; + int i; + + spin_lock_irqsave(&cmci_discover_lock, flags); + for (i = 0; i < banks; i++) { + u64 val; + + if (test_bit(i, owned)) + continue; + + rdmsrl(MSR_IA32_MC0_CTL2 + i, val); + + /* Already owned by someone else? */ + if (val & CMCI_EN) { + if (test_and_clear_bit(i, owned) || boot) + print_update("SHD", &hdr, i); + __clear_bit(i, __get_cpu_var(mce_poll_banks)); + continue; + } + + val |= CMCI_EN | CMCI_THRESHOLD; + wrmsrl(MSR_IA32_MC0_CTL2 + i, val); + rdmsrl(MSR_IA32_MC0_CTL2 + i, val); + + /* Did the enable bit stick? -- the bank supports CMCI */ + if (val & CMCI_EN) { + if (!test_and_set_bit(i, owned) || boot) + print_update("CMCI", &hdr, i); + __clear_bit(i, __get_cpu_var(mce_poll_banks)); + } else { + WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); + } + } + spin_unlock_irqrestore(&cmci_discover_lock, flags); + if (hdr) + printk(KERN_CONT "\n"); +} + +/* + * Just in case we missed an event during initialization check + * all the CMCI owned banks. + */ +void cmci_recheck(void) +{ + unsigned long flags; + int banks; + + if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) + return; + local_irq_save(flags); + machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); + local_irq_restore(flags); +} + +/* + * Disable CMCI on this CPU for all banks it owns when it goes down. + * This allows other CPUs to claim the banks on rediscovery. + */ +void cmci_clear(void) +{ + unsigned long flags; + int i; + int banks; + u64 val; + + if (!cmci_supported(&banks)) + return; + spin_lock_irqsave(&cmci_discover_lock, flags); + for (i = 0; i < banks; i++) { + if (!test_bit(i, __get_cpu_var(mce_banks_owned))) + continue; + /* Disable CMCI */ + rdmsrl(MSR_IA32_MC0_CTL2 + i, val); + val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); + wrmsrl(MSR_IA32_MC0_CTL2 + i, val); + __clear_bit(i, __get_cpu_var(mce_banks_owned)); + } + spin_unlock_irqrestore(&cmci_discover_lock, flags); +} + +/* + * After a CPU went down cycle through all the others and rediscover + * Must run in process context. + */ +void cmci_rediscover(int dying) +{ + int banks; + int cpu; + cpumask_var_t old; + + if (!cmci_supported(&banks)) + return; + if (!alloc_cpumask_var(&old, GFP_KERNEL)) + return; + cpumask_copy(old, ¤t->cpus_allowed); + + for_each_online_cpu(cpu) { + if (cpu == dying) + continue; + if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) + continue; + /* Recheck banks in case CPUs don't all have the same */ + if (cmci_supported(&banks)) + cmci_discover(banks, 0); + } + + set_cpus_allowed_ptr(current, old); + free_cpumask_var(old); +} + +/* + * Reenable CMCI on this CPU in case a CPU down failed. + */ +void cmci_reenable(void) +{ + int banks; + if (cmci_supported(&banks)) + cmci_discover(banks, 0); +} + +static void intel_init_cmci(void) +{ + int banks; + + if (!cmci_supported(&banks)) + return; + + mce_threshold_vector = intel_threshold_interrupt; + cmci_discover(banks, 1); + /* + * For CPU #0 this runs with still disabled APIC, but that's + * ok because only the vector is set up. We still do another + * check for the banks later for CPU #0 just to make sure + * to not miss any events. + */ + apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); + cmci_recheck(); +} + +void mce_intel_feature_init(struct cpuinfo_x86 *c) +{ + intel_init_thermal(c); + intel_init_cmci(); } diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c deleted file mode 100644 index f2ef6952c400..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Intel specific MCE features. - * Copyright 2004 Zwane Mwaikambo - * Copyright (C) 2008, 2009 Intel Corporation - * Author: Andi Kleen - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mce.h" - -asmlinkage void smp_thermal_interrupt(void) -{ - __u64 msr_val; - - ack_APIC_irq(); - - exit_idle(); - irq_enter(); - - rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) - mce_log_therm_throt_event(msr_val); - - inc_irq_stat(irq_thermal_count); - irq_exit(); -} - -/* - * Support for Intel Correct Machine Check Interrupts. This allows - * the CPU to raise an interrupt when a corrected machine check happened. - * Normally we pick those up using a regular polling timer. - * Also supports reliable discovery of shared banks. - */ - -static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); - -/* - * cmci_discover_lock protects against parallel discovery attempts - * which could race against each other. - */ -static DEFINE_SPINLOCK(cmci_discover_lock); - -#define CMCI_THRESHOLD 1 - -static int cmci_supported(int *banks) -{ - u64 cap; - - if (mce_cmci_disabled || mce_ignore_ce) - return 0; - - /* - * Vendor check is not strictly needed, but the initial - * initialization is vendor keyed and this - * makes sure none of the backdoors are entered otherwise. - */ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) - return 0; - if (!cpu_has_apic || lapic_get_maxlvt() < 6) - return 0; - rdmsrl(MSR_IA32_MCG_CAP, cap); - *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); - return !!(cap & MCG_CMCI_P); -} - -/* - * The interrupt handler. This is called on every event. - * Just call the poller directly to log any events. - * This could in theory increase the threshold under high load, - * but doesn't for now. - */ -static void intel_threshold_interrupt(void) -{ - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); - mce_notify_irq(); -} - -static void print_update(char *type, int *hdr, int num) -{ - if (*hdr == 0) - printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); - *hdr = 1; - printk(KERN_CONT " %s:%d", type, num); -} - -/* - * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks - * on this CPU. Use the algorithm recommended in the SDM to discover shared - * banks. - */ -static void cmci_discover(int banks, int boot) -{ - unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); - unsigned long flags; - int hdr = 0; - int i; - - spin_lock_irqsave(&cmci_discover_lock, flags); - for (i = 0; i < banks; i++) { - u64 val; - - if (test_bit(i, owned)) - continue; - - rdmsrl(MSR_IA32_MC0_CTL2 + i, val); - - /* Already owned by someone else? */ - if (val & CMCI_EN) { - if (test_and_clear_bit(i, owned) || boot) - print_update("SHD", &hdr, i); - __clear_bit(i, __get_cpu_var(mce_poll_banks)); - continue; - } - - val |= CMCI_EN | CMCI_THRESHOLD; - wrmsrl(MSR_IA32_MC0_CTL2 + i, val); - rdmsrl(MSR_IA32_MC0_CTL2 + i, val); - - /* Did the enable bit stick? -- the bank supports CMCI */ - if (val & CMCI_EN) { - if (!test_and_set_bit(i, owned) || boot) - print_update("CMCI", &hdr, i); - __clear_bit(i, __get_cpu_var(mce_poll_banks)); - } else { - WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); - } - } - spin_unlock_irqrestore(&cmci_discover_lock, flags); - if (hdr) - printk(KERN_CONT "\n"); -} - -/* - * Just in case we missed an event during initialization check - * all the CMCI owned banks. - */ -void cmci_recheck(void) -{ - unsigned long flags; - int banks; - - if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) - return; - local_irq_save(flags); - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); - local_irq_restore(flags); -} - -/* - * Disable CMCI on this CPU for all banks it owns when it goes down. - * This allows other CPUs to claim the banks on rediscovery. - */ -void cmci_clear(void) -{ - unsigned long flags; - int i; - int banks; - u64 val; - - if (!cmci_supported(&banks)) - return; - spin_lock_irqsave(&cmci_discover_lock, flags); - for (i = 0; i < banks; i++) { - if (!test_bit(i, __get_cpu_var(mce_banks_owned))) - continue; - /* Disable CMCI */ - rdmsrl(MSR_IA32_MC0_CTL2 + i, val); - val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); - wrmsrl(MSR_IA32_MC0_CTL2 + i, val); - __clear_bit(i, __get_cpu_var(mce_banks_owned)); - } - spin_unlock_irqrestore(&cmci_discover_lock, flags); -} - -/* - * After a CPU went down cycle through all the others and rediscover - * Must run in process context. - */ -void cmci_rediscover(int dying) -{ - int banks; - int cpu; - cpumask_var_t old; - - if (!cmci_supported(&banks)) - return; - if (!alloc_cpumask_var(&old, GFP_KERNEL)) - return; - cpumask_copy(old, ¤t->cpus_allowed); - - for_each_online_cpu(cpu) { - if (cpu == dying) - continue; - if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) - continue; - /* Recheck banks in case CPUs don't all have the same */ - if (cmci_supported(&banks)) - cmci_discover(banks, 0); - } - - set_cpus_allowed_ptr(current, old); - free_cpumask_var(old); -} - -/* - * Reenable CMCI on this CPU in case a CPU down failed. - */ -void cmci_reenable(void) -{ - int banks; - if (cmci_supported(&banks)) - cmci_discover(banks, 0); -} - -static void intel_init_cmci(void) -{ - int banks; - - if (!cmci_supported(&banks)) - return; - - mce_threshold_vector = intel_threshold_interrupt; - cmci_discover(banks, 1); - /* - * For CPU #0 this runs with still disabled APIC, but that's - * ok because only the vector is set up. We still do another - * check for the banks later for CPU #0 just to make sure - * to not miss any events. - */ - apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); - cmci_recheck(); -} - -void mce_intel_feature_init(struct cpuinfo_x86 *c) -{ - intel_init_thermal(c); - intel_init_cmci(); -} diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index 70b710420f74..f5f2d6f71fb6 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c @@ -17,10 +17,9 @@ #include #include +#include #include -#include "mce.h" - static int firstbank; #define MCE_RATE (15*HZ) /* timer rate is 15s */ diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 82cee108a2d3..4482aea9aa2e 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -1,21 +1,15 @@ /* * P4 specific Machine Check Exception Reporting */ - -#include #include #include #include #include -#include #include -#include -#include +#include #include -#include "mce.h" - /* as supported by the P4/Xeon family */ struct intel_mce_extended_msrs { u32 eax; @@ -33,46 +27,6 @@ struct intel_mce_extended_msrs { static int mce_num_extended_msrs; - -#ifdef CONFIG_X86_MCE_P4THERMAL - -static void unexpected_thermal_interrupt(struct pt_regs *regs) -{ - printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", - smp_processor_id()); - add_taint(TAINT_MACHINE_CHECK); -} - -/* P4/Xeon Thermal transition interrupt handler: */ -static void intel_thermal_interrupt(struct pt_regs *regs) -{ - __u64 msr_val; - - ack_APIC_irq(); - - rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - therm_throt_process(msr_val & THERM_STATUS_PROCHOT); -} - -/* Thermal interrupt handler for this CPU setup: */ -static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = - unexpected_thermal_interrupt; - -void smp_thermal_interrupt(struct pt_regs *regs) -{ - irq_enter(); - vendor_thermal_interrupt(regs); - __get_cpu_var(irq_stat).irq_thermal_count++; - irq_exit(); -} - -void intel_set_thermal_handler(void) -{ - vendor_thermal_interrupt = intel_thermal_interrupt; -} - -#endif /* CONFIG_X86_MCE_P4THERMAL */ - /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) { diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 015f481ab1b0..5c0e6533d9bc 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -10,12 +10,11 @@ #include #include +#include #include -#include "mce.h" - /* By default disabled */ -int mce_p5_enable; +int mce_p5_enabled __read_mostly; /* Machine check handler for Pentium class Intel CPUs: */ static void pentium_machine_check(struct pt_regs *regs, long error_code) @@ -43,16 +42,14 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; + /* Default P5 to off as its often misconnected: */ + if (!mce_p5_enabled) + return; + /* Check for MCE support: */ if (!cpu_has(c, X86_FEATURE_MCE)) return; -#ifdef CONFIG_X86_OLD_MCE - /* Default P5 to off as its often misconnected: */ - if (mce_disabled != -1) - return; -#endif - machine_check_vector = pentium_machine_check; /* Make sure the vector pointer is visible before we enable MCEs: */ wmb(); diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c index 43c24e667457..01e4f8178183 100644 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ b/arch/x86/kernel/cpu/mcheck/p6.c @@ -10,10 +10,9 @@ #include #include +#include #include -#include "mce.h" - /* Machine Check Handler For PII/PIII */ static void intel_machine_check(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 7b1ae2e20ba5..bff8dd191dd5 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -13,13 +13,23 @@ * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. * Inspired by Ross Biro's and Al Borchers' counter code. */ +#include #include #include +#include #include #include +#include +#include +#include #include -#include +#include +#include +#include +#include +#include +#include /* How long to wait between reporting thermal events */ #define CHECK_INTERVAL (300 * HZ) @@ -27,7 +37,7 @@ static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); -atomic_t therm_throt_en = ATOMIC_INIT(0); +static atomic_t therm_throt_en = ATOMIC_INIT(0); #ifdef CONFIG_SYSFS #define define_therm_throt_sysdev_one_ro(_name) \ @@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = { * 1 : Event should be logged further, and a message has been * printed to the syslog. */ -int therm_throt_process(int curr) +static int therm_throt_process(int curr) { unsigned int cpu = smp_processor_id(); __u64 tmp_jiffs = get_jiffies_64(); @@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void) return 0; } - device_initcall(thermal_throttle_init_device); + #endif /* CONFIG_SYSFS */ + +/* Thermal transition interrupt handler */ +static void intel_thermal_interrupt(void) +{ + __u64 msr_val; + + rdmsrl(MSR_IA32_THERM_STATUS, msr_val); + if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) + mce_log_therm_throt_event(msr_val); +} + +static void unexpected_thermal_interrupt(void) +{ + printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", + smp_processor_id()); + add_taint(TAINT_MACHINE_CHECK); +} + +static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; + +asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) +{ + exit_idle(); + irq_enter(); + inc_irq_stat(irq_thermal_count); + smp_thermal_vector(); + irq_exit(); + /* Ack only at the end to avoid potential reentry */ + ack_APIC_irq(); +} + +void intel_init_thermal(struct cpuinfo_x86 *c) +{ + unsigned int cpu = smp_processor_id(); + int tm2 = 0; + u32 l, h; + + /* Thermal monitoring depends on ACPI and clock modulation*/ + if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) + return; + + /* + * First check if its enabled already, in which case there might + * be some SMM goo which handles it, so we can't even put a handler + * since it might be delivered via SMI already: + */ + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + h = apic_read(APIC_LVTTHMR); + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { + printk(KERN_DEBUG + "CPU%d: Thermal monitoring handled by SMI\n", cpu); + return; + } + + if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) + tm2 = 1; + + /* Check whether a vector already exists */ + if (h & APIC_VECTOR_MASK) { + printk(KERN_DEBUG + "CPU%d: Thermal LVT vector (%#x) already installed\n", + cpu, (h & APIC_VECTOR_MASK)); + return; + } + + /* We'll mask the thermal vector in the lapic till we're ready: */ + h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; + apic_write(APIC_LVTTHMR, h); + + rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr(MSR_IA32_THERM_INTERRUPT, + l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); + + smp_thermal_vector = intel_thermal_interrupt; + + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); + + /* Unmask the thermal vector: */ + l = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + + printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", + cpu, tm2 ? "TM2" : "TM1"); + + /* enable thermal throttle processing */ + atomic_set(&therm_throt_en, 1); +} diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 81b02487090b..54060f565974 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -9,10 +9,9 @@ #include #include +#include #include -#include "mce.h" - /* Machine check handler for WinChip C6: */ static void winchip_machine_check(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1e1e27b7d438..71f9c74814d8 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -53,6 +53,7 @@ #include #include #include +#include #include @@ -64,8 +65,6 @@ #include #include -#include "cpu/mcheck/mce.h" - asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */