From 4c7f8900f1d8a0e464e7092f132a7e93f7c20f2f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Feb 2008 07:06:55 +0100 Subject: [PATCH 001/566] x86: stackprotector & PARAVIRT fix on paravirt enabled 64-bit kernels the paravirt ops do function calls themselves - which is bad with the stackprotector - for example pda_init() loads 0 into %gs and then does MSR_GS_BASE write (which modifies gs.base) - but that MSR write is a function call on paravirt, which with stackprotector tries to read the stack canary from the PDA ... crashing the bootup. the solution was suggested by Arjan van de Ven: to exclude paravirt.c from stackprotector, too many lowlevel functionality is in it. It's not like we'll have paravirt functions with character arrays on their stack anyway... Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3b4720..8161e5a91ca9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -14,6 +14,7 @@ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc_64.o := $(nostackp) +CFLAGS_paravirt.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps_$(BITS).o irq_$(BITS).o From e00320875d0cc5f8099a7227b2f25fbb3231268d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 08:48:23 +0100 Subject: [PATCH 002/566] x86: fix stackprotector canary updates during context switches fix a bug noticed and fixed by pageexec@freemail.hu. if built with -fstack-protector-all then we'll have canary checks built into the __switch_to() function. That does not work well with the canary-switching code there: while we already use the %rsp of the new task, we still call __switch_to() whith the previous task's canary value in the PDA, hence the __switch_to() ssp prologue instructions will store the previous canary. Then we update the PDA and upon return from __switch_to() the canary check triggers and we panic. so update the canary after we have called __switch_to(), where we are at the same stackframe level as the last stackframe of the next (and now freshly current) task. Note: this means that we call __switch_to() [and its sub-functions] still with the old canary, but that is not a problem, both the previous and the next task has a high-quality canary. The only (mostly academic) disadvantage is that the canary of one task may leak onto the stack of another task, increasing the risk of information leaks, were an attacker able to read the stack of specific tasks (but not that of others). To solve this we'll have to reorganize the way we switch tasks, and move the PDA setting into the switch_to() assembly code. That will happen in another patch. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 1 - include/asm-x86/pda.h | 2 -- include/asm-x86/system.h | 6 +++++- include/linux/sched.h | 3 +-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f39988b..d8640388039e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -640,7 +640,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) write_pda(kernelstack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); #ifdef CONFIG_CC_STACKPROTECTOR - write_pda(stack_canary, next_p->stack_canary); /* * Build time only check to make sure the stack_canary is at * offset 40 in the pda; this is a gcc ABI requirement diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h index 101fb9e11954..62b734986a44 100644 --- a/include/asm-x86/pda.h +++ b/include/asm-x86/pda.h @@ -16,11 +16,9 @@ struct x8664_pda { unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ unsigned int cpunumber; /* 36 Logical CPU number */ -#ifdef CONFIG_CC_STACKPROTECTOR unsigned long stack_canary; /* 40 stack canary value */ /* gcc-ABI: this canary MUST be at offset 40!!! */ -#endif char *irqstackptr; unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h index a2f04cd79b29..172f54185093 100644 --- a/include/asm-x86/system.h +++ b/include/asm-x86/system.h @@ -92,6 +92,8 @@ do { \ ".globl thread_return\n" \ "thread_return:\n\t" \ "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq %P[task_canary](%%rsi),%%r8\n\t" \ + "movq %%r8,%%gs:%P[pda_canary]\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ @@ -103,7 +105,9 @@ do { \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + [task_canary] "i" (offsetof(struct task_struct, stack_canary)),\ + [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)), \ + [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\ : "memory", "cc" __EXTRA_CLOBBER) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a6176f4b..d6a515158783 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1096,10 +1096,9 @@ struct task_struct { pid_t pid; pid_t tgid; -#ifdef CONFIG_CC_STACKPROTECTOR /* Canary value for the -fstack-protector gcc feature */ unsigned long stack_canary; -#endif + /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with From ce22bd92cba0958e052cb1ce0f89f1d3a02b60a7 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 12 May 2008 15:44:31 +0200 Subject: [PATCH 003/566] x86: setup stack canary for the idle threads The idle threads for non-boot CPUs are a bit special in how they are created; the result is that these don't have the stack canary set up properly in their PDA. Easiest fix is to just always set the PDA up correctly when entering the idle thread; this is a NOP for the boot cpu. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d8640388039e..9e69e223023e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -146,6 +146,15 @@ static inline void play_dead(void) void cpu_idle(void) { current_thread_info()->status |= TS_POLLING; + +#ifdef CONFIG_CC_STACKPROTECTOR + /* + * If we're the non-boot CPU, nothing set the PDA stack + * canary up for us. This is as good a place as any for + * doing that. + */ + write_pda(stack_canary, current->stack_canary); +#endif /* endless idle loop with no priority at all */ while (1) { tick_nohz_stop_sched_tick(); From 7e09b2a02dae4616a5a26000169964b32f86cd35 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:22:34 +0100 Subject: [PATCH 004/566] x86: fix canary of the boot CPU's idle task the boot CPU's idle task has a zero stackprotector canary value. this is a special task that is never forked, so the fork code does not randomize its canary. Do it when we hit cpu_idle(). Academic sidenote: this means that the early init code runs with a zero canary and hence the canary becomes predictable for this short, boot-only amount of time. Although attack vectors against early init code are very rare, it might make sense to move this initialization to an earlier point. (to one of the early init functions that never return - such as start_kernel()) Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9e69e223023e..d4c7ac7aa430 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -150,9 +150,13 @@ void cpu_idle(void) #ifdef CONFIG_CC_STACKPROTECTOR /* * If we're the non-boot CPU, nothing set the PDA stack - * canary up for us. This is as good a place as any for - * doing that. + * canary up for us - and if we are the boot CPU we have + * a 0 stack canary. This is a good place for updating + * it, as we wont ever return from this function (so the + * invalid canaries already on the stack wont ever + * trigger): */ + current->stack_canary = get_random_int(); write_pda(stack_canary, current->stack_canary); #endif /* endless idle loop with no priority at all */ From 517a92c4e19fcea815332d3155e9fb7723251274 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:02:13 +0100 Subject: [PATCH 005/566] panic: print more informative messages on stackprotect failure pointed out by pageexec@freemail.hu: we just simply panic() when there's a stackprotector attack - giving the attacked person no information about what kernel code the attack went against. print out the attacked function. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/panic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index 425567f45b9f..f236001cc4db 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -327,7 +327,8 @@ EXPORT_SYMBOL(warn_on_slowpath); */ void __stack_chk_fail(void) { - panic("stack-protector: Kernel stack is corrupted"); + panic("stack-protector: Kernel stack is corrupted in: %p\n", + __builtin_return_address(0)); } EXPORT_SYMBOL(__stack_chk_fail); #endif From 5cb273013e182a35e7db614d3e20a144cba71e53 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:07:01 +0100 Subject: [PATCH 006/566] panic: print out stacktrace if DEBUG_BUGVERBOSE if CONFIG_DEBUG_BUGVERBOSE is set then the user most definitely wanted to see as much information about kernel crashes as possible - so give them at least a stack dump. this is particularly useful for stackprotector related panics, where the stacktrace can give us the exact location of the (attempted) attack. Pointed out by pageexec@freemail.hu in the stackprotector breakage threads. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/panic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/panic.c b/kernel/panic.c index f236001cc4db..17aad578a2f2 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -80,6 +80,9 @@ NORET_TYPE void panic(const char * fmt, ...) vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); +#ifdef CONFIG_DEBUG_BUGVERBOSE + dump_stack(); +#endif bust_spinlocks(0); /* From 72370f2a5b227bd3817593a6b15ea3f53f51dfcb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 13 Feb 2008 16:15:34 +0100 Subject: [PATCH 007/566] x86: if stackprotector is enabled, thn use stack-protector-all by default also enable the rodata and nx tests. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 3 ++- arch/x86/Kconfig.debug | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dcbec34154cf..83d8392c1334 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1142,7 +1142,7 @@ config SECCOMP config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" - depends on X86_64 && EXPERIMENTAL && BROKEN + depends on X86_64 help This option turns on the -fstack-protector GCC feature. This feature puts, at the beginning of critical functions, a canary @@ -1159,6 +1159,7 @@ config CC_STACKPROTECTOR config CC_STACKPROTECTOR_ALL bool "Use stack-protector for all functions" depends on CC_STACKPROTECTOR + default y help Normally, GCC only inserts the canary value protection for functions that use large-ish on-stack buffers. By enabling diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index ac1e31ba4795..b5c5b55d0437 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -102,6 +102,7 @@ config DIRECT_GBPAGES config DEBUG_RODATA_TEST bool "Testcase for the DEBUG_RODATA feature" depends on DEBUG_RODATA + default y help This option enables a testcase for the DEBUG_RODATA feature as well as for the change_page_attr() infrastructure. From 9b5609fd773e6ac0b1d6d6e1bf68f32cca64e06b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:41:09 +0100 Subject: [PATCH 008/566] stackprotector: include files create for core kernel files to include. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/stackprotector.h | 4 ++++ include/linux/stackprotector.h | 8 ++++++++ init/main.c | 1 + 3 files changed, 13 insertions(+) create mode 100644 include/asm-x86/stackprotector.h create mode 100644 include/linux/stackprotector.h diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h new file mode 100644 index 000000000000..dcac7a6bdba2 --- /dev/null +++ b/include/asm-x86/stackprotector.h @@ -0,0 +1,4 @@ +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H 1 + +#endif diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h new file mode 100644 index 000000000000..d3e8bbe602f8 --- /dev/null +++ b/include/linux/stackprotector.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_STACKPROTECTOR_H +#define _LINUX_STACKPROTECTOR_H 1 + +#ifdef CONFIG_CC_STACKPROTECTOR +# include +#endif + +#endif diff --git a/init/main.c b/init/main.c index f7fb20021d48..a84322ca64a2 100644 --- a/init/main.c +++ b/init/main.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include From 18aa8bb12dcb10adc3d7c9d69714d53667c0ab7f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:42:02 +0100 Subject: [PATCH 009/566] stackprotector: add boot_init_stack_canary() add the boot_init_stack_canary() and make the secondary idle threads use it. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 6 ++---- include/asm-x86/stackprotector.h | 20 ++++++++++++++++++++ include/linux/stackprotector.h | 4 ++++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d4c7ac7aa430..5107cb214c7b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -147,7 +147,6 @@ void cpu_idle(void) { current_thread_info()->status |= TS_POLLING; -#ifdef CONFIG_CC_STACKPROTECTOR /* * If we're the non-boot CPU, nothing set the PDA stack * canary up for us - and if we are the boot CPU we have @@ -156,9 +155,8 @@ void cpu_idle(void) * invalid canaries already on the stack wont ever * trigger): */ - current->stack_canary = get_random_int(); - write_pda(stack_canary, current->stack_canary); -#endif + boot_init_stack_canary(); + /* endless idle loop with no priority at all */ while (1) { tick_nohz_stop_sched_tick(); diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h index dcac7a6bdba2..0f91f7a2688c 100644 --- a/include/asm-x86/stackprotector.h +++ b/include/asm-x86/stackprotector.h @@ -1,4 +1,24 @@ #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H 1 +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + /* + * If we're the non-boot CPU, nothing set the PDA stack + * canary up for us - and if we are the boot CPU we have + * a 0 stack canary. This is a good place for updating + * it, as we wont ever return from this function (so the + * invalid canaries already on the stack wont ever + * trigger): + */ + current->stack_canary = get_random_int(); + write_pda(stack_canary, current->stack_canary); +} + #endif diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h index d3e8bbe602f8..422e71aafd0b 100644 --- a/include/linux/stackprotector.h +++ b/include/linux/stackprotector.h @@ -3,6 +3,10 @@ #ifdef CONFIG_CC_STACKPROTECTOR # include +#else +static inline void boot_init_stack_canary(void) +{ +} #endif #endif From 420594296838fdc9a674470d710cda7d1487f9f4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:44:08 +0100 Subject: [PATCH 010/566] x86: fix the stackprotector canary of the boot CPU Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 1 + include/linux/stackprotector.h | 4 ++++ init/main.c | 6 ++++++ 3 files changed, 11 insertions(+) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5107cb214c7b..cce47f7fbf22 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -16,6 +16,7 @@ #include +#include #include #include #include diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h index 422e71aafd0b..6f3e54c704c0 100644 --- a/include/linux/stackprotector.h +++ b/include/linux/stackprotector.h @@ -1,6 +1,10 @@ #ifndef _LINUX_STACKPROTECTOR_H #define _LINUX_STACKPROTECTOR_H 1 +#include +#include +#include + #ifdef CONFIG_CC_STACKPROTECTOR # include #else diff --git a/init/main.c b/init/main.c index a84322ca64a2..b44e4eb0f5e3 100644 --- a/init/main.c +++ b/init/main.c @@ -546,6 +546,12 @@ asmlinkage void __init start_kernel(void) unwind_init(); lockdep_init(); debug_objects_early_init(); + + /* + * Set up the the initial canary ASAP: + */ + boot_init_stack_canary(); + cgroup_init_early(); local_irq_disable(); From 960a672bd9f1ec06e8f197cf81a50fd07ea02e7f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:56:04 +0100 Subject: [PATCH 011/566] x86: stackprotector: mix TSC to the boot canary mix the TSC to the boot canary. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/stackprotector.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h index 0f91f7a2688c..3baf7ad89be1 100644 --- a/include/asm-x86/stackprotector.h +++ b/include/asm-x86/stackprotector.h @@ -1,6 +1,8 @@ #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H 1 +#include + /* * Initialize the stackprotector canary value. * @@ -9,16 +11,28 @@ */ static __always_inline void boot_init_stack_canary(void) { + u64 canary; + u64 tsc; + /* * If we're the non-boot CPU, nothing set the PDA stack * canary up for us - and if we are the boot CPU we have * a 0 stack canary. This is a good place for updating * it, as we wont ever return from this function (so the * invalid canaries already on the stack wont ever - * trigger): + * trigger). + * + * We both use the random pool and the current TSC as a source + * of randomness. The TSC only matters for very early init, + * there it already has some randomness on most systems. Later + * on during the bootup the random pool has true entropy too. */ - current->stack_canary = get_random_int(); - write_pda(stack_canary, current->stack_canary); + get_random_bytes(&canary, sizeof(canary)); + tsc = __native_read_tsc(); + canary += tsc + (tsc << 32UL); + + current->stack_canary = canary; + write_pda(stack_canary, canary); } #endif From 113c5413cf9051cc50b88befdc42e3402bb92115 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 10:36:03 +0100 Subject: [PATCH 012/566] x86: unify stackprotector features streamline the stackprotector features under a single option and make the stronger feature the one accessible. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 83d8392c1334..0cd1695c24fb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1140,13 +1140,17 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +config CC_STACKPROTECTOR_ALL + bool + config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" depends on X86_64 + select CC_STACKPROTECTOR_ALL help - This option turns on the -fstack-protector GCC feature. This - feature puts, at the beginning of critical functions, a canary - value on the stack just before the return address, and validates + This option turns on the -fstack-protector GCC feature. This + feature puts, at the beginning of functions, a canary value on + the stack just before the return address, and validates the value just before actually returning. Stack based buffer overflows (that need to overwrite this return address) now also overwrite the canary, which gets detected and the attack is then @@ -1154,16 +1158,8 @@ config CC_STACKPROTECTOR This feature requires gcc version 4.2 or above, or a distribution gcc with the feature backported. Older versions are automatically - detected and for those versions, this configuration option is ignored. - -config CC_STACKPROTECTOR_ALL - bool "Use stack-protector for all functions" - depends on CC_STACKPROTECTOR - default y - help - Normally, GCC only inserts the canary value protection for - functions that use large-ish on-stack buffers. By enabling - this option, GCC will be asked to do this for ALL functions. + detected and for those versions, this configuration option is + ignored. (and a warning is printed during bootup) source kernel/Kconfig.hz From 54371a43a66f4477889769b4fa00df936855dc8f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 15 Feb 2008 15:33:12 -0800 Subject: [PATCH 013/566] x86: add CONFIG_CC_STACKPROTECTOR self-test This patch adds a simple self-test capability to the stackprotector feature. The test deliberately overflows a stack buffer and then checks if the canary trap function gets called. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/panic.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/kernel/panic.c b/kernel/panic.c index 17aad578a2f2..50cf9257b234 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -324,14 +324,82 @@ EXPORT_SYMBOL(warn_on_slowpath); #endif #ifdef CONFIG_CC_STACKPROTECTOR + +static unsigned long __stack_check_testing; +/* + * Self test function for the stack-protector feature. + * This test requires that the local variable absolutely has + * a stack slot, hence the barrier()s. + */ +static noinline void __stack_chk_test_func(void) +{ + unsigned long foo; + barrier(); + /* + * we need to make sure we're not about to clobber the return address, + * while real exploits do this, it's unhealthy on a running system. + * Besides, if we would, the test is already failed anyway so + * time to pull the emergency brake on it. + */ + if ((unsigned long)__builtin_return_address(0) == + *(((unsigned long *)&foo)+1)) { + printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); + return; + } +#ifdef CONFIG_FRAME_POINTER + /* We also don't want to clobber the frame pointer */ + if ((unsigned long)__builtin_return_address(0) == + *(((unsigned long *)&foo)+2)) { + printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); + return; + } +#endif + barrier(); + if (current->stack_canary == *(((unsigned long *)&foo)+1)) + *(((unsigned long *)&foo)+1) = 0; + else + printk(KERN_ERR "No -fstack-protector canary found\n"); + barrier(); +} + +static int __stack_chk_test(void) +{ + printk(KERN_INFO "Testing -fstack-protector-all feature\n"); + __stack_check_testing = (unsigned long)&__stack_chk_test_func; + __stack_chk_test_func(); + if (__stack_check_testing) { + printk(KERN_ERR "-fstack-protector-all test failed\n"); + WARN_ON(1); + } + return 0; +} /* * Called when gcc's -fstack-protector feature is used, and * gcc detects corruption of the on-stack canary value */ void __stack_chk_fail(void) { + if (__stack_check_testing == (unsigned long)&__stack_chk_test_func) { + long delta; + + delta = (unsigned long)__builtin_return_address(0) - + __stack_check_testing; + /* + * The test needs to happen inside the test function, so + * check if the return address is close to that function. + * The function is only 2 dozen bytes long, but keep a wide + * safety margin to avoid panic()s for normal users regardless + * of the quality of the compiler. + */ + if (delta >= 0 && delta <= 400) { + __stack_check_testing = 0; + return; + } + } panic("stack-protector: Kernel stack is corrupted in: %p\n", __builtin_return_address(0)); } EXPORT_SYMBOL(__stack_chk_fail); + +late_initcall(__stack_chk_test); #endif From b719ac56c0032bc1602914c6ea70b0f1581b08c7 Mon Sep 17 00:00:00 2001 From: Daniel Walker Date: Mon, 14 Apr 2008 10:03:50 -0700 Subject: [PATCH 014/566] panic.c: fix whitespace additions trivial: remove white space addition in stack protector Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/panic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 50cf9257b234..866be9b72e4f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -341,14 +341,14 @@ static noinline void __stack_chk_test_func(void) * Besides, if we would, the test is already failed anyway so * time to pull the emergency brake on it. */ - if ((unsigned long)__builtin_return_address(0) == + if ((unsigned long)__builtin_return_address(0) == *(((unsigned long *)&foo)+1)) { printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); return; } #ifdef CONFIG_FRAME_POINTER /* We also don't want to clobber the frame pointer */ - if ((unsigned long)__builtin_return_address(0) == + if ((unsigned long)__builtin_return_address(0) == *(((unsigned long *)&foo)+2)) { printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); return; From b40a4392a3c262e0d1b5379b4e142a8eefa63439 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 18 Apr 2008 06:16:45 -0700 Subject: [PATCH 015/566] stackprotector: turn not having the right gcc into a #warning If the user selects the stack-protector config option, but does not have a gcc that has the right bits enabled (for example because it isn't build with a glibc that supports TLS, as is common for cross-compilers, but also because it may be too old), then the runtime test fails right now. This patch adds a warning message for this scenario. This warning accomplishes two goals 1) the user is informed that the security option he selective isn't available 2) the user is suggested to turn of the CONFIG option that won't work for him, and would make the runtime test fail anyway. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Makefile | 2 +- kernel/panic.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 3cff3c894cf3..c3e0eeeb1dd2 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -73,7 +73,7 @@ else stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \ - "$(CC)" -fstack-protector ) + "$(CC)" "-fstack-protector -DGCC_HAS_SP" ) stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \ "$(CC)" -fstack-protector-all ) diff --git a/kernel/panic.c b/kernel/panic.c index 866be9b72e4f..6729e3f4ebcb 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -325,6 +325,9 @@ EXPORT_SYMBOL(warn_on_slowpath); #ifdef CONFIG_CC_STACKPROTECTOR +#ifndef GCC_HAS_SP +#warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this. +#endif static unsigned long __stack_check_testing; /* * Self test function for the stack-protector feature. From 7c9f8861e6c9c839f913e49b98c3854daca18f27 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 22 Apr 2008 16:38:23 -0500 Subject: [PATCH 016/566] stackprotector: use canary at end of stack to indicate overruns at oops time (Updated with a common max-stack-used checker that knows about the canary, as suggested by Joe Perches) Use a canary at the end of the stack to clearly indicate at oops time whether the stack has ever overflowed. This is a very simple implementation with a couple of drawbacks: 1) a thread may legitimately use exactly up to the last word on the stack -- but the chances of doing this and then oopsing later seem slim 2) it's possible that the stack usage isn't dense enough that the canary location could get skipped over -- but the worst that happens is that we don't flag the overrun -- though this happens fairly often in my testing :( With the code in place, an intentionally-bloated stack oops might do: BUG: unable to handle kernel paging request at ffff8103f84cc680 IP: [] update_curr+0x9a/0xa8 PGD 8063 PUD 0 Thread overran stack or stack corrupted Oops: 0000 [1] SMP CPU 0 ... ... unless the stack overrun is so bad that it corrupts some other thread. Signed-off-by: Eric Sandeen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/fault.c | 7 +++++++ include/linux/magic.h | 1 + include/linux/sched.h | 13 +++++++++++++ kernel/exit.c | 5 +---- kernel/fork.c | 5 +++++ kernel/sched.c | 7 +------ 6 files changed, 28 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fd7e1798c75a..1f524df68b96 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -581,6 +582,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) unsigned long address; int write, si_code; int fault; + unsigned long *stackend; + #ifdef CONFIG_X86_64 unsigned long flags; #endif @@ -850,6 +853,10 @@ no_context: show_fault_oops(regs, error_code, address); + stackend = end_of_stack(tsk); + if (*stackend != STACK_END_MAGIC) + printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); + tsk->thread.cr2 = address; tsk->thread.trap_no = 14; tsk->thread.error_code = error_code; diff --git a/include/linux/magic.h b/include/linux/magic.h index 1fa0c2ce4dec..74e68e201166 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -42,4 +42,5 @@ #define FUTEXFS_SUPER_MAGIC 0xBAD1DEA #define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA +#define STACK_END_MAGIC 0x57AC6E9D #endif /* __LINUX_MAGIC_H__ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d6a515158783..c5181e77f305 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1969,6 +1969,19 @@ static inline unsigned long *end_of_stack(struct task_struct *p) extern void thread_info_cache_init(void); +#ifdef CONFIG_DEBUG_STACK_USAGE +static inline unsigned long stack_not_used(struct task_struct *p) +{ + unsigned long *n = end_of_stack(p); + + do { /* Skip over canary */ + n++; + } while (!*n); + + return (unsigned long)n - (unsigned long)end_of_stack(p); +} +#endif + /* set thread flags in other task's structures * - see asm/thread_info.h for TIF_xxxx flags available */ diff --git a/kernel/exit.c b/kernel/exit.c index 8f6185e69b69..fb8de6cbf2c7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -899,12 +899,9 @@ static void check_stack_usage(void) { static DEFINE_SPINLOCK(low_water_lock); static int lowest_to_date = THREAD_SIZE; - unsigned long *n = end_of_stack(current); unsigned long free; - while (*n == 0) - n++; - free = (unsigned long)n - (unsigned long)end_of_stack(current); + free = stack_not_used(current); if (free >= lowest_to_date) return; diff --git a/kernel/fork.c b/kernel/fork.c index 19908b26cf80..d428336e7aa1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -186,6 +187,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) { struct task_struct *tsk; struct thread_info *ti; + unsigned long *stackend; + int err; prepare_to_copy(orig); @@ -211,6 +214,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) goto out; setup_thread_stack(tsk, orig); + stackend = end_of_stack(tsk); + *stackend = STACK_END_MAGIC; /* for overflow detection */ #ifdef CONFIG_CC_STACKPROTECTOR tsk->stack_canary = get_random_int(); diff --git a/kernel/sched.c b/kernel/sched.c index cfa222a91539..a964ed945094 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5748,12 +5748,7 @@ void sched_show_task(struct task_struct *p) printk(KERN_CONT " %016lx ", thread_saved_pc(p)); #endif #ifdef CONFIG_DEBUG_STACK_USAGE - { - unsigned long *n = end_of_stack(p); - while (!*n) - n++; - free = (unsigned long)n - (unsigned long)end_of_stack(p); - } + free = stack_not_used(p); #endif printk(KERN_CONT "%5lu %5d %6d\n", free, task_pid_nr(p), task_pid_nr(p->real_parent)); From aa92db14270b79f0f91a9060b547a46f9e2639da Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 11 Jul 2008 05:09:55 -0700 Subject: [PATCH 017/566] stackprotector: better self-test check stackprotector functionality by manipulating the canary briefly during bootup. far more robust than trying to overflow the stack. (which is architecture dependent, etc.) Signed-off-by: Ingo Molnar --- kernel/panic.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 6729e3f4ebcb..28153aec7100 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -347,22 +347,18 @@ static noinline void __stack_chk_test_func(void) if ((unsigned long)__builtin_return_address(0) == *(((unsigned long *)&foo)+1)) { printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); - return; } #ifdef CONFIG_FRAME_POINTER /* We also don't want to clobber the frame pointer */ if ((unsigned long)__builtin_return_address(0) == *(((unsigned long *)&foo)+2)) { printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); - return; } #endif - barrier(); - if (current->stack_canary == *(((unsigned long *)&foo)+1)) - *(((unsigned long *)&foo)+1) = 0; - else + if (current->stack_canary != *(((unsigned long *)&foo)+1)) printk(KERN_ERR "No -fstack-protector canary found\n"); - barrier(); + + current->stack_canary = ~current->stack_canary; } static int __stack_chk_test(void) @@ -373,7 +369,8 @@ static int __stack_chk_test(void) if (__stack_check_testing) { printk(KERN_ERR "-fstack-protector-all test failed\n"); WARN_ON(1); - } + }; + current->stack_canary = ~current->stack_canary; return 0; } /* From af9ff7868f0f76d3364351b1641b9dfa99588e77 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 12 Jul 2008 09:36:38 -0700 Subject: [PATCH 018/566] x86: simplify stackprotector self-check Clean up the code by removing no longer needed code; make sure the pda is updated and kept in sync Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- include/asm-x86/pda.h | 1 + kernel/panic.c | 29 +++++++---------------------- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h index 62b734986a44..a5ff5bb76299 100644 --- a/include/asm-x86/pda.h +++ b/include/asm-x86/pda.h @@ -131,4 +131,5 @@ do { \ #define PDA_STACKOFFSET (5*8) +#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary) #endif diff --git a/kernel/panic.c b/kernel/panic.c index 28153aec7100..87445a894c3a 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -328,37 +328,21 @@ EXPORT_SYMBOL(warn_on_slowpath); #ifndef GCC_HAS_SP #warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this. #endif + static unsigned long __stack_check_testing; + /* * Self test function for the stack-protector feature. * This test requires that the local variable absolutely has - * a stack slot, hence the barrier()s. + * a stack slot. */ static noinline void __stack_chk_test_func(void) { - unsigned long foo; - barrier(); - /* - * we need to make sure we're not about to clobber the return address, - * while real exploits do this, it's unhealthy on a running system. - * Besides, if we would, the test is already failed anyway so - * time to pull the emergency brake on it. - */ - if ((unsigned long)__builtin_return_address(0) == - *(((unsigned long *)&foo)+1)) { - printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); - } -#ifdef CONFIG_FRAME_POINTER - /* We also don't want to clobber the frame pointer */ - if ((unsigned long)__builtin_return_address(0) == - *(((unsigned long *)&foo)+2)) { - printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); - } -#endif - if (current->stack_canary != *(((unsigned long *)&foo)+1)) - printk(KERN_ERR "No -fstack-protector canary found\n"); + unsigned long dummy_buffer[64]; /* force gcc to use the canary */ current->stack_canary = ~current->stack_canary; + refresh_stack_canary(); + dummy_buffer[3] = 1; /* fool gcc into keeping the variable */ } static int __stack_chk_test(void) @@ -371,6 +355,7 @@ static int __stack_chk_test(void) WARN_ON(1); }; current->stack_canary = ~current->stack_canary; + refresh_stack_canary(); return 0; } /* From 4f962d4d65923d7b722192e729840cfb79af0a5a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 13 Jul 2008 21:42:44 +0200 Subject: [PATCH 019/566] stackprotector: remove self-test turns out gcc generates such stackprotector-failure sequences in certain circumstances: movq -8(%rbp), %rax # D.16032, xorq %gs:40, %rax #, jne .L17 #, leave ret .L17: call __stack_chk_fail # .size __stack_chk_test_func, .-__stack_chk_test_func .section .init.text,"ax",@progbits .type panic_setup, @function panic_setup: pushq %rbp # note that there's no jump back to the failing context after the call to __stack_chk_fail - i.e. it has a ((noreturn)) attribute. Which is fair enough in the normal case but kills the self-test. (as we cannot reliably return in the self-test) Signed-off-by: Ingo Molnar --- kernel/panic.c | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 87445a894c3a..c35c9eca3eb2 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -329,62 +329,15 @@ EXPORT_SYMBOL(warn_on_slowpath); #warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this. #endif -static unsigned long __stack_check_testing; - -/* - * Self test function for the stack-protector feature. - * This test requires that the local variable absolutely has - * a stack slot. - */ -static noinline void __stack_chk_test_func(void) -{ - unsigned long dummy_buffer[64]; /* force gcc to use the canary */ - - current->stack_canary = ~current->stack_canary; - refresh_stack_canary(); - dummy_buffer[3] = 1; /* fool gcc into keeping the variable */ -} - -static int __stack_chk_test(void) -{ - printk(KERN_INFO "Testing -fstack-protector-all feature\n"); - __stack_check_testing = (unsigned long)&__stack_chk_test_func; - __stack_chk_test_func(); - if (__stack_check_testing) { - printk(KERN_ERR "-fstack-protector-all test failed\n"); - WARN_ON(1); - }; - current->stack_canary = ~current->stack_canary; - refresh_stack_canary(); - return 0; -} /* * Called when gcc's -fstack-protector feature is used, and * gcc detects corruption of the on-stack canary value */ void __stack_chk_fail(void) { - if (__stack_check_testing == (unsigned long)&__stack_chk_test_func) { - long delta; - - delta = (unsigned long)__builtin_return_address(0) - - __stack_check_testing; - /* - * The test needs to happen inside the test function, so - * check if the return address is close to that function. - * The function is only 2 dozen bytes long, but keep a wide - * safety margin to avoid panic()s for normal users regardless - * of the quality of the compiler. - */ - if (delta >= 0 && delta <= 400) { - __stack_check_testing = 0; - return; - } - } panic("stack-protector: Kernel stack is corrupted in: %p\n", __builtin_return_address(0)); } EXPORT_SYMBOL(__stack_chk_fail); -late_initcall(__stack_chk_test); #endif From 7760ec77ab2a9e48bdd0d13341446a8a51f0b9f1 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 18:10:13 +0530 Subject: [PATCH 020/566] x86: smp.h remove obsolete function declaration Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/smp.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 830b9fcb6427..83a4cc074315 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -22,7 +22,6 @@ extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; extern cpumask_t cpu_callin_map; -extern void (*mtrr_hook)(void); extern void zap_low_mappings(void); extern int __cpuinit get_local_pda(int cpu); From dacf7333571d770366bff74d10b56aa545434605 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 17:26:35 +0530 Subject: [PATCH 021/566] x86: smp.h move zap_low_mappings declartion to tlbflush.h Impact: cleanup, moving NON-SMP stuff from smp.h Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/smp.h | 2 -- arch/x86/include/asm/tlbflush.h | 2 ++ arch/x86/mm/init_32.c | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 83a4cc074315..64c9e848f137 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -22,8 +22,6 @@ extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; extern cpumask_t cpu_callin_map; -extern void zap_low_mappings(void); - extern int __cpuinit get_local_pda(int cpu); extern int smp_num_siblings; diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0e7bbb549116..aed0b700b837 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -175,4 +175,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_all(); } +extern void zap_low_mappings(void); + #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index f99a6c6c432e..a9dd0b7ad618 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -49,7 +49,6 @@ #include #include #include -#include unsigned int __VMALLOC_RESERVE = 128 << 20; From 6e5385d44b2df05e50a8d07ba0e14d3e32685237 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 18:11:35 +0530 Subject: [PATCH 022/566] x86: smp.h move prefill_possible_map declartion to cpu.h Impact: cleanup, moving NON-SMP stuff from smp.h Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 10 ++++++++++ arch/x86/include/asm/smp.h | 6 ------ arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smpboot.c | 1 - 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index bae482df6039..29aa6d0752b9 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -7,6 +7,16 @@ #include #include +#ifdef CONFIG_SMP + +extern void prefill_possible_map(void); + +#else /* CONFIG_SMP */ + +static inline void prefill_possible_map(void) {} + +#endif /* CONFIG_SMP */ + struct x86_cpu { struct cpu cpu; }; diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 64c9e848f137..62bd3f68269a 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -138,8 +138,6 @@ void play_dead_common(void); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); -extern void prefill_possible_map(void); - void smp_store_cpu_info(int id); #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) @@ -148,10 +146,6 @@ static inline int num_booting_cpus(void) { return cpus_weight(cpu_callout_map); } -#else -static inline void prefill_possible_map(void) -{ -} #endif /* CONFIG_SMP */ extern unsigned disabled_cpus __cpuinitdata; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ae0d8042cf69..f41c4486c270 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -89,7 +89,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 07576bee03ef..f8c885bed18c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -53,7 +53,6 @@ #include #include #include -#include #include #include #include From f472cdba849cc3d838f3788469316e8572463a8c Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:34:25 +0530 Subject: [PATCH 023/566] x86: smp.h move stack_processor_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 2 ++ arch/x86/include/asm/smp.h | 1 - arch/x86/kernel/cpu/common.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 29aa6d0752b9..f958e7e49c05 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -15,6 +15,8 @@ extern void prefill_possible_map(void); static inline void prefill_possible_map(void) {} +#define stack_smp_processor_id() 0 + #endif /* CONFIG_SMP */ struct x86_cpu { diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 62bd3f68269a..ed4af9a89cfd 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -173,7 +173,6 @@ extern int safe_smp_processor_id(void); #else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */ #define cpu_physical_id(cpu) boot_cpu_physical_apicid #define safe_smp_processor_id() 0 -#define stack_smp_processor_id() 0 #endif #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3f95a40f718a..f7619a2eaffe 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include From 96b89dc6598a50e3aac8e2c6d826ae3795b7d030 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:35:48 +0530 Subject: [PATCH 024/566] x86: smp.h move safe_smp_processor_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 1 + arch/x86/include/asm/smp.h | 1 - arch/x86/kernel/crash.c | 2 +- arch/x86/kernel/reboot.c | 1 + arch/x86/mach-voyager/setup.c | 1 + 5 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index f958e7e49c05..4c16888ffa3f 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -15,6 +15,7 @@ extern void prefill_possible_map(void); static inline void prefill_possible_map(void) {} +#define safe_smp_processor_id() 0 #define stack_smp_processor_id() 0 #endif /* CONFIG_SMP */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index ed4af9a89cfd..c92b93594ab3 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -172,7 +172,6 @@ extern int safe_smp_processor_id(void); #else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */ #define cpu_physical_id(cpu) boot_cpu_physical_apicid -#define safe_smp_processor_id() 0 #endif #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c689d19e35ab..11b93cabdf78 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2b46eb41643b..f8536fee5c12 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef CONFIG_X86_32 # include diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index a580b9562e76..0ade62555ff3 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -9,6 +9,7 @@ #include #include #include +#include void __init pre_intr_init_hook(void) { From af8968abf09fe5984bdd206e54e0eeb1dc1fa29c Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:37:33 +0530 Subject: [PATCH 025/566] x86: smp.h move cpu_physical_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 1 + arch/x86/include/asm/smp.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 4c16888ffa3f..89edafb93390 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -15,6 +15,7 @@ extern void prefill_possible_map(void); static inline void prefill_possible_map(void) {} +#define cpu_physical_id(cpu) boot_cpu_physical_apicid #define safe_smp_processor_id() 0 #define stack_smp_processor_id() 0 diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c92b93594ab3..c975b6f83c68 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -170,8 +170,6 @@ extern int safe_smp_processor_id(void); }) #define safe_smp_processor_id() smp_processor_id() -#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */ -#define cpu_physical_id(cpu) boot_cpu_physical_apicid #endif #ifdef CONFIG_X86_LOCAL_APIC From 6d652ea1d056390a0c33db92b44ed219284b71af Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:38:59 +0530 Subject: [PATCH 026/566] x86: smp.h move boot_cpu_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 7 +++++++ arch/x86/include/asm/smp.h | 6 ------ arch/x86/kernel/io_apic.c | 1 + drivers/pci/intr_remapping.c | 1 + 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 89edafb93390..f03b23e32864 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -31,4 +31,11 @@ extern void arch_unregister_cpu(int); #endif DECLARE_PER_CPU(int, cpu_state); + +#ifdef CONFIG_X86_HAS_BOOT_CPU_ID +extern unsigned char boot_cpu_id; +#else +#define boot_cpu_id 0 +#endif + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c975b6f83c68..74ad9ef6ae02 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -212,11 +212,5 @@ static inline int hard_smp_processor_id(void) #endif /* CONFIG_X86_LOCAL_APIC */ -#ifdef CONFIG_X86_HAS_BOOT_CPU_ID -extern unsigned char boot_cpu_id; -#else -#define boot_cpu_id 0 -#endif - #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1c4a1302536c..109c91db2026 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f78371b22529..5a57753ea9fc 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "intr_remapping.h" From 41401db698cbb5d1869776bf336881db267e7d19 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 8 Jan 2009 15:42:46 +0530 Subject: [PATCH 027/566] x86: rename intel_mp_floating to mpf_intel Impact: cleanup, solve 80 columns wrap problems intel_mp_floating should be renamed to mpf_intel. The reason: the 'f' in MPF already means 'floating' which means MP Floating pointer structure - no need to repeat that in the type name. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mpspec_def.h | 3 ++- arch/x86/kernel/mpparse.c | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h index 59568bc4767f..187dc9201932 100644 --- a/arch/x86/include/asm/mpspec_def.h +++ b/arch/x86/include/asm/mpspec_def.h @@ -24,7 +24,8 @@ # endif #endif -struct intel_mp_floating { +/* Intel MP Floating Pointer Structure */ +struct mpf_intel { char mpf_signature[4]; /* "_MP_" */ unsigned int mpf_physptr; /* Configuration table address */ unsigned char mpf_length; /* Our length (paragraphs) */ diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index c0601c2848a1..6cea941c4dbb 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -569,14 +569,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) } } -static struct intel_mp_floating *mpf_found; +static struct mpf_intel *mpf_found; /* * Scan the memory blocks for an SMP configuration block. */ static void __init __get_smp_config(unsigned int early) { - struct intel_mp_floating *mpf = mpf_found; + struct mpf_intel *mpf = mpf_found; if (!mpf) return; @@ -687,14 +687,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned reserve) { unsigned int *bp = phys_to_virt(base); - struct intel_mp_floating *mpf; + struct mpf_intel *mpf; apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", bp, length); BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { - mpf = (struct intel_mp_floating *)bp; + mpf = (struct mpf_intel *)bp; if ((*bp == SMP_MAGIC_IDENT) && (mpf->mpf_length == 1) && !mpf_checksum((unsigned char *)bp, 16) && @@ -1000,7 +1000,7 @@ static int __init update_mp_table(void) { char str[16]; char oem[10]; - struct intel_mp_floating *mpf; + struct mpf_intel *mpf; struct mpc_table *mpc, *mpc_new; if (!enable_update_mptable) @@ -1052,7 +1052,7 @@ static int __init update_mp_table(void) mpc = mpc_new; /* check if we can modify that */ if (mpc_new_phys - mpf->mpf_physptr) { - struct intel_mp_floating *mpf_new; + struct mpf_intel *mpf_new; /* steal 16 bytes from [0, 1k) */ printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); mpf_new = phys_to_virt(0x400 - 16); From 1eb1b3b65dc3e3ffcc6a60e115c085c0c11c1077 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 8 Jan 2009 15:43:26 +0530 Subject: [PATCH 028/566] x86: rename all fields of mpf_intel mpf_X to X Impact: cleanup, solve 80 columns wrap problems It would be cleaner to rename all the mpf->mpf_X fields to mpf->X - that alone would give 4 characters per usage site. (we already know that it's an 'mpf' entity - no need to duplicate that in the field too) Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mpspec_def.h | 20 ++++++------- arch/x86/kernel/mpparse.c | 50 +++++++++++++++---------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h index 187dc9201932..4a7f96d7c188 100644 --- a/arch/x86/include/asm/mpspec_def.h +++ b/arch/x86/include/asm/mpspec_def.h @@ -26,16 +26,16 @@ /* Intel MP Floating Pointer Structure */ struct mpf_intel { - char mpf_signature[4]; /* "_MP_" */ - unsigned int mpf_physptr; /* Configuration table address */ - unsigned char mpf_length; /* Our length (paragraphs) */ - unsigned char mpf_specification;/* Specification version */ - unsigned char mpf_checksum; /* Checksum (makes sum 0) */ - unsigned char mpf_feature1; /* Standard or configuration ? */ - unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ - unsigned char mpf_feature3; /* Unused (0) */ - unsigned char mpf_feature4; /* Unused (0) */ - unsigned char mpf_feature5; /* Unused (0) */ + char signature[4]; /* "_MP_" */ + unsigned int physptr; /* Configuration table address */ + unsigned char length; /* Our length (paragraphs) */ + unsigned char specification; /* Specification version */ + unsigned char checksum; /* Checksum (makes sum 0) */ + unsigned char feature1; /* Standard or configuration ? */ + unsigned char feature2; /* Bit7 set for IMCR|PIC */ + unsigned char feature3; /* Unused (0) */ + unsigned char feature4; /* Unused (0) */ + unsigned char feature5; /* Unused (0) */ }; #define MPC_SIGNATURE "PCMP" diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 6cea941c4dbb..8385d4e7e15d 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -597,9 +597,9 @@ static void __init __get_smp_config(unsigned int early) } printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", - mpf->mpf_specification); + mpf->specification); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) - if (mpf->mpf_feature2 & (1 << 7)) { + if (mpf->feature2 & (1 << 7)) { printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); pic_mode = 1; } else { @@ -610,7 +610,7 @@ static void __init __get_smp_config(unsigned int early) /* * Now see if we need to read further. */ - if (mpf->mpf_feature1 != 0) { + if (mpf->feature1 != 0) { if (early) { /* * local APIC has default address @@ -620,16 +620,16 @@ static void __init __get_smp_config(unsigned int early) } printk(KERN_INFO "Default MP configuration #%d\n", - mpf->mpf_feature1); - construct_default_ISA_mptable(mpf->mpf_feature1); + mpf->feature1); + construct_default_ISA_mptable(mpf->feature1); - } else if (mpf->mpf_physptr) { + } else if (mpf->physptr) { /* * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { + if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) { #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 0; #endif @@ -696,10 +696,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, while (length > 0) { mpf = (struct mpf_intel *)bp; if ((*bp == SMP_MAGIC_IDENT) && - (mpf->mpf_length == 1) && + (mpf->length == 1) && !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->mpf_specification == 1) - || (mpf->mpf_specification == 4))) { + ((mpf->specification == 1) + || (mpf->specification == 4))) { #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; #endif @@ -712,7 +712,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, return 1; reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, BOOTMEM_DEFAULT); - if (mpf->mpf_physptr) { + if (mpf->physptr) { unsigned long size = PAGE_SIZE; #ifdef CONFIG_X86_32 /* @@ -721,14 +721,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, * the bottom is mapped now. * PC-9800's MPC table places on the very last * of physical memory; so that simply reserving - * PAGE_SIZE from mpg->mpf_physptr yields BUG() + * PAGE_SIZE from mpf->physptr yields BUG() * in reserve_bootmem. */ unsigned long end = max_low_pfn * PAGE_SIZE; - if (mpf->mpf_physptr + size > end) - size = end - mpf->mpf_physptr; + if (mpf->physptr + size > end) + size = end - mpf->physptr; #endif - reserve_bootmem_generic(mpf->mpf_physptr, size, + reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); } @@ -1013,19 +1013,19 @@ static int __init update_mp_table(void) /* * Now see if we need to go further. */ - if (mpf->mpf_feature1 != 0) + if (mpf->feature1 != 0) return 0; - if (!mpf->mpf_physptr) + if (!mpf->physptr) return 0; - mpc = phys_to_virt(mpf->mpf_physptr); + mpc = phys_to_virt(mpf->physptr); if (!smp_check_mpc(mpc, oem, str)) return 0; printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); - printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); + printk(KERN_INFO "physptr: %x\n", mpf->physptr); if (mpc_new_phys && mpc->length > mpc_new_length) { mpc_new_phys = 0; @@ -1046,23 +1046,23 @@ static int __init update_mp_table(void) } printk(KERN_INFO "use in-positon replacing\n"); } else { - mpf->mpf_physptr = mpc_new_phys; + mpf->physptr = mpc_new_phys; mpc_new = phys_to_virt(mpc_new_phys); memcpy(mpc_new, mpc, mpc->length); mpc = mpc_new; /* check if we can modify that */ - if (mpc_new_phys - mpf->mpf_physptr) { + if (mpc_new_phys - mpf->physptr) { struct mpf_intel *mpf_new; /* steal 16 bytes from [0, 1k) */ printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); mpf_new = phys_to_virt(0x400 - 16); memcpy(mpf_new, mpf, 16); mpf = mpf_new; - mpf->mpf_physptr = mpc_new_phys; + mpf->physptr = mpc_new_phys; } - mpf->mpf_checksum = 0; - mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); - printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); + mpf->checksum = 0; + mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16); + printk(KERN_INFO "physptr new: %x\n", mpf->physptr); } /* From 068790334cececc3d2d945617ccc585477da2e38 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 10 Jan 2009 12:17:37 +0530 Subject: [PATCH 029/566] x86: smp.h move cpu_callin_mask and cpu_callin_map declartion to cpumask.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpumask.h | 19 +++++++++++++++++++ arch/x86/include/asm/smp.h | 3 --- arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/setup_percpu.c | 1 + arch/x86/kernel/smpboot.c | 1 + 5 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 arch/x86/include/asm/cpumask.h diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h new file mode 100644 index 000000000000..308dddd56329 --- /dev/null +++ b/arch/x86/include/asm/cpumask.h @@ -0,0 +1,19 @@ +#ifndef _ASM_X86_CPUMASK_H +#define _ASM_X86_CPUMASK_H +#ifndef __ASSEMBLY__ +#include + +#ifdef CONFIG_X86_64 + +extern cpumask_var_t cpu_callin_mask; + +#else /* CONFIG_X86_32 */ + +extern cpumask_t cpu_callin_map; + +#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) + +#endif /* CONFIG_X86_32 */ + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_CPUMASK_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 1963e27673c9..c35aa5c0dd11 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -20,19 +20,16 @@ #ifdef CONFIG_X86_64 -extern cpumask_var_t cpu_callin_mask; extern cpumask_var_t cpu_callout_mask; extern cpumask_var_t cpu_initialized_mask; extern cpumask_var_t cpu_sibling_setup_mask; #else /* CONFIG_X86_32 */ -extern cpumask_t cpu_callin_map; extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; extern cpumask_t cpu_sibling_setup_map; -#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) #define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) #define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) #define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 14e543b6fd4f..f00258462444 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 55c46074eba0..bf63de72b643 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC unsigned int num_processors; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6c2b8444b830..84ac1cf46d87 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include From fb8fd077fbf0de6662acfd240e8e6b25cf3202ca Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 10 Jan 2009 12:20:24 +0530 Subject: [PATCH 030/566] x86: smp.h move cpu_callout_mask and cpu_callout_map declartion to cpumask.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpumask.h | 3 +++ arch/x86/include/asm/smp.h | 4 +--- arch/x86/kernel/smpboot.c | 1 - 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 308dddd56329..9933fcad3c82 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -6,12 +6,15 @@ #ifdef CONFIG_X86_64 extern cpumask_var_t cpu_callin_mask; +extern cpumask_var_t cpu_callout_mask; #else /* CONFIG_X86_32 */ extern cpumask_t cpu_callin_map; +extern cpumask_t cpu_callout_map; #define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) +#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c35aa5c0dd11..a3afec5cad0b 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -17,20 +17,18 @@ #endif #include #include +#include #ifdef CONFIG_X86_64 -extern cpumask_var_t cpu_callout_mask; extern cpumask_var_t cpu_initialized_mask; extern cpumask_var_t cpu_sibling_setup_mask; #else /* CONFIG_X86_32 */ -extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; extern cpumask_t cpu_sibling_setup_map; -#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) #define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) #define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 84ac1cf46d87..6c2b8444b830 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -55,7 +55,6 @@ #include #include #include -#include #include #include #include From 493f6ca54e1ea59732dd334e35c5fe2d8e440b06 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 10 Jan 2009 12:48:22 +0530 Subject: [PATCH 031/566] x86: smp.h move cpu_initialized_mask and cpu_initialized declartion to cpumask.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpumask.h | 3 +++ arch/x86/include/asm/smp.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 9933fcad3c82..d4cfd120b84d 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -7,14 +7,17 @@ extern cpumask_var_t cpu_callin_mask; extern cpumask_var_t cpu_callout_mask; +extern cpumask_var_t cpu_initialized_mask; #else /* CONFIG_X86_32 */ extern cpumask_t cpu_callin_map; extern cpumask_t cpu_callout_map; +extern cpumask_t cpu_initialized; #define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) #define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) +#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index a3afec5cad0b..7d2a80319e82 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -21,15 +21,12 @@ #ifdef CONFIG_X86_64 -extern cpumask_var_t cpu_initialized_mask; extern cpumask_var_t cpu_sibling_setup_mask; #else /* CONFIG_X86_32 */ -extern cpumask_t cpu_initialized; extern cpumask_t cpu_sibling_setup_map; -#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) #define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) #endif /* CONFIG_X86_32 */ From 52811d8c9beb67da6bc4b770de3c4134376788a1 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 10 Jan 2009 12:58:50 +0530 Subject: [PATCH 032/566] x86: smp.h move cpu_sibling_setup_mask and cpu_sibling_setup_map declartion to cpumask.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpumask.h | 3 +++ arch/x86/include/asm/smp.h | 12 ------------ 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index d4cfd120b84d..26c6dad90479 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -8,16 +8,19 @@ extern cpumask_var_t cpu_callin_mask; extern cpumask_var_t cpu_callout_mask; extern cpumask_var_t cpu_initialized_mask; +extern cpumask_var_t cpu_sibling_setup_mask; #else /* CONFIG_X86_32 */ extern cpumask_t cpu_callin_map; extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; +extern cpumask_t cpu_sibling_setup_map; #define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) #define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) #define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) +#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 7d2a80319e82..a8cea7b09434 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -19,18 +19,6 @@ #include #include -#ifdef CONFIG_X86_64 - -extern cpumask_var_t cpu_sibling_setup_mask; - -#else /* CONFIG_X86_32 */ - -extern cpumask_t cpu_sibling_setup_map; - -#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) - -#endif /* CONFIG_X86_32 */ - extern int __cpuinit get_local_pda(int cpu); extern int smp_num_siblings; From 7106a5ab89c50c6b5aadea0850b40323804a922d Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Sat, 10 Jan 2009 23:00:22 -0500 Subject: [PATCH 033/566] x86-64: remove locked instruction from switch_to() Impact: micro-optimization The patch below removes an unnecessary locked instruction from switch_to(). TIF_FORK is only ever set in copy_thread() on initial process creation, and gets cleared during the first scheduling of the process. As such, it is safe to use an unlocked test for the flag within switch_to(). Signed-off-by: Benjamin LaHaise Signed-off-by: Ingo Molnar --- arch/x86/include/asm/system.h | 6 +++--- arch/x86/kernel/entry_64.S | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 8e626ea33a1a..fa47b1e6a866 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -96,15 +96,15 @@ do { \ "thread_return:\n\t" \ "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ - LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ - "jc ret_from_fork\n\t" \ + "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ + "jnz ret_from_fork\n\t" \ RESTORE_CONTEXT \ : "=a" (last) \ : [next] "S" (next), [prev] "D" (prev), \ [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ - [tif_fork] "i" (TIF_FORK), \ + [_tif_fork] "i" (_TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ : "memory", "cc" __EXTRA_CLOBBER) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e28c7a987793..38dd37458e44 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -408,6 +408,8 @@ END(save_paranoid) ENTRY(ret_from_fork) DEFAULT_FRAME + LOCK ; btr $TIF_FORK,TI_flags(%r8) + push kernel_eflags(%rip) CFI_ADJUST_CFA_OFFSET 8 popf # reset kernel eflags From 7f7ace0cda64c99599c23785f8979a072e118058 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:08 -0800 Subject: [PATCH 034/566] cpumask: update irq_desc to use cpumask_var_t Impact: reduce memory usage, use new cpumask API. Replace the affinity and pending_masks with cpumask_var_t's. This adds to the significant size reduction done with the SPARSE_IRQS changes. The added functions (init_alloc_desc_masks & init_copy_desc_masks) are in the include file so they can be inlined (and optimized out for the !CONFIG_CPUMASKS_OFFSTACK case.) [Naming chosen to be consistent with the other init*irq functions, as well as the backwards arg declaration of "from, to" instead of the more common "to, from" standard.] Includes a slight change to the declaration of struct irq_desc to embed the pending_mask within ifdef(CONFIG_SMP) to be consistent with other references, and some small changes to Xen. Tested: sparse/non-sparse/cpumask_offstack/non-cpumask_offstack/nonuma/nosmp on x86_64 Signed-off-by: Mike Travis Cc: Chris Wright Cc: Jeremy Fitzhardinge Cc: KOSAKI Motohiro Cc: Venkatesh Pallipadi Cc: virtualization@lists.osdl.org Cc: xen-devel@lists.xensource.com Cc: Yinghai Lu --- arch/x86/kernel/io_apic.c | 20 +++++----- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/irq_64.c | 2 +- drivers/xen/events.c | 4 +- include/linux/irq.h | 81 +++++++++++++++++++++++++++++++++++++-- kernel/irq/chip.c | 5 ++- kernel/irq/handle.c | 26 +++++++------ kernel/irq/manage.c | 12 +++--- kernel/irq/migration.c | 12 +++--- kernel/irq/numa_migrate.c | 12 +++++- kernel/irq/proc.c | 4 +- 11 files changed, 135 insertions(+), 45 deletions(-) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1c4a1302536c..1337eab60ecc 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -356,7 +356,7 @@ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) if (!cfg->move_in_progress) { /* it means that domain is not changed */ - if (!cpumask_intersects(&desc->affinity, mask)) + if (!cpumask_intersects(desc->affinity, mask)) cfg->move_desc_pending = 1; } } @@ -579,9 +579,9 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) if (assign_irq_vector(irq, cfg, mask)) return BAD_APICID; - cpumask_and(&desc->affinity, cfg->domain, mask); + cpumask_and(desc->affinity, cfg->domain, mask); set_extra_move_desc(desc, mask); - return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); + return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); } static void @@ -2383,7 +2383,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) if (cfg->move_in_progress) send_cleanup_vector(cfg); - cpumask_copy(&desc->affinity, mask); + cpumask_copy(desc->affinity, mask); } static int migrate_irq_remapped_level_desc(struct irq_desc *desc) @@ -2405,11 +2405,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq_desc(desc, &desc->pending_mask); + migrate_ioapic_irq_desc(desc, desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; - cpumask_clear(&desc->pending_mask); + cpumask_clear(desc->pending_mask); unmask: unmask_IO_APIC_irq_desc(desc); @@ -2434,7 +2434,7 @@ static void ir_irq_migration(struct work_struct *work) continue; } - desc->chip->set_affinity(irq, &desc->pending_mask); + desc->chip->set_affinity(irq, desc->pending_mask); spin_unlock_irqrestore(&desc->lock, flags); } } @@ -2448,7 +2448,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, { if (desc->status & IRQ_LEVEL) { desc->status |= IRQ_MOVE_PENDING; - cpumask_copy(&desc->pending_mask, mask); + cpumask_copy(desc->pending_mask, mask); migrate_irq_remapped_level_desc(desc); return; } @@ -2516,7 +2516,7 @@ static void irq_complete_move(struct irq_desc **descp) /* domain has not changed, but affinity did */ me = smp_processor_id(); - if (cpu_isset(me, desc->affinity)) { + if (cpumask_test_cpu(me, desc->affinity)) { *descp = desc = move_irq_desc(desc, me); /* get the new one */ cfg = desc->chip_data; @@ -4039,7 +4039,7 @@ void __init setup_ioapic_dest(void) */ if (desc->status & (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = &desc->affinity; + mask = desc->affinity; else mask = TARGET_CPUS; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 74b9ff7341e9..e0f29be8ab0b 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -248,7 +248,7 @@ void fixup_irqs(void) if (irq == 2) continue; - affinity = &desc->affinity; + affinity = desc->affinity; if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { printk("Breaking affinity for irq %i\n", irq); affinity = cpu_all_mask; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 63c88e6ec025..0b21cb1ea11f 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -100,7 +100,7 @@ void fixup_irqs(void) /* interrupt's are disabled at this point */ spin_lock(&desc->lock); - affinity = &desc->affinity; + affinity = desc->affinity; if (!irq_has_action(irq) || cpumask_equal(affinity, cpu_online_mask)) { spin_unlock(&desc->lock); diff --git a/drivers/xen/events.c b/drivers/xen/events.c index eb0dfdeaa949..e0767ff35d6c 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -125,7 +125,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) BUG_ON(irq == -1); #ifdef CONFIG_SMP - irq_to_desc(irq)->affinity = cpumask_of_cpu(cpu); + cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); #endif __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]); @@ -142,7 +142,7 @@ static void init_evtchn_cpu_bindings(void) /* By default all event channels notify CPU#0. */ for_each_irq_desc(i, desc) { - desc->affinity = cpumask_of_cpu(0); + cpumask_copy(desc->affinity, cpumask_of(0)); } #endif diff --git a/include/linux/irq.h b/include/linux/irq.h index f899b502f186..fa27210f1dfd 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -182,11 +182,11 @@ struct irq_desc { unsigned int irqs_unhandled; spinlock_t lock; #ifdef CONFIG_SMP - cpumask_t affinity; + cpumask_var_t affinity; unsigned int cpu; -#endif #ifdef CONFIG_GENERIC_PENDING_IRQ - cpumask_t pending_mask; + cpumask_var_t pending_mask; +#endif #endif #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; @@ -422,4 +422,79 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); #endif /* !CONFIG_S390 */ +#ifdef CONFIG_SMP +/** + * init_alloc_desc_masks - allocate cpumasks for irq_desc + * @desc: pointer to irq_desc struct + * @boot: true if need bootmem + * + * Allocates affinity and pending_mask cpumask if required. + * Returns true if successful (or not required). + * Side effect: affinity has all bits set, pending_mask has all bits clear. + */ +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, + bool boot) +{ + if (boot) { + alloc_bootmem_cpumask_var(&desc->affinity); + cpumask_setall(desc->affinity); + +#ifdef CONFIG_GENERIC_PENDING_IRQ + alloc_bootmem_cpumask_var(&desc->pending_mask); + cpumask_clear(desc->pending_mask); +#endif + return true; + } + + if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) + return false; + cpumask_setall(desc->affinity); + +#ifdef CONFIG_GENERIC_PENDING_IRQ + if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { + free_cpumask_var(desc->affinity); + return false; + } + cpumask_clear(desc->pending_mask); +#endif + return true; +} + +/** + * init_copy_desc_masks - copy cpumasks for irq_desc + * @old_desc: pointer to old irq_desc struct + * @new_desc: pointer to new irq_desc struct + * + * Insures affinity and pending_masks are copied to new irq_desc. + * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the + * irq_desc struct so the copy is redundant. + */ + +static inline void init_copy_desc_masks(struct irq_desc *old_desc, + struct irq_desc *new_desc) +{ +#ifdef CONFIG_CPUMASKS_OFFSTACK + cpumask_copy(new_desc->affinity, old_desc->affinity); + +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_copy(new_desc->pending_mask, old_desc->pending_mask); +#endif +#endif +} + +#else /* !CONFIG_SMP */ + +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, + bool boot) +{ + return true; +} + +static inline void init_copy_desc_masks(struct irq_desc *old_desc, + struct irq_desc *new_desc) +{ +} + +#endif /* CONFIG_SMP */ + #endif /* _LINUX_IRQ_H */ diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index f63c706d25e1..c248eba98b43 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -46,7 +46,10 @@ void dynamic_irq_init(unsigned int irq) desc->irq_count = 0; desc->irqs_unhandled = 0; #ifdef CONFIG_SMP - cpumask_setall(&desc->affinity); + cpumask_setall(desc->affinity); +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_clear(desc->pending_mask); +#endif #endif spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index c20db0be9173..b8fa1354f01c 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -64,9 +64,6 @@ static struct irq_desc irq_desc_init = { .handle_irq = handle_bad_irq, .depth = 1, .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), -#ifdef CONFIG_SMP - .affinity = CPU_MASK_ALL -#endif }; void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) @@ -88,6 +85,8 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) { + int node = cpu_to_node(cpu); + memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); spin_lock_init(&desc->lock); @@ -101,6 +100,10 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) printk(KERN_ERR "can not alloc kstat_irqs\n"); BUG_ON(1); } + if (!init_alloc_desc_masks(desc, node, false)) { + printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); + BUG_ON(1); + } arch_init_chip_data(desc, cpu); } @@ -119,9 +122,6 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm .handle_irq = handle_bad_irq, .depth = 1, .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), -#ifdef CONFIG_SMP - .affinity = CPU_MASK_ALL -#endif } }; @@ -141,7 +141,7 @@ int __init early_irq_init(void) desc[i].irq = i; desc[i].kstat_irqs = kstat_irqs_legacy[i]; lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - + init_alloc_desc_masks(&desc[i], 0, true); irq_desc_ptrs[i] = desc + i; } @@ -188,6 +188,10 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) printk(KERN_ERR "can not alloc irq_desc\n"); BUG_ON(1); } + if (!init_alloc_desc_masks(desc, node, false)) { + printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); + BUG_ON(1); + } init_one_irq_desc(irq, desc, cpu); irq_desc_ptrs[irq] = desc; @@ -207,9 +211,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { .handle_irq = handle_bad_irq, .depth = 1, .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), -#ifdef CONFIG_SMP - .affinity = CPU_MASK_ALL -#endif } }; @@ -222,9 +223,10 @@ int __init early_irq_init(void) desc = irq_desc; count = ARRAY_SIZE(irq_desc); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { desc[i].irq = i; - + init_alloc_desc_masks(&desc[i], 0, true); + } return arch_early_irq_init(); } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index cd0cd8dcb345..b98739af4558 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -98,14 +98,14 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) #ifdef CONFIG_GENERIC_PENDING_IRQ if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) { - cpumask_copy(&desc->affinity, cpumask); + cpumask_copy(desc->affinity, cpumask); desc->chip->set_affinity(irq, cpumask); } else { desc->status |= IRQ_MOVE_PENDING; - cpumask_copy(&desc->pending_mask, cpumask); + cpumask_copy(desc->pending_mask, cpumask); } #else - cpumask_copy(&desc->affinity, cpumask); + cpumask_copy(desc->affinity, cpumask); desc->chip->set_affinity(irq, cpumask); #endif desc->status |= IRQ_AFFINITY_SET; @@ -127,16 +127,16 @@ int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc) * one of the targets is online. */ if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) { - if (cpumask_any_and(&desc->affinity, cpu_online_mask) + if (cpumask_any_and(desc->affinity, cpu_online_mask) < nr_cpu_ids) goto set_affinity; else desc->status &= ~IRQ_AFFINITY_SET; } - cpumask_and(&desc->affinity, cpu_online_mask, irq_default_affinity); + cpumask_and(desc->affinity, cpu_online_mask, irq_default_affinity); set_affinity: - desc->chip->set_affinity(irq, &desc->affinity); + desc->chip->set_affinity(irq, desc->affinity); return 0; } diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index bd72329e630c..e05ad9be43b7 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -18,7 +18,7 @@ void move_masked_irq(int irq) desc->status &= ~IRQ_MOVE_PENDING; - if (unlikely(cpumask_empty(&desc->pending_mask))) + if (unlikely(cpumask_empty(desc->pending_mask))) return; if (!desc->chip->set_affinity) @@ -38,13 +38,13 @@ void move_masked_irq(int irq) * For correct operation this depends on the caller * masking the irqs. */ - if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask) + if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids)) { - cpumask_and(&desc->affinity, - &desc->pending_mask, cpu_online_mask); - desc->chip->set_affinity(irq, &desc->affinity); + cpumask_and(desc->affinity, + desc->pending_mask, cpu_online_mask); + desc->chip->set_affinity(irq, desc->affinity); } - cpumask_clear(&desc->pending_mask); + cpumask_clear(desc->pending_mask); } void move_native_irq(int irq) diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index ecf765c6a77a..f001a4ea6414 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -46,6 +46,7 @@ static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, desc->cpu = cpu; lockdep_set_class(&desc->lock, &irq_desc_lock_class); init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); + init_copy_desc_masks(old_desc, desc); arch_init_copy_chip_data(old_desc, desc, cpu); } @@ -76,11 +77,20 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, node = cpu_to_node(cpu); desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); if (!desc) { - printk(KERN_ERR "irq %d: can not get new irq_desc for migration.\n", irq); + printk(KERN_ERR "irq %d: can not get new irq_desc " + "for migration.\n", irq); /* still use old one */ desc = old_desc; goto out_unlock; } + if (!init_alloc_desc_masks(desc, node, false)) { + printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " + "for migration.\n", irq); + /* still use old one */ + kfree(desc); + desc = old_desc; + goto out_unlock; + } init_copy_one_irq_desc(irq, old_desc, desc, cpu); irq_desc_ptrs[irq] = desc; diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index aae3f742bcec..692363dd591f 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -20,11 +20,11 @@ static struct proc_dir_entry *root_irq_dir; static int irq_affinity_proc_show(struct seq_file *m, void *v) { struct irq_desc *desc = irq_to_desc((long)m->private); - const struct cpumask *mask = &desc->affinity; + const struct cpumask *mask = desc->affinity; #ifdef CONFIG_GENERIC_PENDING_IRQ if (desc->status & IRQ_MOVE_PENDING) - mask = &desc->pending_mask; + mask = desc->pending_mask; #endif seq_cpumask(m, mask); seq_putc(m, '\n'); From fbd59a8d1f7cf325fdb6828659f1fb76631e87b3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:08 -0800 Subject: [PATCH 035/566] cpumask: Use topology_core_cpumask()/topology_thread_cpumask() Impact: reduce stack usage, use new cpumask API. This actually uses topology_core_cpumask() and topology_thread_cpumask(), removing the only users of topology_core_siblings() and topology_thread_siblings() Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Cc: linux-net-drivers@solarflare.com --- Documentation/cputopology.txt | 6 +++--- drivers/base/topology.c | 33 ++++++++++++++++----------------- drivers/net/sfc/efx.c | 4 ++-- include/linux/topology.h | 6 ++++++ 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index 45932ec21cee..b41f3e58aefa 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt @@ -18,11 +18,11 @@ For an architecture to support this feature, it must define some of these macros in include/asm-XXX/topology.h: #define topology_physical_package_id(cpu) #define topology_core_id(cpu) -#define topology_thread_siblings(cpu) -#define topology_core_siblings(cpu) +#define topology_thread_cpumask(cpu) +#define topology_core_cpumask(cpu) The type of **_id is int. -The type of siblings is cpumask_t. +The type of siblings is (const) struct cpumask *. To be consistent on all architectures, include/linux/topology.h provides default definitions for any of the above macros that are diff --git a/drivers/base/topology.c b/drivers/base/topology.c index a778fb52b11f..bf6b13206d00 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -31,7 +31,10 @@ #include #include -#define define_one_ro(_name) \ +#define define_one_ro_named(_name, _func) \ +static SYSDEV_ATTR(_name, 0444, _func, NULL) + +#define define_one_ro(_name) \ static SYSDEV_ATTR(_name, 0444, show_##_name, NULL) #define define_id_show_func(name) \ @@ -42,8 +45,8 @@ static ssize_t show_##name(struct sys_device *dev, \ return sprintf(buf, "%d\n", topology_##name(cpu)); \ } -#if defined(topology_thread_siblings) || defined(topology_core_siblings) -static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf) +#if defined(topology_thread_cpumask) || defined(topology_core_cpumask) +static ssize_t show_cpumap(int type, const struct cpumask *mask, char *buf) { ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; int n = 0; @@ -65,7 +68,7 @@ static ssize_t show_##name(struct sys_device *dev, \ struct sysdev_attribute *attr, char *buf) \ { \ unsigned int cpu = dev->id; \ - return show_cpumap(0, &(topology_##name(cpu)), buf); \ + return show_cpumap(0, topology_##name(cpu), buf); \ } #define define_siblings_show_list(name) \ @@ -74,7 +77,7 @@ static ssize_t show_##name##_list(struct sys_device *dev, \ char *buf) \ { \ unsigned int cpu = dev->id; \ - return show_cpumap(1, &(topology_##name(cpu)), buf); \ + return show_cpumap(1, topology_##name(cpu), buf); \ } #else @@ -82,9 +85,7 @@ static ssize_t show_##name##_list(struct sys_device *dev, \ static ssize_t show_##name(struct sys_device *dev, \ struct sysdev_attribute *attr, char *buf) \ { \ - unsigned int cpu = dev->id; \ - cpumask_t mask = topology_##name(cpu); \ - return show_cpumap(0, &mask, buf); \ + return show_cpumap(0, topology_##name(dev->id), buf); \ } #define define_siblings_show_list(name) \ @@ -92,9 +93,7 @@ static ssize_t show_##name##_list(struct sys_device *dev, \ struct sysdev_attribute *attr, \ char *buf) \ { \ - unsigned int cpu = dev->id; \ - cpumask_t mask = topology_##name(cpu); \ - return show_cpumap(1, &mask, buf); \ + return show_cpumap(1, topology_##name(dev->id), buf); \ } #endif @@ -107,13 +106,13 @@ define_one_ro(physical_package_id); define_id_show_func(core_id); define_one_ro(core_id); -define_siblings_show_func(thread_siblings); -define_one_ro(thread_siblings); -define_one_ro(thread_siblings_list); +define_siblings_show_func(thread_cpumask); +define_one_ro_named(thread_siblings, show_thread_cpumask); +define_one_ro_named(thread_siblings_list, show_thread_cpumask_list); -define_siblings_show_func(core_siblings); -define_one_ro(core_siblings); -define_one_ro(core_siblings_list); +define_siblings_show_func(core_cpumask); +define_one_ro_named(core_siblings, show_core_cpumask); +define_one_ro_named(core_siblings_list, show_core_cpumask_list); static struct attribute *default_attrs[] = { &attr_physical_package_id.attr, diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c index 7673fd92eaf5..f2e56ceee0ea 100644 --- a/drivers/net/sfc/efx.c +++ b/drivers/net/sfc/efx.c @@ -863,8 +863,8 @@ static int efx_wanted_rx_queues(void) for_each_online_cpu(cpu) { if (!cpu_isset(cpu, core_mask)) { ++count; - cpus_or(core_mask, core_mask, - topology_core_siblings(cpu)); + cpumask_or(&core_mask, &core_mask, + topology_core_cpumask(cpu)); } } diff --git a/include/linux/topology.h b/include/linux/topology.h index e632d29f0544..a16b9e06f2e5 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -193,5 +193,11 @@ int arch_update_cpu_topology(void); #ifndef topology_core_siblings #define topology_core_siblings(cpu) cpumask_of_cpu(cpu) #endif +#ifndef topology_thread_cpumask +#define topology_thread_cpumask(cpu) cpumask_of(cpu) +#endif +#ifndef topology_core_cpumask +#define topology_core_cpumask(cpu) cpumask_of(cpu) +#endif #endif /* _LINUX_TOPOLOGY_H */ From f7df8ed164996cd2c6aca9674388be6ef78d8b37 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: [PATCH 036/566] cpumask: convert misc driver functions Impact: use new cpumask API. Convert misc driver functions to use struct cpumask. To Do: - Convert iucv_buffer_cpumask to cpumask_var_t. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Dean Nelson Cc: Robert Richter Cc: oprofile-list@lists.sf.net Cc: Jeremy Fitzhardinge Cc: Chris Wright Cc: virtualization@lists.osdl.org Cc: xen-devel@lists.xensource.com Cc: Ursula Braun Cc: linux390@de.ibm.com Cc: linux-s390@vger.kernel.org --- drivers/base/cpu.c | 2 +- drivers/misc/sgi-xp/xpc_main.c | 2 +- drivers/oprofile/buffer_sync.c | 22 ++++++++++++++++++---- drivers/oprofile/buffer_sync.h | 4 ++++ drivers/oprofile/oprof.c | 9 ++++++++- drivers/xen/manage.c | 2 +- 6 files changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 719ee5c1c8d9..5b257a57bc57 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -107,7 +107,7 @@ static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL); /* * Print cpu online, possible, present, and system maps */ -static ssize_t print_cpus_map(char *buf, cpumask_t *map) +static ssize_t print_cpus_map(char *buf, const struct cpumask *map) { int n = cpulist_scnprintf(buf, PAGE_SIZE-2, map); diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 89218f7cfaa7..6576170de962 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -318,7 +318,7 @@ xpc_hb_checker(void *ignore) /* this thread was marked active by xpc_hb_init() */ - set_cpus_allowed_ptr(current, &cpumask_of_cpu(XPC_HB_CHECK_CPU)); + set_cpus_allowed_ptr(current, cpumask_of(XPC_HB_CHECK_CPU)); /* set our heartbeating to other partitions into motion */ xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 9da5a4b81133..c3ea5fa7d05a 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -38,7 +38,7 @@ static LIST_HEAD(dying_tasks); static LIST_HEAD(dead_tasks); -static cpumask_t marked_cpus = CPU_MASK_NONE; +static cpumask_var_t marked_cpus; static DEFINE_SPINLOCK(task_mortuary); static void process_task_mortuary(void); @@ -456,10 +456,10 @@ static void mark_done(int cpu) { int i; - cpu_set(cpu, marked_cpus); + cpumask_set_cpu(cpu, marked_cpus); for_each_online_cpu(i) { - if (!cpu_isset(i, marked_cpus)) + if (!cpumask_test_cpu(i, marked_cpus)) return; } @@ -468,7 +468,7 @@ static void mark_done(int cpu) */ process_task_mortuary(); - cpus_clear(marked_cpus); + cpumask_clear(marked_cpus); } @@ -565,6 +565,20 @@ void sync_buffer(int cpu) mutex_unlock(&buffer_mutex); } +int __init buffer_sync_init(void) +{ + if (!alloc_cpumask_var(&marked_cpus, GFP_KERNEL)) + return -ENOMEM; + + cpumask_clear(marked_cpus); + return 0; +} + +void __exit buffer_sync_cleanup(void) +{ + free_cpumask_var(marked_cpus); +} + /* The function can be used to add a buffer worth of data directly to * the kernel buffer. The buffer is assumed to be a circular buffer. * Take the entries from index start and end at index end, wrapping diff --git a/drivers/oprofile/buffer_sync.h b/drivers/oprofile/buffer_sync.h index 3110732c1835..0ebf5db62679 100644 --- a/drivers/oprofile/buffer_sync.h +++ b/drivers/oprofile/buffer_sync.h @@ -19,4 +19,8 @@ void sync_stop(void); /* sync the given CPU's buffer */ void sync_buffer(int cpu); +/* initialize/destroy the buffer system. */ +int buffer_sync_init(void); +void buffer_sync_cleanup(void); + #endif /* OPROFILE_BUFFER_SYNC_H */ diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index 3cffce90f82a..ced39f602292 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -183,6 +183,10 @@ static int __init oprofile_init(void) { int err; + err = buffer_sync_init(); + if (err) + return err; + err = oprofile_arch_init(&oprofile_ops); if (err < 0 || timer) { @@ -191,8 +195,10 @@ static int __init oprofile_init(void) } err = oprofilefs_register(); - if (err) + if (err) { oprofile_arch_exit(); + buffer_sync_cleanup(); + } return err; } @@ -202,6 +208,7 @@ static void __exit oprofile_exit(void) { oprofilefs_unregister(); oprofile_arch_exit(); + buffer_sync_cleanup(); } diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 9b91617b9582..e7e83b65c18f 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -100,7 +100,7 @@ static void do_suspend(void) /* XXX use normal device tree? */ xenbus_suspend(); - err = stop_machine(xen_suspend, &cancelled, &cpumask_of_cpu(0)); + err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); if (err) { printk(KERN_ERR "failed to start xen_suspend: %d\n", err); goto out; From 2f8975fbcf07103afab0bbaea5f5b1a9967ffb86 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: [PATCH 037/566] cpumask: convert drivers/net/sfc Impact: reduce stack usage, use new cpumask API. Remove a cpumask from the stack. Ben Hutchings indicated that printing a warning and returning 1 was acceptable for the corner case where allocation fails. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Cc: Ben Hutchings Cc: linux-net-drivers@solarflare.com --- drivers/net/sfc/efx.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c index f2e56ceee0ea..101c00a7bb73 100644 --- a/drivers/net/sfc/efx.c +++ b/drivers/net/sfc/efx.c @@ -854,20 +854,27 @@ static void efx_fini_io(struct efx_nic *efx) * interrupts across them. */ static int efx_wanted_rx_queues(void) { - cpumask_t core_mask; + cpumask_var_t core_mask; int count; int cpu; - cpus_clear(core_mask); + if (!alloc_cpumask_var(&core_mask, GFP_KERNEL)) { + printk(KERN_WARNING + "efx.c: allocation failure, irq balancing hobbled\n"); + return 1; + } + + cpumask_clear(core_mask); count = 0; for_each_online_cpu(cpu) { - if (!cpu_isset(cpu, core_mask)) { + if (!cpumask_test_cpu(cpu, core_mask)) { ++count; - cpumask_or(&core_mask, &core_mask, + cpumask_or(core_mask, core_mask, topology_core_cpumask(cpu)); } } + free_cpumask_var(core_mask); return count; } From 651f8118cf0a5724f23fe1de4a3d9d36b2e01c2e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: [PATCH 038/566] cpumask: convert other misc kernel functions Impact: use new cpumask API. Convert other misc kernel functions to use struct cpumask. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- lib/smp_processor_id.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 0f8fc22ed103..4689cb073da4 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -22,7 +22,7 @@ notrace unsigned int debug_smp_processor_id(void) * Kernel threads bound to a single CPU can safely use * smp_processor_id(): */ - if (cpus_equal(current->cpus_allowed, cpumask_of_cpu(this_cpu))) + if (cpumask_equal(¤t->cpus_allowed, cpumask_of(this_cpu))) goto out; /* From 802bf931f2688ad125b73db597ce63cc842fb27a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: [PATCH 039/566] cpumask: fix bug in use cpumask_var_t in irq_desc Impact: fix bug where new irq_desc uses old cpumask pointers which are freed. As Yinghai pointed out, init_copy_one_irq_desc() copies the old desc to the new desc overwriting the cpumask pointers. Since the old_desc and the cpumask pointers are freed, then memory corruption will occur if these old pointers are used. Move the allocation of these pointers to after the copy. Signed-off-by: Mike Travis Cc: Yinghai Lu --- include/linux/irq.h | 9 +++++++-- kernel/irq/handle.c | 8 +------- kernel/irq/numa_migrate.c | 13 ++++++++----- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index fa27210f1dfd..27a67536511e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -426,15 +426,18 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); /** * init_alloc_desc_masks - allocate cpumasks for irq_desc * @desc: pointer to irq_desc struct + * @cpu: cpu which will be handling the cpumasks * @boot: true if need bootmem * * Allocates affinity and pending_mask cpumask if required. * Returns true if successful (or not required). * Side effect: affinity has all bits set, pending_mask has all bits clear. */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { + int node; + if (boot) { alloc_bootmem_cpumask_var(&desc->affinity); cpumask_setall(desc->affinity); @@ -446,6 +449,8 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, return true; } + node = cpu_to_node(cpu); + if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) return false; cpumask_setall(desc->affinity); @@ -484,7 +489,7 @@ static inline void init_copy_desc_masks(struct irq_desc *old_desc, #else /* !CONFIG_SMP */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { return true; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index b8fa1354f01c..f01c0a30cb42 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -85,8 +85,6 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) { - int node = cpu_to_node(cpu); - memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); spin_lock_init(&desc->lock); @@ -100,7 +98,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) printk(KERN_ERR "can not alloc kstat_irqs\n"); BUG_ON(1); } - if (!init_alloc_desc_masks(desc, node, false)) { + if (!init_alloc_desc_masks(desc, cpu, false)) { printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); BUG_ON(1); } @@ -188,10 +186,6 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) printk(KERN_ERR "can not alloc irq_desc\n"); BUG_ON(1); } - if (!init_alloc_desc_masks(desc, node, false)) { - printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); - BUG_ON(1); - } init_one_irq_desc(irq, desc, cpu); irq_desc_ptrs[irq] = desc; diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index f001a4ea6414..666260e4c065 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -38,16 +38,22 @@ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) old_desc->kstat_irqs = NULL; } -static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, +static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, struct irq_desc *desc, int cpu) { memcpy(desc, old_desc, sizeof(struct irq_desc)); + if (!init_alloc_desc_masks(desc, cpu, false)) { + printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " + "for migration.\n", irq); + return false; + } spin_lock_init(&desc->lock); desc->cpu = cpu; lockdep_set_class(&desc->lock, &irq_desc_lock_class); init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); init_copy_desc_masks(old_desc, desc); arch_init_copy_chip_data(old_desc, desc, cpu); + return true; } static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) @@ -83,15 +89,12 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, desc = old_desc; goto out_unlock; } - if (!init_alloc_desc_masks(desc, node, false)) { - printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " - "for migration.\n", irq); + if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) { /* still use old one */ kfree(desc); desc = old_desc; goto out_unlock; } - init_copy_one_irq_desc(irq, old_desc, desc, cpu); irq_desc_ptrs[irq] = desc; From 4595f9620cda8a1e973588e743cf5f8436dd20c6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: [PATCH 040/566] x86: change flush_tlb_others to take a const struct cpumask Impact: reduce stack usage, use new cpumask API. This is made a little more tricky by uv_flush_tlb_others which actually alters its argument, for an IPI to be sent to the remaining cpus in the mask. I solve this by allocating a cpumask_var_t for this case and falling back to IPI should this fail. To eliminate temporaries in the caller, all flush_tlb_others implementations now do the this-cpu-elimination step themselves. Note also the curious "cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask)" which has been there since pre-git and yet f->flush_cpumask is always zero at this point. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/x86/include/asm/paravirt.h | 8 ++-- arch/x86/include/asm/tlbflush.h | 8 ++-- arch/x86/include/asm/uv/uv_bau.h | 3 +- arch/x86/kernel/tlb_32.c | 67 ++++++++++++++------------------ arch/x86/kernel/tlb_64.c | 61 ++++++++++++++++------------- arch/x86/kernel/tlb_uv.c | 16 ++++---- arch/x86/xen/enlighten.c | 31 ++++++--------- 7 files changed, 93 insertions(+), 101 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aedc..c26c6bf4da00 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -244,7 +244,8 @@ struct pv_mmu_ops { void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr); - void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, + void (*flush_tlb_others)(const struct cpumask *cpus, + struct mm_struct *mm, unsigned long va); /* Hooks for allocating and freeing a pagetable top-level */ @@ -984,10 +985,11 @@ static inline void __flush_tlb_single(unsigned long addr) PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); } -static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, +static inline void flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { - PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); + PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0e7bbb549116..f4e1b550ce61 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -113,7 +113,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, __flush_tlb(); } -static inline void native_flush_tlb_others(const cpumask_t *cpumask, +static inline void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { @@ -142,8 +142,8 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, flush_tlb_mm(vma->vm_mm); } -void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, - unsigned long va); +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va); #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 @@ -166,7 +166,7 @@ static inline void reset_lazy_tlbstate(void) #endif /* SMP */ #ifndef CONFIG_PARAVIRT -#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(&mask, mm, va) +#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va) #endif static inline void flush_tlb_kernel_range(unsigned long start, diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 50423c7b56b2..74e6393bfddb 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -325,7 +325,8 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) #define cpubit_isset(cpu, bau_local_cpumask) \ test_bit((cpu), (bau_local_cpumask).bits) -extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); +extern int uv_flush_tlb_others(struct cpumask *, + struct mm_struct *, unsigned long); extern void uv_bau_message_intr1(void); extern void uv_bau_timeout_intr1(void); diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index ce5054642247..ec53818f4e38 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -20,7 +20,7 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) * Optimizations Manfred Spraul */ -static cpumask_t flush_cpumask; +static cpumask_var_t flush_cpumask; static struct mm_struct *flush_mm; static unsigned long flush_va; static DEFINE_SPINLOCK(tlbstate_lock); @@ -92,7 +92,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs) cpu = get_cpu(); - if (!cpu_isset(cpu, flush_cpumask)) + if (!cpumask_test_cpu(cpu, flush_cpumask)) goto out; /* * This was a BUG() but until someone can quote me the @@ -114,35 +114,22 @@ void smp_invalidate_interrupt(struct pt_regs *regs) } ack_APIC_irq(); smp_mb__before_clear_bit(); - cpu_clear(cpu, flush_cpumask); + cpumask_clear_cpu(cpu, flush_cpumask); smp_mb__after_clear_bit(); out: put_cpu_no_resched(); inc_irq_stat(irq_tlb_count); } -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, - unsigned long va) +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { - cpumask_t cpumask = *cpumaskp; - /* - * A couple of (to be removed) sanity checks: - * - * - current CPU must not be in mask * - mask must exist :) */ - BUG_ON(cpus_empty(cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); + BUG_ON(cpumask_empty(cpumask)); BUG_ON(!mm); -#ifdef CONFIG_HOTPLUG_CPU - /* If a CPU which we ran on has gone down, OK. */ - cpus_and(cpumask, cpumask, cpu_online_map); - if (unlikely(cpus_empty(cpumask))) - return; -#endif - /* * i'm not happy about this global shared spinlock in the * MM hot path, but we'll see how contended it is. @@ -150,9 +137,17 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, */ spin_lock(&tlbstate_lock); + cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id())); +#ifdef CONFIG_HOTPLUG_CPU + /* If a CPU which we ran on has gone down, OK. */ + cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask); + if (unlikely(cpumask_empty(flush_cpumask))) { + spin_unlock(&tlbstate_lock); + return; + } +#endif flush_mm = mm; flush_va = va; - cpus_or(flush_cpumask, cpumask, flush_cpumask); /* * Make the above memory operations globally visible before @@ -163,9 +158,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); + send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR); - while (!cpus_empty(flush_cpumask)) + while (!cpumask_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ cpu_relax(); @@ -177,25 +172,19 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, void flush_tlb_current_task(void) { struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } void flush_tlb_mm(struct mm_struct *mm) { - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -203,8 +192,8 @@ void flush_tlb_mm(struct mm_struct *mm) else leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } @@ -212,11 +201,8 @@ void flush_tlb_mm(struct mm_struct *mm) void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { struct mm_struct *mm = vma->vm_mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -225,9 +211,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, va); - + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, va); preempt_enable(); } EXPORT_SYMBOL(flush_tlb_page); @@ -254,3 +239,9 @@ void reset_lazy_tlbstate(void) per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; } +static int init_flush_cpumask(void) +{ + alloc_cpumask_var(&flush_cpumask, GFP_KERNEL); + return 0; +} +early_initcall(init_flush_cpumask); diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index f8be6f1d2e48..38836aef51b4 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -43,10 +43,10 @@ union smp_flush_state { struct { - cpumask_t flush_cpumask; struct mm_struct *flush_mm; unsigned long flush_va; spinlock_t tlbstate_lock; + DECLARE_BITMAP(flush_cpumask, NR_CPUS); }; char pad[SMP_CACHE_BYTES]; } ____cacheline_aligned; @@ -131,7 +131,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; f = &per_cpu(flush_state, sender); - if (!cpu_isset(cpu, f->flush_cpumask)) + if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) goto out; /* * This was a BUG() but until someone can quote me the @@ -153,19 +153,15 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) } out: ack_APIC_irq(); - cpu_clear(cpu, f->flush_cpumask); + cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); inc_irq_stat(irq_tlb_count); } -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, - unsigned long va) +static void flush_tlb_others_ipi(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { int sender; union smp_flush_state *f; - cpumask_t cpumask = *cpumaskp; - - if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va)) - return; /* Caller has disabled preemption */ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; @@ -180,7 +176,8 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, f->flush_mm = mm; f->flush_va = va; - cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); + cpumask_andnot(to_cpumask(f->flush_cpumask), + cpumask, cpumask_of(smp_processor_id())); /* * Make the above memory operations globally visible before @@ -191,9 +188,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); - while (!cpus_empty(f->flush_cpumask)) + while (!cpumask_empty(to_cpumask(f->flush_cpumask))) cpu_relax(); f->flush_mm = NULL; @@ -201,6 +198,24 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, spin_unlock(&f->tlbstate_lock); } +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) +{ + if (is_uv_system()) { + cpumask_var_t after_uv_flush; + + if (alloc_cpumask_var(&after_uv_flush, GFP_ATOMIC)) { + cpumask_andnot(after_uv_flush, + cpumask, cpumask_of(smp_processor_id())); + if (!uv_flush_tlb_others(after_uv_flush, mm, va)) + flush_tlb_others_ipi(after_uv_flush, mm, va); + free_cpumask_var(after_uv_flush); + return; + } + } + flush_tlb_others_ipi(cpumask, mm, va); +} + static int __cpuinit init_smp_flush(void) { int i; @@ -215,25 +230,18 @@ core_initcall(init_smp_flush); void flush_tlb_current_task(void) { struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } void flush_tlb_mm(struct mm_struct *mm) { - cpumask_t cpu_mask; - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -241,8 +249,8 @@ void flush_tlb_mm(struct mm_struct *mm) else leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } @@ -250,11 +258,8 @@ void flush_tlb_mm(struct mm_struct *mm) void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { struct mm_struct *mm = vma->vm_mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -263,8 +268,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, va); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, va); preempt_enable(); } diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index f885023167e0..690dcf1a27d4 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -212,11 +212,11 @@ static int uv_wait_completion(struct bau_desc *bau_desc, * The cpumaskp mask contains the cpus the broadcast was sent to. * * Returns 1 if all remote flushing was done. The mask is zeroed. - * Returns 0 if some remote flushing remains to be done. The mask is left - * unchanged. + * Returns 0 if some remote flushing remains to be done. The mask will have + * some bits still set. */ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, - cpumask_t *cpumaskp) + struct cpumask *cpumaskp) { int completion_status = 0; int right_shift; @@ -263,13 +263,13 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, * Success, so clear the remote cpu's from the mask so we don't * use the IPI method of shootdown on them. */ - for_each_cpu_mask(bit, *cpumaskp) { + for_each_cpu(bit, cpumaskp) { blade = uv_cpu_to_blade_id(bit); if (blade == this_blade) continue; - cpu_clear(bit, *cpumaskp); + cpumask_clear_cpu(bit, cpumaskp); } - if (!cpus_empty(*cpumaskp)) + if (!cpumask_empty(cpumaskp)) return 0; return 1; } @@ -296,7 +296,7 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, * Returns 1 if all remote flushing was done. * Returns 0 if some remote flushing remains to be done. */ -int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, +int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, unsigned long va) { int i; @@ -315,7 +315,7 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); i = 0; - for_each_cpu_mask(bit, *cpumaskp) { + for_each_cpu(bit, cpumaskp) { blade = uv_cpu_to_blade_id(bit); BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); if (blade == this_blade) { diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bea215230b20..965539ec425f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -634,35 +634,27 @@ static void xen_flush_tlb_single(unsigned long addr) preempt_enable(); } -static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, - unsigned long va) +static void xen_flush_tlb_others(const struct cpumask *cpus, + struct mm_struct *mm, unsigned long va) { struct { struct mmuext_op op; - cpumask_t mask; + DECLARE_BITMAP(mask, NR_CPUS); } *args; - cpumask_t cpumask = *cpus; struct multicall_space mcs; - /* - * A couple of (to be removed) sanity checks: - * - * - current CPU must not be in mask - * - mask must exist :) - */ - BUG_ON(cpus_empty(cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); + BUG_ON(cpumask_empty(cpus)); BUG_ON(!mm); - /* If a CPU which we ran on has gone down, OK. */ - cpus_and(cpumask, cpumask, cpu_online_map); - if (cpus_empty(cpumask)) - return; - mcs = xen_mc_entry(sizeof(*args)); args = mcs.args; - args->mask = cpumask; - args->op.arg2.vcpumask = &args->mask; + args->op.arg2.vcpumask = to_cpumask(args->mask); + + /* Remove us, and any offline CPUS. */ + cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); + if (unlikely(cpumask_empty(to_cpumask(args->mask)))) + goto issue; if (va == TLB_FLUSH_ALL) { args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; @@ -673,6 +665,7 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); +issue: xen_mc_issue(PARAVIRT_LAZY_MMU); } From 0e21990ae7ee11af94f44f240b06e520cf1505d4 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:10 -0800 Subject: [PATCH 041/566] SGI UV cpumask: use static temp cpumask in flush_tlb Impact: Improve tlb flush performance for UV Calling alloc_cpumask_var a zillion times a second does affect performance. Replace with static cpumask. Note: when CONFIG_X86_UV is defined, this extra PER_CPU memory will be optimized out for non-UV configs as is_uv_system() will then return a constant 0. Signed-off-by: Mike Travis --- arch/x86/kernel/tlb_64.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 38836aef51b4..7a3f9891302d 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -202,16 +202,17 @@ void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { if (is_uv_system()) { - cpumask_var_t after_uv_flush; + /* FIXME: could be an percpu_alloc'd thing */ + static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); + struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask); - if (alloc_cpumask_var(&after_uv_flush, GFP_ATOMIC)) { - cpumask_andnot(after_uv_flush, - cpumask, cpumask_of(smp_processor_id())); - if (!uv_flush_tlb_others(after_uv_flush, mm, va)) - flush_tlb_others_ipi(after_uv_flush, mm, va); - free_cpumask_var(after_uv_flush); - return; - } + cpumask_andnot(after_uv_flush, cpumask, + cpumask_of(smp_processor_id())); + if (!uv_flush_tlb_others(after_uv_flush, mm, va)) + flush_tlb_others_ipi(after_uv_flush, mm, va); + + put_cpu_var(flush_tlb_uv_cpumask); + return; } flush_tlb_others_ipi(cpumask, mm, va); } From a1c33bbeb7061f3ed39103c385844474eaa8f921 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:10 -0800 Subject: [PATCH 042/566] x86: cleanup remaining cpumask_t code in mce_amd_64.c Impact: Reduce memory usage, use new cpumask API. Use cpumask_var_t for 'cpus' cpumask in struct threshold_bank and update remaining old cpumask_t functions to new cpumask API. Signed-off-by: Mike Travis --- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 8ae8c4ff094d..4772e91e8246 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = { struct threshold_bank { struct kobject *kobj; struct threshold_block *blocks; - cpumask_t cpus; + cpumask_var_t cpus; }; static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); @@ -481,7 +481,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifdef CONFIG_SMP if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ - i = first_cpu(per_cpu(cpu_core_map, cpu)); + i = cpumask_first(&per_cpu(cpu_core_map, cpu)); /* first core not up yet */ if (cpu_data(i).cpu_core_id) @@ -501,7 +501,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out; - b->cpus = per_cpu(cpu_core_map, cpu); + cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); per_cpu(threshold_banks, cpu)[bank] = b; goto out; } @@ -512,15 +512,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) err = -ENOMEM; goto out; } + if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) { + kfree(b); + err = -ENOMEM; + goto out; + } b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); if (!b->kobj) goto out_free; #ifndef CONFIG_SMP - b->cpus = CPU_MASK_ALL; + cpumask_setall(b->cpus); #else - b->cpus = per_cpu(cpu_core_map, cpu); + cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); #endif per_cpu(threshold_banks, cpu)[bank] = b; @@ -529,7 +534,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out_free; - for_each_cpu_mask_nr(i, b->cpus) { + for_each_cpu(i, b->cpus) { if (i == cpu) continue; @@ -545,6 +550,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) out_free: per_cpu(threshold_banks, cpu)[bank] = NULL; + free_cpumask_var(b->cpus); kfree(b); out: return err; @@ -619,7 +625,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) #endif /* remove all sibling symlinks before unregistering */ - for_each_cpu_mask_nr(i, b->cpus) { + for_each_cpu(i, b->cpus) { if (i == cpu) continue; @@ -632,6 +638,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) free_out: kobject_del(b->kobj); kobject_put(b->kobj); + free_cpumask_var(b->cpus); kfree(b); per_cpu(threshold_banks, cpu)[bank] = NULL; } From f9b90566cd46e19f670a1e60a717ff243f060a8a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:10 -0800 Subject: [PATCH 043/566] x86: reduce stack usage in init_intel_cacheinfo Impact: reduce stack usage. init_intel_cacheinfo() does not use the cpumask so define a subset of struct _cpuid4_info (_cpuid4_info_regs) that can be used instead. Signed-off-by: Mike Travis --- arch/x86/kernel/cpu/intel_cacheinfo.c | 63 +++++++++++++++++++-------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 48533d77be78..58527a9fc404 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -132,7 +132,16 @@ struct _cpuid4_info { union _cpuid4_leaf_ecx ecx; unsigned long size; unsigned long can_disable; - cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ + DECLARE_BITMAP(shared_cpu_map, NR_CPUS); +}; + +/* subset of above _cpuid4_info w/o shared_cpu_map */ +struct _cpuid4_info_regs { + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned long size; + unsigned long can_disable; }; #ifdef CONFIG_PCI @@ -263,7 +272,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, } static void __cpuinit -amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) +amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) { if (index < 3) return; @@ -271,7 +280,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) } static int -__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +__cpuinit cpuid4_cache_lookup_regs(int index, + struct _cpuid4_info_regs *this_leaf) { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; @@ -299,6 +309,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) return 0; } +static int +__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +{ + struct _cpuid4_info_regs *leaf_regs = + (struct _cpuid4_info_regs *)this_leaf; + + return cpuid4_cache_lookup_regs(index, leaf_regs); +} + static int __cpuinit find_num_cache_leaves(void) { unsigned int eax, ebx, ecx, edx; @@ -338,11 +357,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) * parameters cpuid leaf to find the cache details */ for (i = 0; i < num_cache_leaves; i++) { - struct _cpuid4_info this_leaf; - + struct _cpuid4_info_regs this_leaf; int retval; - retval = cpuid4_cache_lookup(i, &this_leaf); + retval = cpuid4_cache_lookup_regs(i, &this_leaf); if (retval >= 0) { switch(this_leaf.eax.split.level) { case 1: @@ -491,17 +509,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; if (num_threads_sharing == 1) - cpu_set(cpu, this_leaf->shared_cpu_map); + cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map)); else { index_msb = get_count_order(num_threads_sharing); for_each_online_cpu(i) { if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) { - cpu_set(i, this_leaf->shared_cpu_map); + cpumask_set_cpu(i, + to_cpumask(this_leaf->shared_cpu_map)); if (i != cpu && per_cpu(cpuid4_info, i)) { - sibling_leaf = CPUID4_INFO_IDX(i, index); - cpu_set(cpu, sibling_leaf->shared_cpu_map); + sibling_leaf = + CPUID4_INFO_IDX(i, index); + cpumask_set_cpu(cpu, to_cpumask( + sibling_leaf->shared_cpu_map)); } } } @@ -513,9 +534,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) int sibling; this_leaf = CPUID4_INFO_IDX(cpu, index); - for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { + for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) { sibling_leaf = CPUID4_INFO_IDX(sibling, index); - cpu_clear(cpu, sibling_leaf->shared_cpu_map); + cpumask_clear_cpu(cpu, + to_cpumask(sibling_leaf->shared_cpu_map)); } } #else @@ -620,8 +642,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, int n = 0; if (len > 1) { - cpumask_t *mask = &this_leaf->shared_cpu_map; + const struct cpumask *mask; + mask = to_cpumask(this_leaf->shared_cpu_map); n = type? cpulist_scnprintf(buf, len-2, mask) : cpumask_scnprintf(buf, len-2, mask); @@ -684,7 +707,8 @@ static struct pci_dev *get_k8_northbridge(int node) static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); + int node = cpu_to_node(cpumask_first(mask)); struct pci_dev *dev = NULL; ssize_t ret = 0; int i; @@ -718,7 +742,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) { - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); + int node = cpu_to_node(cpumask_first(mask)); struct pci_dev *dev = NULL; unsigned int ret, index, val; @@ -863,7 +888,7 @@ err_out: return -ENOMEM; } -static cpumask_t cache_dev_map = CPU_MASK_NONE; +static DECLARE_BITMAP(cache_dev_map, NR_CPUS); /* Add/Remove cache interface for CPU device */ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) @@ -903,7 +928,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) } kobject_uevent(&(this_object->kobj), KOBJ_ADD); } - cpu_set(cpu, cache_dev_map); + cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); return 0; @@ -916,9 +941,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) if (per_cpu(cpuid4_info, cpu) == NULL) return; - if (!cpu_isset(cpu, cache_dev_map)) + if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) return; - cpu_clear(cpu, cache_dev_map); + cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); for (i = 0; i < num_cache_leaves; i++) kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); From c90e785be2fd9dfaef1f030d0314e44052553736 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:10 -0800 Subject: [PATCH 044/566] cpumask: use cpumask_var_t in dcdbas.c Impact: reduce stack usage. Replace cpumask_t with cpumask_var_t in drivers/firmware/dcdbas.c. Signed-off-by: Mike Travis --- drivers/firmware/dcdbas.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index 777fba48d2d3..3009e0171e54 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -244,7 +244,7 @@ static ssize_t host_control_on_shutdown_store(struct device *dev, */ int dcdbas_smi_request(struct smi_cmd *smi_cmd) { - cpumask_t old_mask; + cpumask_var_t old_mask; int ret = 0; if (smi_cmd->magic != SMI_CMD_MAGIC) { @@ -254,8 +254,11 @@ int dcdbas_smi_request(struct smi_cmd *smi_cmd) } /* SMI requires CPU 0 */ - old_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(0)); + if (!alloc_cpumask_var(&old_mask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_copy(old_mask, ¤t->cpus_allowed); + set_cpus_allowed_ptr(current, cpumask_of(0)); if (smp_processor_id() != 0) { dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n", __func__); @@ -275,7 +278,8 @@ int dcdbas_smi_request(struct smi_cmd *smi_cmd) ); out: - set_cpus_allowed_ptr(current, &old_mask); + set_cpus_allowed_ptr(current, old_mask); + free_cpumask_var(old_mask); return ret; } From d38b223c86db3162dc85b5a1997ac8a210e1660b Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:11 -0800 Subject: [PATCH 045/566] cpumask: reduce stack usage in find_lowest_rq Impact: reduce stack usage, cleanup Use a cpumask_var_t in find_lowest_rq() and clean up other old cpumask_t calls. Signed-off-by: Mike Travis --- kernel/sched_rt.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 954e1a81b796..da932f4c8524 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -960,16 +960,17 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); -static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) +static inline int pick_optimal_cpu(int this_cpu, + const struct cpumask *mask) { int first; /* "this_cpu" is cheaper to preempt than a remote processor */ - if ((this_cpu != -1) && cpu_isset(this_cpu, *mask)) + if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask)) return this_cpu; - first = first_cpu(*mask); - if (first != NR_CPUS) + first = cpumask_first(mask); + if (first < nr_cpu_ids) return first; return -1; @@ -981,6 +982,7 @@ static int find_lowest_rq(struct task_struct *task) struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); + cpumask_var_t domain_mask; if (task->rt.nr_cpus_allowed == 1) return -1; /* No other targets possible */ @@ -1013,19 +1015,25 @@ static int find_lowest_rq(struct task_struct *task) if (this_cpu == cpu) this_cpu = -1; /* Skip this_cpu opt if the same */ - for_each_domain(cpu, sd) { - if (sd->flags & SD_WAKE_AFFINE) { - cpumask_t domain_mask; - int best_cpu; + if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) { + for_each_domain(cpu, sd) { + if (sd->flags & SD_WAKE_AFFINE) { + int best_cpu; - cpumask_and(&domain_mask, sched_domain_span(sd), - lowest_mask); + cpumask_and(domain_mask, + sched_domain_span(sd), + lowest_mask); - best_cpu = pick_optimal_cpu(this_cpu, - &domain_mask); - if (best_cpu != -1) - return best_cpu; + best_cpu = pick_optimal_cpu(this_cpu, + domain_mask); + + if (best_cpu != -1) { + free_cpumask_var(domain_mask); + return best_cpu; + } + } } + free_cpumask_var(domain_mask); } /* From c7a3589e7a1f8fdbd2536fe1bfa60b37f5121c69 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:11 -0800 Subject: [PATCH 046/566] Xen: reduce memory required for cpu_evtchn_mask Impact: reduce memory usage. Reduce this significant gain in the amount of memory used when NR_CPUS bumped from 128 to 4096 by allocating the array based on nr_cpu_ids: 65536 +2031616 2097152 +3100% cpu_evtchn_mask(.bss) Signed-off-by: Mike Travis Cc: Jeremy Fitzhardinge Cc: Chris Wright Cc: virtualization@lists.osdl.org Cc: xen-devel@lists.xensource.com --- drivers/xen/events.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index e0767ff35d6c..ed7527b3745a 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -75,7 +75,14 @@ enum { static int evtchn_to_irq[NR_EVENT_CHANNELS] = { [0 ... NR_EVENT_CHANNELS-1] = -1 }; -static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG]; +struct cpu_evtchn_s { + unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG]; +}; +static struct cpu_evtchn_s *cpu_evtchn_mask_p; +static inline unsigned long *cpu_evtchn_mask(int cpu) +{ + return cpu_evtchn_mask_p[cpu].bits; +} static u8 cpu_evtchn[NR_EVENT_CHANNELS]; /* Reference counts for bindings to IRQs. */ @@ -115,7 +122,7 @@ static inline unsigned long active_evtchns(unsigned int cpu, unsigned int idx) { return (sh->evtchn_pending[idx] & - cpu_evtchn_mask[cpu][idx] & + cpu_evtchn_mask(cpu)[idx] & ~sh->evtchn_mask[idx]); } @@ -128,8 +135,8 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); #endif - __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]); - __set_bit(chn, cpu_evtchn_mask[cpu]); + __clear_bit(chn, cpu_evtchn_mask(cpu_evtchn[chn])); + __set_bit(chn, cpu_evtchn_mask(cpu)); cpu_evtchn[chn] = cpu; } @@ -147,7 +154,7 @@ static void init_evtchn_cpu_bindings(void) #endif memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); - memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); + memset(cpu_evtchn_mask(0), ~0, sizeof(cpu_evtchn_mask(0))); } static inline unsigned int cpu_from_evtchn(unsigned int evtchn) @@ -822,6 +829,10 @@ static struct irq_chip xen_dynamic_chip __read_mostly = { void __init xen_init_IRQ(void) { int i; + size_t size = nr_cpu_ids * sizeof(struct cpu_evtchn_s); + + cpu_evtchn_mask_p = kmalloc(size, GFP_KERNEL); + BUG_ON(cpu_evtchn_mask == NULL); init_evtchn_cpu_bindings(); From 9594949b060efe86ecaa1a66839232a3b9800bc9 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:06 -0800 Subject: [PATCH 047/566] irq: change references from NR_IRQS to nr_irqs Impact: preparation, cleanup, add KERN_INFO printk Modify references from NR_IRQS to nr_irqs as the later will become variable-sized based on nr_cpu_ids when CONFIG_SPARSE_IRQS=y. Signed-off-by: Mike Travis --- arch/x86/kernel/io_apic.c | 2 +- kernel/irq/handle.c | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1337eab60ecc..ae80638012de 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3183,7 +3183,7 @@ unsigned int create_irq_nr(unsigned int irq_want) irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = irq_want; new < NR_IRQS; new++) { + for (new = irq_want; new < nr_irqs; new++) { if (platform_legacy_irq(new)) continue; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index f01c0a30cb42..790c5fa7ea39 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -132,6 +132,8 @@ int __init early_irq_init(void) int legacy_count; int i; + printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs); + desc = irq_desc_legacy; legacy_count = ARRAY_SIZE(irq_desc_legacy); @@ -143,7 +145,7 @@ int __init early_irq_init(void) irq_desc_ptrs[i] = desc + i; } - for (i = legacy_count; i < NR_IRQS; i++) + for (i = legacy_count; i < nr_irqs; i++) irq_desc_ptrs[i] = NULL; return arch_early_irq_init(); @@ -151,7 +153,7 @@ int __init early_irq_init(void) struct irq_desc *irq_to_desc(unsigned int irq) { - return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL; + return (irq < nr_irqs) ? irq_desc_ptrs[irq] : NULL; } struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) @@ -160,9 +162,9 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) unsigned long flags; int node; - if (irq >= NR_IRQS) { - printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n", - irq, NR_IRQS); + if (irq >= nr_irqs) { + printk(KERN_WARNING "irq >= nr_irqs in irq_to_desc_alloc: %d %d\n", + irq, nr_irqs); WARN_ON(1); return NULL; } @@ -214,6 +216,8 @@ int __init early_irq_init(void) int count; int i; + printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS); + desc = irq_desc; count = ARRAY_SIZE(irq_desc); From e2f4d06545ec1f29b0e838ee34cbf3500ea5b9a4 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:06 -0800 Subject: [PATCH 048/566] irq: use WARN() instead of WARN_ON(). Impact: cleanup WARN msg. Ingo requested: > While at it, could you please also convert this to a WARN() construct > instead? (in a separate commit) ... and it shall be done. ;-) Signed-off-by: Mike Travis --- kernel/irq/handle.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 790c5fa7ea39..fd1ef16252f4 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -163,9 +163,8 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) int node; if (irq >= nr_irqs) { - printk(KERN_WARNING "irq >= nr_irqs in irq_to_desc_alloc: %d %d\n", - irq, nr_irqs); - WARN_ON(1); + WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n", + irq, nr_irqs); return NULL; } From 0fa0ebbf15addc1be8f73325d809c8547a9de304 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:06 -0800 Subject: [PATCH 049/566] irq: allocate irq_desc_ptrs array based on nr_irqs Impact: allocate irq_desc_ptrs in preparation for making it variable-sized. This addresses this memory usage bump when NR_CPUS bumped from 128 to 4096: 34816 +229376 264192 +658% irq_desc_ptrs(.data.read_mostly) The patch is split into two parts, the first simply allocates the irq_desc_ptrs array. Then next will deal with making it variable. This is only when CONFIG_SPARSE_IRQS=y. Signed-off-by: Mike Travis --- kernel/irq/handle.c | 11 +++++++++-- kernel/irq/internals.h | 7 +++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index fd1ef16252f4..d0b8f7e72790 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "internals.h" @@ -110,7 +111,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) */ DEFINE_SPINLOCK(sparse_irq_lock); -struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly; +struct irq_desc **irq_desc_ptrs __read_mostly; static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = { [0 ... NR_IRQS_LEGACY-1] = { @@ -137,6 +138,9 @@ int __init early_irq_init(void) desc = irq_desc_legacy; legacy_count = ARRAY_SIZE(irq_desc_legacy); + /* allocate irq_desc_ptrs array based on nr_irqs */ + irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *)); + for (i = 0; i < legacy_count; i++) { desc[i].irq = i; desc[i].kstat_irqs = kstat_irqs_legacy[i]; @@ -153,7 +157,10 @@ int __init early_irq_init(void) struct irq_desc *irq_to_desc(unsigned int irq) { - return (irq < nr_irqs) ? irq_desc_ptrs[irq] : NULL; + if (irq_desc_ptrs && irq < nr_irqs) + return irq_desc_ptrs[irq]; + + return NULL; } struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index e6d0a43cc125..40416a81a0f5 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -16,7 +16,14 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, extern struct lock_class_key irq_desc_lock_class; extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); extern spinlock_t sparse_irq_lock; + +#ifdef CONFIG_SPARSE_IRQ +/* irq_desc_ptrs allocated at boot time */ +extern struct irq_desc **irq_desc_ptrs; +#else +/* irq_desc_ptrs is a fixed size array */ extern struct irq_desc *irq_desc_ptrs[NR_IRQS]; +#endif #ifdef CONFIG_PROC_FS extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); From 9332fccdedf8e09448f3b69b624211ae879f6c45 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:07 -0800 Subject: [PATCH 050/566] irq: initialize nr_irqs based on nr_cpu_ids Impact: Reduce memory usage. This is the second half of the changes to make the irq_desc_ptrs be variable sized based on nr_cpu_ids. This is done by adding a new "max_nr_irqs" macro to irq_vectors.h (and a dummy in irqnr.h) to return a max NR_IRQS value based on NR_CPUS or nr_cpu_ids. This necessitated moving the define of MAX_IO_APICS to a separate file (asm/apicnum.h) so it could be included without the baggage of the other asm/apicdef.h declarations. Signed-off-by: Mike Travis --- arch/x86/include/asm/apicdef.h | 8 ++------ arch/x86/include/asm/apicnum.h | 12 ++++++++++++ arch/x86/include/asm/irq_vectors.h | 16 +++++++++++----- include/linux/irqnr.h | 7 +++++++ kernel/irq/handle.c | 3 +++ 5 files changed, 35 insertions(+), 11 deletions(-) create mode 100644 arch/x86/include/asm/apicnum.h diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 63134e31e8b9..1a6454ef7f6c 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -132,12 +132,8 @@ #define APIC_BASE_MSR 0x800 #define X2APIC_ENABLE (1UL << 10) -#ifdef CONFIG_X86_32 -# define MAX_IO_APICS 64 -#else -# define MAX_IO_APICS 128 -# define MAX_LOCAL_APIC 32768 -#endif +/* get MAX_IO_APICS */ +#include /* * All x86-64 systems are xAPIC compatible. diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h new file mode 100644 index 000000000000..82f613c607ce --- /dev/null +++ b/arch/x86/include/asm/apicnum.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_APICNUM_H +#define _ASM_X86_APICNUM_H + +/* define MAX_IO_APICS */ +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif + +#endif /* _ASM_X86_APICNUM_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index f7ff65032b9d..602361ad0e74 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -105,6 +105,8 @@ #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) +#include /* need MAX_IO_APICS */ + #ifndef CONFIG_SPARSE_IRQ # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) @@ -112,11 +114,15 @@ # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif #else -# if (8 * NR_CPUS) > (32 * MAX_IO_APICS) -# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) -# else -# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) -# endif + +/* defined as a macro so nr_irqs = max_nr_irqs(nr_cpu_ids) can be used */ +# define max_nr_irqs(nr_cpus) \ + ((8 * nr_cpus) > (32 * MAX_IO_APICS) ? \ + (NR_VECTORS + (8 * NR_CPUS)) : \ + (NR_VECTORS + (32 * MAX_IO_APICS))) \ + +# define NR_IRQS max_nr_irqs(NR_CPUS) + #endif #elif defined(CONFIG_X86_VOYAGER) diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 86af92e9e84c..de66e4e10406 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -20,11 +20,18 @@ # define for_each_irq_desc_reverse(irq, desc) \ for (irq = nr_irqs - 1; irq >= 0; irq--) + #else /* CONFIG_GENERIC_HARDIRQS */ +#include /* need possible max_nr_irqs() */ + extern int nr_irqs; extern struct irq_desc *irq_to_desc(unsigned int irq); +# ifndef max_nr_irqs +# define max_nr_irqs(nr_cpus) NR_IRQS +# endif + # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ irq++, desc = irq_to_desc(irq)) \ diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index d0b8f7e72790..ebba7a116f14 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -133,6 +133,9 @@ int __init early_irq_init(void) int legacy_count; int i; + /* initialize nr_irqs based on nr_cpu_ids */ + nr_irqs = max_nr_irqs(nr_cpu_ids); + printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs); desc = irq_desc_legacy; From 542d865bbed4ce1f050f586e53cf1cfadda93766 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:07 -0800 Subject: [PATCH 051/566] kstat: modify kstat_irqs_legacy to be variable sized Impact: reduce memory usage. Allocate kstat_irqs_legacy based on nr_cpu_ids to deal with this memory usage bump when NR_CPUS bumped from 128 to 4096: 8192 +253952 262144 +3100% kstat_irqs_legacy(.bss) This is only when CONFIG_SPARSE_IRQS=y. Signed-off-by: Mike Travis --- kernel/irq/handle.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index ebba7a116f14..b39f32ac8f80 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -124,8 +124,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm } }; -/* FIXME: use bootmem alloc ...*/ -static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS]; +static unsigned int *kstat_irqs_legacy; int __init early_irq_init(void) { @@ -144,9 +143,14 @@ int __init early_irq_init(void) /* allocate irq_desc_ptrs array based on nr_irqs */ irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *)); + /* allocate based on nr_cpu_ids */ + /* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */ + kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids * + sizeof(int)); + for (i = 0; i < legacy_count; i++) { desc[i].irq = i; - desc[i].kstat_irqs = kstat_irqs_legacy[i]; + desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids; lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); init_alloc_desc_masks(&desc[i], 0, true); irq_desc_ptrs[i] = desc + i; From 92296c6d6e908c35fca287a21af27be814af9c75 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sun, 11 Jan 2009 09:22:58 -0800 Subject: [PATCH 052/566] cpumask, irq: non-x86 build failures Ingo Molnar wrote: > All non-x86 architectures fail to build: > > In file included from /home/mingo/tip/include/linux/random.h:11, > from /home/mingo/tip/include/linux/stackprotector.h:6, > from /home/mingo/tip/init/main.c:17: > /home/mingo/tip/include/linux/irqnr.h:26:63: error: asm/irq_vectors.h: No such file or directory Do not include asm/irq_vectors.h in generic code - it's not available on all architectures. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apicdef.h | 8 ++++++-- include/linux/irqnr.h | 6 ------ kernel/irq/handle.c | 5 +++++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 1a6454ef7f6c..63134e31e8b9 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -132,8 +132,12 @@ #define APIC_BASE_MSR 0x800 #define X2APIC_ENABLE (1UL << 10) -/* get MAX_IO_APICS */ -#include +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif /* * All x86-64 systems are xAPIC compatible. diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index de66e4e10406..887477bc2ab0 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -23,15 +23,9 @@ #else /* CONFIG_GENERIC_HARDIRQS */ -#include /* need possible max_nr_irqs() */ - extern int nr_irqs; extern struct irq_desc *irq_to_desc(unsigned int irq); -# ifndef max_nr_irqs -# define max_nr_irqs(nr_cpus) NR_IRQS -# endif - # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ irq++, desc = irq_to_desc(irq)) \ diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index b39f32ac8f80..04d3e46031e5 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -58,6 +58,11 @@ int nr_irqs = NR_IRQS; EXPORT_SYMBOL_GPL(nr_irqs); #ifdef CONFIG_SPARSE_IRQ + +#ifndef max_nr_irqs +#define max_nr_irqs(nr_cpus) NR_IRQS +#endif + static struct irq_desc irq_desc_init = { .irq = -1, .status = IRQ_DISABLED, From 28e08861b9afab4168b758fb7b95aa7a4da0f668 Mon Sep 17 00:00:00 2001 From: Christophe Saout Date: Sun, 11 Jan 2009 11:46:23 -0800 Subject: [PATCH 053/566] xen: fix too early kmalloc call Impact: fix bootup crash on xen guests SLAB is not yet up, with earlyprintk it is giving me an Oops in __kmalloc. Replace call to kmalloc() with alloc_bootmem(). Reported-by: Christophe Saout Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- drivers/xen/events.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index ed7527b3745a..3141e149d595 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -831,8 +832,8 @@ void __init xen_init_IRQ(void) int i; size_t size = nr_cpu_ids * sizeof(struct cpu_evtchn_s); - cpu_evtchn_mask_p = kmalloc(size, GFP_KERNEL); - BUG_ON(cpu_evtchn_mask == NULL); + cpu_evtchn_mask_p = alloc_bootmem(size); + BUG_ON(cpu_evtchn_mask_p == NULL); init_evtchn_cpu_bindings(); From dd3feda7748b4c2739de47daaaa387fb01926c15 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 14:44:29 +0530 Subject: [PATCH 054/566] x86: microcode_intel.c fix style problems Impact: cleanup Fix: WARNING: Use #include instead of ERROR: trailing whitespace ERROR: "(foo*)" should be "(foo *)" total: 3 errors, 1 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_intel.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index b7f4c929e615..5e9f4fc51385 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -87,9 +87,9 @@ #include #include #include +#include #include -#include #include #include @@ -196,7 +196,7 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; } -static inline int +static inline int update_match_revision(struct microcode_header_intel *mc_header, int rev) { return (mc_header->rev <= rev) ? 0 : 1; @@ -442,8 +442,8 @@ static int request_microcode_fw(int cpu, struct device *device) return ret; } - ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, - &get_ucode_fw); + ret = generic_load_microcode(cpu, (void *)firmware->data, + firmware->size, &get_ucode_fw); release_firmware(firmware); @@ -460,7 +460,7 @@ static int request_microcode_user(int cpu, const void __user *buf, size_t size) /* We should bind the task to the CPU */ BUG_ON(cpu != raw_smp_processor_id()); - return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); + return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); } static void microcode_fini_cpu(int cpu) From 448dd2fa3ec915ad4325868ef8bb9b9490d9f6a9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 14:45:14 +0530 Subject: [PATCH 055/566] x86: msr.c fix style problems Impact: cleanup Fix: WARNING: Use #include instead of total: 0 errors, 1 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/msr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 726266695b2c..3cf3413ec626 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -35,10 +35,10 @@ #include #include #include +#include #include #include -#include #include static struct class *msr_class; From e17029ad69c7b1518413c00f793d89bb77b8527b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 14:46:03 +0530 Subject: [PATCH 056/566] x86: module_32.c fix style problems Impact: cleanup Fix: ERROR: code indent should use tabs where possible ERROR: trailing whitespace ERROR: spaces required around that '=' (ctx:VxW) total: 3 errors, 0 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/module_32.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c index 3db0a5442eb1..0edd819050e7 100644 --- a/arch/x86/kernel/module_32.c +++ b/arch/x86/kernel/module_32.c @@ -42,7 +42,7 @@ void module_free(struct module *mod, void *module_region) { vfree(module_region); /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ + table entries. */ } /* We don't need anything special. */ @@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr, *para = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { if (!strcmp(".text", secstrings + s->sh_name)) text = s; if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks= s; + locks = s; if (!strcmp(".parainstructions", secstrings + s->sh_name)) para = s; } From 3b9dc9f2f123286aaade54c45fef6723d587c664 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 14:46:48 +0530 Subject: [PATCH 057/566] x86: module_64.c fix style problems Impact: cleanup Fix: ERROR: trailing whitespace ERROR: code indent should use tabs where possible WARNING: %Ld/%Lu are not-standard C, use %lld/%llu WARNING: printk() should include KERN_ facility level ERROR: spaces required around that '=' (ctx:VxW) total: 13 errors, 2 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/module_64.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c index 6ba87830d4b1..c23880b90b5c 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module_64.c @@ -30,14 +30,14 @@ #include #include -#define DEBUGP(fmt...) +#define DEBUGP(fmt...) #ifndef CONFIG_UML void module_free(struct module *mod, void *module_region) { vfree(module_region); /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ + table entries. */ } void *module_alloc(unsigned long size) @@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; Elf64_Sym *sym; void *loc; - u64 val; + u64 val; DEBUGP("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); @@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + ELF64_R_SYM(rel[i].r_info); - DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", - (int)ELF64_R_TYPE(rel[i].r_info), - sym->st_value, rel[i].r_addend, (u64)loc); + DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", + (int)ELF64_R_TYPE(rel[i].r_info), + sym->st_value, rel[i].r_addend, (u64)loc); - val = sym->st_value + rel[i].r_addend; + val = sym->st_value + rel[i].r_addend; switch (ELF64_R_TYPE(rel[i].r_info)) { case R_X86_64_NONE: @@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, if ((s64)val != *(s32 *)loc) goto overflow; break; - case R_X86_64_PC32: + case R_X86_64_PC32: val -= (u64)loc; *(u32 *)loc = val; #if 0 if ((s64)val != *(s32 *)loc) - goto overflow; + goto overflow; #endif break; default: - printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n", + printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", me->name, ELF64_R_TYPE(rel[i].r_info)); return -ENOEXEC; } @@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return 0; overflow: - printk(KERN_ERR "overflow in relocation type %d val %Lx\n", + printk(KERN_ERR "overflow in relocation type %d val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), val); printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", me->name); @@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs, unsigned int relsec, struct module *me) { - printk("non add relocation not supported\n"); + printk(KERN_ERR "non add relocation not supported\n"); return -ENOSYS; -} +} int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) + const Elf_Shdr *sechdrs, + struct module *me) { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL; @@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr, if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks= s; + locks = s; if (!strcmp(".parainstructions", secstrings + s->sh_name)) para = s; } From e65e49d0f3714f4a6a42f6f6a19926ba33fcda75 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 Jan 2009 15:27:13 -0800 Subject: [PATCH 058/566] irq: update all arches for new irq_desc Impact: cleanup, update to new cpumask API Irq_desc.affinity and irq_desc.pending_mask are now cpumask_var_t's so access to them should be using the new cpumask API. Signed-off-by: Mike Travis --- arch/alpha/kernel/irq.c | 2 +- arch/arm/kernel/irq.c | 18 ++++++++++++------ arch/arm/oprofile/op_model_mpcore.c | 2 +- arch/blackfin/kernel/irqchip.c | 5 +++++ arch/ia64/kernel/iosapic.c | 2 +- arch/ia64/kernel/irq.c | 4 ++-- arch/ia64/kernel/msi_ia64.c | 4 ++-- arch/ia64/sn/kernel/msi_sn.c | 2 +- arch/mips/include/asm/irq.h | 2 +- arch/mips/kernel/irq-gic.c | 2 +- arch/mips/kernel/smtc.c | 2 +- arch/mips/mti-malta/malta-smtc.c | 5 +++-- arch/parisc/kernel/irq.c | 8 ++++---- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/platforms/pseries/xics.c | 5 +++-- arch/powerpc/sysdev/mpic.c | 3 ++- arch/sparc/kernel/irq_64.c | 5 +++-- 17 files changed, 44 insertions(+), 29 deletions(-) diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 703731accda6..7bc7489223f3 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -55,7 +55,7 @@ int irq_select_affinity(unsigned int irq) cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0); last_cpu = cpu; - irq_desc[irq].affinity = cpumask_of_cpu(cpu); + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu)); return 0; } diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 7141cee1fab7..4bb723eadad1 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -104,6 +104,11 @@ static struct irq_desc bad_irq_desc = { .lock = SPIN_LOCK_UNLOCKED }; +#ifdef CONFIG_CPUMASK_OFFSTACK +/* We are not allocating bad_irq_desc.affinity or .pending_mask */ +#error "ARM architecture does not support CONFIG_CPUMASK_OFFSTACK." +#endif + /* * do_IRQ handles all hardware IRQ's. Decoded IRQs should not * come via this function. Instead, they should provide their @@ -161,7 +166,7 @@ void __init init_IRQ(void) irq_desc[irq].status |= IRQ_NOREQUEST | IRQ_NOPROBE; #ifdef CONFIG_SMP - bad_irq_desc.affinity = CPU_MASK_ALL; + cpumask_setall(bad_irq_desc.affinity); bad_irq_desc.cpu = smp_processor_id(); #endif init_arch_irq(); @@ -191,15 +196,16 @@ void migrate_irqs(void) struct irq_desc *desc = irq_desc + i; if (desc->cpu == cpu) { - unsigned int newcpu = any_online_cpu(desc->affinity); - - if (newcpu == NR_CPUS) { + unsigned int newcpu = cpumask_any_and(desc->affinity, + cpu_online_mask); + if (newcpu >= nr_cpu_ids) { if (printk_ratelimit()) printk(KERN_INFO "IRQ%u no longer affine to CPU%u\n", i, cpu); - cpus_setall(desc->affinity); - newcpu = any_online_cpu(desc->affinity); + cpumask_setall(desc->affinity); + newcpu = cpumask_any_and(desc->affinity, + cpu_online_mask); } route_irq(desc, i, newcpu); diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c index 6d6bd5899240..853d42bb8682 100644 --- a/arch/arm/oprofile/op_model_mpcore.c +++ b/arch/arm/oprofile/op_model_mpcore.c @@ -263,7 +263,7 @@ static void em_route_irq(int irq, unsigned int cpu) const struct cpumask *mask = cpumask_of(cpu); spin_lock_irq(&desc->lock); - desc->affinity = *mask; + cpumask_copy(desc->affinity, mask); desc->chip->set_affinity(irq, mask); spin_unlock_irq(&desc->lock); } diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c index ab8209cbbad0..5780d6df1542 100644 --- a/arch/blackfin/kernel/irqchip.c +++ b/arch/blackfin/kernel/irqchip.c @@ -69,6 +69,11 @@ static struct irq_desc bad_irq_desc = { #endif }; +#ifdef CONFIG_CPUMASK_OFFSTACK +/* We are not allocating a variable-sized bad_irq_desc.affinity */ +#error "Blackfin architecture does not support CONFIG_CPUMASK_OFFSTACK." +#endif + int show_interrupts(struct seq_file *p, void *v) { int i = *(loff_t *) v, j; diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 5cfd3d91001a..006ad366a454 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -880,7 +880,7 @@ iosapic_unregister_intr (unsigned int gsi) if (iosapic_intr_info[irq].count == 0) { #ifdef CONFIG_SMP /* Clear affinity */ - cpus_setall(idesc->affinity); + cpumask_setall(idesc->affinity); #endif /* Clear the interrupt information */ iosapic_intr_info[irq].dest = 0; diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index a58f64ca9f0e..226233a6fa19 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -103,7 +103,7 @@ static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; void set_irq_affinity_info (unsigned int irq, int hwid, int redir) { if (irq < NR_IRQS) { - cpumask_copy(&irq_desc[irq].affinity, + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu_logical_id(hwid))); irq_redir[irq] = (char) (redir & 0xff); } @@ -148,7 +148,7 @@ static void migrate_irqs(void) if (desc->status == IRQ_PER_CPU) continue; - if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask) + if (cpumask_any_and(irq_desc[irq].affinity, cpu_online_mask) >= nr_cpu_ids) { /* * Save it for phase 2 processing diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 890339339035..dcb6b7c51ea7 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -75,7 +75,7 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, msg.data = data; write_msi_msg(irq, &msg); - irq_desc[irq].affinity = cpumask_of_cpu(cpu); + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); } #endif /* CONFIG_SMP */ @@ -187,7 +187,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); dmar_msi_write(irq, &msg); - irq_desc[irq].affinity = *mask; + cpumask_copy(irq_desc[irq].affinity, mask); } #endif /* CONFIG_SMP */ diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index ca553b0429ce..81e428943d73 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -205,7 +205,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff); write_msi_msg(irq, &msg); - irq_desc[irq].affinity = *cpu_mask; + cpumask_copy(irq_desc[irq].affinity, cpu_mask); } #endif /* CONFIG_SMP */ diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index abc62aa744ac..3214ade02d10 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -66,7 +66,7 @@ extern void smtc_forward_irq(unsigned int irq); */ #define IRQ_AFFINITY_HOOK(irq) \ do { \ - if (!cpu_isset(smp_processor_id(), irq_desc[irq].affinity)) { \ + if (!cpumask_test_cpu(smp_processor_id(), irq_desc[irq].affinity)) {\ smtc_forward_irq(irq); \ irq_exit(); \ return; \ diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c index 494a49a317e9..87deb8f6c458 100644 --- a/arch/mips/kernel/irq-gic.c +++ b/arch/mips/kernel/irq-gic.c @@ -187,7 +187,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask); } - irq_desc[irq].affinity = *cpumask; + cpumask_copy(irq_desc[irq].affinity, cpumask); spin_unlock_irqrestore(&gic_lock, flags); } diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index b6cca01ff82b..d2c1ab12425a 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -686,7 +686,7 @@ void smtc_forward_irq(unsigned int irq) * and efficiency, we just pick the easiest one to find. */ - target = first_cpu(irq_desc[irq].affinity); + target = cpumask_first(irq_desc[irq].affinity); /* * We depend on the platform code to have correctly processed diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c index aabd7274507b..5ba31888fefb 100644 --- a/arch/mips/mti-malta/malta-smtc.c +++ b/arch/mips/mti-malta/malta-smtc.c @@ -116,7 +116,7 @@ struct plat_smp_ops msmtc_smp_ops = { void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) { - cpumask_t tmask = *affinity; + cpumask_t tmask; int cpu = 0; void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff); @@ -139,11 +139,12 @@ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) * be made to forward to an offline "CPU". */ + cpumask_copy(&tmask, affinity); for_each_cpu(cpu, affinity) { if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu)) cpu_clear(cpu, tmask); } - irq_desc[irq].affinity = tmask; + cpumask_copy(irq_desc[irq].affinity, &tmask); if (cpus_empty(tmask)) /* diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index ac2c822928c7..49482806863f 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -120,7 +120,7 @@ int cpu_check_affinity(unsigned int irq, cpumask_t *dest) if (CHECK_IRQ_PER_CPU(irq)) { /* Bad linux design decision. The mask has already * been set; we must reset it */ - irq_desc[irq].affinity = CPU_MASK_ALL; + cpumask_setall(irq_desc[irq].affinity); return -EINVAL; } @@ -136,7 +136,7 @@ static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest) if (cpu_check_affinity(irq, dest)) return; - irq_desc[irq].affinity = *dest; + cpumask_copy(irq_desc[irq].affinity, dest); } #endif @@ -295,7 +295,7 @@ int txn_alloc_irq(unsigned int bits_wide) unsigned long txn_affinity_addr(unsigned int irq, int cpu) { #ifdef CONFIG_SMP - irq_desc[irq].affinity = cpumask_of_cpu(cpu); + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); #endif return per_cpu(cpu_data, cpu).txn_addr; @@ -352,7 +352,7 @@ void do_cpu_irq_mask(struct pt_regs *regs) irq = eirr_to_irq(eirr_val); #ifdef CONFIG_SMP - dest = irq_desc[irq].affinity; + cpumask_copy(&dest, irq_desc[irq].affinity); if (CHECK_IRQ_PER_CPU(irq_desc[irq].status) && !cpu_isset(smp_processor_id(), dest)) { int cpu = first_cpu(dest); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 23b8b5e36f98..ad1e5ac721d8 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -231,7 +231,7 @@ void fixup_irqs(cpumask_t map) if (irq_desc[irq].status & IRQ_PER_CPU) continue; - cpus_and(mask, irq_desc[irq].affinity, map); + cpumask_and(&mask, irq_desc[irq].affinity, &map); if (any_online_cpu(mask) == NR_CPUS) { printk("Breaking affinity for irq %i\n", irq); mask = map; diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 84e058f1e1cc..80b513449f4c 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -153,9 +153,10 @@ static int get_irq_server(unsigned int virq, unsigned int strict_check) { int server; /* For the moment only implement delivery to all cpus or one cpu */ - cpumask_t cpumask = irq_desc[virq].affinity; + cpumask_t cpumask; cpumask_t tmp = CPU_MASK_NONE; + cpumask_copy(&cpumask, irq_desc[virq].affinity); if (!distribute_irqs) return default_server; @@ -869,7 +870,7 @@ void xics_migrate_irqs_away(void) virq, cpu); /* Reset affinity to all cpus */ - irq_desc[virq].affinity = CPU_MASK_ALL; + cpumask_setall(irq_desc[virq].affinity); desc->chip->set_affinity(virq, cpu_all_mask); unlock: spin_unlock_irqrestore(&desc->lock, flags); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 3e0d89dcdba2..0afd21f9a222 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -566,9 +566,10 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic) #ifdef CONFIG_SMP static int irq_choose_cpu(unsigned int virt_irq) { - cpumask_t mask = irq_desc[virt_irq].affinity; + cpumask_t mask; int cpuid; + cpumask_copy(&mask, irq_desc[virt_irq].affinity); if (cpus_equal(mask, CPU_MASK_ALL)) { static int irq_rover; static DEFINE_SPINLOCK(irq_rover_lock); diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index cab8e0286871..4ac5c651e00d 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -247,9 +247,10 @@ struct irq_handler_data { #ifdef CONFIG_SMP static int irq_choose_cpu(unsigned int virt_irq) { - cpumask_t mask = irq_desc[virt_irq].affinity; + cpumask_t mask; int cpuid; + cpumask_copy(&mask, irq_desc[virt_irq].affinity); if (cpus_equal(mask, CPU_MASK_ALL)) { static int irq_rover; static DEFINE_SPINLOCK(irq_rover_lock); @@ -854,7 +855,7 @@ void fixup_irqs(void) !(irq_desc[irq].status & IRQ_PER_CPU)) { if (irq_desc[irq].chip->set_affinity) irq_desc[irq].chip->set_affinity(irq, - &irq_desc[irq].affinity); + irq_desc[irq].affinity); } spin_unlock_irqrestore(&irq_desc[irq].lock, flags); } From 4a046d1754ee6ebb6f399696805ed61ea0444d4c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 Jan 2009 17:39:24 -0800 Subject: [PATCH 059/566] x86: arch_probe_nr_irqs Impact: save RAM with large NR_CPUS, get smaller nr_irqs Signed-off-by: Yinghai Lu Signed-off-by: Mike Travis --- arch/x86/include/asm/irq_vectors.h | 7 ++----- arch/x86/kernel/io_apic.c | 16 ++++++++++++++++ include/linux/interrupt.h | 1 + kernel/irq/handle.c | 9 ++------- kernel/softirq.c | 5 +++++ 5 files changed, 26 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 602361ad0e74..a16a2ab2b429 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -115,14 +115,11 @@ # endif #else -/* defined as a macro so nr_irqs = max_nr_irqs(nr_cpu_ids) can be used */ -# define max_nr_irqs(nr_cpus) \ - ((8 * nr_cpus) > (32 * MAX_IO_APICS) ? \ +# define NR_IRQS \ + ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \ (NR_VECTORS + (8 * NR_CPUS)) : \ (NR_VECTORS + (32 * MAX_IO_APICS))) \ -# define NR_IRQS max_nr_irqs(NR_CPUS) - #endif #elif defined(CONFIG_X86_VOYAGER) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index ae80638012de..157986916cd1 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3850,6 +3850,22 @@ void __init probe_nr_irqs_gsi(void) nr_irqs_gsi = nr; } +#ifdef CONFIG_SPARSE_IRQ +int __init arch_probe_nr_irqs(void) +{ + int nr; + + nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ? + (NR_VECTORS + (8 * nr_cpu_ids)) : + (NR_VECTORS + (32 * nr_ioapics))); + + if (nr < nr_irqs && nr > nr_irqs_gsi) + nr_irqs = nr; + + return 0; +} +#endif + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9127f6b51a39..472f11765f60 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -467,6 +467,7 @@ int show_interrupts(struct seq_file *p, void *v); struct irq_desc; extern int early_irq_init(void); +extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); extern int arch_init_chip_data(struct irq_desc *desc, int cpu); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 04d3e46031e5..375d68cd5bf0 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -59,10 +59,6 @@ EXPORT_SYMBOL_GPL(nr_irqs); #ifdef CONFIG_SPARSE_IRQ -#ifndef max_nr_irqs -#define max_nr_irqs(nr_cpus) NR_IRQS -#endif - static struct irq_desc irq_desc_init = { .irq = -1, .status = IRQ_DISABLED, @@ -137,9 +133,8 @@ int __init early_irq_init(void) int legacy_count; int i; - /* initialize nr_irqs based on nr_cpu_ids */ - nr_irqs = max_nr_irqs(nr_cpu_ids); - + /* initialize nr_irqs based on nr_cpu_ids */ + arch_probe_nr_irqs(); printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs); desc = irq_desc_legacy; diff --git a/kernel/softirq.c b/kernel/softirq.c index bdbe9de9cd8d..0365b4899a3d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -795,6 +795,11 @@ int __init __weak early_irq_init(void) return 0; } +int __init __weak arch_probe_nr_irqs(void) +{ + return 0; +} + int __init __weak arch_early_irq_init(void) { return 0; From a4a0acf8e17e3d08e28b721ceceb898fbc959ceb Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 13 Jan 2009 18:43:03 -0800 Subject: [PATCH 060/566] x86: fix broken flush_tlb_others_ipi() This commit broke flush_tlb_others_ipi() causing boot hangs on a 16 logical cpu system: > commit 4595f9620cda8a1e973588e743cf5f8436dd20c6 > Author: Rusty Russell > Date: Sat Jan 10 21:58:09 2009 -0800 > > x86: change flush_tlb_others to take a const struct cpumask This change resulted in sending the invalidate tlb vector to the sender itself causing the hang. flush_tlb_others_ipi() should exclude the sender itself from the destination list. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 7a3f9891302d..54ee2ecb5e26 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -188,7 +188,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(f->flush_cpumask, INVALIDATE_TLB_VECTOR_START + sender); while (!cpumask_empty(to_cpumask(f->flush_cpumask))) cpu_relax(); From b5ba7e6d1e7e2ac808afd21be1e56dc34caf20e6 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 17:46:17 +0530 Subject: [PATCH 061/566] x86: replacing mp_config_ioapic with mpc_ioapic Impact: cleanup, solve 80 columns wrap problems Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 10 +----- arch/x86/kernel/acpi/boot.c | 28 ++++++++-------- arch/x86/kernel/io_apic.c | 60 ++++++++++++++++------------------ arch/x86/kernel/mpparse.c | 12 +++---- 4 files changed, 50 insertions(+), 60 deletions(-) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 7a1f44ac1f17..5a56ae9b505a 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -120,14 +120,6 @@ extern int nr_ioapic_registers[MAX_IO_APICS]; #define MP_MAX_IOAPIC_PIN 127 -struct mp_config_ioapic { - unsigned long mp_apicaddr; - unsigned int mp_apicid; - unsigned char mp_type; - unsigned char mp_apicver; - unsigned char mp_flags; -}; - struct mp_config_intsrc { unsigned int mp_dstapic; unsigned char mp_type; @@ -139,7 +131,7 @@ struct mp_config_intsrc { }; /* I/O APIC entries */ -extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; /* # of MP IRQ source entries */ extern int mp_irq_entries; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d37593c2f438..2b27019e64f8 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -912,8 +912,8 @@ static u8 __init uniq_ioapic_id(u8 id) DECLARE_BITMAP(used, 256); bitmap_zero(used, 256); for (i = 0; i < nr_ioapics; i++) { - struct mp_config_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->mp_apicid, used); + struct mpc_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->apicid, used); } if (!test_bit(id, used)) return id; @@ -945,29 +945,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) idx = nr_ioapics; - mp_ioapics[idx].mp_type = MP_IOAPIC; - mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mp_apicaddr = address; + mp_ioapics[idx].type = MP_IOAPIC; + mp_ioapics[idx].flags = MPC_APIC_USABLE; + mp_ioapics[idx].apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); + mp_ioapics[idx].apicid = uniq_ioapic_id(id); #ifdef CONFIG_X86_32 - mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); + mp_ioapics[idx].apicver = io_apic_get_version(idx); #else - mp_ioapics[idx].mp_apicver = 0; + mp_ioapics[idx].apicver = 0; #endif /* * Build basic GSI lookup table to facilitate gsi->io_apic lookups * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid; mp_ioapic_routing[idx].gsi_base = gsi_base; mp_ioapic_routing[idx].gsi_end = gsi_base + io_apic_get_redir_entries(idx); - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, - mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " + "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, + mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); nr_ioapics++; @@ -1026,7 +1026,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) mp_irq.mp_irqflag = (trigger << 2) | polarity; mp_irq.mp_srcbus = MP_ISA_BUS; mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ - mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ + mp_irq.mp_dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ mp_irq.mp_dstirq = pin; /* INTIN# */ save_mp_irq(&mp_irq); @@ -1062,7 +1062,7 @@ void __init mp_config_acpi_legacy_irqs(void) ioapic = mp_find_ioapic(0); if (ioapic < 0) return; - dstapic = mp_ioapics[ioapic].mp_apicid; + dstapic = mp_ioapics[ioapic].apicid; /* * Use the default configuration for the IRQs 0-15. Unless diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 109c91db2026..6c51ecdfbf49 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -83,7 +83,7 @@ static DEFINE_SPINLOCK(vector_lock); int nr_ioapic_registers[MAX_IO_APICS]; /* I/O APIC entries */ -struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ @@ -387,7 +387,7 @@ struct io_apic { static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); + + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); } static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) @@ -946,7 +946,7 @@ static int find_irq_entry(int apic, int pin, int type) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].mp_irqtype == type && - (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].apicid || mp_irqs[i].mp_dstapic == MP_APIC_ALL) && mp_irqs[i].mp_dstirq == pin) return i; @@ -988,7 +988,7 @@ static int __init find_isa_irq_apic(int irq, int type) if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) + if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic) return apic; } } @@ -1016,7 +1016,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) int lbus = mp_irqs[i].mp_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || + if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic || mp_irqs[i].mp_dstapic == MP_APIC_ALL) break; @@ -1567,14 +1567,14 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, + apic, mp_ioapics[apic].apicid, pin, cfg->vector, irq, trigger, polarity); - if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry, dest, trigger, polarity, cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mp_ioapics[apic].mp_apicid, pin); + mp_ioapics[apic].apicid, pin); __clear_irq_vector(irq, cfg); return; } @@ -1605,12 +1605,10 @@ static void __init setup_IO_APIC_irqs(void) notcon = 1; apic_printk(APIC_VERBOSE, KERN_DEBUG " %d-%d", - mp_ioapics[apic].mp_apicid, - pin); + mp_ioapics[apic].apicid, pin); } else apic_printk(APIC_VERBOSE, " %d-%d", - mp_ioapics[apic].mp_apicid, - pin); + mp_ioapics[apic].apicid, pin); continue; } if (notcon) { @@ -1700,7 +1698,7 @@ __apicdebuginit(void) print_IO_APIC(void) printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -1720,7 +1718,7 @@ __apicdebuginit(void) print_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); @@ -2122,14 +2120,14 @@ static void __init setup_ioapic_ids_from_mpc(void) reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - old_id = mp_ioapics[apic].mp_apicid; + old_id = mp_ioapics[apic].apicid; - if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { + if (mp_ioapics[apic].apicid >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mp_apicid); + apic, mp_ioapics[apic].apicid); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - mp_ioapics[apic].mp_apicid = reg_00.bits.ID; + mp_ioapics[apic].apicid = reg_00.bits.ID; } /* @@ -2138,9 +2136,9 @@ static void __init setup_ioapic_ids_from_mpc(void) * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mp_apicid)) { + mp_ioapics[apic].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mp_apicid); + apic, mp_ioapics[apic].apicid); for (i = 0; i < get_physical_broadcast(); i++) if (!physid_isset(i, phys_id_present_map)) break; @@ -2149,13 +2147,13 @@ static void __init setup_ioapic_ids_from_mpc(void) printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); - mp_ioapics[apic].mp_apicid = i; + mp_ioapics[apic].apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); + tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", - mp_ioapics[apic].mp_apicid); + mp_ioapics[apic].apicid); physids_or(phys_id_present_map, phys_id_present_map, tmp); } @@ -2164,11 +2162,11 @@ static void __init setup_ioapic_ids_from_mpc(void) * We need to adjust the IRQ routing table * if the ID changed. */ - if (old_id != mp_ioapics[apic].mp_apicid) + if (old_id != mp_ioapics[apic].apicid) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].mp_dstapic == old_id) mp_irqs[i].mp_dstapic - = mp_ioapics[apic].mp_apicid; + = mp_ioapics[apic].apicid; /* * Read the right value from the MPC table and @@ -2176,9 +2174,9 @@ static void __init setup_ioapic_ids_from_mpc(void) */ apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mp_apicid); + mp_ioapics[apic].apicid); - reg_00.bits.ID = mp_ioapics[apic].mp_apicid; + reg_00.bits.ID = mp_ioapics[apic].apicid; spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2189,7 +2187,7 @@ static void __init setup_ioapic_ids_from_mpc(void) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) + if (reg_00.bits.ID != mp_ioapics[apic].apicid) printk("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); @@ -3118,8 +3116,8 @@ static int ioapic_resume(struct sys_device *dev) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; + if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].apicid; io_apic_write(dev->id, 0, reg_00.raw); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -4101,7 +4099,7 @@ void __init ioapic_init_mappings(void) ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mp_apicaddr; + ioapic_phys = mp_ioapics[i].apicaddr; #ifdef CONFIG_X86_32 if (!ioapic_phys) { printk(KERN_ERR diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 8385d4e7e15d..a86a65537433 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -143,11 +143,11 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m) if (bad_ioapic(m->apicaddr)) return; - mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr; - mp_ioapics[nr_ioapics].mp_apicid = m->apicid; - mp_ioapics[nr_ioapics].mp_type = m->type; - mp_ioapics[nr_ioapics].mp_apicver = m->apicver; - mp_ioapics[nr_ioapics].mp_flags = m->flags; + mp_ioapics[nr_ioapics].apicaddr = m->apicaddr; + mp_ioapics[nr_ioapics].apicid = m->apicid; + mp_ioapics[nr_ioapics].type = m->type; + mp_ioapics[nr_ioapics].apicver = m->apicver; + mp_ioapics[nr_ioapics].flags = m->flags; nr_ioapics++; } @@ -416,7 +416,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) intsrc.type = MP_INTSRC; intsrc.irqflag = 0; /* conforming */ intsrc.srcbus = 0; - intsrc.dstapic = mp_ioapics[0].mp_apicid; + intsrc.dstapic = mp_ioapics[0].apicid; intsrc.irqtype = mp_INT; From c2c21745ecba23c74690a124bcd371f83bd71e45 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 17:47:22 +0530 Subject: [PATCH 062/566] x86: replacing mp_config_intsrc with mpc_intsrc Impact: cleanup, solve 80 columns wrap problems Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 16 +------- arch/x86/kernel/acpi/boot.c | 70 +++++++++++++++++----------------- arch/x86/kernel/io_apic.c | 64 +++++++++++++++---------------- arch/x86/kernel/mpparse.c | 68 ++++++++++++++++----------------- 4 files changed, 101 insertions(+), 117 deletions(-) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 5a56ae9b505a..08ec793aa043 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -114,22 +114,8 @@ struct IR_IO_APIC_route_entry { extern int nr_ioapics; extern int nr_ioapic_registers[MAX_IO_APICS]; -/* - * MP-BIOS irq configuration table structures: - */ - #define MP_MAX_IOAPIC_PIN 127 -struct mp_config_intsrc { - unsigned int mp_dstapic; - unsigned char mp_type; - unsigned char mp_irqtype; - unsigned short mp_irqflag; - unsigned char mp_srcbus; - unsigned char mp_srcbusirq; - unsigned char mp_dstirq; -}; - /* I/O APIC entries */ extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; @@ -137,7 +123,7 @@ extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; extern int mp_irq_entries; /* MP IRQ source entries */ -extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* non-0 if default (table-less) MP configuration */ extern int mpc_default_type; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2b27019e64f8..4cb5964f1499 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -973,19 +973,19 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } -static void assign_to_mp_irq(struct mp_config_intsrc *m, - struct mp_config_intsrc *mp_irq) +static void assign_to_mp_irq(struct mpc_intsrc *m, + struct mpc_intsrc *mp_irq) { - memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); + memcpy(mp_irq, m, sizeof(struct mpc_intsrc)); } -static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, - struct mp_config_intsrc *m) +static int mp_irq_cmp(struct mpc_intsrc *mp_irq, + struct mpc_intsrc *m) { - return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); + return memcmp(mp_irq, m, sizeof(struct mpc_intsrc)); } -static void save_mp_irq(struct mp_config_intsrc *m) +static void save_mp_irq(struct mpc_intsrc *m) { int i; @@ -1003,7 +1003,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) { int ioapic; int pin; - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; /* * Convert 'gsi' to 'ioapic.pin'. @@ -1021,13 +1021,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) if ((bus_irq == 0) && (trigger == 3)) trigger = 1; - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_irqflag = (trigger << 2) | polarity; - mp_irq.mp_srcbus = MP_ISA_BUS; - mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ - mp_irq.mp_dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ - mp_irq.mp_dstirq = pin; /* INTIN# */ + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger << 2) | polarity; + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.srcbusirq = bus_irq; /* IRQ */ + mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ + mp_irq.dstirq = pin; /* INTIN# */ save_mp_irq(&mp_irq); } @@ -1037,7 +1037,7 @@ void __init mp_config_acpi_legacy_irqs(void) int i; int ioapic; unsigned int dstapic; - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; #if defined (CONFIG_MCA) || defined (CONFIG_EISA) /* @@ -1072,16 +1072,14 @@ void __init mp_config_acpi_legacy_irqs(void) int idx; for (idx = 0; idx < mp_irq_entries; idx++) { - struct mp_config_intsrc *irq = mp_irqs + idx; + struct mpc_intsrc *irq = mp_irqs + idx; /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mp_srcbus == MP_ISA_BUS - && irq->mp_srcbusirq == i) + if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i) break; /* Do we already have a mapping for this IOAPIC pin */ - if (irq->mp_dstapic == dstapic && - irq->mp_dstirq == i) + if (irq->dstapic == dstapic && irq->dstirq == i) break; } @@ -1090,13 +1088,13 @@ void __init mp_config_acpi_legacy_irqs(void) continue; /* IRQ already used */ } - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqflag = 0; /* Conforming */ - mp_irq.mp_srcbus = MP_ISA_BUS; - mp_irq.mp_dstapic = dstapic; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_srcbusirq = i; /* Identity mapped */ - mp_irq.mp_dstirq = i; + mp_irq.type = MP_INTSRC; + mp_irq.irqflag = 0; /* Conforming */ + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.dstapic = dstapic; + mp_irq.irqtype = mp_INT; + mp_irq.srcbusirq = i; /* Identity mapped */ + mp_irq.dstirq = i; save_mp_irq(&mp_irq); } @@ -1207,22 +1205,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity) { #ifdef CONFIG_X86_MPPARSE - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; int ioapic; if (!acpi_ioapic) return 0; /* print the entry should happen on mptable identically */ - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); - mp_irq.mp_srcbus = number; - mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); + mp_irq.srcbus = number; + mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); ioapic = mp_find_ioapic(gsi); - mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; - mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; + mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id; + mp_irq.dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; save_mp_irq(&mp_irq); #endif diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 6c51ecdfbf49..79b8c0c72d34 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -87,7 +87,7 @@ struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ -struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; @@ -945,10 +945,10 @@ static int find_irq_entry(int apic, int pin, int type) int i; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == type && - (mp_irqs[i].mp_dstapic == mp_ioapics[apic].apicid || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) && - mp_irqs[i].mp_dstirq == pin) + if (mp_irqs[i].irqtype == type && + (mp_irqs[i].dstapic == mp_ioapics[apic].apicid || + mp_irqs[i].dstapic == MP_APIC_ALL) && + mp_irqs[i].dstirq == pin) return i; return -1; @@ -962,13 +962,13 @@ static int __init find_isa_irq_pin(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) + (mp_irqs[i].irqtype == type) && + (mp_irqs[i].srcbusirq == irq)) - return mp_irqs[i].mp_dstirq; + return mp_irqs[i].dstirq; } return -1; } @@ -978,17 +978,17 @@ static int __init find_isa_irq_apic(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) + (mp_irqs[i].irqtype == type) && + (mp_irqs[i].srcbusirq == irq)) break; } if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic) + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic) return apic; } } @@ -1013,23 +1013,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) return -1; } for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || + mp_irqs[i].dstapic == MP_APIC_ALL) break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mp_irqtype && + !mp_irqs[i].irqtype && (bus == lbus) && - (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); + (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; - if (pin == (mp_irqs[i].mp_srcbusirq & 3)) + if (pin == (mp_irqs[i].srcbusirq & 3)) return irq; /* * Use the first all-but-pin matching entry as a @@ -1072,7 +1072,7 @@ static int EISA_ELCR(unsigned int irq) * EISA conforming in the MP table, that means its trigger type must * be read in from the ELCR */ -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) #define default_EISA_polarity(idx) default_ISA_polarity(idx) /* PCI interrupts are always polarity one level triggered, @@ -1089,13 +1089,13 @@ static int EISA_ELCR(unsigned int irq) static int MPBIOS_polarity(int idx) { - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mp_irqflag & 3) + switch (mp_irqs[idx].irqflag & 3) { case 0: /* conforms, ie. bus-type dependent polarity */ if (test_bit(bus, mp_bus_not_pci)) @@ -1131,13 +1131,13 @@ static int MPBIOS_polarity(int idx) static int MPBIOS_trigger(int idx) { - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; int trigger; /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mp_irqflag>>2) & 3) + switch ((mp_irqs[idx].irqflag>>2) & 3) { case 0: /* conforms, ie. bus-type dependent */ if (test_bit(bus, mp_bus_not_pci)) @@ -1215,16 +1215,16 @@ int (*ioapic_renumber_irq)(int ioapic, int irq); static int pin_2_irq(int idx, int apic, int pin) { int irq, i; - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; /* * Debugging check, we are in big trouble if this message pops up! */ - if (mp_irqs[idx].mp_dstirq != pin) + if (mp_irqs[idx].dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].mp_srcbusirq; + irq = mp_irqs[idx].srcbusirq; } else { /* * PCI IRQs are mapped in order @@ -2164,8 +2164,8 @@ static void __init setup_ioapic_ids_from_mpc(void) */ if (old_id != mp_ioapics[apic].apicid) for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_dstapic == old_id) - mp_irqs[i].mp_dstapic + if (mp_irqs[i].dstapic == old_id) + mp_irqs[i].dstapic = mp_ioapics[apic].apicid; /* @@ -3983,8 +3983,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return -1; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == mp_INT && - mp_irqs[i].mp_srcbusirq == bus_irq) + if (mp_irqs[i].irqtype == mp_INT && + mp_irqs[i].srcbusirq == bus_irq) break; if (i >= mp_irq_entries) return -1; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index a86a65537433..ad36377dc935 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -159,55 +159,55 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m) m->srcbusirq, m->dstapic, m->dstirq); } -static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) +static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) { apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", - mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, - (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, - mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); + mp_irq->irqtype, mp_irq->irqflag & 3, + (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus, + mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); } static void __init assign_to_mp_irq(struct mpc_intsrc *m, - struct mp_config_intsrc *mp_irq) + struct mpc_intsrc *mp_irq) { - mp_irq->mp_dstapic = m->dstapic; - mp_irq->mp_type = m->type; - mp_irq->mp_irqtype = m->irqtype; - mp_irq->mp_irqflag = m->irqflag; - mp_irq->mp_srcbus = m->srcbus; - mp_irq->mp_srcbusirq = m->srcbusirq; - mp_irq->mp_dstirq = m->dstirq; + mp_irq->dstapic = m->dstapic; + mp_irq->type = m->type; + mp_irq->irqtype = m->irqtype; + mp_irq->irqflag = m->irqflag; + mp_irq->srcbus = m->srcbus; + mp_irq->srcbusirq = m->srcbusirq; + mp_irq->dstirq = m->dstirq; } -static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, +static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq, struct mpc_intsrc *m) { - m->dstapic = mp_irq->mp_dstapic; - m->type = mp_irq->mp_type; - m->irqtype = mp_irq->mp_irqtype; - m->irqflag = mp_irq->mp_irqflag; - m->srcbus = mp_irq->mp_srcbus; - m->srcbusirq = mp_irq->mp_srcbusirq; - m->dstirq = mp_irq->mp_dstirq; + m->dstapic = mp_irq->dstapic; + m->type = mp_irq->type; + m->irqtype = mp_irq->irqtype; + m->irqflag = mp_irq->irqflag; + m->srcbus = mp_irq->srcbus; + m->srcbusirq = mp_irq->srcbusirq; + m->dstirq = mp_irq->dstirq; } -static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, +static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq, struct mpc_intsrc *m) { - if (mp_irq->mp_dstapic != m->dstapic) + if (mp_irq->dstapic != m->dstapic) return 1; - if (mp_irq->mp_type != m->type) + if (mp_irq->type != m->type) return 2; - if (mp_irq->mp_irqtype != m->irqtype) + if (mp_irq->irqtype != m->irqtype) return 3; - if (mp_irq->mp_irqflag != m->irqflag) + if (mp_irq->irqflag != m->irqflag) return 4; - if (mp_irq->mp_srcbus != m->srcbus) + if (mp_irq->srcbus != m->srcbus) return 5; - if (mp_irq->mp_srcbusirq != m->srcbusirq) + if (mp_irq->srcbusirq != m->srcbusirq) return 6; - if (mp_irq->mp_dstirq != m->dstirq) + if (mp_irq->dstirq != m->dstirq) return 7; return 0; @@ -808,15 +808,15 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m) /* not legacy */ for (i = 0; i < mp_irq_entries; i++) { - if (mp_irqs[i].mp_irqtype != mp_INT) + if (mp_irqs[i].irqtype != mp_INT) continue; - if (mp_irqs[i].mp_irqflag != 0x0f) + if (mp_irqs[i].irqflag != 0x0f) continue; - if (mp_irqs[i].mp_srcbus != m->srcbus) + if (mp_irqs[i].srcbus != m->srcbus) continue; - if (mp_irqs[i].mp_srcbusirq != m->srcbusirq) + if (mp_irqs[i].srcbusirq != m->srcbusirq) continue; if (irq_used[i]) { /* already claimed */ @@ -921,10 +921,10 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, if (irq_used[i]) continue; - if (mp_irqs[i].mp_irqtype != mp_INT) + if (mp_irqs[i].irqtype != mp_INT) continue; - if (mp_irqs[i].mp_irqflag != 0x0f) + if (mp_irqs[i].irqflag != 0x0f) continue; if (nr_m_spare > 0) { From 09b3ec7315a18d885127544204f1e389d41058d0 Mon Sep 17 00:00:00 2001 From: Frederik Deweerdt Date: Mon, 12 Jan 2009 22:35:42 +0100 Subject: [PATCH 063/566] x86, tlb flush_data: replace per_cpu with an array Impact: micro-optimization, memory reduction On x86_64 flush tlb data is stored in per_cpu variables. This is unnecessary because only the first NUM_INVALIDATE_TLB_VECTORS entries are accessed. This patch aims at making the code less confusing (there's nothing really "per_cpu") by using a plain array. It also would save some memory on most distros out there (Ubuntu x86_64 has NR_CPUS=64 by default). [ Ravikiran G Thirumalai also pointed out that the correct alignment is ____cacheline_internodealigned_in_smp, so that there's no bouncing on vsmp. ] Signed-off-by: Frederik Deweerdt Acked-by: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_64.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index f8be6f1d2e48..8cfea5d14517 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -33,7 +33,7 @@ * To avoid global state use 8 different call vectors. * Each CPU uses a specific vector to trigger flushes on other * CPUs. Depending on the received vector the target CPUs look into - * the right per cpu variable for the flush data. + * the right array slot for the flush data. * * With more than 8 CPUs they are hashed to the 8 available * vectors. The limited global vector space forces us to this right now. @@ -48,13 +48,13 @@ union smp_flush_state { unsigned long flush_va; spinlock_t tlbstate_lock; }; - char pad[SMP_CACHE_BYTES]; -} ____cacheline_aligned; + char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; +} ____cacheline_internodealigned_in_smp; /* State is put into the per CPU data section, but padded to a full cache line because other CPUs can access it and we don't want false sharing in the per cpu data segment. */ -static DEFINE_PER_CPU(union smp_flush_state, flush_state); +static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; /* * We cannot call mmdrop() because we are in interrupt context, @@ -129,7 +129,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) * Use that to determine where the sender put the data. */ sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; - f = &per_cpu(flush_state, sender); + f = &flush_state[sender]; if (!cpu_isset(cpu, f->flush_cpumask)) goto out; @@ -169,7 +169,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, /* Caller has disabled preemption */ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; - f = &per_cpu(flush_state, sender); + f = &flush_state[sender]; /* * Could avoid this lock when @@ -205,8 +205,8 @@ static int __cpuinit init_smp_flush(void) { int i; - for_each_possible_cpu(i) - spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); + for (i = 0; i < ARRAY_SIZE(flush_state); i++) + spin_lock_init(&flush_state[i].tlbstate_lock); return 0; } From 0a2a18b721abc960fbcada406746877d22340a60 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 Jan 2009 23:37:16 +0100 Subject: [PATCH 064/566] x86: change the default cache size to 64 bytes Right now the generic cacheline size is 128 bytes - that is wasteful when structures are aligned, as all modern x86 CPUs have an (effective) cacheline sizes of 64 bytes. It was set to 128 bytes due to some cacheline aliasing problems on older P4 systems, but those are many years old and we dont optimize for them anymore. (They'll still get the 128 bytes cacheline size if the kernel is specifically built for Pentium 4) Signed-off-by: Ingo Molnar Acked-by: Arjan van de Ven --- arch/x86/Kconfig.cpu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8078955845ae..cdf4a9623237 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -307,10 +307,10 @@ config X86_CMPXCHG config X86_L1_CACHE_SHIFT int - default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC + default "7" if MPENTIUM4 || MPSC default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU config X86_XADD def_bool y From b665967979d0e990f196e7c4ba88e17c9ed9b781 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 Jan 2009 11:54:27 -0800 Subject: [PATCH 065/566] x86: make 32bit MAX_HARDIRQS_PER_CPU to be NR_VECTORS Impact: clean up to be same as 64bit 32-bit is using per-cpu vector too, so don't use default NR_IRQS. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hardirq_32.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index cf7954d1405f..d4b5d731073f 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h @@ -19,6 +19,9 @@ typedef struct { DECLARE_PER_CPU(irq_cpustat_t, irq_stat); +/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ +#define MAX_HARDIRQS_PER_CPU NR_VECTORS + #define __ARCH_IRQ_STAT #define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) From 444027031cd069ea7e48b016cb33bbf201c8a9f0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Jan 2009 23:37:47 +0300 Subject: [PATCH 066/566] x86: headers cleanup - prctl.h Impact: cleanup (internal kernel function exported) 'make headers_check' warn us about leaking of kernel private (mostly compile time vars) data to userspace in headers. Fix it. sys_arch_prctl is completely removed from header since frankly I don't even understand why we describe it here. It is described like __SYSCALL(__NR_arch_prctl, sys_arch_prctl) in unistd_64.h and implemented in process_64.c. User-mode linux involved? So this one in fact is suspicious. Signed-off-by: Cyrill Gorcunov Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/prctl.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/asm/prctl.h index a8894647dd9a..3ac5032fae09 100644 --- a/arch/x86/include/asm/prctl.h +++ b/arch/x86/include/asm/prctl.h @@ -6,8 +6,4 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 -#ifdef CONFIG_X86_64 -extern long sys_arch_prctl(int, unsigned long); -#endif /* CONFIG_X86_64 */ - #endif /* _ASM_X86_PRCTL_H */ From a7c4e68615e20771f279c51a2bec8980675c78c7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Jan 2009 23:37:49 +0300 Subject: [PATCH 067/566] x86: headers cleanup - sigcontext32.h Impact: cleanup 'make headers_check' warn us about lack of linux/types.h here. Lets add it. Signed-off-by: Cyrill Gorcunov Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/sigcontext32.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/sigcontext32.h b/arch/x86/include/asm/sigcontext32.h index 6126188cf3a9..ad1478c4ae12 100644 --- a/arch/x86/include/asm/sigcontext32.h +++ b/arch/x86/include/asm/sigcontext32.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_SIGCONTEXT32_H #define _ASM_X86_SIGCONTEXT32_H +#include + /* signal context for 32bit programs. */ #define X86_FXSR_MAGIC 0x0000 From dbca1df48e89d8aa59254fdc10ef16c16e73d94e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Jan 2009 23:37:50 +0300 Subject: [PATCH 068/566] x86: headers cleanup - setup.h Impact: cleanup 'make headers_check' warn us about leaking of kernel private (mostly compile time vars) data to userspace in headers. Fix it. Guard this one by __KERNEL__. Signed-off-by: Cyrill Gorcunov Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/setup.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ebe858cdc8a3..29d31c0d13d6 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_SETUP_H #define _ASM_X86_SETUP_H +#ifdef __KERNEL__ + #define COMMAND_LINE_SIZE 2048 #ifndef __ASSEMBLY__ @@ -8,10 +10,8 @@ /* Interrupt control for vSMPowered x86_64 systems */ void vsmp_init(void); - void setup_bios_corruption_check(void); - #ifdef CONFIG_X86_VISWS extern void visws_early_detect(void); extern int is_visws_box(void); @@ -43,7 +43,7 @@ struct x86_quirks { void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); void (*mpc_oem_pci_bus)(struct mpc_bus *m); void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable, - unsigned short oemsize); + unsigned short oemsize); int (*setup_ioapic_ids)(void); int (*update_genapic)(void); }; @@ -56,8 +56,6 @@ extern unsigned long saved_video_mode; #endif #endif /* __ASSEMBLY__ */ -#ifdef __KERNEL__ - #ifdef __i386__ #include From d2287f5ebea9ff2487d614719775f0b03fce15f6 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 14 Jan 2009 15:43:54 -0800 Subject: [PATCH 069/566] irq: update all arches for new irq_desc, fix Impact: fix build errors Since the SPARSE IRQS changes redefined how the kstat irqs are organized, arch's must use the new accessor function: kstat_incr_irqs_this_cpu(irq, DESC); If CONFIG_SPARSE_IRQS is set, then DESC is a pointer to the irq_desc which has a pointer to the kstat_irqs. If not, then the .irqs field of struct kernel_stat is used instead. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/ia64/kernel/irq_ia64.c | 12 ++++++++---- arch/mips/kernel/smtc.c | 4 +++- arch/mips/sgi-ip22/ip22-int.c | 2 +- arch/mips/sgi-ip22/ip22-time.c | 2 +- arch/mips/sibyte/bcm1480/smp.c | 3 ++- arch/mips/sibyte/sb1250/smp.c | 3 ++- arch/mn10300/kernel/mn10300-watchdog.c | 3 ++- arch/sparc/kernel/time_64.c | 2 +- 8 files changed, 20 insertions(+), 11 deletions(-) diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 28d3d483db92..927ad027820c 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -493,11 +493,13 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); ia64_srlz_d(); while (vector != IA64_SPURIOUS_INT_VECTOR) { + struct irq_desc *desc = irq_to_desc(vector); + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { smp_local_flush_tlb(); - kstat_this_cpu.irqs[vector]++; + kstat_incr_irqs_this_cpu(vector, desc); } else if (unlikely(IS_RESCHEDULE(vector))) - kstat_this_cpu.irqs[vector]++; + kstat_incr_irqs_this_cpu(vector, desc); else { int irq = local_vector_to_irq(vector); @@ -551,11 +553,13 @@ void ia64_process_pending_intr(void) * Perform normal interrupt style processing */ while (vector != IA64_SPURIOUS_INT_VECTOR) { + struct irq_desc *desc = irq_to_desc(vector); + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { smp_local_flush_tlb(); - kstat_this_cpu.irqs[vector]++; + kstat_incr_irqs_this_cpu(vector, desc); } else if (unlikely(IS_RESCHEDULE(vector))) - kstat_this_cpu.irqs[vector]++; + kstat_incr_irqs_this_cpu(vector, desc); else { struct pt_regs *old_regs = set_irq_regs(NULL); int irq = local_vector_to_irq(vector); diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index d2c1ab12425a..5f5af7d4c890 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -921,11 +921,13 @@ void ipi_decode(struct smtc_ipi *pipi) struct clock_event_device *cd; void *arg_copy = pipi->arg; int type_copy = pipi->type; + int irq = MIPS_CPU_IRQ_BASE + 1; + smtc_ipi_nq(&freeIPIq, pipi); switch (type_copy) { case SMTC_CLOCK_TICK: irq_enter(); - kstat_this_cpu.irqs[MIPS_CPU_IRQ_BASE + 1]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); cd = &per_cpu(mips_clockevent_device, cpu); cd->event_handler(cd); irq_exit(); diff --git a/arch/mips/sgi-ip22/ip22-int.c b/arch/mips/sgi-ip22/ip22-int.c index f8b18af141a1..0ecd5fe9486e 100644 --- a/arch/mips/sgi-ip22/ip22-int.c +++ b/arch/mips/sgi-ip22/ip22-int.c @@ -155,7 +155,7 @@ static void indy_buserror_irq(void) int irq = SGI_BUSERR_IRQ; irq_enter(); - kstat_this_cpu.irqs[irq]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); ip22_be_interrupt(irq); irq_exit(); } diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c index 3dcb27ec0c53..c8f7d2328b24 100644 --- a/arch/mips/sgi-ip22/ip22-time.c +++ b/arch/mips/sgi-ip22/ip22-time.c @@ -122,7 +122,7 @@ void indy_8254timer_irq(void) char c; irq_enter(); - kstat_this_cpu.irqs[irq]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); printk(KERN_ALERT "Oops, got 8254 interrupt.\n"); ArcRead(0, &c, 1, &cnt); ArcEnterInteractiveMode(); diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c index dddfda8e8294..314691648c97 100644 --- a/arch/mips/sibyte/bcm1480/smp.c +++ b/arch/mips/sibyte/bcm1480/smp.c @@ -178,9 +178,10 @@ struct plat_smp_ops bcm1480_smp_ops = { void bcm1480_mailbox_interrupt(void) { int cpu = smp_processor_id(); + int irq = K_BCM1480_INT_MBOX_0_0; unsigned int action; - kstat_this_cpu.irqs[K_BCM1480_INT_MBOX_0_0]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); /* Load the mailbox register to figure out what we're supposed to do */ action = (__raw_readq(mailbox_0_regs[cpu]) >> 48) & 0xffff; diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c index 5950a288a7da..cad14003b84f 100644 --- a/arch/mips/sibyte/sb1250/smp.c +++ b/arch/mips/sibyte/sb1250/smp.c @@ -166,9 +166,10 @@ struct plat_smp_ops sb_smp_ops = { void sb1250_mailbox_interrupt(void) { int cpu = smp_processor_id(); + int irq = K_INT_MBOX_0; unsigned int action; - kstat_this_cpu.irqs[K_INT_MBOX_0]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); /* Load the mailbox register to figure out what we're supposed to do */ action = (____raw_readq(mailbox_regs[cpu]) >> 48) & 0xffff; diff --git a/arch/mn10300/kernel/mn10300-watchdog.c b/arch/mn10300/kernel/mn10300-watchdog.c index 10811e981d20..2e370d88a87a 100644 --- a/arch/mn10300/kernel/mn10300-watchdog.c +++ b/arch/mn10300/kernel/mn10300-watchdog.c @@ -130,6 +130,7 @@ void watchdog_interrupt(struct pt_regs *regs, enum exception_code excep) * the stack NMI-atomically, it's safe to use smp_processor_id(). */ int sum, cpu = smp_processor_id(); + int irq = NMIIRQ; u8 wdt, tmp; wdt = WDCTR & ~WDCTR_WDCNE; @@ -138,7 +139,7 @@ void watchdog_interrupt(struct pt_regs *regs, enum exception_code excep) NMICR = NMICR_WDIF; nmi_count(cpu)++; - kstat_this_cpu.irqs[NMIIRQ]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); sum = irq_stat[cpu].__irq_count; if (last_irq_sums[cpu] == sum) { diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 54405d362148..28b48f3eb256 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -727,7 +727,7 @@ void timer_interrupt(int irq, struct pt_regs *regs) irq_enter(); - kstat_this_cpu.irqs[0]++; + kstat_incr_irqs_this_cpu(0, irq_to_desc(0)); if (unlikely(!evt->event_handler)) { printk(KERN_WARNING From f11826385b63566d98c02d35f592232ee77cd791 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 14 Jan 2009 12:27:35 +0000 Subject: [PATCH 070/566] x86: fully honor "nolapic" Impact: widen the effect of the 'nolapic' boot parameter "nolapic" should not only suppress SMP and use of the LAPIC, but it also ought to have the effect of disabling all IO-APIC related activity as well as PCI MSI and HT-IRQs. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 7 ++++++- arch/x86/kernel/io_apic.c | 6 ++++++ arch/x86/kernel/smpboot.c | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 0f830e4f5675..c3dd64fabcf3 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1126,6 +1126,11 @@ void __cpuinit setup_local_APIC(void) unsigned int value; int i, j; + if (disable_apic) { + disable_ioapic_setup(); + return; + } + #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ if (lapic_is_integrated() && esr_disable) { @@ -1566,11 +1571,11 @@ int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) { -#ifdef CONFIG_X86_64 if (disable_apic) { pr_info("Apic disabled\n"); return -1; } +#ifdef CONFIG_X86_64 if (!cpu_has_apic) { disable_apic = 1; pr_info("Apic disabled by BIOS\n"); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1c4a1302536c..40747e58f305 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3258,6 +3258,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms int err; unsigned dest; + if (disable_apic) + return -ENXIO; + cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (err) @@ -3726,6 +3729,9 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) struct irq_cfg *cfg; int err; + if (disable_apic) + return -ENXIO; + cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (!err) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bb1a3b1fc87f..31f99ec2e0fd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1125,6 +1125,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_ERR "... forcing use of dummy APIC emulation." "(tell your hw vendor)\n"); smpboot_clear_io_apic(); + disable_ioapic_setup(); return -1; } From a08c4743ed5b861c4fa3d75be00da7106c926296 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 14 Jan 2009 12:28:51 +0000 Subject: [PATCH 071/566] x86: avoid early crash in disable_local_APIC() E.g. when called due to an early panic. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index c3dd64fabcf3..38d6aab2358d 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -895,6 +895,10 @@ void disable_local_APIC(void) { unsigned int value; + /* APIC hasn't been mapped yet */ + if (!apic_phys) + return; + clear_local_APIC(); /* From 54da5b3d44238eeb7417bacf792fb416d473bf4d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Jan 2009 13:04:58 +0100 Subject: [PATCH 072/566] x86: fix broken flush_tlb_others_ipi(), fix Impact: cleanup Use the proper type. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 54ee2ecb5e26..7f4141d3b661 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -188,7 +188,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(f->flush_cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(to_cpumask(f->flush_cpumask), + INVALIDATE_TLB_VECTOR_START + sender); while (!cpumask_empty(to_cpumask(f->flush_cpumask))) cpu_relax(); From e56d0cfe7790fd3218ae4f6aae1335547fea8763 Mon Sep 17 00:00:00 2001 From: Baodong Chen Date: Thu, 8 Jan 2009 19:24:29 +0800 Subject: [PATCH 073/566] Documentation/x86/boot.txt: modify fieldname Modify field names to the right ones: - start_sys was changed to start_sys_seg - iinitrd_addr_max was changed to ramdisk_max - pad2 was changed to pad2 and pad3 - readmode_swtch was changed to realmode_swtch Signed-off-by: Baodong Chen <[email]chenbdchenbd@gmail.com[email]> Acked-by: Jiri Kosina Signed-off-by: Ingo Molnar --- Documentation/x86/boot.txt | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index 7b4596ac4120..12299697b7cd 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -158,7 +158,7 @@ Offset Proto Name Meaning 0202/4 2.00+ header Magic signature "HdrS" 0206/2 2.00+ version Boot protocol version supported 0208/4 2.00+ realmode_swtch Boot loader hook (see below) -020C/2 2.00+ start_sys The load-low segment (0x1000) (obsolete) +020C/2 2.00+ start_sys_seg The load-low segment (0x1000) (obsolete) 020E/2 2.00+ kernel_version Pointer to kernel version string 0210/1 2.00+ type_of_loader Boot loader identifier 0211/1 2.00+ loadflags Boot protocol option flags @@ -170,10 +170,11 @@ Offset Proto Name Meaning 0224/2 2.01+ heap_end_ptr Free memory after setup end 0226/2 N/A pad1 Unused 0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line -022C/4 2.03+ initrd_addr_max Highest legal initrd address +022C/4 2.03+ ramdisk_max Highest legal initrd address 0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel 0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not -0235/3 N/A pad2 Unused +0235/1 N/A pad2 Unused +0236/2 N/A pad3 Unused 0238/4 2.06+ cmdline_size Maximum size of the kernel command line 023C/4 2.07+ hardware_subarch Hardware subarchitecture 0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data @@ -299,14 +300,14 @@ Protocol: 2.00+ e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version 10.17. -Field name: readmode_swtch +Field name: realmode_swtch Type: modify (optional) Offset/size: 0x208/4 Protocol: 2.00+ Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.) -Field name: start_sys +Field name: start_sys_seg Type: read Offset/size: 0x20c/2 Protocol: 2.00+ @@ -468,7 +469,7 @@ Protocol: 2.02+ zero, the kernel will assume that your boot loader does not support the 2.02+ protocol. -Field name: initrd_addr_max +Field name: ramdisk_max Type: read Offset/size: 0x22c/4 Protocol: 2.03+ From 5cd7376200be7b8bab085557ff5876b04bd84191 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Jan 2009 15:46:08 +0100 Subject: [PATCH 074/566] fix: crash: IP: __bitmap_intersects+0x48/0x73 -tip testing found this crash: > [ 35.258515] calling acpi_cpufreq_init+0x0/0x127 @ 1 > [ 35.264127] BUG: unable to handle kernel NULL pointer dereference at (null) > [ 35.267554] IP: [] __bitmap_intersects+0x48/0x73 > [ 35.267554] PGD 0 > [ 35.267554] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c is still broken: there's no allocation of the variable mask, so we pass in an uninitialized cmd.mask field to drv_read(), which then passes it to the scheduler which then crashes ... Switch it over to the much simpler constant-cpumask-pointers approach. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 6f11e029e8c5..019276717a7f 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -145,7 +145,7 @@ typedef union { struct drv_cmd { unsigned int type; - cpumask_var_t mask; + const struct cpumask *mask; drv_addr_union addr; u32 val; }; @@ -235,8 +235,7 @@ static u32 get_cur_val(const struct cpumask *mask) return 0; } - cpumask_copy(cmd.mask, mask); - + cmd.mask = mask; drv_read(&cmd); dprintk("get_cur_val = %u\n", cmd.val); @@ -403,9 +402,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, return -ENODEV; } - if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL))) - return -ENOMEM; - perf = data->acpi_data; result = cpufreq_frequency_table_target(policy, data->freq_table, @@ -450,9 +446,9 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, /* cpufreq holds the hotplug lock, so we are safe from here on */ if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) - cpumask_and(cmd.mask, cpu_online_mask, policy->cpus); + cmd.mask = policy->cpus; else - cpumask_copy(cmd.mask, cpumask_of(policy->cpu)); + cmd.mask = cpumask_of(policy->cpu); freqs.old = perf->states[perf->state].core_frequency * 1000; freqs.new = data->freq_table[next_state].frequency; @@ -479,7 +475,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, perf->state = next_perf_state; out: - free_cpumask_var(cmd.mask); return result; } From f2a082711905312dc7b6675e913fee0c4689f7ae Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 15 Jan 2009 09:19:32 -0800 Subject: [PATCH 075/566] x86: fix build warning when CONFIG_NUMA not defined. Impact: fix build warning The macro cpu_to_node did not reference it's argument, and instead simply returned a 0. This causes a "unused variable" warning if it's the only reference in a function (show_cache_disable). Replace it with the more correct inline function. Signed-off-by: Mike Travis --- arch/x86/include/asm/topology.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4e2f2e0aab27..d0c68e291635 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -192,9 +192,20 @@ extern int __node_distance(int, int); #else /* !CONFIG_NUMA */ -#define numa_node_id() 0 -#define cpu_to_node(cpu) 0 -#define early_cpu_to_node(cpu) 0 +static inline int numa_node_id(void) +{ + return 0; +} + +static inline int cpu_to_node(int cpu) +{ + return 0; +} + +static inline int early_cpu_to_node(int cpu) +{ + return 0; +} static inline const cpumask_t *cpumask_of_node(int node) { From c99dbbe9f8f6b3e9383e64710217e873431d1c31 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 15 Jan 2009 12:09:44 -0800 Subject: [PATCH 076/566] sched: fix warning on ia64 Andrew Morton reported this warning on ia64: kernel/sched.c: In function `sd_init_NODE': kernel/sched.c:7449: warning: comparison of distinct pointer types lacks a cast Using the untyped min() function produces such warnings. Fix: type the constant 32 as unsigned int to match typeof(num_online_cpus). Reported-by: Andrew Morton Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 32f3af1641c5..3193f4417e16 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -84,7 +84,7 @@ void build_cpu_to_node_map(void); .child = NULL, \ .groups = NULL, \ .min_interval = 8, \ - .max_interval = 8*(min(num_online_cpus(), 32)), \ + .max_interval = 8*(min(num_online_cpus(), 32U)), \ .busy_factor = 64, \ .imbalance_pct = 125, \ .cache_nice_tries = 2, \ From 7de6883faad71e3a253d55b9e1a47b89ebce0a31 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:34 +0900 Subject: [PATCH 077/566] x86: fix pda_to_op() There's no instruction to move a 64bit immediate into memory location. Drop "i". Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 2fbfff88df37..cbd3f48a8320 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -78,7 +78,7 @@ do { \ case 8: \ asm(op "q %1,%%gs:%c2": \ "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ + "r" ((T__)val), \ "i"(pda_offset(field))); \ break; \ default: \ From f10fcd47120e80f66665567dbe17f5071c7aef52 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:34 +0900 Subject: [PATCH 078/566] x86: make early_per_cpu() a lvalue and use it Make early_per_cpu() a lvalue as per_cpu() is and use it where applicable. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/percpu.h | 6 +++--- arch/x86/include/asm/topology.h | 5 +---- arch/x86/kernel/apic.c | 13 ++----------- 3 files changed, 6 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ece72053ba63..df644f3e53e6 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -195,9 +195,9 @@ do { \ #define early_per_cpu_ptr(_name) (_name##_early_ptr) #define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) #define early_per_cpu(_name, _cpu) \ - (early_per_cpu_ptr(_name) ? \ - early_per_cpu_ptr(_name)[_cpu] : \ - per_cpu(_name, _cpu)) + *(early_per_cpu_ptr(_name) ? \ + &early_per_cpu_ptr(_name)[_cpu] : \ + &per_cpu(_name, _cpu)) #else /* !CONFIG_SMP */ #define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4e2f2e0aab27..87ca3fd86e88 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -102,10 +102,7 @@ static inline int cpu_to_node(int cpu) /* Same function but used if called before per_cpu areas are setup */ static inline int early_cpu_to_node(int cpu) { - if (early_per_cpu_ptr(x86_cpu_to_node_map)) - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - - return per_cpu(x86_cpu_to_node_map, cpu); + return early_per_cpu(x86_cpu_to_node_map, cpu); } /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 38d6aab2358d..485787955834 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1877,17 +1877,8 @@ void __cpuinit generic_processor_info(int apicid, int version) #endif #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) - /* are we being called early in kernel startup? */ - if (early_per_cpu_ptr(x86_cpu_to_apicid)) { - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - - cpu_to_apicid[cpu] = apicid; - bios_cpu_apicid[cpu] = apicid; - } else { - per_cpu(x86_cpu_to_apicid, cpu) = apicid; - per_cpu(x86_bios_cpu_apicid, cpu) = apicid; - } + early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; + early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; #endif set_cpu_possible(cpu, true); From c90aa894f0240084f2c6e42e2333b211d6cfe2b2 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 13 Jan 2009 20:41:34 +0900 Subject: [PATCH 079/566] x86: cleanup early setup_percpu references [ Based on original patch from Christoph Lameter and Mike Travis. ] * Ruggedize some calls in setup_percpu.c to prevent mishaps in early calls, particularly for non-critical functions. * Cleanup DEBUG_PER_CPU_MAPS usages and some comments. Signed-off-by: Mike Travis Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 56 ++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index bf63de72b643..56c63ac62b10 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -15,6 +15,12 @@ #include #include +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +# define DBG(x...) printk(KERN_DEBUG x) +#else +# define DBG(x...) +#endif + #ifdef CONFIG_X86_LOCAL_APIC unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; @@ -27,31 +33,39 @@ unsigned int max_physical_apicid; physid_mask_t phys_cpu_present_map; #endif -/* map cpu index to physical APIC ID */ +/* + * Map cpu index to physical APIC ID + */ DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) -#define X86_64_NUMA 1 +#define X86_64_NUMA 1 /* (used later) */ -/* map cpu index to node index */ +/* + * Map cpu index to node index + */ DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); -/* which logical CPUs are on which nodes */ +/* + * Which logical CPUs are on which nodes + */ cpumask_t *node_to_cpumask_map; EXPORT_SYMBOL(node_to_cpumask_map); -/* setup node_to_cpumask_map */ +/* + * Setup node_to_cpumask_map + */ static void __init setup_node_to_cpumask_map(void); #else static inline void setup_node_to_cpumask_map(void) { } #endif -#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA /* * Copy data used in early init routines from the initial arrays to the * per cpu data areas. These arrays then become expendable and the @@ -200,6 +214,8 @@ void __init setup_per_cpu_areas(void) #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + + DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } /* Setup percpu data maps */ @@ -221,6 +237,7 @@ void __init setup_per_cpu_areas(void) * Requires node_possible_map to be valid. * * Note: node_to_cpumask() is not valid until after this is done. + * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) */ static void __init setup_node_to_cpumask_map(void) { @@ -236,6 +253,7 @@ static void __init setup_node_to_cpumask_map(void) /* allocate the map */ map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); + DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids); pr_debug("Node to cpumask map at %p for %d nodes\n", map, nr_node_ids); @@ -248,17 +266,23 @@ void __cpuinit numa_set_node(int cpu, int node) { int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); - if (cpu_pda(cpu) && node != NUMA_NO_NODE) - cpu_pda(cpu)->nodenumber = node; - - if (cpu_to_node_map) + /* early setting, no percpu area yet */ + if (cpu_to_node_map) { cpu_to_node_map[cpu] = node; + return; + } - else if (per_cpu_offset(cpu)) - per_cpu(x86_cpu_to_node_map, cpu) = node; +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) { + printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); + dump_stack(); + return; + } +#endif + per_cpu(x86_cpu_to_node_map, cpu) = node; - else - pr_debug("Setting node for non-present cpu %d\n", cpu); + if (node != NUMA_NO_NODE) + cpu_pda(cpu)->nodenumber = node; } void __cpuinit numa_clear_node(int cpu) @@ -275,7 +299,7 @@ void __cpuinit numa_add_cpu(int cpu) void __cpuinit numa_remove_cpu(int cpu) { - cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); + cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); } #else /* CONFIG_DEBUG_PER_CPU_MAPS */ @@ -285,7 +309,7 @@ void __cpuinit numa_remove_cpu(int cpu) */ static void __cpuinit numa_set_cpumask(int cpu, int enable) { - int node = cpu_to_node(cpu); + int node = early_cpu_to_node(cpu); cpumask_t *mask; char buf[64]; From a698c823e15149941b0f0281527d0c0d1daf2639 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 080/566] x86: make vmlinux_32.lds.S use PERCPU() macro Make vmlinux_32.lds.S use the generic PERCPU() macro instead of open coding it. This will ease future changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux_32.lds.S | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 82c67559dde7..3eba7f7bac05 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -178,14 +178,7 @@ SECTIONS __initramfs_end = .; } #endif - . = ALIGN(PAGE_SIZE); - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { - __per_cpu_start = .; - *(.data.percpu.page_aligned) - *(.data.percpu) - *(.data.percpu.shared_aligned) - __per_cpu_end = .; - } + PERCPU(PAGE_SIZE) . = ALIGN(PAGE_SIZE); /* freed after init ends here */ From 3e5d8f978435bb9ba4dfe3f4514e65e7885db1a9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 081/566] x86: make percpu symbols zerobased on SMP [ Based on original patch from Christoph Lameter and Mike Travis. ] This patch makes percpu symbols zerobased on x86_64 SMP by adding PERCPU_VADDR() to vmlinux.lds.h which helps setting explicit vaddr on the percpu output section and using it in vmlinux_64.lds.S. A new PHDR is added as existing ones cannot contain sections near address zero. PERCPU_VADDR() also adds a new symbol __per_cpu_load which always points to the vaddr of the loaded percpu data.init region. The following adjustments have been made to accomodate the address change. * code to locate percpu gdt_page in head_64.S is updated to add the load address to the gdt_page offset. * __per_cpu_load is used in places where access to the init data area is necessary. * pda->data_offset is initialized soon after C code is entered as zero value doesn't work anymore. This patch is mostly taken from Mike Travis' "x86_64: Base percpu variables at zero" patch. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 2 ++ arch/x86/kernel/head_64.S | 24 ++++++++++++++- arch/x86/kernel/setup_percpu.c | 2 +- arch/x86/kernel/vmlinux_64.lds.S | 17 ++++++++++- include/asm-generic/sections.h | 2 +- include/asm-generic/vmlinux.lds.h | 51 +++++++++++++++++++++++++++---- 6 files changed, 88 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b9a4d8c4b935..bc2900ca82c7 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -44,6 +44,8 @@ void __init x86_64_init_pda(void) { _cpu_pda = __cpu_pda; cpu_pda(0) = &_boot_cpu_pda; + cpu_pda(0)->data_offset = + (unsigned long)(__per_cpu_load - __per_cpu_start); pda_init(0); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 0e275d495563..7ee0363871e8 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -204,6 +204,23 @@ ENTRY(secondary_startup_64) pushq $0 popfq +#ifdef CONFIG_SMP + /* + * early_gdt_base should point to the gdt_page in static percpu init + * data area. Computing this requires two symbols - __per_cpu_load + * and per_cpu__gdt_page. As linker can't do no such relocation, do + * it by hand. As early_gdt_descr is manipulated by C code for + * secondary CPUs, this should be done only once for the boot CPU + * when early_gdt_descr_base contains zero. + */ + movq early_gdt_descr_base(%rip), %rax + testq %rax, %rax + jnz 1f + movq $__per_cpu_load, %rax + addq $per_cpu__gdt_page, %rax + movq %rax, early_gdt_descr_base(%rip) +1: +#endif /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace @@ -401,7 +418,12 @@ NEXT_PAGE(level2_spare_pgt) .globl early_gdt_descr early_gdt_descr: .word GDT_ENTRIES*8-1 - .quad per_cpu__gdt_page +#ifdef CONFIG_SMP +early_gdt_descr_base: + .quad 0x0000000000000000 +#else + .quad per_cpu__gdt_page +#endif ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 56c63ac62b10..44845842e722 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -213,7 +213,7 @@ void __init setup_per_cpu_areas(void) } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 1a614c0e6bef..f50280db0dfe 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -19,6 +19,9 @@ PHDRS { data PT_LOAD FLAGS(7); /* RWE */ user PT_LOAD FLAGS(7); /* RWE */ data.init PT_LOAD FLAGS(7); /* RWE */ +#ifdef CONFIG_SMP + percpu PT_LOAD FLAGS(7); /* RWE */ +#endif note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS @@ -208,14 +211,26 @@ SECTIONS __initramfs_end = .; #endif +#ifdef CONFIG_SMP + /* + * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the + * output PHDR, so the next output section - __data_nosave - should + * switch it back to data.init. + */ + . = ALIGN(PAGE_SIZE); + PERCPU_VADDR(0, :percpu) +#else PERCPU(PAGE_SIZE) +#endif . = ALIGN(PAGE_SIZE); __init_end = .; . = ALIGN(PAGE_SIZE); __nosave_begin = .; - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + *(.data.nosave) + } :data.init /* switch back to data.init, see PERCPU_VADDR() above */ . = ALIGN(PAGE_SIZE); __nosave_end = .; diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 79a7ff925bf8..4ce48e878530 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -9,7 +9,7 @@ extern char __bss_start[], __bss_stop[]; extern char __init_begin[], __init_end[]; extern char _sinittext[], _einittext[]; extern char _end[]; -extern char __per_cpu_start[], __per_cpu_end[]; +extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __start_rodata[], __end_rodata[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c61fab1dd2f8..fc2f55f2dcd6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -430,12 +430,51 @@ *(.initcall7.init) \ *(.initcall7s.init) -#define PERCPU(align) \ - . = ALIGN(align); \ - VMLINUX_SYMBOL(__per_cpu_start) = .; \ - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ +#define PERCPU_PROLOG(vaddr) \ + VMLINUX_SYMBOL(__per_cpu_load) = .; \ + .data.percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__per_cpu_start) = .; + +#define PERCPU_EPILOG(phdr) \ + VMLINUX_SYMBOL(__per_cpu_end) = .; \ + } phdr \ + . = __per_cpu_load + SIZEOF(.data.percpu); + +/** + * PERCPU_VADDR - define output section for percpu area + * @vaddr: explicit base address (optional) + * @phdr: destination PHDR (optional) + * + * Macro which expands to output section for percpu area. If @vaddr + * is not blank, it specifies explicit base address and all percpu + * symbols will be offset from the given address. If blank, @vaddr + * always equals @laddr + LOAD_OFFSET. + * + * @phdr defines the output PHDR to use if not blank. Be warned that + * output PHDR is sticky. If @phdr is specified, the next output + * section in the linker script will go there too. @phdr should have + * a leading colon. + * + * This macro defines three symbols, __per_cpu_load, __per_cpu_start + * and __per_cpu_end. The first one is the vaddr of loaded percpu + * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the + * end offset. + */ +#define PERCPU_VADDR(vaddr, phdr) \ + PERCPU_PROLOG(vaddr) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ - } \ - VMLINUX_SYMBOL(__per_cpu_end) = .; + PERCPU_EPILOG(phdr) + +/** + * PERCPU - define output section for percpu area, simple version + * @align: required alignment + * + * Align to @align and outputs output section for percpu area. This + * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and + * __per_cpu_start will be identical. + */ +#define PERCPU(align) \ + . = ALIGN(align); \ + PERCPU_VADDR( , ) From f32ff5388d86518c0375ccdb330d3b459b9c405e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 082/566] x86: load pointer to pda into %gs while brining up a CPU [ Based on original patch from Christoph Lameter and Mike Travis. ] CPU startup code in head_64.S loaded address of a zero page into %gs for temporary use till pda is loaded but address to the actual pda is available at the point. Load the real address directly instead. This will help unifying percpu and pda handling later on. This patch is mostly taken from Mike Travis' "x86_64: Fold pda into per cpu area" patch. Signed-off-by: Tejun Heo --- arch/x86/include/asm/trampoline.h | 1 + arch/x86/kernel/acpi/sleep.c | 1 + arch/x86/kernel/head64.c | 4 ++-- arch/x86/kernel/head_64.S | 15 ++++++++++----- arch/x86/kernel/smpboot.c | 1 + 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index 780ba0ab94f9..90f06c25221d 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h @@ -13,6 +13,7 @@ extern unsigned char *trampoline_base; extern unsigned long init_rsp; extern unsigned long initial_code; +extern unsigned long initial_gs; #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) #define TRAMPOLINE_BASE 0x6000 diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 707c1f6f95fa..9ff67f8dc2c0 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -101,6 +101,7 @@ int acpi_save_state_mem(void) stack_start.sp = temp_stack + sizeof(temp_stack); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); + initial_gs = (unsigned long)cpu_pda(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index bc2900ca82c7..76ffba2aa66d 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,8 +26,8 @@ #include #include -/* boot cpu pda */ -static struct x8664_pda _boot_cpu_pda; +/* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */ +struct x8664_pda _boot_cpu_pda; #ifdef CONFIG_SMP /* diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7ee0363871e8..2f0ab0089883 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -243,12 +243,15 @@ ENTRY(secondary_startup_64) movl %eax,%fs movl %eax,%gs - /* - * Setup up a dummy PDA. this is just for some early bootup code - * that does in_interrupt() - */ + /* Set up %gs. + * + * %gs should point to the pda. For initial boot, make %gs point + * to the _boot_cpu_pda in data section. For a secondary CPU, + * initial_gs should be set to its pda address before the CPU runs + * this code. + */ movl $MSR_GS_BASE,%ecx - movq $empty_zero_page,%rax + movq initial_gs(%rip),%rax movq %rax,%rdx shrq $32,%rdx wrmsr @@ -274,6 +277,8 @@ ENTRY(secondary_startup_64) .align 8 ENTRY(initial_code) .quad x86_64_start_kernel + ENTRY(initial_gs) + .quad _boot_cpu_pda __FINITDATA ENTRY(stack_start) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1a712da1dfa0..70d846628bbf 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -854,6 +854,7 @@ do_rest: #else cpu_pda(cpu)->pcurrent = c_idle.idle; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); + initial_gs = (unsigned long)cpu_pda(cpu); #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; From c8f3329a0ddd751241e96b4100df7eda14b2cbc6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 083/566] x86: use static _cpu_pda array _cpu_pda array first uses statically allocated storage in data.init and then switches to allocated bootmem to conserve space. However, after folding pda area into percpu area, _cpu_pda array will be removed completely. Drop the reallocation part to simplify the code for soon-to-follow changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 3 ++- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/head64.c | 12 ------------ arch/x86/kernel/setup_percpu.c | 14 +++----------- 4 files changed, 6 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index cbd3f48a8320..2d5b49c3248e 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -5,6 +5,7 @@ #include #include #include +#include #include /* Per processor datastructure. %gs points to it while the kernel runs */ @@ -39,7 +40,7 @@ struct x8664_pda { unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; -extern struct x8664_pda **_cpu_pda; +extern struct x8664_pda *_cpu_pda[NR_CPUS]; extern void pda_init(int); #define cpu_pda(i) (_cpu_pda[i]) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f00258462444..c116c599326e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -879,7 +879,7 @@ static __init int setup_disablecpuid(char *arg) __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 -struct x8664_pda **_cpu_pda __read_mostly; +struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; EXPORT_SYMBOL(_cpu_pda); struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 76ffba2aa66d..462d0beccb6b 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -29,20 +29,8 @@ /* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */ struct x8664_pda _boot_cpu_pda; -#ifdef CONFIG_SMP -/* - * We install an empty cpu_pda pointer table to indicate to early users - * (numa_set_node) that the cpu_pda pointer table for cpus other than - * the boot cpu is not yet setup. - */ -static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; -#else -static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; -#endif - void __init x86_64_init_pda(void) { - _cpu_pda = __cpu_pda; cpu_pda(0) = &_boot_cpu_pda; cpu_pda(0)->data_offset = (unsigned long)(__per_cpu_load - __per_cpu_start); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 44845842e722..73ab01b297c5 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -114,7 +114,6 @@ static inline void setup_cpu_pda_map(void) { } static void __init setup_cpu_pda_map(void) { char *pda; - struct x8664_pda **new_cpu_pda; unsigned long size; int cpu; @@ -122,28 +121,21 @@ static void __init setup_cpu_pda_map(void) /* allocate cpu_pda array and pointer table */ { - unsigned long tsize = nr_cpu_ids * sizeof(void *); unsigned long asize = size * (nr_cpu_ids - 1); - tsize = roundup(tsize, cache_line_size()); - new_cpu_pda = alloc_bootmem(tsize + asize); - pda = (char *)new_cpu_pda + tsize; + pda = alloc_bootmem(asize); } /* initialize pointer table to static pda's */ for_each_possible_cpu(cpu) { if (cpu == 0) { /* leave boot cpu pda in place */ - new_cpu_pda[0] = cpu_pda(0); continue; } - new_cpu_pda[cpu] = (struct x8664_pda *)pda; - new_cpu_pda[cpu]->in_bootmem = 1; + cpu_pda(cpu) = (struct x8664_pda *)pda; + cpu_pda(cpu)->in_bootmem = 1; pda += size; } - - /* point to new pointer table */ - _cpu_pda = new_cpu_pda; } #endif /* CONFIG_SMP && CONFIG_X86_64 */ From 1a51e3a0aed18767cf2762e95456ecfeb0bca5e6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 084/566] x86: fold pda into percpu area on SMP [ Based on original patch from Christoph Lameter and Mike Travis. ] Currently pdas and percpu areas are allocated separately. %gs points to local pda and percpu area can be reached using pda->data_offset. This patch folds pda into percpu area. Due to strange gcc requirement, pda needs to be at the beginning of the percpu area so that pda->stack_canary is at %gs:40. To achieve this, a new percpu output section macro - PERCPU_VADDR_PREALLOC() - is added and used to reserve pda sized chunk at the start of the percpu area. After this change, for boot cpu, %gs first points to pda in the data.init area and later during setup_per_cpu_areas() gets updated to point to the actual pda. This means that setup_per_cpu_areas() need to reload %gs for CPU0 while clearing pda area for other cpus as cpu0 already has modified it when control reaches setup_per_cpu_areas(). This patch also removes now unnecessary get_local_pda() and its call sites. A lot of this patch is taken from Mike Travis' "x86_64: Fold pda into per cpu area" patch. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/percpu.h | 8 +++ arch/x86/include/asm/smp.h | 2 - arch/x86/kernel/asm-offsets_64.c | 1 + arch/x86/kernel/cpu/common.c | 6 +- arch/x86/kernel/head64.c | 8 ++- arch/x86/kernel/head_64.S | 15 +++-- arch/x86/kernel/setup_percpu.c | 107 +++++++++++++----------------- arch/x86/kernel/smpboot.c | 60 +---------------- arch/x86/kernel/vmlinux_64.lds.S | 6 +- arch/x86/xen/smp.c | 10 --- include/asm-generic/vmlinux.lds.h | 25 ++++++- 11 files changed, 104 insertions(+), 144 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index df644f3e53e6..0ed77cf33f76 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -1,6 +1,14 @@ #ifndef _ASM_X86_PERCPU_H #define _ASM_X86_PERCPU_H +#ifndef __ASSEMBLY__ +#ifdef CONFIG_X86_64 +extern void load_pda_offset(int cpu); +#else +static inline void load_pda_offset(int cpu) { } +#endif +#endif + #ifdef CONFIG_X86_64 #include diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index a8cea7b09434..127415402ea1 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -19,8 +19,6 @@ #include #include -extern int __cpuinit get_local_pda(int cpu); - extern int smp_num_siblings; extern unsigned int num_processors; diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 1d41d3f1edbc..f8d1b047ef4f 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -56,6 +56,7 @@ int main(void) ENTRY(cpunumber); ENTRY(irqstackptr); ENTRY(data_offset); + DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY #ifdef CONFIG_PARAVIRT diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c116c599326e..7041acdf5579 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -893,10 +893,8 @@ void __cpuinit pda_init(int cpu) /* Setup up data that may be needed in __get_free_pages early */ loadsegment(fs, 0); loadsegment(gs, 0); - /* Memory clobbers used to order PDA accessed */ - mb(); - wrmsrl(MSR_GS_BASE, pda); - mb(); + + load_pda_offset(cpu); pda->cpunumber = cpu; pda->irqcount = -1; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 462d0beccb6b..1a311293f733 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,12 +26,18 @@ #include #include -/* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */ +#ifndef CONFIG_SMP +/* boot cpu pda, referenced by head_64.S to initialize %gs on UP */ struct x8664_pda _boot_cpu_pda; +#endif void __init x86_64_init_pda(void) { +#ifdef CONFIG_SMP + cpu_pda(0) = (void *)__per_cpu_load; +#else cpu_pda(0) = &_boot_cpu_pda; +#endif cpu_pda(0)->data_offset = (unsigned long)(__per_cpu_load - __per_cpu_start); pda_init(0); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 2f0ab0089883..7a995d0e9f78 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -245,10 +245,13 @@ ENTRY(secondary_startup_64) /* Set up %gs. * - * %gs should point to the pda. For initial boot, make %gs point - * to the _boot_cpu_pda in data section. For a secondary CPU, - * initial_gs should be set to its pda address before the CPU runs - * this code. + * On SMP, %gs should point to the per-cpu area. For initial + * boot, make %gs point to the init data section. For a + * secondary CPU,initial_gs should be set to its pda address + * before the CPU runs this code. + * + * On UP, initial_gs points to _boot_cpu_pda and doesn't + * change. */ movl $MSR_GS_BASE,%ecx movq initial_gs(%rip),%rax @@ -278,7 +281,11 @@ ENTRY(secondary_startup_64) ENTRY(initial_code) .quad x86_64_start_kernel ENTRY(initial_gs) +#ifdef CONFIG_SMP + .quad __per_cpu_load +#else .quad _boot_cpu_pda +#endif __FINITDATA ENTRY(stack_start) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 73ab01b297c5..63d462802272 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #ifdef CONFIG_DEBUG_PER_CPU_MAPS @@ -65,6 +66,36 @@ static void __init setup_node_to_cpumask_map(void); static inline void setup_node_to_cpumask_map(void) { } #endif +#ifdef CONFIG_X86_64 +void __cpuinit load_pda_offset(int cpu) +{ + /* Memory clobbers used to order pda/percpu accesses */ + mb(); + wrmsrl(MSR_GS_BASE, cpu_pda(cpu)); + mb(); +} + +#endif /* CONFIG_SMP && CONFIG_X86_64 */ + +#ifdef CONFIG_X86_64 + +/* correctly size the local cpu masks */ +static void setup_cpu_local_masks(void) +{ + alloc_bootmem_cpumask_var(&cpu_initialized_mask); + alloc_bootmem_cpumask_var(&cpu_callin_mask); + alloc_bootmem_cpumask_var(&cpu_callout_mask); + alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); +} + +#else /* CONFIG_X86_32 */ + +static inline void setup_cpu_local_masks(void) +{ +} + +#endif /* CONFIG_X86_32 */ + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA /* * Copy data used in early init routines from the initial arrays to the @@ -101,63 +132,7 @@ static void __init setup_per_cpu_maps(void) */ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); -static inline void setup_cpu_pda_map(void) { } - -#elif !defined(CONFIG_SMP) -static inline void setup_cpu_pda_map(void) { } - -#else /* CONFIG_SMP && CONFIG_X86_64 */ - -/* - * Allocate cpu_pda pointer table and array via alloc_bootmem. - */ -static void __init setup_cpu_pda_map(void) -{ - char *pda; - unsigned long size; - int cpu; - - size = roundup(sizeof(struct x8664_pda), cache_line_size()); - - /* allocate cpu_pda array and pointer table */ - { - unsigned long asize = size * (nr_cpu_ids - 1); - - pda = alloc_bootmem(asize); - } - - /* initialize pointer table to static pda's */ - for_each_possible_cpu(cpu) { - if (cpu == 0) { - /* leave boot cpu pda in place */ - continue; - } - cpu_pda(cpu) = (struct x8664_pda *)pda; - cpu_pda(cpu)->in_bootmem = 1; - pda += size; - } -} - -#endif /* CONFIG_SMP && CONFIG_X86_64 */ - -#ifdef CONFIG_X86_64 - -/* correctly size the local cpu masks */ -static void setup_cpu_local_masks(void) -{ - alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callin_mask); - alloc_bootmem_cpumask_var(&cpu_callout_mask); - alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); -} - -#else /* CONFIG_X86_32 */ - -static inline void setup_cpu_local_masks(void) -{ -} - -#endif /* CONFIG_X86_32 */ +#endif /* * Great future plan: @@ -171,9 +146,6 @@ void __init setup_per_cpu_areas(void) int cpu; unsigned long align = 1; - /* Setup cpu_pda map */ - setup_cpu_pda_map(); - /* Copy section for each CPU (we discard the original) */ old_size = PERCPU_ENOUGH_ROOM; align = max_t(unsigned long, PAGE_SIZE, align); @@ -204,8 +176,21 @@ void __init setup_per_cpu_areas(void) cpu, node, __pa(ptr)); } #endif - per_cpu_offset(cpu) = ptr - __per_cpu_start; + memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); +#ifdef CONFIG_X86_64 + cpu_pda(cpu) = (void *)ptr; + + /* + * CPU0 modified pda in the init data area, reload pda + * offset for CPU0 and clear the area for others. + */ + if (cpu == 0) + load_pda_offset(0); + else + memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); +#endif + per_cpu_offset(cpu) = ptr - __per_cpu_start; DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 70d846628bbf..f2f77ca494d4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -744,52 +744,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work) complete(&c_idle->done); } -#ifdef CONFIG_X86_64 - -/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ -static void __ref free_bootmem_pda(struct x8664_pda *oldpda) -{ - if (!after_bootmem) - free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); -} - -/* - * Allocate node local memory for the AP pda. - * - * Must be called after the _cpu_pda pointer table is initialized. - */ -int __cpuinit get_local_pda(int cpu) -{ - struct x8664_pda *oldpda, *newpda; - unsigned long size = sizeof(struct x8664_pda); - int node = cpu_to_node(cpu); - - if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) - return 0; - - oldpda = cpu_pda(cpu); - newpda = kmalloc_node(size, GFP_ATOMIC, node); - if (!newpda) { - printk(KERN_ERR "Could not allocate node local PDA " - "for CPU %d on node %d\n", cpu, node); - - if (oldpda) - return 0; /* have a usable pda */ - else - return -1; - } - - if (oldpda) { - memcpy(newpda, oldpda, size); - free_bootmem_pda(oldpda); - } - - newpda->in_bootmem = 0; - cpu_pda(cpu) = newpda; - return 0; -} -#endif /* CONFIG_X86_64 */ - static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad @@ -807,16 +761,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) }; INIT_WORK(&c_idle.work, do_fork_idle); -#ifdef CONFIG_X86_64 - /* Allocate node local memory for AP pdas */ - if (cpu > 0) { - boot_error = get_local_pda(cpu); - if (boot_error) - goto restore_state; - /* if can't get pda memory, can't start cpu */ - } -#endif - alternatives_smp_switch(1); c_idle.idle = get_idle_for_cpu(cpu); @@ -931,9 +875,7 @@ do_rest: inquire_remote_apic(apicid); } } -#ifdef CONFIG_X86_64 -restore_state: -#endif + if (boot_error) { /* Try to put things back the way they were before ... */ numa_remove_cpu(cpu); /* was set by numa_add_cpu */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index f50280db0dfe..962f21f1d4d7 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -5,6 +5,7 @@ #define LOAD_OFFSET __START_KERNEL_map #include +#include #include #undef i386 /* in case the preprocessor is a 32bit one */ @@ -215,10 +216,11 @@ SECTIONS /* * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the * output PHDR, so the next output section - __data_nosave - should - * switch it back to data.init. + * switch it back to data.init. Also, pda should be at the head of + * percpu area. Preallocate it. */ . = ALIGN(PAGE_SIZE); - PERCPU_VADDR(0, :percpu) + PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) #else PERCPU(PAGE_SIZE) #endif diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index c44e2069c7c7..83fa4236477d 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -283,16 +283,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) struct task_struct *idle = idle_task(cpu); int rc; -#ifdef CONFIG_X86_64 - /* Allocate node local memory for AP pdas */ - WARN_ON(cpu == 0); - if (cpu > 0) { - rc = get_local_pda(cpu); - if (rc) - return rc; - } -#endif - #ifdef CONFIG_X86_32 init_gdt(cpu); per_cpu(current_task, cpu) = idle; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index fc2f55f2dcd6..e53319cf29cb 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -441,9 +441,10 @@ . = __per_cpu_load + SIZEOF(.data.percpu); /** - * PERCPU_VADDR - define output section for percpu area + * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc * @vaddr: explicit base address (optional) * @phdr: destination PHDR (optional) + * @prealloc: the size of prealloc area * * Macro which expands to output section for percpu area. If @vaddr * is not blank, it specifies explicit base address and all percpu @@ -455,11 +456,33 @@ * section in the linker script will go there too. @phdr should have * a leading colon. * + * If @prealloc is non-zero, the specified number of bytes will be + * reserved at the start of percpu area. As the prealloc area is + * likely to break alignment, this macro puts areas in increasing + * alignment order. + * * This macro defines three symbols, __per_cpu_load, __per_cpu_start * and __per_cpu_end. The first one is the vaddr of loaded percpu * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the * end offset. */ +#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc) \ + PERCPU_PROLOG(vaddr) \ + . += prealloc; \ + *(.data.percpu) \ + *(.data.percpu.shared_aligned) \ + *(.data.percpu.page_aligned) \ + PERCPU_EPILOG(segment) + +/** + * PERCPU_VADDR - define output section for percpu area + * @vaddr: explicit base address (optional) + * @phdr: destination PHDR (optional) + * + * Macro which expands to output section for percpu area. Mostly + * identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than + * using slighly different layout. + */ #define PERCPU_VADDR(vaddr, phdr) \ PERCPU_PROLOG(vaddr) \ *(.data.percpu.page_aligned) \ From 9939ddaff52787b2a7c1adf1b2afc95421aa0884 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 085/566] x86: merge 64 and 32 SMP percpu handling Now that pda is allocated as part of percpu, percpu doesn't need to be accessed through pda. Unify x86_64 SMP percpu access with x86_32 SMP one. Other than the segment register, operand size and the base of percpu symbols, they behave identical now. This patch replaces now unnecessary pda->data_offset with a dummy field which is necessary to keep stack_canary at its place. This patch also moves per_cpu_offset initialization out of init_gdt() into setup_per_cpu_areas(). Note that this change also necessitates explicit per_cpu_offset initializations in voyager_smp.c. With this change, x86_OP_percpu()'s are as efficient on x86_64 as on x86_32 and also x86_64 can use assembly PER_CPU macros. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 3 +- arch/x86/include/asm/percpu.h | 127 +++++++++------------------- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/entry_64.S | 7 +- arch/x86/kernel/head64.c | 2 - arch/x86/kernel/setup_percpu.c | 15 ++-- arch/x86/kernel/smpcommon.c | 3 +- arch/x86/mach-voyager/voyager_smp.c | 2 + 8 files changed, 55 insertions(+), 105 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 2d5b49c3248e..e91558e37850 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -11,8 +11,7 @@ /* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { struct task_struct *pcurrent; /* 0 Current process */ - unsigned long data_offset; /* 8 Per cpu data offset from linker - address */ + unsigned long dummy; unsigned long kernelstack; /* 16 top of kernel stack for current */ unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0ed77cf33f76..556f84b9ea96 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -1,62 +1,13 @@ #ifndef _ASM_X86_PERCPU_H #define _ASM_X86_PERCPU_H -#ifndef __ASSEMBLY__ #ifdef CONFIG_X86_64 -extern void load_pda_offset(int cpu); +#define __percpu_seg gs +#define __percpu_mov_op movq #else -static inline void load_pda_offset(int cpu) { } +#define __percpu_seg fs +#define __percpu_mov_op movl #endif -#endif - -#ifdef CONFIG_X86_64 -#include - -/* Same as asm-generic/percpu.h, except that we store the per cpu offset - in the PDA. Longer term the PDA and every per cpu variable - should be just put into a single section and referenced directly - from %gs */ - -#ifdef CONFIG_SMP -#include - -#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) -#define __my_cpu_offset read_pda(data_offset) - -#define per_cpu_offset(x) (__per_cpu_offset(x)) - -#endif -#include - -DECLARE_PER_CPU(struct x8664_pda, pda); - -/* - * These are supposed to be implemented as a single instruction which - * operates on the per-cpu data base segment. x86-64 doesn't have - * that yet, so this is a fairly inefficient workaround for the - * meantime. The single instruction is atomic with respect to - * preemption and interrupts, so we need to explicitly disable - * interrupts here to achieve the same effect. However, because it - * can be used from within interrupt-disable/enable, we can't actually - * disable interrupts; disabling preemption is enough. - */ -#define x86_read_percpu(var) \ - ({ \ - typeof(per_cpu_var(var)) __tmp; \ - preempt_disable(); \ - __tmp = __get_cpu_var(var); \ - preempt_enable(); \ - __tmp; \ - }) - -#define x86_write_percpu(var, val) \ - do { \ - preempt_disable(); \ - __get_cpu_var(var) = (val); \ - preempt_enable(); \ - } while(0) - -#else /* CONFIG_X86_64 */ #ifdef __ASSEMBLY__ @@ -73,42 +24,26 @@ DECLARE_PER_CPU(struct x8664_pda, pda); * PER_CPU(cpu_gdt_descr, %ebx) */ #ifdef CONFIG_SMP -#define PER_CPU(var, reg) \ - movl %fs:per_cpu__##this_cpu_off, reg; \ +#define PER_CPU(var, reg) \ + __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \ lea per_cpu__##var(reg), reg -#define PER_CPU_VAR(var) %fs:per_cpu__##var +#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var #else /* ! SMP */ -#define PER_CPU(var, reg) \ - movl $per_cpu__##var, reg +#define PER_CPU(var, reg) \ + __percpu_mov_op $per_cpu__##var, reg #define PER_CPU_VAR(var) per_cpu__##var #endif /* SMP */ #else /* ...!ASSEMBLY */ -/* - * PER_CPU finds an address of a per-cpu variable. - * - * Args: - * var - variable name - * cpu - 32bit register containing the current CPU number - * - * The resulting address is stored in the "cpu" argument. - * - * Example: - * PER_CPU(cpu_gdt_descr, %ebx) - */ +#include + #ifdef CONFIG_SMP - -#define __my_cpu_offset x86_read_percpu(this_cpu_off) - -/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ -#define __percpu_seg "%%fs:" - -#else /* !SMP */ - -#define __percpu_seg "" - -#endif /* SMP */ +#define __percpu_seg_str "%%"__stringify(__percpu_seg)":" +#define __my_cpu_offset x86_read_percpu(this_cpu_off) +#else +#define __percpu_seg_str +#endif #include @@ -128,20 +63,25 @@ do { \ } \ switch (sizeof(var)) { \ case 1: \ - asm(op "b %1,"__percpu_seg"%0" \ + asm(op "b %1,"__percpu_seg_str"%0" \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 2: \ - asm(op "w %1,"__percpu_seg"%0" \ + asm(op "w %1,"__percpu_seg_str"%0" \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 4: \ - asm(op "l %1,"__percpu_seg"%0" \ + asm(op "l %1,"__percpu_seg_str"%0" \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ + case 8: \ + asm(op "q %1,"__percpu_seg_str"%0" \ + : "+m" (var) \ + : "r" ((T__)val)); \ + break; \ default: __bad_percpu_size(); \ } \ } while (0) @@ -151,17 +91,22 @@ do { \ typeof(var) ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_seg"%1,%0" \ + asm(op "b "__percpu_seg_str"%1,%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_seg"%1,%0" \ + asm(op "w "__percpu_seg_str"%1,%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_seg"%1,%0" \ + asm(op "l "__percpu_seg_str"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + case 8: \ + asm(op "q "__percpu_seg_str"%1,%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ @@ -175,8 +120,14 @@ do { \ #define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) #define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) + +#ifdef CONFIG_X86_64 +extern void load_pda_offset(int cpu); +#else +static inline void load_pda_offset(int cpu) { } +#endif + #endif /* !__ASSEMBLY__ */ -#endif /* !CONFIG_X86_64 */ #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index f8d1b047ef4f..f4cc81bfbf89 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -55,7 +55,6 @@ int main(void) ENTRY(irqcount); ENTRY(cpunumber); ENTRY(irqstackptr); - ENTRY(data_offset); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e28c7a987793..4833f3a19650 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -52,6 +52,7 @@ #include #include #include +#include /* Avoid __ASSEMBLER__'ifying just for this. */ #include @@ -1072,10 +1073,10 @@ ENTRY(\sym) TRACE_IRQS_OFF movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ - movq %gs:pda_data_offset, %rbp - subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + PER_CPU(init_tss, %rbp) + subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) call \do_sym - addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC END(\sym) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 1a311293f733..e99b661a97f4 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -38,8 +38,6 @@ void __init x86_64_init_pda(void) #else cpu_pda(0) = &_boot_cpu_pda; #endif - cpu_pda(0)->data_offset = - (unsigned long)(__per_cpu_load - __per_cpu_start); pda_init(0); } diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 63d462802272..be1ff34db112 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -125,14 +125,14 @@ static void __init setup_per_cpu_maps(void) #endif } -#ifdef CONFIG_X86_32 -/* - * Great future not-so-futuristic plan: make i386 and x86_64 do it - * the same way - */ +#ifdef CONFIG_X86_64 +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { + [0] = (unsigned long)__per_cpu_load, +}; +#else unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(__per_cpu_offset); #endif +EXPORT_SYMBOL(__per_cpu_offset); /* * Great future plan: @@ -178,6 +178,7 @@ void __init setup_per_cpu_areas(void) #endif memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); + per_cpu_offset(cpu) = ptr - __per_cpu_start; #ifdef CONFIG_X86_64 cpu_pda(cpu) = (void *)ptr; @@ -190,7 +191,7 @@ void __init setup_per_cpu_areas(void) else memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); #endif - per_cpu_offset(cpu) = ptr - __per_cpu_start; + per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 397e309839dd..84395fabc410 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -4,10 +4,10 @@ #include #include -#ifdef CONFIG_X86_32 DEFINE_PER_CPU(unsigned long, this_cpu_off); EXPORT_PER_CPU_SYMBOL(this_cpu_off); +#ifdef CONFIG_X86_32 /* * Initialize the CPU's GDT. This is either the boot CPU doing itself * (still using the master per-cpu area), or a CPU doing it for a @@ -24,7 +24,6 @@ __cpuinit void init_gdt(int cpu) write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); - per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; per_cpu(cpu_number, cpu) = cpu; } #endif diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 9840b7ec749a..1a48368acb09 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -531,6 +531,7 @@ static void __init do_boot_cpu(__u8 cpu) stack_start.sp = (void *)idle->thread.sp; init_gdt(cpu); + per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; per_cpu(current_task, cpu) = idle; early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); irq_ctx_init(cpu); @@ -1748,6 +1749,7 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) static void __cpuinit voyager_smp_prepare_boot_cpu(void) { init_gdt(smp_processor_id()); + per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; switch_to_new_gdt(); cpu_set(smp_processor_id(), cpu_online_map); From b12d8db8fbfaed1e8222a15333a3645599636854 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 086/566] x86: make pda a percpu variable [ Based on original patch from Christoph Lameter and Mike Travis. ] As pda is now allocated in percpu area, it can easily be made a proper percpu variable. Make it so by defining per cpu symbol from linker script and declaring it in C code for SMP and simply defining it for UP. This change cleans up code and brings SMP and UP closer a bit. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 5 +++-- arch/x86/kernel/cpu/common.c | 3 --- arch/x86/kernel/head64.c | 10 ---------- arch/x86/kernel/head_64.S | 5 +++-- arch/x86/kernel/setup_percpu.c | 16 ++++++++++++++-- arch/x86/kernel/vmlinux_64.lds.S | 4 +++- 6 files changed, 23 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index e91558e37850..66ae1043393d 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -7,6 +7,7 @@ #include #include #include +#include /* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { @@ -39,10 +40,10 @@ struct x8664_pda { unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; -extern struct x8664_pda *_cpu_pda[NR_CPUS]; +DECLARE_PER_CPU(struct x8664_pda, __pda); extern void pda_init(int); -#define cpu_pda(i) (_cpu_pda[i]) +#define cpu_pda(cpu) (&per_cpu(__pda, cpu)) /* * There is no fast way to get the base address of the PDA, all the accesses diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7041acdf5579..c49498d40830 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -879,9 +879,6 @@ static __init int setup_disablecpuid(char *arg) __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 -struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(_cpu_pda); - struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index e99b661a97f4..71b6f6ec96a2 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,18 +26,8 @@ #include #include -#ifndef CONFIG_SMP -/* boot cpu pda, referenced by head_64.S to initialize %gs on UP */ -struct x8664_pda _boot_cpu_pda; -#endif - void __init x86_64_init_pda(void) { -#ifdef CONFIG_SMP - cpu_pda(0) = (void *)__per_cpu_load; -#else - cpu_pda(0) = &_boot_cpu_pda; -#endif pda_init(0); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7a995d0e9f78..c8ace880661b 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef CONFIG_PARAVIRT #include @@ -250,7 +251,7 @@ ENTRY(secondary_startup_64) * secondary CPU,initial_gs should be set to its pda address * before the CPU runs this code. * - * On UP, initial_gs points to _boot_cpu_pda and doesn't + * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't * change. */ movl $MSR_GS_BASE,%ecx @@ -284,7 +285,7 @@ ENTRY(secondary_startup_64) #ifdef CONFIG_SMP .quad __per_cpu_load #else - .quad _boot_cpu_pda + .quad PER_CPU_VAR(__pda) #endif __FINITDATA diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index be1ff34db112..daeedf82c15f 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -66,6 +66,16 @@ static void __init setup_node_to_cpumask_map(void); static inline void setup_node_to_cpumask_map(void) { } #endif +/* + * Define load_pda_offset() and per-cpu __pda for x86_64. + * load_pda_offset() is responsible for loading the offset of pda into + * %gs. + * + * On SMP, pda offset also duals as percpu base address and thus it + * should be at the start of per-cpu area. To achieve this, it's + * preallocated in vmlinux_64.lds.S directly instead of using + * DEFINE_PER_CPU(). + */ #ifdef CONFIG_X86_64 void __cpuinit load_pda_offset(int cpu) { @@ -74,6 +84,10 @@ void __cpuinit load_pda_offset(int cpu) wrmsrl(MSR_GS_BASE, cpu_pda(cpu)); mb(); } +#ifndef CONFIG_SMP +DEFINE_PER_CPU(struct x8664_pda, __pda); +EXPORT_PER_CPU_SYMBOL(__pda); +#endif #endif /* CONFIG_SMP && CONFIG_X86_64 */ @@ -180,8 +194,6 @@ void __init setup_per_cpu_areas(void) memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); per_cpu_offset(cpu) = ptr - __per_cpu_start; #ifdef CONFIG_X86_64 - cpu_pda(cpu) = (void *)ptr; - /* * CPU0 modified pda in the init data area, reload pda * offset for CPU0 and clear the area for others. diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 962f21f1d4d7..d2a0baa87d1b 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -217,10 +217,12 @@ SECTIONS * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the * output PHDR, so the next output section - __data_nosave - should * switch it back to data.init. Also, pda should be at the head of - * percpu area. Preallocate it. + * percpu area. Preallocate it and define the percpu offset symbol + * so that it can be accessed as a percpu variable. */ . = ALIGN(PAGE_SIZE); PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) + per_cpu____pda = __per_cpu_start; #else PERCPU(PAGE_SIZE) #endif From 49357d19e4fb31e28796eaff83499e7584c26878 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 087/566] x86: convert pda ops to wrappers around x86 percpu accessors pda is now a percpu variable and there's no reason it can't use plain x86 percpu accessors. Add x86_test_and_clear_bit_percpu() and replace pda op implementations with wrappers around x86 percpu accessors. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 88 +++----------------------------- arch/x86/include/asm/percpu.h | 10 ++++ arch/x86/kernel/vmlinux_64.lds.S | 1 - arch/x86/kernel/x8664_ksyms_64.c | 2 - 4 files changed, 16 insertions(+), 85 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 66ae1043393d..e3d3a081d798 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -45,91 +45,15 @@ extern void pda_init(int); #define cpu_pda(cpu) (&per_cpu(__pda, cpu)) -/* - * There is no fast way to get the base address of the PDA, all the accesses - * have to mention %fs/%gs. So it needs to be done this Torvaldian way. - */ -extern void __bad_pda_field(void) __attribute__((noreturn)); - -/* - * proxy_pda doesn't actually exist, but tell gcc it is accessed for - * all PDA accesses so it gets read/write dependencies right. - */ -extern struct x8664_pda _proxy_pda; - -#define pda_offset(field) offsetof(struct x8664_pda, field) - -#define pda_to_op(op, field, val) \ -do { \ - typedef typeof(_proxy_pda.field) T__; \ - if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \ - switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %1,%%gs:%c2" : \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - case 4: \ - asm(op "l %1,%%gs:%c2" : \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i" (pda_offset(field))); \ - break; \ - case 8: \ - asm(op "q %1,%%gs:%c2": \ - "+m" (_proxy_pda.field) : \ - "r" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - default: \ - __bad_pda_field(); \ - } \ -} while (0) - -#define pda_from_op(op, field) \ -({ \ - typeof(_proxy_pda.field) ret__; \ - switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %%gs:%c1,%0" : \ - "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 4: \ - asm(op "l %%gs:%c1,%0": \ - "=r" (ret__): \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 8: \ - asm(op "q %%gs:%c1,%0": \ - "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - default: \ - __bad_pda_field(); \ - } \ - ret__; \ -}) - -#define read_pda(field) pda_from_op("mov", field) -#define write_pda(field, val) pda_to_op("mov", field, val) -#define add_pda(field, val) pda_to_op("add", field, val) -#define sub_pda(field, val) pda_to_op("sub", field, val) -#define or_pda(field, val) pda_to_op("or", field, val) +#define read_pda(field) x86_read_percpu(__pda.field) +#define write_pda(field, val) x86_write_percpu(__pda.field, val) +#define add_pda(field, val) x86_add_percpu(__pda.field, val) +#define sub_pda(field, val) x86_sub_percpu(__pda.field, val) +#define or_pda(field, val) x86_or_percpu(__pda.field, val) /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define test_and_clear_bit_pda(bit, field) \ -({ \ - int old__; \ - asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \ - : "=r" (old__), "+m" (_proxy_pda.field) \ - : "dIr" (bit), "i" (pda_offset(field)) : "memory");\ - old__; \ -}) + x86_test_and_clear_bit_percpu(bit, __pda.field) #endif diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 556f84b9ea96..328b31a429d7 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -121,6 +121,16 @@ do { \ #define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) +/* This is not atomic against other CPUs -- CPU preemption needs to be off */ +#define x86_test_and_clear_bit_percpu(bit, var) \ +({ \ + int old__; \ + asm volatile("btr %1,"__percpu_seg_str"%c2\n\tsbbl %0,%0" \ + : "=r" (old__) \ + : "dIr" (bit), "i" (&per_cpu__##var) : "memory"); \ + old__; \ +}) + #ifdef CONFIG_X86_64 extern void load_pda_offset(int cpu); #else diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index d2a0baa87d1b..a09abb8fb97f 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -14,7 +14,6 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) jiffies_64 = jiffies; -_proxy_pda = 1; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(7); /* RWE */ diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 695e426aa354..3909e3ba5ce3 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(init_level4_pgt); EXPORT_SYMBOL(load_gs_index); - -EXPORT_SYMBOL(_proxy_pda); From 004aa322f855a765741d9437a98dd8fe2e4f32a6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 088/566] x86: misc clean up after the percpu update Do the following cleanups: * kill x86_64_init_pda() which now is equivalent to pda_init() * use per_cpu_offset() instead of cpu_pda() when initializing initial_gs Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/setup.h | 1 - arch/x86/kernel/acpi/sleep.c | 2 +- arch/x86/kernel/head64.c | 7 +------ arch/x86/kernel/smpboot.c | 2 +- arch/x86/xen/enlighten.c | 2 +- 5 files changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ebe858cdc8a3..536949749bc2 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -100,7 +100,6 @@ extern unsigned long init_pg_tables_start; extern unsigned long init_pg_tables_end; #else -void __init x86_64_init_pda(void); void __init x86_64_start_kernel(char *real_mode); void __init x86_64_start_reservations(char *real_mode_data); diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 9ff67f8dc2c0..4abff454c55b 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -101,7 +101,7 @@ int acpi_save_state_mem(void) stack_start.sp = temp_stack + sizeof(temp_stack); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); - initial_gs = (unsigned long)cpu_pda(smp_processor_id()); + initial_gs = per_cpu_offset(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 71b6f6ec96a2..af67d3227ea6 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,11 +26,6 @@ #include #include -void __init x86_64_init_pda(void) -{ - pda_init(0); -} - static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); @@ -96,7 +91,7 @@ void __init x86_64_start_kernel(char * real_mode_data) if (console_loglevel == 10) early_printk("Kernel alive\n"); - x86_64_init_pda(); + pda_init(0); x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f2f77ca494d4..2f0e0f1090f6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -798,7 +798,7 @@ do_rest: #else cpu_pda(cpu)->pcurrent = c_idle.idle; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); - initial_gs = (unsigned long)cpu_pda(cpu); + initial_gs = per_cpu_offset(cpu); #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 965539ec425f..312414ef9365 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1645,7 +1645,7 @@ asmlinkage void __init xen_start_kernel(void) #ifdef CONFIG_X86_64 /* Disable until direct per-cpu data access. */ have_vcpu_info_placement = 0; - x86_64_init_pda(); + pda_init(0); #endif xen_smp_init(); From 6dbde3530850d4d8bfc1b6bd4006d92786a2787f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Jan 2009 22:15:53 +0900 Subject: [PATCH 089/566] percpu: add optimized generic percpu accessors It is an optimization and a cleanup, and adds the following new generic percpu methods: percpu_read() percpu_write() percpu_add() percpu_sub() percpu_and() percpu_or() percpu_xor() and implements support for them on x86. (other architectures will fall back to a default implementation) The advantage is that for example to read a local percpu variable, instead of this sequence: return __get_cpu_var(var); ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx ffffffff8102ca32: 81 ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax We can get a single instruction by using the optimized variants: return percpu_read(var); ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax I also cleaned up the x86-specific APIs and made the x86 code use these new generic percpu primitives. tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out * added percpu_and() for completeness's sake * made generic percpu ops atomic against preemption Signed-off-by: Ingo Molnar Signed-off-by: Tejun Heo --- arch/x86/include/asm/current.h | 2 +- arch/x86/include/asm/irq_regs_32.h | 4 +-- arch/x86/include/asm/mmu_context_32.h | 12 +++---- arch/x86/include/asm/pda.h | 10 +++--- arch/x86/include/asm/percpu.h | 24 +++++++------ arch/x86/include/asm/smp.h | 2 +- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/tlb_32.c | 10 +++--- arch/x86/mach-voyager/voyager_smp.c | 4 +-- arch/x86/xen/enlighten.c | 14 ++++---- arch/x86/xen/irq.c | 8 ++--- arch/x86/xen/mmu.c | 2 +- arch/x86/xen/multicalls.h | 2 +- arch/x86/xen/smp.c | 2 +- include/asm-generic/percpu.h | 52 +++++++++++++++++++++++++++ 15 files changed, 102 insertions(+), 48 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 0930b4f8d672..0728480f5c56 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -10,7 +10,7 @@ struct task_struct; DECLARE_PER_CPU(struct task_struct *, current_task); static __always_inline struct task_struct *get_current(void) { - return x86_read_percpu(current_task); + return percpu_read(current_task); } #else /* X86_32 */ diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h index 86afd7473457..d7ed33ee94e9 100644 --- a/arch/x86/include/asm/irq_regs_32.h +++ b/arch/x86/include/asm/irq_regs_32.h @@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs); static inline struct pt_regs *get_irq_regs(void) { - return x86_read_percpu(irq_regs); + return percpu_read(irq_regs); } static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) @@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) struct pt_regs *old_regs; old_regs = get_irq_regs(); - x86_write_percpu(irq_regs, new_regs); + percpu_write(irq_regs, new_regs); return old_regs; } diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h index 7e98ce1d2c0e..08b53454f831 100644 --- a/arch/x86/include/asm/mmu_context_32.h +++ b/arch/x86/include/asm/mmu_context_32.h @@ -4,8 +4,8 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); #endif } @@ -19,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev, /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); #ifdef CONFIG_SMP - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); - x86_write_percpu(cpu_tlbstate.active_mm, next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); #endif cpu_set(cpu, next->cpu_vm_mask); @@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev, } #ifdef CONFIG_SMP else { - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); - BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index e3d3a081d798..47f274fe6953 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -45,11 +45,11 @@ extern void pda_init(int); #define cpu_pda(cpu) (&per_cpu(__pda, cpu)) -#define read_pda(field) x86_read_percpu(__pda.field) -#define write_pda(field, val) x86_write_percpu(__pda.field, val) -#define add_pda(field, val) x86_add_percpu(__pda.field, val) -#define sub_pda(field, val) x86_sub_percpu(__pda.field, val) -#define or_pda(field, val) x86_or_percpu(__pda.field, val) +#define read_pda(field) percpu_read(__pda.field) +#define write_pda(field, val) percpu_write(__pda.field, val) +#define add_pda(field, val) percpu_add(__pda.field, val) +#define sub_pda(field, val) percpu_sub(__pda.field, val) +#define or_pda(field, val) percpu_or(__pda.field, val) /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define test_and_clear_bit_pda(bit, field) \ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 328b31a429d7..03aa4b00a1c3 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -40,16 +40,11 @@ #ifdef CONFIG_SMP #define __percpu_seg_str "%%"__stringify(__percpu_seg)":" -#define __my_cpu_offset x86_read_percpu(this_cpu_off) +#define __my_cpu_offset percpu_read(this_cpu_off) #else #define __percpu_seg_str #endif -#include - -/* We can use this directly for local CPU (faster). */ -DECLARE_PER_CPU(unsigned long, this_cpu_off); - /* For arch-specific code, we can use direct single-insn ops (they * don't give an lvalue though). */ extern void __bad_percpu_size(void); @@ -115,11 +110,13 @@ do { \ ret__; \ }) -#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) -#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) -#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) -#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) -#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_read(var) percpu_from_op("mov", per_cpu__##var) +#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) +#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) +#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) +#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) +#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define x86_test_and_clear_bit_percpu(bit, var) \ @@ -131,6 +128,11 @@ do { \ old__; \ }) +#include + +/* We can use this directly for local CPU (faster). */ +DECLARE_PER_CPU(unsigned long, this_cpu_off); + #ifdef CONFIG_X86_64 extern void load_pda_offset(int cpu); #else diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 127415402ea1..c7bbbbe65d3f 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -160,7 +160,7 @@ extern unsigned disabled_cpus __cpuinitdata; * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) +#define raw_smp_processor_id() (percpu_read(cpu_number)) extern int safe_smp_processor_id(void); #elif defined(CONFIG_X86_64_SMP) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a546f55c77b4..77d546817d94 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -591,7 +591,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) loadsegment(gs, next->gs); - x86_write_percpu(current_task, next_p); + percpu_write(current_task, next_p); return prev_p; } diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index ec53818f4e38..e65449d0f7d9 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -34,8 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock); */ void leave_mm(int cpu) { - BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK); - cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask); + BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK); + cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } EXPORT_SYMBOL_GPL(leave_mm); @@ -103,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs) * BUG(); */ - if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) { - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) { + if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) { + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (flush_va == TLB_FLUSH_ALL) local_flush_tlb(); else @@ -222,7 +222,7 @@ static void do_flush_tlb_all(void *info) unsigned long cpu = smp_processor_id(); __flush_tlb_all(); - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(cpu); } diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 1a48368acb09..96f15b09a4c5 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -402,7 +402,7 @@ void __init find_smp_config(void) VOYAGER_SUS_IN_CONTROL_PORT); current_thread_info()->cpu = boot_cpu_id; - x86_write_percpu(cpu_number, boot_cpu_id); + percpu_write(cpu_number, boot_cpu_id); } /* @@ -1782,7 +1782,7 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus) void __init smp_setup_processor_id(void) { current_thread_info()->cpu = hard_smp_processor_id(); - x86_write_percpu(cpu_number, hard_smp_processor_id()); + percpu_write(cpu_number, hard_smp_processor_id()); } static void voyager_send_call_func(cpumask_t callmask) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 312414ef9365..75b94139e1f2 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -695,17 +695,17 @@ static void xen_write_cr0(unsigned long cr0) static void xen_write_cr2(unsigned long cr2) { - x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; + percpu_read(xen_vcpu)->arch.cr2 = cr2; } static unsigned long xen_read_cr2(void) { - return x86_read_percpu(xen_vcpu)->arch.cr2; + return percpu_read(xen_vcpu)->arch.cr2; } static unsigned long xen_read_cr2_direct(void) { - return x86_read_percpu(xen_vcpu_info.arch.cr2); + return percpu_read(xen_vcpu_info.arch.cr2); } static void xen_write_cr4(unsigned long cr4) @@ -718,12 +718,12 @@ static void xen_write_cr4(unsigned long cr4) static unsigned long xen_read_cr3(void) { - return x86_read_percpu(xen_cr3); + return percpu_read(xen_cr3); } static void set_current_cr3(void *v) { - x86_write_percpu(xen_current_cr3, (unsigned long)v); + percpu_write(xen_current_cr3, (unsigned long)v); } static void __xen_write_cr3(bool kernel, unsigned long cr3) @@ -748,7 +748,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); if (kernel) { - x86_write_percpu(xen_cr3, cr3); + percpu_write(xen_cr3, cr3); /* Update xen_current_cr3 once the batch has actually been submitted. */ @@ -764,7 +764,7 @@ static void xen_write_cr3(unsigned long cr3) /* Update while interrupts are disabled, so its atomic with respect to ipis */ - x86_write_percpu(xen_cr3, cr3); + percpu_write(xen_cr3, cr3); __xen_write_cr3(true, cr3); diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index bb042608c602..2e8271431e1a 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -39,7 +39,7 @@ static unsigned long xen_save_fl(void) struct vcpu_info *vcpu; unsigned long flags; - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); /* flag has opposite sense of mask */ flags = !vcpu->evtchn_upcall_mask; @@ -62,7 +62,7 @@ static void xen_restore_fl(unsigned long flags) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = flags; preempt_enable_no_resched(); @@ -83,7 +83,7 @@ static void xen_irq_disable(void) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; + percpu_read(xen_vcpu)->evtchn_upcall_mask = 1; preempt_enable_no_resched(); } @@ -96,7 +96,7 @@ static void xen_irq_enable(void) the caller is confused and is trying to re-enable interrupts on an indeterminate processor. */ - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = 0; /* Doesn't matter if we get preempted here, because any diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 503c240e26c7..7bc7852cc5c4 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1074,7 +1074,7 @@ static void drop_other_mm_ref(void *info) /* If this cpu still has a stale cr3 reference, then make sure it has been flushed. */ - if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) { + if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { load_cr3(swapper_pg_dir); arch_flush_lazy_cpu_mode(); } diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 858938241616..e786fa7f2615 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -39,7 +39,7 @@ static inline void xen_mc_issue(unsigned mode) xen_mc_flush(); /* restore flags saved in xen_mc_batch */ - local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); + local_irq_restore(percpu_read(xen_mc_irq_flags)); } /* Set up a callback to be called when the current batch is flushed */ diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 83fa4236477d..3bfd6dd0b47c 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -78,7 +78,7 @@ static __cpuinit void cpu_bringup(void) xen_setup_cpu_clockevents(); cpu_set(cpu, cpu_online_map); - x86_write_percpu(cpu_state, CPU_ONLINE); + percpu_write(cpu_state, CPU_ONLINE); wmb(); /* We can take interrupts now: we're officially "up". */ diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b0e63c672ebd..00f45ff081a6 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -80,4 +80,56 @@ extern void setup_per_cpu_areas(void); #define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \ __typeof__(type) per_cpu_var(name) +/* + * Optional methods for optimized non-lvalue per-cpu variable access. + * + * @var can be a percpu variable or a field of it and its size should + * equal char, int or long. percpu_read() evaluates to a lvalue and + * all others to void. + * + * These operations are guaranteed to be atomic w.r.t. preemption. + * The generic versions use plain get/put_cpu_var(). Archs are + * encouraged to implement single-instruction alternatives which don't + * require preemption protection. + */ +#ifndef percpu_read +# define percpu_read(var) \ + ({ \ + typeof(per_cpu_var(var)) __tmp_var__; \ + __tmp_var__ = get_cpu_var(var); \ + put_cpu_var(var); \ + __tmp_var__; \ + }) +#endif + +#define __percpu_generic_to_op(var, val, op) \ +do { \ + get_cpu_var(var) op val; \ + put_cpu_var(var); \ +} while (0) + +#ifndef percpu_write +# define percpu_write(var, val) __percpu_generic_to_op(var, (val), =) +#endif + +#ifndef percpu_add +# define percpu_add(var, val) __percpu_generic_to_op(var, (val), +=) +#endif + +#ifndef percpu_sub +# define percpu_sub(var, val) __percpu_generic_to_op(var, (val), -=) +#endif + +#ifndef percpu_and +# define percpu_and(var, val) __percpu_generic_to_op(var, (val), &=) +#endif + +#ifndef percpu_or +# define percpu_or(var, val) __percpu_generic_to_op(var, (val), |=) +#endif + +#ifndef percpu_xor +# define percpu_xor(var, val) __percpu_generic_to_op(var, (val), ^=) +#endif + #endif /* _ASM_GENERIC_PERCPU_H_ */ From a338af2c648f5e07c582154745a6c60cd2d8bf12 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 Jan 2009 11:19:03 +0900 Subject: [PATCH 090/566] x86: fix build bug introduced during merge EXPORT_PER_CPU_SYMBOL() got misplaced during merge leading to build failure. Fix it. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index daeedf82c15f..b5c35af2011d 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -86,9 +86,8 @@ void __cpuinit load_pda_offset(int cpu) } #ifndef CONFIG_SMP DEFINE_PER_CPU(struct x8664_pda, __pda); -EXPORT_PER_CPU_SYMBOL(__pda); #endif - +EXPORT_PER_CPU_SYMBOL(__pda); #endif /* CONFIG_SMP && CONFIG_X86_64 */ #ifdef CONFIG_X86_64 From cd3adf52309867955d6e2175246b526442235805 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 Jan 2009 12:11:43 +0900 Subject: [PATCH 091/566] x86_64: initialize this_cpu_off to __per_cpu_load On x86_64, if get_per_cpu_var() is used before per cpu area is setup (if lockdep is turned on, it happens), it needs this_cpu_off to point to __per_cpu_load. Initialize accordingly. Signed-off-by: Tejun Heo --- arch/x86/kernel/smpcommon.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 84395fabc410..7e157810062f 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -3,8 +3,13 @@ */ #include #include +#include +#ifdef CONFIG_X86_64 +DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_load; +#else DEFINE_PER_CPU(unsigned long, this_cpu_off); +#endif EXPORT_PER_CPU_SYMBOL(this_cpu_off); #ifdef CONFIG_X86_32 From 68564a46976017496c2227660930d81240f82355 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 16 Jan 2009 15:31:15 -0800 Subject: [PATCH 092/566] work_on_cpu: don't try to get_online_cpus() in work_on_cpu. Impact: remove potential circular lock dependency with cpu hotplug lock This has caused more problems than it solved, with a pile of cpu hotplug locking issues. Followup patches will get_online_cpus() in callers that need it, but if they don't do it they're no worse than before when they were using set_cpus_allowed without locking. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- kernel/workqueue.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2f445833ae37..a35afdbc0161 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -991,8 +991,8 @@ static void do_work_for_cpu(struct work_struct *w) * @fn: the function to run * @arg: the function arg * - * This will return -EINVAL in the cpu is not online, or the return value - * of @fn otherwise. + * This will return the value @fn returns. + * It is up to the caller to ensure that the cpu doesn't go offline. */ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) { @@ -1001,14 +1001,8 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) INIT_WORK(&wfc.work, do_work_for_cpu); wfc.fn = fn; wfc.arg = arg; - get_online_cpus(); - if (unlikely(!cpu_online(cpu))) - wfc.ret = -EINVAL; - else { - schedule_work_on(cpu, &wfc.work); - flush_work(&wfc.work); - } - put_online_cpus(); + schedule_work_on(cpu, &wfc.work); + flush_work(&wfc.work); return wfc.ret; } From e1d9ec6246a2668a5d037f529877efb7cf176af8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 16 Jan 2009 15:31:15 -0800 Subject: [PATCH 093/566] work_on_cpu: Use our own workqueue. Impact: remove potential clashes with generic kevent workqueue Annoyingly, some places we want to use work_on_cpu are already in workqueues. As per Ingo's suggestion, we create a different workqueue for work_on_cpu. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- kernel/workqueue.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a35afdbc0161..1f0c509b40d3 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -971,6 +971,8 @@ undo: } #ifdef CONFIG_SMP +static struct workqueue_struct *work_on_cpu_wq __read_mostly; + struct work_for_cpu { struct work_struct work; long (*fn)(void *); @@ -1001,7 +1003,7 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) INIT_WORK(&wfc.work, do_work_for_cpu); wfc.fn = fn; wfc.arg = arg; - schedule_work_on(cpu, &wfc.work); + queue_work_on(cpu, work_on_cpu_wq, &wfc.work); flush_work(&wfc.work); return wfc.ret; @@ -1019,4 +1021,8 @@ void __init init_workqueues(void) hotcpu_notifier(workqueue_cpu_callback, 0); keventd_wq = create_workqueue("events"); BUG_ON(!keventd_wq); +#ifdef CONFIG_SMP + work_on_cpu_wq = create_workqueue("work_on_cpu"); + BUG_ON(!work_on_cpu_wq); +#endif } From 6eb714c63ed5bd663627f7dda8c4d5258f3b64ef Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 16 Jan 2009 15:31:15 -0800 Subject: [PATCH 094/566] cpufreq: use work_on_cpu in acpi-cpufreq.c for drv_read and drv_write Impact: use new work_on_cpu function to reduce stack usage Replace the saving of current->cpus_allowed and set_cpus_allowed_ptr() with a work_on_cpu function for drv_read() and drv_write(). Basically converts do_drv_{read,write} into "work_on_cpu" functions that are now called by drv_read and drv_write. Note: This patch basically reverts 50c668d6 which reverted 7503bfba, now that the work_on_cpu() function is more stable. Signed-off-by: Mike Travis Acked-by: Rusty Russell Tested-by: Dieter Ries Tested-by: Maciej Rutecki Cc: Dave Jones Cc: --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 019276717a7f..4b1c319d30c3 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -150,8 +150,9 @@ struct drv_cmd { u32 val; }; -static void do_drv_read(struct drv_cmd *cmd) +static long do_drv_read(void *_cmd) { + struct drv_cmd *cmd = _cmd; u32 h; switch (cmd->type) { @@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd) default: break; } + return 0; } -static void do_drv_write(struct drv_cmd *cmd) +static long do_drv_write(void *_cmd) { + struct drv_cmd *cmd = _cmd; u32 lo, hi; switch (cmd->type) { @@ -186,30 +189,23 @@ static void do_drv_write(struct drv_cmd *cmd) default: break; } + return 0; } static void drv_read(struct drv_cmd *cmd) { - cpumask_t saved_mask = current->cpus_allowed; cmd->val = 0; - set_cpus_allowed_ptr(current, cmd->mask); - do_drv_read(cmd); - set_cpus_allowed_ptr(current, &saved_mask); + work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); } static void drv_write(struct drv_cmd *cmd) { - cpumask_t saved_mask = current->cpus_allowed; unsigned int i; for_each_cpu(i, cmd->mask) { - set_cpus_allowed_ptr(current, cpumask_of(i)); - do_drv_write(cmd); + work_on_cpu(i, do_drv_write, cmd); } - - set_cpus_allowed_ptr(current, &saved_mask); - return; } static u32 get_cur_val(const struct cpumask *mask) @@ -367,7 +363,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) return freq; } -static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, +static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, struct acpi_cpufreq_data *data) { unsigned int cur_freq; From cef30b3a84e1c7cbd49987d83c5863c001445842 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 16 Jan 2009 15:58:13 -0800 Subject: [PATCH 095/566] x86: put trigger in to detect mismatched apic versions. Fire off one message if two apic's discovered with different apic versions. Signed-off-by: Mike Travis --- arch/x86/kernel/apic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 0f830e4f5675..db0998641c58 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1833,6 +1833,11 @@ void __cpuinit generic_processor_info(int apicid, int version) num_processors++; cpu = cpumask_next_zero(-1, cpu_present_mask); + if (version != apic_version[boot_cpu_physical_apicid]) + WARN_ONCE(1, + "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n", + apic_version[boot_cpu_physical_apicid], cpu, version); + physid_set(apicid, phys_cpu_present_map); if (apicid == boot_cpu_physical_apicid) { /* From 145cd30bac885dffad9db9d487baad07b68a3d04 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 17 Jan 2009 14:42:50 +0900 Subject: [PATCH 096/566] linker script: add missing VMLINUX_SYMBOL The newly added PERCPU_*() macros define and use __per_cpu_load but VMLINUX_SYMBOL() was missing from usages causing build failures on archs where linker visible symbol is different from C symbols (e.g. blackfin). Signed-off-by: Tejun Heo --- include/asm-generic/vmlinux.lds.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index e53319cf29cb..aa6b9b1b30b5 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -432,13 +432,14 @@ #define PERCPU_PROLOG(vaddr) \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ - .data.percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \ + .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ + - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_start) = .; #define PERCPU_EPILOG(phdr) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ } phdr \ - . = __per_cpu_load + SIZEOF(.data.percpu); + . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); /** * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc From 74e7904559a10cbb9fbf9139c5c42fc87c0f62a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 17 Jan 2009 15:26:32 +0900 Subject: [PATCH 097/566] linker script: add missing .data.percpu.page_aligned arm, arm/mach-integrator and powerpc were missing .data.percpu.page_aligned in their percpu output section definitions. Add it. Signed-off-by: Tejun Heo --- arch/arm/kernel/vmlinux.lds.S | 1 + arch/ia64/kernel/vmlinux.lds.S | 1 + arch/powerpc/kernel/vmlinux.lds.S | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 00216071eaf7..85598f7da407 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -65,6 +65,7 @@ SECTIONS #endif . = ALIGN(4096); __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 10a7d47e8510..f45e4e508eca 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -219,6 +219,7 @@ SECTIONS .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) { __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 47bf15cd2c9e..04e8ecea9b40 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -182,6 +182,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; From 1b437c8c73a36daa471dd54a63c426d72af5723d Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:57 +0900 Subject: [PATCH 098/566] x86-64: Move irq stats from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/hardirq_64.h | 24 +++++++++++++++++++----- arch/x86/include/asm/pda.h | 10 ---------- arch/x86/kernel/irq.c | 6 +----- arch/x86/kernel/irq_64.c | 3 +++ arch/x86/kernel/nmi.c | 10 +--------- arch/x86/xen/smp.c | 18 +++--------------- 6 files changed, 27 insertions(+), 44 deletions(-) diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h index b5a6b5d56704..a65bab20f6ce 100644 --- a/arch/x86/include/asm/hardirq_64.h +++ b/arch/x86/include/asm/hardirq_64.h @@ -3,22 +3,36 @@ #include #include -#include #include +typedef struct { + unsigned int __softirq_pending; + unsigned int __nmi_count; /* arch dependent */ + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int irq0_irqs; + unsigned int irq_resched_count; + unsigned int irq_call_count; + unsigned int irq_tlb_count; + unsigned int irq_thermal_count; + unsigned int irq_spurious_count; + unsigned int irq_threshold_count; +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU(irq_cpustat_t, irq_stat); + /* We can have at most NR_VECTORS irqs routed to a cpu at a time */ #define MAX_HARDIRQS_PER_CPU NR_VECTORS #define __ARCH_IRQ_STAT 1 -#define inc_irq_stat(member) add_pda(member, 1) +#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) -#define local_softirq_pending() read_pda(__softirq_pending) +#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) #define __ARCH_SET_SOFTIRQ_PENDING 1 -#define set_softirq_pending(x) write_pda(__softirq_pending, (x)) -#define or_softirq_pending(x) or_pda(__softirq_pending, (x)) +#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) extern void ack_bad_irq(unsigned int irq); diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 47f274fe6953..69a40757e217 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -25,19 +25,9 @@ struct x8664_pda { char *irqstackptr; short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ - unsigned int __softirq_pending; - unsigned int __nmi_count; /* number of NMI on this CPUs */ short mmu_state; short isidle; struct mm_struct *active_mm; - unsigned apic_timer_irqs; - unsigned irq0_irqs; - unsigned irq_resched_count; - unsigned irq_call_count; - unsigned irq_tlb_count; - unsigned irq_thermal_count; - unsigned irq_threshold_count; - unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; DECLARE_PER_CPU(struct x8664_pda, __pda); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3973e2df7f87..8b30d0c2512c 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq) #endif } -#ifdef CONFIG_X86_32 -# define irq_stats(x) (&per_cpu(irq_stat, x)) -#else -# define irq_stats(x) cpu_pda(x) -#endif +#define irq_stats(x) (&per_cpu(irq_stat, x)) /* * /proc/interrupts printing: */ diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 0b21cb1ea11f..1db05247b47f 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -19,6 +19,9 @@ #include #include +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + /* * Probabilistic stack overflow check: * diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 7228979f1e7f..23b6d9e6e4f5 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -61,11 +61,7 @@ static int endflag __initdata; static inline unsigned int get_nmi_count(int cpu) { -#ifdef CONFIG_X86_64 - return cpu_pda(cpu)->__nmi_count; -#else - return nmi_count(cpu); -#endif + return per_cpu(irq_stat, cpu).__nmi_count; } static inline int mce_in_progress(void) @@ -82,12 +78,8 @@ static inline int mce_in_progress(void) */ static inline unsigned int get_timer_irqs(int cpu) { -#ifdef CONFIG_X86_64 - return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); -#else return per_cpu(irq_stat, cpu).apic_timer_irqs + per_cpu(irq_stat, cpu).irq0_irqs; -#endif } #ifdef CONFIG_SMP diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 3bfd6dd0b47c..9ff3b0999cfb 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); */ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) { -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_resched_count++; -#else - add_pda(irq_resched_count, 1); -#endif + inc_irq_stat(irq_resched_count); return IRQ_HANDLED; } @@ -435,11 +431,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_interrupt(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_call_count++; -#else - add_pda(irq_call_count, 1); -#endif + inc_irq_stat(irq_call_count); irq_exit(); return IRQ_HANDLED; @@ -449,11 +441,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_single_interrupt(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_call_count++; -#else - add_pda(irq_call_count, 1); -#endif + inc_irq_stat(irq_call_count); irq_exit(); return IRQ_HANDLED; From 9eb912d1aa6b8106e06a73ea6702ec3dab0d6a1a Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:57 +0900 Subject: [PATCH 099/566] x86-64: Move TLB state from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/mmu_context_64.h | 16 +++++++--------- arch/x86/include/asm/pda.h | 2 -- arch/x86/include/asm/tlbflush.h | 7 ++----- arch/x86/kernel/cpu/common.c | 2 -- arch/x86/kernel/tlb_32.c | 12 ++---------- arch/x86/kernel/tlb_64.c | 13 ++++++++----- arch/x86/xen/mmu.c | 6 +----- 7 files changed, 20 insertions(+), 38 deletions(-) diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h index 677d36e9540a..c4572505ab3e 100644 --- a/arch/x86/include/asm/mmu_context_64.h +++ b/arch/x86/include/asm/mmu_context_64.h @@ -1,13 +1,11 @@ #ifndef _ASM_X86_MMU_CONTEXT_64_H #define _ASM_X86_MMU_CONTEXT_64_H -#include - static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP - if (read_pda(mmu_state) == TLBSTATE_OK) - write_pda(mmu_state, TLBSTATE_LAZY); + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); #endif } @@ -19,8 +17,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); #ifdef CONFIG_SMP - write_pda(mmu_state, TLBSTATE_OK); - write_pda(active_mm, next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); #endif cpu_set(cpu, next->cpu_vm_mask); load_cr3(next->pgd); @@ -30,9 +28,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, } #ifdef CONFIG_SMP else { - write_pda(mmu_state, TLBSTATE_OK); - if (read_pda(active_mm) != next) - BUG(); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); + if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 69a40757e217..8ee835ed10e1 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -25,9 +25,7 @@ struct x8664_pda { char *irqstackptr; short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ - short mmu_state; short isidle; - struct mm_struct *active_mm; } ____cacheline_aligned_in_smp; DECLARE_PER_CPU(struct x8664_pda, __pda); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 17feaa9c7e76..d3539f998f88 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -148,20 +148,17 @@ void native_flush_tlb_others(const struct cpumask *cpumask, #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 -#ifdef CONFIG_X86_32 struct tlb_state { struct mm_struct *active_mm; int state; - char __cacheline_padding[L1_CACHE_BYTES-8]; }; DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); -void reset_lazy_tlbstate(void); -#else static inline void reset_lazy_tlbstate(void) { + percpu_write(cpu_tlbstate.state, 0); + percpu_write(cpu_tlbstate.active_mm, &init_mm); } -#endif #endif /* SMP */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c49498d40830..3d0cc6f17116 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -897,8 +897,6 @@ void __cpuinit pda_init(int cpu) pda->irqcount = -1; pda->kernelstack = (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; if (cpu == 0) { /* others are initialized in smpboot.c */ diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index e65449d0f7d9..abf0808d6fc4 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -4,8 +4,8 @@ #include -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) - ____cacheline_aligned = { &init_mm, 0, }; +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) + = { &init_mm, 0, }; /* must come after the send_IPI functions above for inlining */ #include @@ -231,14 +231,6 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, NULL, 1); } -void reset_lazy_tlbstate(void) -{ - int cpu = raw_smp_processor_id(); - - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; -} - static int init_flush_cpumask(void) { alloc_cpumask_var(&flush_cpumask, GFP_KERNEL); diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 7f4141d3b661..e64a32c48825 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -18,6 +18,9 @@ #include #include +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) + = { &init_mm, 0, }; + #include /* * Smarter SMP flushing macros. @@ -62,9 +65,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state); */ void leave_mm(int cpu) { - if (read_pda(mmu_state) == TLBSTATE_OK) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) BUG(); - cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); + cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } EXPORT_SYMBOL_GPL(leave_mm); @@ -142,8 +145,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) * BUG(); */ - if (f->flush_mm == read_pda(active_mm)) { - if (read_pda(mmu_state) == TLBSTATE_OK) { + if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (f->flush_va == TLB_FLUSH_ALL) local_flush_tlb(); else @@ -281,7 +284,7 @@ static void do_flush_tlb_all(void *info) unsigned long cpu = smp_processor_id(); __flush_tlb_all(); - if (read_pda(mmu_state) == TLBSTATE_LAZY) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(cpu); } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 7bc7852cc5c4..98cb9869eb24 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1063,11 +1063,7 @@ static void drop_other_mm_ref(void *info) struct mm_struct *mm = info; struct mm_struct *active_mm; -#ifdef CONFIG_X86_64 - active_mm = read_pda(active_mm); -#else - active_mm = __get_cpu_var(cpu_tlbstate).active_mm; -#endif + active_mm = percpu_read(cpu_tlbstate.active_mm); if (active_mm == mm) leave_mm(smp_processor_id()); From 26f80bd6a9ab17bc8a60b6092e7c0d05c5927ce5 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: [PATCH 100/566] x86-64: Convert irqstacks to per-cpu Move the irqstackptr variable from the PDA to per-cpu. Make the stacks themselves per-cpu, removing some specific allocation code. Add a seperate flag (is_boot_cpu) to simplify the per-cpu boot adjustments. tj: * sprinkle some underbars around. * irq_stack_ptr is not used till traps_init(), no reason to initialize it early. On SMP, just leaving it NULL till proper initialization in setup_per_cpu_areas() works. Dropped is_boot_cpu and early irq_stack_ptr initialization. * do DECLARE/DEFINE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack) instead of (char, irq_stack[IRQ_STACK_SIZE]). Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/page_64.h | 4 ++-- arch/x86/include/asm/pda.h | 1 - arch/x86/include/asm/processor.h | 3 +++ arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 19 +++++++----------- arch/x86/kernel/dumpstack_64.c | 33 ++++++++++++++++---------------- arch/x86/kernel/entry_64.S | 6 +++--- arch/x86/kernel/setup_percpu.c | 4 +++- 8 files changed, 35 insertions(+), 36 deletions(-) diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 5ebca29f44f0..e27fdbe5f9e4 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -13,8 +13,8 @@ #define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) -#define IRQSTACK_ORDER 2 -#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) +#define IRQ_STACK_ORDER 2 +#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) #define STACKFAULT_STACK 1 #define DOUBLEFAULT_STACK 2 diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 8ee835ed10e1..09965f7a2165 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -22,7 +22,6 @@ struct x8664_pda { /* gcc-ABI: this canary MUST be at offset 40!!! */ #endif - char *irqstackptr; short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ short isidle; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 091cd8855f2e..f511246fa6cd 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -378,6 +378,9 @@ union thread_xstate { #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); + +DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack); +DECLARE_PER_CPU(char *, irq_stack_ptr); #endif extern void print_cpu_info(struct cpuinfo_x86 *); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index f4cc81bfbf89..5b821fbdaf7b 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -54,7 +54,6 @@ int main(void) ENTRY(pcurrent); ENTRY(irqcount); ENTRY(cpunumber); - ENTRY(irqstackptr); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3d0cc6f17116..496f0a01919b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -881,7 +881,13 @@ __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; -static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; +DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack); +#ifdef CONFIG_SMP +DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ +#else +DEFINE_PER_CPU(char *, irq_stack_ptr) = + per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; +#endif void __cpuinit pda_init(int cpu) { @@ -901,18 +907,7 @@ void __cpuinit pda_init(int cpu) if (cpu == 0) { /* others are initialized in smpboot.c */ pda->pcurrent = &init_task; - pda->irqstackptr = boot_cpu_stack; - pda->irqstackptr += IRQSTACKSIZE - 64; } else { - if (!pda->irqstackptr) { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", - cpu); - pda->irqstackptr += IRQSTACKSIZE - 64; - } - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); } diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index c302d0707048..28e26a4315df 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; + unsigned long *irq_stack_end = + (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned used = 0; struct thread_info *tinfo; int graph = 0; @@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, stack = (unsigned long *) estack_end[-2]; continue; } - if (irqstack_end) { - unsigned long *irqstack; - irqstack = irqstack_end - - (IRQSTACKSIZE - 64) / sizeof(*irqstack); + if (irq_stack_end) { + unsigned long *irq_stack; + irq_stack = irq_stack_end - + (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); - if (stack >= irqstack && stack < irqstack_end) { + if (stack >= irq_stack && stack < irq_stack_end) { if (ops->stack(data, "IRQ") < 0) break; bp = print_context_stack(tinfo, stack, bp, - ops, data, irqstack_end, &graph); + ops, data, irq_stack_end, &graph); /* * We link to the next stack (which would be * the process stack normally) the last * pointer (index -1 to end) in the IRQ stack: */ - stack = (unsigned long *) (irqstack_end[-1]); - irqstack_end = NULL; + stack = (unsigned long *) (irq_stack_end[-1]); + irq_stack_end = NULL; ops->stack(data, "EOI"); continue; } @@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack; int i; const int cpu = smp_processor_id(); - unsigned long *irqstack_end = - (unsigned long *) (cpu_pda(cpu)->irqstackptr); - unsigned long *irqstack = - (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); + unsigned long *irq_stack_end = + (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); + unsigned long *irq_stack = + (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); /* * debugging aid: "show_stack(NULL, NULL);" prints the @@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, stack = sp; for (i = 0; i < kstack_depth_to_print; i++) { - if (stack >= irqstack && stack <= irqstack_end) { - if (stack == irqstack_end) { - stack = (unsigned long *) (irqstack_end[-1]); + if (stack >= irq_stack && stack <= irq_stack_end) { + if (stack == irq_stack_end) { + stack = (unsigned long *) (irq_stack_end[-1]); printk(" "); } } else { diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 4833f3a19650..d22677a66438 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -345,7 +345,7 @@ ENTRY(save_args) 1: incl %gs:pda_irqcount jne 2f popq_cfi %rax /* move return address... */ - mov %gs:pda_irqstackptr,%rsp + mov PER_CPU_VAR(irq_stack_ptr),%rsp EMPTY_FRAME 0 pushq_cfi %rax /* ... to the new stack */ /* @@ -1261,7 +1261,7 @@ ENTRY(call_softirq) mov %rsp,%rbp CFI_DEF_CFA_REGISTER rbp incl %gs:pda_irqcount - cmove %gs:pda_irqstackptr,%rsp + cmove PER_CPU_VAR(irq_stack_ptr),%rsp push %rbp # backlink for old unwinder call __do_softirq leaveq @@ -1300,7 +1300,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 11: incl %gs:pda_irqcount movq %rsp,%rbp CFI_DEF_CFA_REGISTER rbp - cmovzq %gs:pda_irqstackptr,%rsp + cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp pushq %rbp # backlink for old unwinder call xen_evtchn_do_upcall popq %rsp diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index b5c35af2011d..8b53ef83c611 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -192,7 +192,10 @@ void __init setup_per_cpu_areas(void) memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); per_cpu_offset(cpu) = ptr - __per_cpu_start; + per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); #ifdef CONFIG_X86_64 + per_cpu(irq_stack_ptr, cpu) = + (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; /* * CPU0 modified pda in the init data area, reload pda * offset for CPU0 and clear the area for others. @@ -202,7 +205,6 @@ void __init setup_per_cpu_areas(void) else memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); #endif - per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } From 92d65b2371d86d40807e1dbfdccadc4d501edcde Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: [PATCH 101/566] x86-64: Convert exception stacks to per-cpu Move the exception stacks to per-cpu, removing specific allocation code. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 496f0a01919b..b6d7eec0be77 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -913,8 +913,9 @@ void __cpuinit pda_init(int cpu) } } -static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + - DEBUG_STKSZ] __page_aligned_bss; +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) + __aligned(PAGE_SIZE); extern asmlinkage void ignore_sysret(void); @@ -972,15 +973,12 @@ void __cpuinit cpu_init(void) struct tss_struct *t = &per_cpu(init_tss, cpu); struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); unsigned long v; - char *estacks = NULL; struct task_struct *me; int i; /* CPU 0 is initialised in head64.c */ if (cpu != 0) pda_init(cpu); - else - estacks = boot_exception_stacks; me = current; @@ -1014,18 +1012,13 @@ void __cpuinit cpu_init(void) * set up and load the per-CPU TSS */ if (!orig_ist->ist[0]) { - static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + static const unsigned int sizes[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, + [DEBUG_STACK - 1] = DEBUG_STKSZ }; + char *estacks = per_cpu(exception_stacks, cpu); for (v = 0; v < N_EXCEPTION_STACKS; v++) { - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception " - "stack %ld %d\n", v, cpu); - } - estacks += PAGE_SIZE << order[v]; + estacks += sizes[v]; orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } From ea9279066de44053d0c20ea855bc9f4706652d84 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: [PATCH 102/566] x86-64: Move cpu number from PDA to per-cpu and consolidate with 32-bit. tj: moved cpu_number definition out of CONFIG_HAVE_SETUP_PER_CPU_AREA for voyager. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 2 +- arch/x86/include/asm/smp.h | 4 +--- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 1 - arch/x86/kernel/process_32.c | 3 --- arch/x86/kernel/setup_percpu.c | 10 ++++++++++ arch/x86/kernel/smpcommon.c | 2 -- 7 files changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 09965f7a2165..668d5a5b6f70 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -16,7 +16,7 @@ struct x8664_pda { unsigned long kernelstack; /* 16 top of kernel stack for current */ unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ - unsigned int cpunumber; /* 36 Logical CPU number */ + unsigned int unused6; /* 36 was cpunumber */ #ifdef CONFIG_CC_STACKPROTECTOR unsigned long stack_canary; /* 40 stack canary value */ /* gcc-ABI: this canary MUST be at diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c7bbbbe65d3f..68636e767a91 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -25,9 +25,7 @@ extern unsigned int num_processors; DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map); DECLARE_PER_CPU(u16, cpu_llc_id); -#ifdef CONFIG_X86_32 DECLARE_PER_CPU(int, cpu_number); -#endif static inline struct cpumask *cpu_sibling_mask(int cpu) { @@ -164,7 +162,7 @@ extern unsigned disabled_cpus __cpuinitdata; extern int safe_smp_processor_id(void); #elif defined(CONFIG_X86_64_SMP) -#define raw_smp_processor_id() read_pda(cpunumber) +#define raw_smp_processor_id() (percpu_read(cpu_number)) #define stack_smp_processor_id() \ ({ \ diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 5b821fbdaf7b..cae6697c0991 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -53,7 +53,6 @@ int main(void) ENTRY(oldrsp); ENTRY(pcurrent); ENTRY(irqcount); - ENTRY(cpunumber); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b6d7eec0be77..4221e920886d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -899,7 +899,6 @@ void __cpuinit pda_init(int cpu) load_pda_offset(cpu); - pda->cpunumber = cpu; pda->irqcount = -1; pda->kernelstack = (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 77d546817d94..2c00a57ccb90 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -66,9 +66,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - /* * Return saved PC of a blocked thread. */ diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 8b53ef83c611..258497f93f4d 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -22,6 +22,15 @@ # define DBG(x...) #endif +/* + * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but + * voyager wants cpu_number too. + */ +#ifdef CONFIG_SMP +DEFINE_PER_CPU(int, cpu_number); +EXPORT_PER_CPU_SYMBOL(cpu_number); +#endif + #ifdef CONFIG_X86_LOCAL_APIC unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; @@ -193,6 +202,7 @@ void __init setup_per_cpu_areas(void) memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); per_cpu_offset(cpu) = ptr - __per_cpu_start; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); + per_cpu(cpu_number, cpu) = cpu; #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 7e157810062f..add36b4e37c9 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -28,7 +28,5 @@ __cpuinit void init_gdt(int cpu) write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); - - per_cpu(cpu_number, cpu) = cpu; } #endif From c6f5e0acd5d12ee23f701f15889872e67b47caa6 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: [PATCH 103/566] x86-64: Move current task from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/current.h | 24 +++--------------------- arch/x86/include/asm/pda.h | 4 ++-- arch/x86/include/asm/system.h | 4 ++-- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 5 +---- arch/x86/kernel/dumpstack_64.c | 2 +- arch/x86/kernel/process_64.c | 5 ++++- arch/x86/kernel/smpboot.c | 3 +-- arch/x86/xen/smp.c | 3 +-- 9 files changed, 15 insertions(+), 36 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 0728480f5c56..c68c361697e1 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -1,39 +1,21 @@ #ifndef _ASM_X86_CURRENT_H #define _ASM_X86_CURRENT_H -#ifdef CONFIG_X86_32 #include #include +#ifndef __ASSEMBLY__ struct task_struct; DECLARE_PER_CPU(struct task_struct *, current_task); + static __always_inline struct task_struct *get_current(void) { return percpu_read(current_task); } -#else /* X86_32 */ - -#ifndef __ASSEMBLY__ -#include - -struct task_struct; - -static __always_inline struct task_struct *get_current(void) -{ - return read_pda(pcurrent); -} - -#else /* __ASSEMBLY__ */ - -#include -#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg +#define current get_current() #endif /* __ASSEMBLY__ */ -#endif /* X86_32 */ - -#define current get_current() - #endif /* _ASM_X86_CURRENT_H */ diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 668d5a5b6f70..7209302d9227 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -11,8 +11,8 @@ /* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { - struct task_struct *pcurrent; /* 0 Current process */ - unsigned long dummy; + unsigned long unused1; + unsigned long unused2; unsigned long kernelstack; /* 16 top of kernel stack for current */ unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 8e626ea33a1a..4399aac680e9 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -94,7 +94,7 @@ do { \ "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ - "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq "__percpu_seg_str"%P[current_task],%%rsi\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ @@ -106,7 +106,7 @@ do { \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + [current_task] "m" (per_cpu_var(current_task)) \ : "memory", "cc" __EXTRA_CLOBBER) #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index cae6697c0991..4f7a210e1e58 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -51,7 +51,6 @@ int main(void) #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) ENTRY(kernelstack); ENTRY(oldrsp); - ENTRY(pcurrent); ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4221e920886d..b50e38d16888 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -903,10 +903,7 @@ void __cpuinit pda_init(int cpu) pda->kernelstack = (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; - if (cpu == 0) { - /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; - } else { + if (cpu != 0) { if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); } diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 28e26a4315df..d35db5993fd6 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -242,7 +242,7 @@ void show_registers(struct pt_regs *regs) int i; unsigned long sp; const int cpu = smp_processor_id(); - struct task_struct *cur = cpu_pda(cpu)->pcurrent; + struct task_struct *cur = current; sp = regs->sp; printk("CPU %d ", cpu); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 416fb9282f4f..e00c31a4b3c0 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -57,6 +57,9 @@ asmlinkage extern void ret_from_fork(void); +DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; +EXPORT_PER_CPU_SYMBOL(current_task); + unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; static ATOMIC_NOTIFIER_HEAD(idle_notifier); @@ -615,7 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ prev->usersp = read_pda(oldrsp); write_pda(oldrsp, next->usersp); - write_pda(pcurrent, next_p); + percpu_write(current_task, next_p); write_pda(kernelstack, (unsigned long)task_stack_page(next_p) + diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2f0e0f1090f6..5854be0fb804 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -790,13 +790,12 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) set_idle_for_cpu(cpu, c_idle.idle); do_rest: -#ifdef CONFIG_X86_32 per_cpu(current_task, cpu) = c_idle.idle; +#ifdef CONFIG_X86_32 init_gdt(cpu); /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else - cpu_pda(cpu)->pcurrent = c_idle.idle; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); #endif diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 9ff3b0999cfb..72c2eb9b64cd 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -279,12 +279,11 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) struct task_struct *idle = idle_task(cpu); int rc; + per_cpu(current_task, cpu) = idle; #ifdef CONFIG_X86_32 init_gdt(cpu); - per_cpu(current_task, cpu) = idle; irq_ctx_init(cpu); #else - cpu_pda(cpu)->pcurrent = idle; clear_tsk_thread_flag(idle, TIF_FORK); #endif xen_setup_timer(cpu); From 9af45651f1f7c89942e016a1a00a7ebddfa727f8 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: [PATCH 104/566] x86-64: Move kernelstack from PDA to per-cpu. Also clean up PER_CPU_VAR usage in xen-asm_64.S tj: * remove now unused stack_thread_info() * s/kernelstack/kernel_stack/ * added FIXME comment in xen-asm_64.S Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/ia32/ia32entry.S | 8 ++++---- arch/x86/include/asm/pda.h | 4 +--- arch/x86/include/asm/thread_info.h | 20 ++++++++------------ arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 6 ++++-- arch/x86/kernel/entry_64.S | 4 ++-- arch/x86/kernel/process_64.c | 4 ++-- arch/x86/kernel/smpboot.c | 3 +++ arch/x86/xen/xen-asm_64.S | 23 +++++++++++------------ 9 files changed, 35 insertions(+), 38 deletions(-) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 256b00b61892..9c79b2477008 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target) CFI_DEF_CFA rsp,0 CFI_REGISTER rsp,rbp SWAPGS_UNSAFE_STACK - movq %gs:pda_kernelstack, %rsp - addq $(PDA_STACKOFFSET),%rsp + movq PER_CPU_VAR(kernel_stack), %rsp + addq $(KERNEL_STACK_OFFSET),%rsp /* * No need to follow this irqs on/off section: the syscall * disabled irqs, here we enable it straight after entry: @@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target) ENTRY(ia32_cstar_target) CFI_STARTPROC32 simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,PDA_STACKOFFSET + CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 - movq %gs:pda_kernelstack,%rsp + movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 7209302d9227..4d28ffba6e1b 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -13,7 +13,7 @@ struct x8664_pda { unsigned long unused1; unsigned long unused2; - unsigned long kernelstack; /* 16 top of kernel stack for current */ + unsigned long unused3; unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ unsigned int unused6; /* 36 was cpunumber */ @@ -44,6 +44,4 @@ extern void pda_init(int); #endif -#define PDA_STACKOFFSET (5*8) - #endif /* _ASM_X86_PDA_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 98789647baa9..b46f8ca007b5 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -194,25 +194,21 @@ static inline struct thread_info *current_thread_info(void) #else /* X86_32 */ -#include +#include +#define KERNEL_STACK_OFFSET (5*8) /* * macros/functions for gaining access to the thread information structure * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ +DECLARE_PER_CPU(unsigned long, kernel_stack); + static inline struct thread_info *current_thread_info(void) { struct thread_info *ti; - ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); - return ti; -} - -/* do not use in interrupt context */ -static inline struct thread_info *stack_thread_info(void) -{ - struct thread_info *ti; - asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); + ti = (void *)(percpu_read(kernel_stack) + + KERNEL_STACK_OFFSET - THREAD_SIZE); return ti; } @@ -220,8 +216,8 @@ static inline struct thread_info *stack_thread_info(void) /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movq %gs:pda_kernelstack,reg ; \ - subq $(THREAD_SIZE-PDA_STACKOFFSET),reg + movq PER_CPU_VAR(kernel_stack),reg ; \ + subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 4f7a210e1e58..cafff5f4a031 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -49,7 +49,6 @@ int main(void) BLANK(); #undef ENTRY #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - ENTRY(kernelstack); ENTRY(oldrsp); ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b50e38d16888..06b6290088f4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -889,6 +889,10 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; #endif +DEFINE_PER_CPU(unsigned long, kernel_stack) = + (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; +EXPORT_PER_CPU_SYMBOL(kernel_stack); + void __cpuinit pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); @@ -900,8 +904,6 @@ void __cpuinit pda_init(int cpu) load_pda_offset(cpu); pda->irqcount = -1; - pda->kernelstack = (unsigned long)stack_thread_info() - - PDA_STACKOFFSET + THREAD_SIZE; if (cpu != 0) { if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index d22677a66438..0dd45859a7a8 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -468,7 +468,7 @@ END(ret_from_fork) ENTRY(system_call) CFI_STARTPROC simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,PDA_STACKOFFSET + CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK @@ -480,7 +480,7 @@ ENTRY(system_call) ENTRY(system_call_after_swapgs) movq %rsp,%gs:pda_oldrsp - movq %gs:pda_kernelstack,%rsp + movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs off/on section - it's straight * and short: diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e00c31a4b3c0..6c5f57602108 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -620,9 +620,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) write_pda(oldrsp, next->usersp); percpu_write(current_task, next_p); - write_pda(kernelstack, + percpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + - THREAD_SIZE - PDA_STACKOFFSET); + THREAD_SIZE - KERNEL_STACK_OFFSET); #ifdef CONFIG_CC_STACKPROTECTOR write_pda(stack_canary, next_p->stack_canary); /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5854be0fb804..869b98840fd0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -798,6 +798,9 @@ do_rest: #else clear_tsk_thread_flag(c_idle.idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); + per_cpu(kernel_stack, cpu) = + (unsigned long)task_stack_page(c_idle.idle) - + KERNEL_STACK_OFFSET + THREAD_SIZE; #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 05794c566e87..5a23e8993678 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -28,12 +29,10 @@ #if 1 /* - x86-64 does not yet support direct access to percpu variables - via a segment override, so we just need to make sure this code - never gets used + FIXME: x86_64 now can support direct access to percpu variables + via a segment override. Update xen accordingly. */ #define BUG ud2a -#define PER_CPU_VAR(var, off) 0xdeadbeef #endif /* @@ -45,14 +44,14 @@ ENTRY(xen_irq_enable_direct) BUG /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask /* Preempt here doesn't matter because that will deal with any pending interrupts. The pending check may end up being run on the wrong CPU, but that doesn't hurt. */ /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending jz 1f 2: call check_events @@ -69,7 +68,7 @@ ENDPATCH(xen_irq_enable_direct) ENTRY(xen_irq_disable_direct) BUG - movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask ENDPATCH(xen_irq_disable_direct) ret ENDPROC(xen_irq_disable_direct) @@ -87,7 +86,7 @@ ENDPATCH(xen_irq_disable_direct) ENTRY(xen_save_fl_direct) BUG - testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask setz %ah addb %ah,%ah ENDPATCH(xen_save_fl_direct) @@ -107,13 +106,13 @@ ENTRY(xen_restore_fl_direct) BUG testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask /* Preempt here doesn't matter because that will deal with any pending interrupts. The pending check may end up being run on the wrong CPU, but that doesn't hurt. */ /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) + cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending jz 1f 2: call check_events 1: @@ -196,7 +195,7 @@ ENTRY(xen_sysret64) /* We're already on the usermode stack at this point, but still with the kernel gs, so we can easily switch back */ movq %rsp, %gs:pda_oldrsp - movq %gs:pda_kernelstack,%rsp + movq PER_CPU_VAR(kernel_stack),%rsp pushq $__USER_DS pushq %gs:pda_oldrsp @@ -213,7 +212,7 @@ ENTRY(xen_sysret32) /* We're already on the usermode stack at this point, but still with the kernel gs, so we can easily switch back */ movq %rsp, %gs:pda_oldrsp - movq %gs:pda_kernelstack, %rsp + movq PER_CPU_VAR(kernel_stack), %rsp pushq $__USER32_DS pushq %gs:pda_oldrsp From 3d1e42a7cf945e289d6ba26159aa0e2b0645401b Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: [PATCH 105/566] x86-64: Move oldrsp from PDA to per-cpu. tj: * in asm-offsets_64.c, pda.h inclusion shouldn't be removed as pda is still referenced in the file * s/oldrsp/old_rsp/ Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 2 +- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/entry_64.S | 10 +++++----- arch/x86/kernel/process_64.c | 8 +++++--- arch/x86/xen/xen-asm_64.S | 8 ++++---- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 4d28ffba6e1b..ae23deb99559 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -14,7 +14,7 @@ struct x8664_pda { unsigned long unused1; unsigned long unused2; unsigned long unused3; - unsigned long oldrsp; /* 24 user rsp for system call */ + unsigned long unused4; int irqcount; /* 32 Irq nesting counter. Starts -1 */ unsigned int unused6; /* 36 was cpunumber */ #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index cafff5f4a031..afda6deb8515 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -49,7 +49,6 @@ int main(void) BLANK(); #undef ENTRY #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - ENTRY(oldrsp); ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 0dd45859a7a8..7c27da407da7 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -210,7 +210,7 @@ ENTRY(native_usergs_sysret64) /* %rsp:at FRAMEEND */ .macro FIXUP_TOP_OF_STACK tmp offset=0 - movq %gs:pda_oldrsp,\tmp + movq PER_CPU_VAR(old_rsp),\tmp movq \tmp,RSP+\offset(%rsp) movq $__USER_DS,SS+\offset(%rsp) movq $__USER_CS,CS+\offset(%rsp) @@ -221,7 +221,7 @@ ENTRY(native_usergs_sysret64) .macro RESTORE_TOP_OF_STACK tmp offset=0 movq RSP+\offset(%rsp),\tmp - movq \tmp,%gs:pda_oldrsp + movq \tmp,PER_CPU_VAR(old_rsp) movq EFLAGS+\offset(%rsp),\tmp movq \tmp,R11+\offset(%rsp) .endm @@ -479,7 +479,7 @@ ENTRY(system_call) */ ENTRY(system_call_after_swapgs) - movq %rsp,%gs:pda_oldrsp + movq %rsp,PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs off/on section - it's straight @@ -523,7 +523,7 @@ sysret_check: CFI_REGISTER rip,rcx RESTORE_ARGS 0,-ARG_SKIP,1 /*CFI_REGISTER rflags,r11*/ - movq %gs:pda_oldrsp, %rsp + movq PER_CPU_VAR(old_rsp), %rsp USERGS_SYSRET64 CFI_RESTORE_STATE @@ -833,7 +833,7 @@ common_interrupt: XCPT_FRAME addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ interrupt do_IRQ - /* 0(%rsp): oldrsp-ARGOFFSET */ + /* 0(%rsp): old_rsp-ARGOFFSET */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6c5f57602108..480128918926 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -60,6 +60,8 @@ asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); +DEFINE_PER_CPU(unsigned long, old_rsp); + unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; static ATOMIC_NOTIFIER_HEAD(idle_notifier); @@ -395,7 +397,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) load_gs_index(0); regs->ip = new_ip; regs->sp = new_sp; - write_pda(oldrsp, new_sp); + percpu_write(old_rsp, new_sp); regs->cs = __USER_CS; regs->ss = __USER_DS; regs->flags = 0x200; @@ -616,8 +618,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch the PDA and FPU contexts. */ - prev->usersp = read_pda(oldrsp); - write_pda(oldrsp, next->usersp); + prev->usersp = percpu_read(old_rsp); + percpu_write(old_rsp, next->usersp); percpu_write(current_task, next_p); percpu_write(kernel_stack, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 5a23e8993678..d6fc51f4ce85 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -194,11 +194,11 @@ RELOC(xen_sysexit, 1b+1) ENTRY(xen_sysret64) /* We're already on the usermode stack at this point, but still with the kernel gs, so we can easily switch back */ - movq %rsp, %gs:pda_oldrsp + movq %rsp, PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack),%rsp pushq $__USER_DS - pushq %gs:pda_oldrsp + pushq PER_CPU_VAR(old_rsp) pushq %r11 pushq $__USER_CS pushq %rcx @@ -211,11 +211,11 @@ RELOC(xen_sysret64, 1b+1) ENTRY(xen_sysret32) /* We're already on the usermode stack at this point, but still with the kernel gs, so we can easily switch back */ - movq %rsp, %gs:pda_oldrsp + movq %rsp, PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack), %rsp pushq $__USER32_DS - pushq %gs:pda_oldrsp + pushq PER_CPU_VAR(old_rsp) pushq %r11 pushq $__USER32_CS pushq %rcx From 5689553076c4a67b83426b076082c63085b7567a Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: [PATCH 106/566] x86-64: Move irqcount from PDA to per-cpu. tj: s/irqcount/irq_count/ Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 2 +- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/entry_64.S | 14 +++++++------- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index ae23deb99559..4527d70314d4 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -15,7 +15,7 @@ struct x8664_pda { unsigned long unused2; unsigned long unused3; unsigned long unused4; - int irqcount; /* 32 Irq nesting counter. Starts -1 */ + int unused5; unsigned int unused6; /* 36 was cpunumber */ #ifdef CONFIG_CC_STACKPROTECTOR unsigned long stack_canary; /* 40 stack canary value */ diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index afda6deb8515..64c834a39aa8 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -49,7 +49,6 @@ int main(void) BLANK(); #undef ENTRY #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 06b6290088f4..e2323ecce1d3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -893,6 +893,8 @@ DEFINE_PER_CPU(unsigned long, kernel_stack) = (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; EXPORT_PER_CPU_SYMBOL(kernel_stack); +DEFINE_PER_CPU(unsigned int, irq_count) = -1; + void __cpuinit pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); @@ -903,8 +905,6 @@ void __cpuinit pda_init(int cpu) load_pda_offset(cpu); - pda->irqcount = -1; - if (cpu != 0) { if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7c27da407da7..c52b60919163 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -337,12 +337,12 @@ ENTRY(save_args) je 1f SWAPGS /* - * irqcount is used to check if a CPU is already on an interrupt stack + * irq_count is used to check if a CPU is already on an interrupt stack * or not. While this is essentially redundant with preempt_count it is * a little cheaper to use a separate counter in the PDA (short of * moving irq_enter into assembly, which would be too much work) */ -1: incl %gs:pda_irqcount +1: incl PER_CPU_VAR(irq_count) jne 2f popq_cfi %rax /* move return address... */ mov PER_CPU_VAR(irq_stack_ptr),%rsp @@ -837,7 +837,7 @@ common_interrupt: ret_from_intr: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) leaveq CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 @@ -1260,14 +1260,14 @@ ENTRY(call_softirq) CFI_REL_OFFSET rbp,0 mov %rsp,%rbp CFI_DEF_CFA_REGISTER rbp - incl %gs:pda_irqcount + incl PER_CPU_VAR(irq_count) cmove PER_CPU_VAR(irq_stack_ptr),%rsp push %rbp # backlink for old unwinder call __do_softirq leaveq CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) ret CFI_ENDPROC END(call_softirq) @@ -1297,7 +1297,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) movq %rdi, %rsp # we don't return, adjust the stack frame CFI_ENDPROC DEFAULT_FRAME -11: incl %gs:pda_irqcount +11: incl PER_CPU_VAR(irq_count) movq %rsp,%rbp CFI_DEF_CFA_REGISTER rbp cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp @@ -1305,7 +1305,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) call xen_evtchn_do_upcall popq %rsp CFI_DEF_CFA_REGISTER rsp - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) jmp error_exit CFI_ENDPROC END(do_hypervisor_callback) From e7a22c1ebcc1caa8178df1819d05128bb5b45ab9 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:59 +0900 Subject: [PATCH 107/566] x86-64: Move nodenumber from PDA to per-cpu. tj: * s/nodenumber/node_number/ * removed now unused pda variable from pda_init() Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 1 - arch/x86/include/asm/topology.h | 3 ++- arch/x86/kernel/cpu/common.c | 13 ++++++------- arch/x86/kernel/setup_percpu.c | 4 +++- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 4527d70314d4..b30ef6bddc43 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -22,7 +22,6 @@ struct x8664_pda { /* gcc-ABI: this canary MUST be at offset 40!!! */ #endif - short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ short isidle; } ____cacheline_aligned_in_smp; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 87ca3fd86e88..ffea1fe03a99 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -83,7 +83,8 @@ extern cpumask_t *node_to_cpumask_map; DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); /* Returns the number of the current Node. */ -#define numa_node_id() read_pda(nodenumber) +DECLARE_PER_CPU(int, node_number); +#define numa_node_id() percpu_read(node_number) #ifdef CONFIG_DEBUG_PER_CPU_MAPS extern int cpu_to_node(int cpu); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e2323ecce1d3..7976a6a0f65c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -897,18 +897,11 @@ DEFINE_PER_CPU(unsigned int, irq_count) = -1; void __cpuinit pda_init(int cpu) { - struct x8664_pda *pda = cpu_pda(cpu); - /* Setup up data that may be needed in __get_free_pages early */ loadsegment(fs, 0); loadsegment(gs, 0); load_pda_offset(cpu); - - if (cpu != 0) { - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) - pda->nodenumber = cpu_to_node(cpu); - } } static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks @@ -978,6 +971,12 @@ void __cpuinit cpu_init(void) if (cpu != 0) pda_init(cpu); +#ifdef CONFIG_NUMA + if (cpu != 0 && percpu_read(node_number) == 0 && + cpu_to_node(cpu) != NUMA_NO_NODE) + percpu_write(node_number, cpu_to_node(cpu)); +#endif + me = current; if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 258497f93f4d..efbafbbff584 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -53,6 +53,8 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) #define X86_64_NUMA 1 /* (used later) */ +DEFINE_PER_CPU(int, node_number) = 0; +EXPORT_PER_CPU_SYMBOL(node_number); /* * Map cpu index to node index @@ -283,7 +285,7 @@ void __cpuinit numa_set_node(int cpu, int node) per_cpu(x86_cpu_to_node_map, cpu) = node; if (node != NUMA_NO_NODE) - cpu_pda(cpu)->nodenumber = node; + per_cpu(node_number, cpu) = node; } void __cpuinit numa_clear_node(int cpu) From c2558e0eba66b49993e619da66c95a50a97830a3 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:59 +0900 Subject: [PATCH 108/566] x86-64: Move isidle from PDA to per-cpu. tj: s/isidle/is_idle/ Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 1 - arch/x86/kernel/process_64.c | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index b30ef6bddc43..c31ca048a901 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -23,7 +23,6 @@ struct x8664_pda { offset 40!!! */ #endif short in_bootmem; /* pda lives in bootmem */ - short isidle; } ____cacheline_aligned_in_smp; DECLARE_PER_CPU(struct x8664_pda, __pda); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 480128918926..4523ff88a69d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -61,6 +61,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); DEFINE_PER_CPU(unsigned long, old_rsp); +static DEFINE_PER_CPU(unsigned char, is_idle); unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; @@ -80,13 +81,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister); void enter_idle(void) { - write_pda(isidle, 1); + percpu_write(is_idle, 1); atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); } static void __exit_idle(void) { - if (test_and_clear_bit_pda(0, isidle) == 0) + if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) return; atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); } From 87b264065880fa696c121dad8498a60524e0f6de Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:59 +0900 Subject: [PATCH 109/566] x86-64: Use absolute displacements for per-cpu accesses. Accessing memory through %gs should not use rip-relative addressing. Adding a P prefix for the argument tells gcc to not add (%rip) to the memory references. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 26 +++++++++++++------------- arch/x86/include/asm/system.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 03aa4b00a1c3..165d5272ece1 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -39,10 +39,10 @@ #include #ifdef CONFIG_SMP -#define __percpu_seg_str "%%"__stringify(__percpu_seg)":" +#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x #define __my_cpu_offset percpu_read(this_cpu_off) #else -#define __percpu_seg_str +#define __percpu_arg(x) "%" #x #endif /* For arch-specific code, we can use direct single-insn ops (they @@ -58,22 +58,22 @@ do { \ } \ switch (sizeof(var)) { \ case 1: \ - asm(op "b %1,"__percpu_seg_str"%0" \ + asm(op "b %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 2: \ - asm(op "w %1,"__percpu_seg_str"%0" \ + asm(op "w %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 4: \ - asm(op "l %1,"__percpu_seg_str"%0" \ + asm(op "l %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 8: \ - asm(op "q %1,"__percpu_seg_str"%0" \ + asm(op "q %1,"__percpu_arg(0) \ : "+m" (var) \ : "r" ((T__)val)); \ break; \ @@ -86,22 +86,22 @@ do { \ typeof(var) ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_seg_str"%1,%0" \ + asm(op "b "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_seg_str"%1,%0" \ + asm(op "w "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_seg_str"%1,%0" \ + asm(op "l "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 8: \ - asm(op "q "__percpu_seg_str"%1,%0" \ + asm(op "q "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ @@ -122,9 +122,9 @@ do { \ #define x86_test_and_clear_bit_percpu(bit, var) \ ({ \ int old__; \ - asm volatile("btr %1,"__percpu_seg_str"%c2\n\tsbbl %0,%0" \ - : "=r" (old__) \ - : "dIr" (bit), "i" (&per_cpu__##var) : "memory"); \ + asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ + : "=r" (old__), "+m" (per_cpu__##var) \ + : "dIr" (bit)); \ old__; \ }) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 4399aac680e9..d1dc27dba36d 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -94,7 +94,7 @@ do { \ "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ - "movq "__percpu_seg_str"%P[current_task],%%rsi\n\t" \ + "movq "__percpu_arg([current_task])",%%rsi\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ From 5662a2f8e7313f78d6b17ab383f3e4f04971c335 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 18 Jan 2009 19:37:21 +0100 Subject: [PATCH 110/566] x86, rdc321x: remove/move leftover files Impact: cleanup Move/remove leftover RDC321 files. Now that it's not a subarch anymore, arch/x86/mach-rdc321x and arch/x86/include/asm/mach-rdc321x/ are not needed. One include file was still in use: rdc321x_defs.h, move that to the generic x86 asm header directory. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mach-rdc321x/gpio.h | 60 ------ .../asm/{mach-rdc321x => }/rdc321x_defs.h | 0 arch/x86/mach-rdc321x/Makefile | 5 - arch/x86/mach-rdc321x/gpio.c | 194 ------------------ arch/x86/mach-rdc321x/platform.c | 69 ------- drivers/watchdog/rdc321x_wdt.c | 2 +- 6 files changed, 1 insertion(+), 329 deletions(-) delete mode 100644 arch/x86/include/asm/mach-rdc321x/gpio.h rename arch/x86/include/asm/{mach-rdc321x => }/rdc321x_defs.h (100%) delete mode 100644 arch/x86/mach-rdc321x/Makefile delete mode 100644 arch/x86/mach-rdc321x/gpio.c delete mode 100644 arch/x86/mach-rdc321x/platform.c diff --git a/arch/x86/include/asm/mach-rdc321x/gpio.h b/arch/x86/include/asm/mach-rdc321x/gpio.h deleted file mode 100644 index c210ab5788b0..000000000000 --- a/arch/x86/include/asm/mach-rdc321x/gpio.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef _ASM_X86_MACH_RDC321X_GPIO_H -#define _ASM_X86_MACH_RDC321X_GPIO_H - -#include - -extern int rdc_gpio_get_value(unsigned gpio); -extern void rdc_gpio_set_value(unsigned gpio, int value); -extern int rdc_gpio_direction_input(unsigned gpio); -extern int rdc_gpio_direction_output(unsigned gpio, int value); -extern int rdc_gpio_request(unsigned gpio, const char *label); -extern void rdc_gpio_free(unsigned gpio); -extern void __init rdc321x_gpio_setup(void); - -/* Wrappers for the arch-neutral GPIO API */ - -static inline int gpio_request(unsigned gpio, const char *label) -{ - return rdc_gpio_request(gpio, label); -} - -static inline void gpio_free(unsigned gpio) -{ - might_sleep(); - rdc_gpio_free(gpio); -} - -static inline int gpio_direction_input(unsigned gpio) -{ - return rdc_gpio_direction_input(gpio); -} - -static inline int gpio_direction_output(unsigned gpio, int value) -{ - return rdc_gpio_direction_output(gpio, value); -} - -static inline int gpio_get_value(unsigned gpio) -{ - return rdc_gpio_get_value(gpio); -} - -static inline void gpio_set_value(unsigned gpio, int value) -{ - rdc_gpio_set_value(gpio, value); -} - -static inline int gpio_to_irq(unsigned gpio) -{ - return gpio; -} - -static inline int irq_to_gpio(unsigned irq) -{ - return irq; -} - -/* For cansleep */ -#include - -#endif /* _ASM_X86_MACH_RDC321X_GPIO_H */ diff --git a/arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h b/arch/x86/include/asm/rdc321x_defs.h similarity index 100% rename from arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h rename to arch/x86/include/asm/rdc321x_defs.h diff --git a/arch/x86/mach-rdc321x/Makefile b/arch/x86/mach-rdc321x/Makefile deleted file mode 100644 index 8325b4ca431c..000000000000 --- a/arch/x86/mach-rdc321x/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# -# Makefile for the RDC321x specific parts of the kernel -# -obj-$(CONFIG_X86_RDC321X) := gpio.o platform.o - diff --git a/arch/x86/mach-rdc321x/gpio.c b/arch/x86/mach-rdc321x/gpio.c deleted file mode 100644 index 247f33d3a407..000000000000 --- a/arch/x86/mach-rdc321x/gpio.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * GPIO support for RDC SoC R3210/R8610 - * - * Copyright (C) 2007, Florian Fainelli - * Copyright (C) 2008, Volker Weiss - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - - -#include -#include -#include -#include - -#include -#include - - -/* spin lock to protect our private copy of GPIO data register plus - the access to PCI conf registers. */ -static DEFINE_SPINLOCK(gpio_lock); - -/* copy of GPIO data registers */ -static u32 gpio_data_reg1; -static u32 gpio_data_reg2; - -static u32 gpio_request_data[2]; - - -static inline void rdc321x_conf_write(unsigned addr, u32 value) -{ - outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR); - outl(value, RDC3210_CFGREG_DATA); -} - -static inline void rdc321x_conf_or(unsigned addr, u32 value) -{ - outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR); - value |= inl(RDC3210_CFGREG_DATA); - outl(value, RDC3210_CFGREG_DATA); -} - -static inline u32 rdc321x_conf_read(unsigned addr) -{ - outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR); - - return inl(RDC3210_CFGREG_DATA); -} - -/* configure pin as GPIO */ -static void rdc321x_configure_gpio(unsigned gpio) -{ - unsigned long flags; - - spin_lock_irqsave(&gpio_lock, flags); - rdc321x_conf_or(gpio < 32 - ? RDC321X_GPIO_CTRL_REG1 : RDC321X_GPIO_CTRL_REG2, - 1 << (gpio & 0x1f)); - spin_unlock_irqrestore(&gpio_lock, flags); -} - -/* initially setup the 2 copies of the gpio data registers. - This function must be called by the platform setup code. */ -void __init rdc321x_gpio_setup() -{ - /* this might not be, what others (BIOS, bootloader, etc.) - wrote to these registers before, but it's a good guess. Still - better than just using 0xffffffff. */ - - gpio_data_reg1 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG1); - gpio_data_reg2 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG2); -} - -/* determine, if gpio number is valid */ -static inline int rdc321x_is_gpio(unsigned gpio) -{ - return gpio <= RDC321X_MAX_GPIO; -} - -/* request GPIO */ -int rdc_gpio_request(unsigned gpio, const char *label) -{ - unsigned long flags; - - if (!rdc321x_is_gpio(gpio)) - return -EINVAL; - - spin_lock_irqsave(&gpio_lock, flags); - if (gpio_request_data[(gpio & 0x20) ? 1 : 0] & (1 << (gpio & 0x1f))) - goto inuse; - gpio_request_data[(gpio & 0x20) ? 1 : 0] |= (1 << (gpio & 0x1f)); - spin_unlock_irqrestore(&gpio_lock, flags); - - return 0; -inuse: - spin_unlock_irqrestore(&gpio_lock, flags); - return -EINVAL; -} -EXPORT_SYMBOL(rdc_gpio_request); - -/* release previously-claimed GPIO */ -void rdc_gpio_free(unsigned gpio) -{ - unsigned long flags; - - if (!rdc321x_is_gpio(gpio)) - return; - - spin_lock_irqsave(&gpio_lock, flags); - gpio_request_data[(gpio & 0x20) ? 1 : 0] &= ~(1 << (gpio & 0x1f)); - spin_unlock_irqrestore(&gpio_lock, flags); -} -EXPORT_SYMBOL(rdc_gpio_free); - -/* read GPIO pin */ -int rdc_gpio_get_value(unsigned gpio) -{ - u32 reg; - unsigned long flags; - - spin_lock_irqsave(&gpio_lock, flags); - reg = rdc321x_conf_read(gpio < 32 - ? RDC321X_GPIO_DATA_REG1 : RDC321X_GPIO_DATA_REG2); - spin_unlock_irqrestore(&gpio_lock, flags); - - return (1 << (gpio & 0x1f)) & reg ? 1 : 0; -} -EXPORT_SYMBOL(rdc_gpio_get_value); - -/* set GPIO pin to value */ -void rdc_gpio_set_value(unsigned gpio, int value) -{ - unsigned long flags; - u32 reg; - - reg = 1 << (gpio & 0x1f); - if (gpio < 32) { - spin_lock_irqsave(&gpio_lock, flags); - if (value) - gpio_data_reg1 |= reg; - else - gpio_data_reg1 &= ~reg; - rdc321x_conf_write(RDC321X_GPIO_DATA_REG1, gpio_data_reg1); - spin_unlock_irqrestore(&gpio_lock, flags); - } else { - spin_lock_irqsave(&gpio_lock, flags); - if (value) - gpio_data_reg2 |= reg; - else - gpio_data_reg2 &= ~reg; - rdc321x_conf_write(RDC321X_GPIO_DATA_REG2, gpio_data_reg2); - spin_unlock_irqrestore(&gpio_lock, flags); - } -} -EXPORT_SYMBOL(rdc_gpio_set_value); - -/* configure GPIO pin as input */ -int rdc_gpio_direction_input(unsigned gpio) -{ - if (!rdc321x_is_gpio(gpio)) - return -EINVAL; - - rdc321x_configure_gpio(gpio); - - return 0; -} -EXPORT_SYMBOL(rdc_gpio_direction_input); - -/* configure GPIO pin as output and set value */ -int rdc_gpio_direction_output(unsigned gpio, int value) -{ - if (!rdc321x_is_gpio(gpio)) - return -EINVAL; - - gpio_set_value(gpio, value); - rdc321x_configure_gpio(gpio); - - return 0; -} -EXPORT_SYMBOL(rdc_gpio_direction_output); diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c deleted file mode 100644 index 4f4e50c3ad3b..000000000000 --- a/arch/x86/mach-rdc321x/platform.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Generic RDC321x platform devices - * - * Copyright (C) 2007 Florian Fainelli - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#include -#include -#include -#include -#include -#include - -#include - -/* LEDS */ -static struct gpio_led default_leds[] = { - { .name = "rdc:dmz", .gpio = 1, }, -}; - -static struct gpio_led_platform_data rdc321x_led_data = { - .num_leds = ARRAY_SIZE(default_leds), - .leds = default_leds, -}; - -static struct platform_device rdc321x_leds = { - .name = "leds-gpio", - .id = -1, - .dev = { - .platform_data = &rdc321x_led_data, - } -}; - -/* Watchdog */ -static struct platform_device rdc321x_wdt = { - .name = "rdc321x-wdt", - .id = -1, - .num_resources = 0, -}; - -static struct platform_device *rdc321x_devs[] = { - &rdc321x_leds, - &rdc321x_wdt -}; - -static int __init rdc_board_setup(void) -{ - rdc321x_gpio_setup(); - - return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs)); -} - -arch_initcall(rdc_board_setup); diff --git a/drivers/watchdog/rdc321x_wdt.c b/drivers/watchdog/rdc321x_wdt.c index bf92802f2bbe..36e221beedcd 100644 --- a/drivers/watchdog/rdc321x_wdt.c +++ b/drivers/watchdog/rdc321x_wdt.c @@ -37,7 +37,7 @@ #include #include -#include +#include #define RDC_WDT_MASK 0x80000000 /* Mask */ #define RDC_WDT_EN 0x00800000 /* Enable bit */ From 8d69abb08343706f88380edb83927ffc0fc83098 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 18 Jan 2009 11:55:19 +0200 Subject: [PATCH 111/566] [ARM] pxa: fix NAND and MMC clock initialization for pxa3xx After commit 8c3abc7d903df492a7394b0adae4349d9a381aaf ("[ARM] pxa: convert to clkdev and match clocks by struct device where possible") get_clk in pxa3xx_nand fails with -ENOENT. Apparently, clk_get in pxamci will also fail for MCI2 on PXA310. The 'clk_find' and therefore 'clk_get' require driver to supply both 'dev_id' and 'con_id' if they are not NULL in the 'strcut clk_lookup', but neither pxa3xx_nand nor pxamci supply 'con_id'. This patch sets 'con_id' to NULL in NAND clock and MCI2 clock registration. Signed-off-by: Mike Rapoport Signed-off-by: Eric Miao --- arch/arm/mach-pxa/pxa300.c | 4 ++-- arch/arm/mach-pxa/pxa320.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-pxa/pxa300.c b/arch/arm/mach-pxa/pxa300.c index f735e58e6669..83fb609b6eb7 100644 --- a/arch/arm/mach-pxa/pxa300.c +++ b/arch/arm/mach-pxa/pxa300.c @@ -88,13 +88,13 @@ static struct pxa3xx_mfp_addr_map pxa310_mfp_addr_map[] __initdata = { static DEFINE_PXA3_CKEN(common_nand, NAND, 156000000, 0); static struct clk_lookup common_clkregs[] = { - INIT_CLKREG(&clk_common_nand, "pxa3xx-nand", "NANDCLK"), + INIT_CLKREG(&clk_common_nand, "pxa3xx-nand", NULL), }; static DEFINE_PXA3_CKEN(pxa310_mmc3, MMC3, 19500000, 0); static struct clk_lookup pxa310_clkregs[] = { - INIT_CLKREG(&clk_pxa310_mmc3, "pxa2xx-mci.2", "MMCCLK"), + INIT_CLKREG(&clk_pxa310_mmc3, "pxa2xx-mci.2", NULL), }; static int __init pxa300_init(void) diff --git a/arch/arm/mach-pxa/pxa320.c b/arch/arm/mach-pxa/pxa320.c index effe408c186f..36f066196fa2 100644 --- a/arch/arm/mach-pxa/pxa320.c +++ b/arch/arm/mach-pxa/pxa320.c @@ -83,7 +83,7 @@ static struct pxa3xx_mfp_addr_map pxa320_mfp_addr_map[] __initdata = { static DEFINE_PXA3_CKEN(pxa320_nand, NAND, 104000000, 0); static struct clk_lookup pxa320_clkregs[] = { - INIT_CLKREG(&clk_pxa320_nand, "pxa3xx-nand", "NANDCLK"), + INIT_CLKREG(&clk_pxa320_nand, "pxa3xx-nand", NULL), }; static int __init pxa320_init(void) From ec971c91c55b8e2e2609f8d61eb34da13aedb37d Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Mon, 19 Jan 2009 11:39:36 +0800 Subject: [PATCH 112/566] [ARM] pxa: fix missing of __REG() definition for ac97 registers access This currently happens for 'drivers/input/touch/mainstone-wm97xx.c'. Signed-off-by: Eric Miao --- arch/arm/mach-pxa/include/mach/regs-ac97.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-pxa/include/mach/regs-ac97.h b/arch/arm/mach-pxa/include/mach/regs-ac97.h index e41b9d202b8c..b8d14bd9ae59 100644 --- a/arch/arm/mach-pxa/include/mach/regs-ac97.h +++ b/arch/arm/mach-pxa/include/mach/regs-ac97.h @@ -1,6 +1,8 @@ #ifndef __ASM_ARCH_REGS_AC97_H #define __ASM_ARCH_REGS_AC97_H +#include + /* * AC97 Controller registers */ From b6729deb26a131083add5b7238c7b7478ef6b502 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Mon, 19 Jan 2009 11:42:32 +0800 Subject: [PATCH 113/566] [ARM] pxa: make more SSCR0 bit definitions visible on multiple processors The only exclusive definitions are SSCR0_SCR and SSCR0_SerClkDiv(), loosen that exclusive #ifdef .. #else .. #endif to allow other definitions to be visible when slected multiple processors. This helps to pass the building of pxa-ssp.c. Signed-off-by: Eric Miao --- arch/arm/mach-pxa/include/mach/regs-ssp.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-pxa/include/mach/regs-ssp.h b/arch/arm/mach-pxa/include/mach/regs-ssp.h index 3c04cde2cf1f..cf31986f6f05 100644 --- a/arch/arm/mach-pxa/include/mach/regs-ssp.h +++ b/arch/arm/mach-pxa/include/mach/regs-ssp.h @@ -41,6 +41,9 @@ #elif defined(CONFIG_PXA27x) || defined(CONFIG_PXA3xx) #define SSCR0_SCR (0x000fff00) /* Serial Clock Rate (mask) */ #define SSCR0_SerClkDiv(x) (((x) - 1) << 8) /* Divisor [1..4096] */ +#endif + +#if defined(CONFIG_PXA27x) || defined(CONFIG_PXA3xx) #define SSCR0_EDSS (1 << 20) /* Extended data size select */ #define SSCR0_NCS (1 << 21) /* Network clock select */ #define SSCR0_RIM (1 << 22) /* Receive FIFO overrrun interrupt mask */ From 422e79a8b39d9ac73e410dc3cd099aecea82afd2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 19 Jan 2009 17:06:42 +1100 Subject: [PATCH 114/566] x86: Remove never-called arch_setup_msi_irq() Since commit 75c46fa, "x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure", x86 has had an implementation of arch_setup_msi_irqs(). That implementation does not call arch_setup_msi_irq(), instead it calls setup_irq(). No other x86 code calls arch_setup_msi_irq(). That leaves only arch_setup_msi_irqs() in drivers/pci/msi.c, but that routine is overridden by the x86 version of arch_setup_msi_irqs(). So arch_setup_msi_irq() is dead code, remove it. Signed-off-by: Michael Ellerman Signed-off-by: H. Peter Anvin --- arch/x86/kernel/io_apic.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 79b8c0c72d34..157aafa45583 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3462,40 +3462,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) return 0; } -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) -{ - unsigned int irq; - int ret; - unsigned int irq_want; - - irq_want = nr_irqs_gsi; - irq = create_irq_nr(irq_want); - if (irq == 0) - return -1; - -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) - goto no_ir; - - ret = msi_alloc_irte(dev, irq, 1); - if (ret < 0) - goto error; -no_ir: -#endif - ret = setup_msi_irq(dev, msidesc, irq); - if (ret < 0) { - destroy_irq(irq); - return ret; - } - return 0; - -#ifdef CONFIG_INTR_REMAP -error: - destroy_irq(irq); - return ret; -#endif -} - int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { unsigned int irq; From 5cdc5e9e69d4dc3a3630ae1fa666401b2a8dcde6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Jan 2009 20:49:37 +0100 Subject: [PATCH 115/566] x86: fully honor "nolapic", fix Impact: build fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 485787955834..9ca12af6c876 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1131,7 +1131,9 @@ void __cpuinit setup_local_APIC(void) int i, j; if (disable_apic) { +#ifdef CONFIG_X86_IO_APIC disable_ioapic_setup(); +#endif return; } From c6e50f93db5bd0895ec7c7d1b6f3886c6e1f11b6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 20 Jan 2009 12:29:19 +0900 Subject: [PATCH 116/566] x86: cleanup stack protector Impact: cleanup Make the following cleanups. * remove duplicate comment from boot_init_stack_canary() which fits better in the other place - cpu_idle(). * move stack_canary offset check from __switch_to() to boot_init_stack_canary(). Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 2 -- arch/x86/include/asm/stackprotector.h | 13 ++++++------- arch/x86/kernel/process_64.c | 7 ------- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 5976cd803e9a..4a8c9d382c93 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -40,6 +40,4 @@ extern void pda_init(int); #endif -#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary) - #endif /* _ASM_X86_PDA_H */ diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index c7f0d10bae7b..2383e5bb475c 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -16,13 +16,12 @@ static __always_inline void boot_init_stack_canary(void) u64 tsc; /* - * If we're the non-boot CPU, nothing set the PDA stack - * canary up for us - and if we are the boot CPU we have - * a 0 stack canary. This is a good place for updating - * it, as we wont ever return from this function (so the - * invalid canaries already on the stack wont ever - * trigger). - * + * Build time only check to make sure the stack_canary is at + * offset 40 in the pda; this is a gcc ABI requirement + */ + BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); + + /* * We both use the random pool and the current TSC as a source * of randomness. The TSC only matters for very early init, * there it already has some randomness on most systems. Later diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index aa89eabf09e0..088bc9a0f82c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -638,13 +638,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) percpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE - KERNEL_STACK_OFFSET); -#ifdef CONFIG_CC_STACKPROTECTOR - /* - * Build time only check to make sure the stack_canary is at - * offset 40 in the pda; this is a gcc ABI requirement - */ - BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); -#endif /* * Now maybe reload the debug registers and handle I/O bitmaps From b4a8f7a262e79ecb0b39beb1449af524a78887f8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 20 Jan 2009 12:29:19 +0900 Subject: [PATCH 117/566] x86: conditionalize stack canary handling in hot path Impact: no unnecessary stack canary swapping during context switch There's no point in moving stack_canary around during context switch if it's not enabled. Conditionalize it. Signed-off-by: Tejun Heo --- arch/x86/include/asm/system.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 8cadfe9b1194..b77bd8bd3cc2 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -86,17 +86,28 @@ do { \ , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ "r12", "r13", "r14", "r15" +#ifdef CONFIG_CC_STACKPROTECTOR +#define __switch_canary \ + "movq %P[task_canary](%%rsi),%%r8\n\t" \ + "movq %%r8,%%gs:%P[pda_canary]\n\t" +#define __switch_canary_param \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \ + , [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary)) +#else /* CC_STACKPROTECTOR */ +#define __switch_canary +#define __switch_canary_param +#endif /* CC_STACKPROTECTOR */ + /* Save restore flags to clear handle leaking NT */ #define switch_to(prev, next, last) \ - asm volatile(SAVE_CONTEXT \ + asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ "movq "__percpu_arg([current_task])",%%rsi\n\t" \ - "movq %P[task_canary](%%rsi),%%r8\n\t" \ - "movq %%r8,%%gs:%P[pda_canary]\n\t" \ + __switch_canary \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ @@ -108,9 +119,8 @@ do { \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [task_canary] "i" (offsetof(struct task_struct, stack_canary)),\ - [current_task] "m" (per_cpu_var(current_task)), \ - [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\ + [current_task] "m" (per_cpu_var(current_task)) \ + __switch_canary_param \ : "memory", "cc" __EXTRA_CLOBBER) #endif From 8ce031972b40da58c268caba8c5ea3c0856d7131 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:27 +0900 Subject: [PATCH 118/566] x86: remove pda_init() Impact: cleanup Copy the code to cpu_init() to satisfy the requirement that the cpu be reinitialized. Remove all other calls, since the segments are already initialized in head_64.S. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 1 - arch/x86/kernel/cpu/common.c | 15 +++------------ arch/x86/kernel/head64.c | 2 -- arch/x86/xen/enlighten.c | 1 - 4 files changed, 3 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 4a8c9d382c93..b473e952439a 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -24,7 +24,6 @@ struct x8664_pda { } ____cacheline_aligned_in_smp; DECLARE_PER_CPU(struct x8664_pda, __pda); -extern void pda_init(int); #define cpu_pda(cpu) (&per_cpu(__pda, cpu)) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7976a6a0f65c..f83a4d6160f0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -895,15 +895,6 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack); DEFINE_PER_CPU(unsigned int, irq_count) = -1; -void __cpuinit pda_init(int cpu) -{ - /* Setup up data that may be needed in __get_free_pages early */ - loadsegment(fs, 0); - loadsegment(gs, 0); - - load_pda_offset(cpu); -} - static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) __aligned(PAGE_SIZE); @@ -967,9 +958,9 @@ void __cpuinit cpu_init(void) struct task_struct *me; int i; - /* CPU 0 is initialised in head64.c */ - if (cpu != 0) - pda_init(cpu); + loadsegment(fs, 0); + loadsegment(gs, 0); + load_pda_offset(cpu); #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index af67d3227ea6..f5b272247690 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -91,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data) if (console_loglevel == 10) early_printk("Kernel alive\n"); - pda_init(0); - x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 75b94139e1f2..bef941f61451 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1645,7 +1645,6 @@ asmlinkage void __init xen_start_kernel(void) #ifdef CONFIG_X86_64 /* Disable until direct per-cpu data access. */ have_vcpu_info_placement = 0; - pda_init(0); #endif xen_smp_init(); From 0bd74fa8e29dcad98f7e8ffe01ec05fb3326abaf Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:27 +0900 Subject: [PATCH 119/566] percpu: refactor percpu.h Impact: cleanup Refactor the DEFINE_PER_CPU_* macros and add .data.percpu.first section. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- include/asm-generic/vmlinux.lds.h | 1 + include/linux/percpu.h | 43 +++++++++++++++++-------------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index aa6b9b1b30b5..32bbf50d3055 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -486,6 +486,7 @@ */ #define PERCPU_VADDR(vaddr, phdr) \ PERCPU_PROLOG(vaddr) \ + *(.data.percpu.first) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 9f2a3751873a..0e24202b5a4e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -9,34 +9,39 @@ #include #ifdef CONFIG_SMP -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name +#define PER_CPU_BASE_SECTION ".data.percpu" #ifdef MODULE -#define SHARED_ALIGNED_SECTION ".data.percpu" +#define PER_CPU_SHARED_ALIGNED_SECTION "" #else -#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned" +#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" #endif +#define PER_CPU_FIRST_SECTION ".first" + +#else + +#define PER_CPU_BASE_SECTION ".data" +#define PER_CPU_SHARED_ALIGNED_SECTION "" +#define PER_CPU_FIRST_SECTION "" + +#endif + +#define DEFINE_PER_CPU_SECTION(type, name, section) \ + __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#define DEFINE_PER_CPU(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "") #define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - __attribute__((__section__(SHARED_ALIGNED_SECTION))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ ____cacheline_aligned_in_smp -#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.page_aligned"))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name -#else -#define DEFINE_PER_CPU(type, name) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) - -#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) -#endif +#define DEFINE_PER_CPU_FIRST(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) From 8c7e58e690ae60ab4215b025f433ed4af261e103 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:28 +0900 Subject: [PATCH 120/566] x86: rework __per_cpu_load adjustments Impact: cleanup Use cpu_number to determine if the adjustment is necessary. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/head_64.S | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index c8ace880661b..98ea26a2fca1 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -207,19 +207,15 @@ ENTRY(secondary_startup_64) #ifdef CONFIG_SMP /* - * early_gdt_base should point to the gdt_page in static percpu init - * data area. Computing this requires two symbols - __per_cpu_load - * and per_cpu__gdt_page. As linker can't do no such relocation, do - * it by hand. As early_gdt_descr is manipulated by C code for - * secondary CPUs, this should be done only once for the boot CPU - * when early_gdt_descr_base contains zero. + * Fix up static pointers that need __per_cpu_load added. The assembler + * is unable to do this directly. This is only needed for the boot cpu. + * These values are set up with the correct base addresses by C code for + * secondary cpus. */ - movq early_gdt_descr_base(%rip), %rax - testq %rax, %rax - jnz 1f - movq $__per_cpu_load, %rax - addq $per_cpu__gdt_page, %rax - movq %rax, early_gdt_descr_base(%rip) + movq initial_gs(%rip), %rax + cmpl $0, per_cpu__cpu_number(%rax) + jne 1f + addq %rax, early_gdt_descr_base(%rip) 1: #endif /* @@ -431,12 +427,8 @@ NEXT_PAGE(level2_spare_pgt) .globl early_gdt_descr early_gdt_descr: .word GDT_ENTRIES*8-1 -#ifdef CONFIG_SMP early_gdt_descr_base: - .quad 0x0000000000000000 -#else .quad per_cpu__gdt_page -#endif ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ From 947e76cdc34c782fc947313d4331380686eebbad Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:28 +0900 Subject: [PATCH 121/566] x86: move stack_canary into irq_stack Impact: x86_64 percpu area layout change, irq_stack now at the beginning Now that the PDA is empty except for the stack canary, it can be removed. The irqstack is moved to the start of the per-cpu section. If the stack protector is enabled, the canary overlaps the bottom 48 bytes of the irqstack. tj: * updated subject * dropped asm relocation of irq_stack_ptr * updated comments a bit * rebased on top of stack canary changes Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 3 --- arch/x86/include/asm/percpu.h | 6 ----- arch/x86/include/asm/processor.h | 23 +++++++++++++++++- arch/x86/include/asm/stackprotector.h | 6 ++--- arch/x86/include/asm/system.h | 4 ++-- arch/x86/kernel/asm-offsets_64.c | 4 ---- arch/x86/kernel/cpu/common.c | 7 +++--- arch/x86/kernel/head_64.S | 13 ++++------ arch/x86/kernel/setup_percpu.c | 34 ++++----------------------- arch/x86/kernel/vmlinux_64.lds.S | 8 +++++-- 10 files changed, 46 insertions(+), 62 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index b473e952439a..ba46416634f0 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -17,9 +17,6 @@ struct x8664_pda { unsigned long unused4; int unused5; unsigned int unused6; /* 36 was cpunumber */ - unsigned long stack_canary; /* 40 stack canary value */ - /* gcc-ABI: this canary MUST be at - offset 40!!! */ short in_bootmem; /* pda lives in bootmem */ } ____cacheline_aligned_in_smp; diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 165d5272ece1..ce980db5e59d 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -133,12 +133,6 @@ do { \ /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); -#ifdef CONFIG_X86_64 -extern void load_pda_offset(int cpu); -#else -static inline void load_pda_offset(int cpu) { } -#endif - #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f511246fa6cd..48676b943b92 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -379,8 +379,29 @@ union thread_xstate { #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); -DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack); +union irq_stack_union { + char irq_stack[IRQ_STACK_SIZE]; + /* + * GCC hardcodes the stack canary as %gs:40. Since the + * irq_stack is the object at %gs:0, we reserve the bottom + * 48 bytes of the irq stack for the canary. + */ + struct { + char gs_base[40]; + unsigned long stack_canary; + }; +}; + +DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); + +static inline void load_gs_base(int cpu) +{ + /* Memory clobbers used to order pda/percpu accesses */ + mb(); + wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); + mb(); +} #endif extern void print_cpu_info(struct cpuinfo_x86 *); diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 2383e5bb475c..36a700acaf2b 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -2,7 +2,7 @@ #define _ASM_STACKPROTECTOR_H 1 #include -#include +#include /* * Initialize the stackprotector canary value. @@ -19,7 +19,7 @@ static __always_inline void boot_init_stack_canary(void) * Build time only check to make sure the stack_canary is at * offset 40 in the pda; this is a gcc ABI requirement */ - BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); + BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); /* * We both use the random pool and the current TSC as a source @@ -32,7 +32,7 @@ static __always_inline void boot_init_stack_canary(void) canary += tsc + (tsc << 32UL); current->stack_canary = canary; - write_pda(stack_canary, canary); + percpu_write(irq_stack_union.stack_canary, canary); } #endif diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index b77bd8bd3cc2..52eb748a68af 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -89,10 +89,10 @@ do { \ #ifdef CONFIG_CC_STACKPROTECTOR #define __switch_canary \ "movq %P[task_canary](%%rsi),%%r8\n\t" \ - "movq %%r8,%%gs:%P[pda_canary]\n\t" + "movq %%r8,%%gs:%P[gs_canary]\n\t" #define __switch_canary_param \ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \ - , [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary)) + , [gs_canary] "i" (offsetof(union irq_stack_union, stack_canary)) #else /* CC_STACKPROTECTOR */ #define __switch_canary #define __switch_canary_param diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 64c834a39aa8..94f9c8b39d20 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -48,10 +48,6 @@ int main(void) #endif BLANK(); #undef ENTRY -#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - DEFINE(pda_size, sizeof(struct x8664_pda)); - BLANK(); -#undef ENTRY #ifdef CONFIG_PARAVIRT BLANK(); OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f83a4d6160f0..098934e72a16 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -881,12 +881,13 @@ __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; -DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack); +DEFINE_PER_CPU_FIRST(union irq_stack_union, + irq_stack_union) __aligned(PAGE_SIZE); #ifdef CONFIG_SMP DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ #else DEFINE_PER_CPU(char *, irq_stack_ptr) = - per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; + per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; #endif DEFINE_PER_CPU(unsigned long, kernel_stack) = @@ -960,7 +961,7 @@ void __cpuinit cpu_init(void) loadsegment(fs, 0); loadsegment(gs, 0); - load_pda_offset(cpu); + load_gs_base(cpu); #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 98ea26a2fca1..a0a2b5ca9b7d 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -242,13 +242,10 @@ ENTRY(secondary_startup_64) /* Set up %gs. * - * On SMP, %gs should point to the per-cpu area. For initial - * boot, make %gs point to the init data section. For a - * secondary CPU,initial_gs should be set to its pda address - * before the CPU runs this code. - * - * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't - * change. + * The base of %gs always points to the bottom of the irqstack + * union. If the stack protector canary is enabled, it is + * located at %gs:40. Note that, on SMP, the boot cpu uses + * init data section till per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx movq initial_gs(%rip),%rax @@ -281,7 +278,7 @@ ENTRY(secondary_startup_64) #ifdef CONFIG_SMP .quad __per_cpu_load #else - .quad PER_CPU_VAR(__pda) + .quad PER_CPU_VAR(irq_stack_union) #endif __FINITDATA diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index efbafbbff584..90b8e154bb53 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void); static inline void setup_node_to_cpumask_map(void) { } #endif -/* - * Define load_pda_offset() and per-cpu __pda for x86_64. - * load_pda_offset() is responsible for loading the offset of pda into - * %gs. - * - * On SMP, pda offset also duals as percpu base address and thus it - * should be at the start of per-cpu area. To achieve this, it's - * preallocated in vmlinux_64.lds.S directly instead of using - * DEFINE_PER_CPU(). - */ -#ifdef CONFIG_X86_64 -void __cpuinit load_pda_offset(int cpu) -{ - /* Memory clobbers used to order pda/percpu accesses */ - mb(); - wrmsrl(MSR_GS_BASE, cpu_pda(cpu)); - mb(); -} -#ifndef CONFIG_SMP -DEFINE_PER_CPU(struct x8664_pda, __pda); -#endif -EXPORT_PER_CPU_SYMBOL(__pda); -#endif /* CONFIG_SMP && CONFIG_X86_64 */ - #ifdef CONFIG_X86_64 /* correctly size the local cpu masks */ @@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void) per_cpu(cpu_number, cpu) = cpu; #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = - (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; + per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; /* - * CPU0 modified pda in the init data area, reload pda - * offset for CPU0 and clear the area for others. + * Up to this point, CPU0 has been using .data.init + * area. Reload %gs offset for CPU0. */ if (cpu == 0) - load_pda_offset(0); - else - memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); + load_gs_base(cpu); #endif DBG("PERCPU: cpu %4d %p\n", cpu, ptr); diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index a09abb8fb97f..c9740996430a 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -220,8 +220,7 @@ SECTIONS * so that it can be accessed as a percpu variable. */ . = ALIGN(PAGE_SIZE); - PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) - per_cpu____pda = __per_cpu_start; + PERCPU_VADDR(0, :percpu) #else PERCPU(PAGE_SIZE) #endif @@ -262,3 +261,8 @@ SECTIONS */ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), "kernel image bigger than KERNEL_IMAGE_SIZE") + +#ifdef CONFIG_SMP +ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); +#endif From 0d974d4592708f85044751817da4b7016e1b0602 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 18 Jan 2009 19:52:25 -0500 Subject: [PATCH 122/566] x86: remove pda.h Impact: cleanup Signed-off-by: Brian Gerst --- arch/x86/include/asm/pda.h | 39 ------------------------------- arch/x86/include/asm/pgtable_64.h | 1 - arch/x86/include/asm/smp.h | 1 - arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 1 - arch/x86/kernel/process_64.c | 1 - arch/x86/kernel/traps.c | 1 - 7 files changed, 45 deletions(-) delete mode 100644 arch/x86/include/asm/pda.h diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h deleted file mode 100644 index ba46416634f0..000000000000 --- a/arch/x86/include/asm/pda.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef _ASM_X86_PDA_H -#define _ASM_X86_PDA_H - -#ifndef __ASSEMBLY__ -#include -#include -#include -#include -#include -#include - -/* Per processor datastructure. %gs points to it while the kernel runs */ -struct x8664_pda { - unsigned long unused1; - unsigned long unused2; - unsigned long unused3; - unsigned long unused4; - int unused5; - unsigned int unused6; /* 36 was cpunumber */ - short in_bootmem; /* pda lives in bootmem */ -} ____cacheline_aligned_in_smp; - -DECLARE_PER_CPU(struct x8664_pda, __pda); - -#define cpu_pda(cpu) (&per_cpu(__pda, cpu)) - -#define read_pda(field) percpu_read(__pda.field) -#define write_pda(field, val) percpu_write(__pda.field, val) -#define add_pda(field, val) percpu_add(__pda.field, val) -#define sub_pda(field, val) percpu_sub(__pda.field, val) -#define or_pda(field, val) percpu_or(__pda.field, val) - -/* This is not atomic against other CPUs -- CPU preemption needs to be off */ -#define test_and_clear_bit_pda(bit, field) \ - x86_test_and_clear_bit_percpu(bit, __pda.field) - -#endif - -#endif /* _ASM_X86_PDA_H */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index ba09289accaa..1df9637dfda3 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -11,7 +11,6 @@ #include #include #include -#include extern pud_t level3_kernel_pgt[512]; extern pud_t level3_ident_pgt[512]; diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 68636e767a91..45ef8a1b9d7c 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -15,7 +15,6 @@ # include # endif #endif -#include #include #include diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 94f9c8b39d20..8793ab33e2c1 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 098934e72a16..3887fcf6e519 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -30,7 +30,6 @@ #include #endif -#include #include #include #include diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 088bc9a0f82c..c422eebb0c58 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 98c2d055284b..ed5aee5f3fcc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -59,7 +59,6 @@ #ifdef CONFIG_X86_64 #include #include -#include #else #include #include From 6b7c38d55587f43bcd2cbce3a98b1c0826982090 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 19 Jan 2009 12:21:28 +0900 Subject: [PATCH 123/566] linker script: kill PERCPU_VADDR_PREALLOC() Impact: cleanup With .data.percpu.first in place, PERCPU_VADDR_PREALLOC() is no longer necessary. Kill it. Signed-off-by: Tejun Heo --- include/asm-generic/vmlinux.lds.h | 45 ++++++------------------------- 1 file changed, 8 insertions(+), 37 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 32bbf50d3055..53e21f36a802 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -430,22 +430,10 @@ *(.initcall7.init) \ *(.initcall7s.init) -#define PERCPU_PROLOG(vaddr) \ - VMLINUX_SYMBOL(__per_cpu_load) = .; \ - .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ - - LOAD_OFFSET) { \ - VMLINUX_SYMBOL(__per_cpu_start) = .; - -#define PERCPU_EPILOG(phdr) \ - VMLINUX_SYMBOL(__per_cpu_end) = .; \ - } phdr \ - . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); - /** - * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc + * PERCPU_VADDR - define output section for percpu area * @vaddr: explicit base address (optional) * @phdr: destination PHDR (optional) - * @prealloc: the size of prealloc area * * Macro which expands to output section for percpu area. If @vaddr * is not blank, it specifies explicit base address and all percpu @@ -457,40 +445,23 @@ * section in the linker script will go there too. @phdr should have * a leading colon. * - * If @prealloc is non-zero, the specified number of bytes will be - * reserved at the start of percpu area. As the prealloc area is - * likely to break alignment, this macro puts areas in increasing - * alignment order. - * * This macro defines three symbols, __per_cpu_load, __per_cpu_start * and __per_cpu_end. The first one is the vaddr of loaded percpu * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the * end offset. */ -#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc) \ - PERCPU_PROLOG(vaddr) \ - . += prealloc; \ - *(.data.percpu) \ - *(.data.percpu.shared_aligned) \ - *(.data.percpu.page_aligned) \ - PERCPU_EPILOG(segment) - -/** - * PERCPU_VADDR - define output section for percpu area - * @vaddr: explicit base address (optional) - * @phdr: destination PHDR (optional) - * - * Macro which expands to output section for percpu area. Mostly - * identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than - * using slighly different layout. - */ #define PERCPU_VADDR(vaddr, phdr) \ - PERCPU_PROLOG(vaddr) \ + VMLINUX_SYMBOL(__per_cpu_load) = .; \ + .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ + - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__per_cpu_start) = .; \ *(.data.percpu.first) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ - PERCPU_EPILOG(phdr) + VMLINUX_SYMBOL(__per_cpu_end) = .; \ + } phdr \ + . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); /** * PERCPU - define output section for percpu area, simple version From 5766b842b23c6b40935a5f3bd435b2bcdaff2143 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 20 Jan 2009 09:13:15 +0100 Subject: [PATCH 124/566] x86, cpumask: fix tlb flush race Impact: fix bootup crash The cpumask is now passed in as a reference to mm->cpu_vm_mask, not on the stack - hence it is not constant anymore during the TLB flush. That way it could race and some static sanity checks would trigger: [ 238.154287] ------------[ cut here ]------------ [ 238.156039] kernel BUG at arch/x86/kernel/tlb_32.c:130! [ 238.156039] invalid opcode: 0000 [#1] SMP [ 238.156039] last sysfs file: /sys/class/net/eth2/address [ 238.156039] Modules linked in: [ 238.156039] [ 238.156039] Pid: 6493, comm: ifup-eth Not tainted (2.6.29-rc2-tip #1) P4DC6 [ 238.156039] EIP: 0060:[] EFLAGS: 00010202 CPU: 2 [ 238.156039] EIP is at native_flush_tlb_others+0x35/0x158 [ 238.156039] EAX: c0ef972c EBX: f6143301 ECX: 00000000 EDX: 00000000 [ 238.156039] ESI: f61433a8 EDI: f6143200 EBP: f34f3e00 ESP: f34f3df0 [ 238.156039] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 [ 238.156039] Process ifup-eth (pid: 6493, ti=f34f2000 task=f399ab00 task.ti=f34f2000) [ 238.156039] Stack: [ 238.156039] ffffffff f61433a8 ffffffff f6143200 f34f3e18 c0118e9c 00000000 f6143200 [ 238.156039] f61433a8 f5bec738 f34f3e28 c0119435 c2b5b830 f6143200 f34f3e34 c01c2dc3 [ 238.156039] bffd9000 f34f3e60 c01c3051 00000000 ffffffff f34f3e4c 00000000 00000071 [ 238.156039] Call Trace: [ 238.156039] [] ? flush_tlb_others+0x52/0x5b [ 238.156039] [] ? flush_tlb_mm+0x7f/0x8b [ 238.156039] [] ? tlb_finish_mmu+0x2d/0x55 [ 238.156039] [] ? exit_mmap+0x124/0x170 [ 238.156039] [] ? mmput+0x40/0xf5 [ 238.156039] [] ? flush_old_exec+0x640/0x94b [ 238.156039] [] ? fsnotify_access+0x37/0x39 [ 238.156039] [] ? kernel_read+0x39/0x4b [ 238.156039] [] ? load_elf_binary+0x4a1/0x11bb [ 238.156039] [] ? might_fault+0x51/0x9c [ 238.156039] [] ? paravirt_read_tsc+0x20/0x4f [ 238.156039] [] ? native_sched_clock+0x5d/0x60 [ 238.156039] [] ? search_binary_handler+0xab/0x2c4 [ 238.156039] [] ? load_elf_binary+0x0/0x11bb [ 238.156039] [] ? _raw_read_unlock+0x21/0x46 [ 238.156039] [] ? load_elf_binary+0x0/0x11bb [ 238.156039] [] ? search_binary_handler+0xb2/0x2c4 [ 238.156039] [] ? do_execve+0x21c/0x2ee [ 238.156039] [] ? sys_execve+0x51/0x8c [ 238.156039] [] ? sysenter_do_call+0x12/0x43 Fix it by not assuming that the cpumask is constant. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_32.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index ec53818f4e38..d37bbfcb813d 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -125,9 +125,8 @@ void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { /* - * - mask must exist :) + * mm must exist :) */ - BUG_ON(cpumask_empty(cpumask)); BUG_ON(!mm); /* @@ -138,14 +137,18 @@ void native_flush_tlb_others(const struct cpumask *cpumask, spin_lock(&tlbstate_lock); cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id())); -#ifdef CONFIG_HOTPLUG_CPU - /* If a CPU which we ran on has gone down, OK. */ cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask); + + /* + * If a task whose mm mask we are looking at has descheduled and + * has cleared its presence from the mask, or if a CPU which we ran + * on has gone down then there might be no flush work left: + */ if (unlikely(cpumask_empty(flush_cpumask))) { spin_unlock(&tlbstate_lock); return; } -#endif + flush_mm = mm; flush_va = va; From 92181f190b649f7ef2b79cbf5c00f26ccc66da2a Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 20 Jan 2009 04:24:26 +0100 Subject: [PATCH 125/566] x86: optimise x86's do_page_fault (C entry point for the page fault path) Impact: cleanup, restructure code to improve assembly gcc isn't _all_ that smart about spilling registers to stack or reusing stack slots, even with branch annotations. do_page_fault contained a lot of functionality, so split unlikely paths into their own functions, and mark them as noinline just to be sure. I consider this actually to be somewhat of a cleanup too: the main function now contains about half the number of lines so the normal path is easier to read, while the error cases are also nicely split away. Also, ensure the order of arguments to functions is always the same: regs, addr, error_code. This can reduce code size a tiny bit, and just looks neater too. And add a couple of branch annotations. Before: do_page_fault: subq $360, %rsp #, After: do_page_fault: subq $56, %rsp #, bloat-o-meter: add/remove: 8/0 grow/shrink: 0/1 up/down: 2222/-1680 (542) function old new delta __bad_area_nosemaphore - 506 +506 no_context - 474 +474 vmalloc_fault - 424 +424 spurious_fault - 358 +358 mm_fault_error - 272 +272 bad_area_access_error - 89 +89 bad_area - 89 +89 bad_area_nosemaphore - 10 +10 do_page_fault 2464 784 -1680 Yes, the total size increases by 542 bytes, due to the extra function calls. But these will very rarely be called (except for vmalloc_fault) in a normal workload. Importantly, do_page_fault is less than 1/3rd it's original size, and touches far less stack. Existing gotos and branch hints did move a lot of the infrequently used text out of the fastpath, but that's even further improved after this patch. Signed-off-by: Nick Piggin Acked-by: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 438 ++++++++++++++++++++++++++------------------ 1 file changed, 256 insertions(+), 182 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 90dfae511a41..033292dc9e21 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -91,8 +91,8 @@ static inline int notify_page_fault(struct pt_regs *regs) * * Opcode checker based on code by Richard Brunner */ -static int is_prefetch(struct pt_regs *regs, unsigned long addr, - unsigned long error_code) +static int is_prefetch(struct pt_regs *regs, unsigned long error_code, + unsigned long addr) { unsigned char *instr; int scan_more = 1; @@ -409,15 +409,15 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, } #ifdef CONFIG_X86_64 -static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, - unsigned long error_code) +static noinline void pgtable_bad(struct pt_regs *regs, + unsigned long error_code, unsigned long address) { unsigned long flags = oops_begin(); int sig = SIGKILL; - struct task_struct *tsk; + struct task_struct *tsk = current; printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", - current->comm, address); + tsk->comm, address); dump_pagetable(address); tsk = current; tsk->thread.cr2 = address; @@ -429,6 +429,190 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, } #endif +static noinline void no_context(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + struct task_struct *tsk = current; +#ifdef CONFIG_X86_64 + unsigned long flags; + int sig; +#endif + + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) + return; + + /* + * X86_32 + * Valid to do another page fault here, because if this fault + * had been triggered by is_prefetch fixup_exception would have + * handled it. + * + * X86_64 + * Hall of shame of CPU/BIOS bugs. + */ + if (is_prefetch(regs, error_code, address)) + return; + + if (is_errata93(regs, address)) + return; + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ +#ifdef CONFIG_X86_32 + bust_spinlocks(1); +#else + flags = oops_begin(); +#endif + + show_fault_oops(regs, error_code, address); + + tsk->thread.cr2 = address; + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; + +#ifdef CONFIG_X86_32 + die("Oops", regs, error_code); + bust_spinlocks(0); + do_exit(SIGKILL); +#else + sig = SIGKILL; + if (__die("Oops", regs, error_code)) + sig = 0; + /* Executive summary in case the body of the oops scrolled away */ + printk(KERN_EMERG "CR2: %016lx\n", address); + oops_end(flags, regs, sig); +#endif +} + +static void __bad_area_nosemaphore(struct pt_regs *regs, + unsigned long error_code, unsigned long address, + int si_code) +{ + struct task_struct *tsk = current; + + /* User mode accesses just cause a SIGSEGV */ + if (error_code & PF_USER) { + /* + * It's possible to have interrupts off here. + */ + local_irq_enable(); + + /* + * Valid to do another page fault here because this one came + * from user space. + */ + if (is_prefetch(regs, error_code, address)) + return; + + if (is_errata100(regs, address)) + return; + + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) { + printk( + "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), address, + (void *) regs->ip, (void *) regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); + } + + tsk->thread.cr2 = address; + /* Kernel addresses are always protection faults */ + tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.trap_no = 14; + force_sig_info_fault(SIGSEGV, si_code, address, tsk); + return; + } + + if (is_f00f_bug(regs, address)) + return; + + no_context(regs, error_code, address); +} + +static noinline void bad_area_nosemaphore(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR); +} + +static void __bad_area(struct pt_regs *regs, + unsigned long error_code, unsigned long address, + int si_code) +{ + struct mm_struct *mm = current->mm; + + /* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ + up_read(&mm->mmap_sem); + + __bad_area_nosemaphore(regs, error_code, address, si_code); +} + +static noinline void bad_area(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + __bad_area(regs, error_code, address, SEGV_MAPERR); +} + +static noinline void bad_area_access_error(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + __bad_area(regs, error_code, address, SEGV_ACCERR); +} + +/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */ +static void out_of_memory(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ + up_read(¤t->mm->mmap_sem); + pagefault_out_of_memory(); +} + +static void do_sigbus(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + + up_read(&mm->mmap_sem); + + /* Kernel mode? Handle exceptions or die */ + if (!(error_code & PF_USER)) + no_context(regs, error_code, address); +#ifdef CONFIG_X86_32 + /* User space => ok to do another page fault */ + if (is_prefetch(regs, error_code, address)) + return; +#endif + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); +} + +static noinline void mm_fault_error(struct pt_regs *regs, + unsigned long error_code, unsigned long address, unsigned int fault) +{ + if (fault & VM_FAULT_OOM) + out_of_memory(regs, error_code, address); + else if (fault & VM_FAULT_SIGBUS) + do_sigbus(regs, error_code, address); + else + BUG(); +} + static int spurious_fault_check(unsigned long error_code, pte_t *pte) { if ((error_code & PF_WRITE) && !pte_write(*pte)) @@ -448,8 +632,8 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static int spurious_fault(unsigned long address, - unsigned long error_code) +static noinline int spurious_fault(unsigned long error_code, + unsigned long address) { pgd_t *pgd; pud_t *pud; @@ -494,7 +678,7 @@ static int spurious_fault(unsigned long address, * * This assumes no large pages in there. */ -static int vmalloc_fault(unsigned long address) +static noinline int vmalloc_fault(unsigned long address) { #ifdef CONFIG_X86_32 unsigned long pgd_paddr; @@ -573,6 +757,25 @@ static int vmalloc_fault(unsigned long address) int show_unhandled_signals = 1; +static inline int access_error(unsigned long error_code, int write, + struct vm_area_struct *vma) +{ + if (write) { + /* write, present and write, not present */ + if (unlikely(!(vma->vm_flags & VM_WRITE))) + return 1; + } else if (unlikely(error_code & PF_PROT)) { + /* read, present */ + return 1; + } else { + /* read, not present */ + if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) + return 1; + } + + return 0; +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -583,16 +786,12 @@ asmlinkage #endif void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) { + unsigned long address; struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; - unsigned long address; - int write, si_code; + int write; int fault; -#ifdef CONFIG_X86_64 - unsigned long flags; - int sig; -#endif tsk = current; mm = tsk->mm; @@ -601,9 +800,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) /* get the address */ address = read_cr2(); - si_code = SEGV_MAPERR; - - if (notify_page_fault(regs)) + if (unlikely(notify_page_fault(regs))) return; if (unlikely(kmmio_fault(regs, address))) return; @@ -631,17 +828,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) return; /* Can handle a stale RO->RW TLB */ - if (spurious_fault(address, error_code)) + if (spurious_fault(error_code, address)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. */ - goto bad_area_nosemaphore; + bad_area_nosemaphore(regs, error_code, address); + return; } - /* * It's safe to allow irq's after cr2 has been saved and the * vmalloc fault has been handled. @@ -657,15 +854,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) #ifdef CONFIG_X86_64 if (unlikely(error_code & PF_RSVD)) - pgtable_bad(address, regs, error_code); + pgtable_bad(regs, error_code, address); #endif /* * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault. */ - if (unlikely(in_atomic() || !mm)) - goto bad_area_nosemaphore; + if (unlikely(in_atomic() || !mm)) { + bad_area_nosemaphore(regs, error_code, address); + return; + } /* * When running in the kernel we expect faults to occur only to @@ -683,20 +882,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. */ - if (!down_read_trylock(&mm->mmap_sem)) { + if (unlikely(!down_read_trylock(&mm->mmap_sem))) { if ((error_code & PF_USER) == 0 && - !search_exception_tables(regs->ip)) - goto bad_area_nosemaphore; + !search_exception_tables(regs->ip)) { + bad_area_nosemaphore(regs, error_code, address); + return; + } down_read(&mm->mmap_sem); } vma = find_vma(mm, address); - if (!vma) - goto bad_area; - if (vma->vm_start <= address) + if (unlikely(!vma)) { + bad_area(regs, error_code, address); + return; + } + if (likely(vma->vm_start <= address)) goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { + bad_area(regs, error_code, address); + return; + } if (error_code & PF_USER) { /* * Accessing the stack below %sp is always a bug. @@ -704,31 +909,25 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) * and pusha to work. ("enter $65535,$31" pushes * 32 pointers and then decrements %sp by 65535.) */ - if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) - goto bad_area; + if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { + bad_area(regs, error_code, address); + return; + } } - if (expand_stack(vma, address)) - goto bad_area; -/* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ + if (unlikely(expand_stack(vma, address))) { + bad_area(regs, error_code, address); + return; + } + + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ good_area: - si_code = SEGV_ACCERR; - write = 0; - switch (error_code & (PF_PROT|PF_WRITE)) { - default: /* 3: write, present */ - /* fall through */ - case PF_WRITE: /* write, not present */ - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - write++; - break; - case PF_PROT: /* read, present */ - goto bad_area; - case 0: /* read, not present */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto bad_area; + write = error_code & PF_WRITE; + if (unlikely(access_error(error_code, write, vma))) { + bad_area_access_error(regs, error_code, address); + return; } /* @@ -738,11 +937,8 @@ good_area: */ fault = handle_mm_fault(mm, vma, address, write); if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); + mm_fault_error(regs, error_code, address, fault); + return; } if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; @@ -760,128 +956,6 @@ good_area: } #endif up_read(&mm->mmap_sem); - return; - -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: - up_read(&mm->mmap_sem); - -bad_area_nosemaphore: - /* User mode accesses just cause a SIGSEGV */ - if (error_code & PF_USER) { - /* - * It's possible to have interrupts off here. - */ - local_irq_enable(); - - /* - * Valid to do another page fault here because this one came - * from user space. - */ - if (is_prefetch(regs, address, error_code)) - return; - - if (is_errata100(regs, address)) - return; - - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) { - printk( - "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", - task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, task_pid_nr(tsk), address, - (void *) regs->ip, (void *) regs->sp, error_code); - print_vma_addr(" in ", regs->ip); - printk("\n"); - } - - tsk->thread.cr2 = address; - /* Kernel addresses are always protection faults */ - tsk->thread.error_code = error_code | (address >= TASK_SIZE); - tsk->thread.trap_no = 14; - force_sig_info_fault(SIGSEGV, si_code, address, tsk); - return; - } - - if (is_f00f_bug(regs, address)) - return; - -no_context: - /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) - return; - - /* - * X86_32 - * Valid to do another page fault here, because if this fault - * had been triggered by is_prefetch fixup_exception would have - * handled it. - * - * X86_64 - * Hall of shame of CPU/BIOS bugs. - */ - if (is_prefetch(regs, address, error_code)) - return; - - if (is_errata93(regs, address)) - return; - -/* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ -#ifdef CONFIG_X86_32 - bust_spinlocks(1); -#else - flags = oops_begin(); -#endif - - show_fault_oops(regs, error_code, address); - - tsk->thread.cr2 = address; - tsk->thread.trap_no = 14; - tsk->thread.error_code = error_code; - -#ifdef CONFIG_X86_32 - die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); -#else - sig = SIGKILL; - if (__die("Oops", regs, error_code)) - sig = 0; - /* Executive summary in case the body of the oops scrolled away */ - printk(KERN_EMERG "CR2: %016lx\n", address); - oops_end(flags, regs, sig); -#endif - -out_of_memory: - /* - * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed). - */ - up_read(&mm->mmap_sem); - pagefault_out_of_memory(); - return; - -do_sigbus: - up_read(&mm->mmap_sem); - - /* Kernel mode? Handle exceptions or die */ - if (!(error_code & PF_USER)) - goto no_context; -#ifdef CONFIG_X86_32 - /* User space => ok to do another page fault */ - if (is_prefetch(regs, address, error_code)) - return; -#endif - tsk->thread.cr2 = address; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 14; - force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); } DEFINE_SPINLOCK(pgd_lock); From afb33f8c0d7dea8c48ae1c2e3af5b437aa8dd7bb Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 12 Jan 2009 12:53:45 +0100 Subject: [PATCH 126/566] x86: remove byte locks Impact: cleanup Remove byte locks implementation, which was introduced by Jeremy in 8efcbab6 ("paravirt: introduce a "lock-byte" spinlock implementation"), but turned out to be dead code that is not used by any in-kernel virtualization guest (Xen uses its own variant of spinlocks implementation and KVM is not planning to move to byte locks). Signed-off-by: Jiri Kosina Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 2 - arch/x86/include/asm/spinlock.h | 66 +--------------------------- arch/x86/kernel/paravirt-spinlocks.c | 10 ----- 3 files changed, 2 insertions(+), 76 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aedc..32bc6c2c1386 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1389,8 +1389,6 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, void _paravirt_nop(void); #define paravirt_nop ((void *)_paravirt_nop) -void paravirt_use_bytelocks(void); - #ifdef CONFIG_SMP static inline int __raw_spin_is_locked(struct raw_spinlock *lock) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index d17c91981da2..2bd6b111a414 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -172,70 +172,8 @@ static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1; } -#ifdef CONFIG_PARAVIRT -/* - * Define virtualization-friendly old-style lock byte lock, for use in - * pv_lock_ops if desired. - * - * This differs from the pre-2.6.24 spinlock by always using xchgb - * rather than decb to take the lock; this allows it to use a - * zero-initialized lock structure. It also maintains a 1-byte - * contention counter, so that we can implement - * __byte_spin_is_contended. - */ -struct __byte_spinlock { - s8 lock; - s8 spinners; -}; +#ifndef CONFIG_PARAVIRT -static inline int __byte_spin_is_locked(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - return bl->lock != 0; -} - -static inline int __byte_spin_is_contended(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - return bl->spinners != 0; -} - -static inline void __byte_spin_lock(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - s8 val = 1; - - asm("1: xchgb %1, %0\n" - " test %1,%1\n" - " jz 3f\n" - " " LOCK_PREFIX "incb %2\n" - "2: rep;nop\n" - " cmpb $1, %0\n" - " je 2b\n" - " " LOCK_PREFIX "decb %2\n" - " jmp 1b\n" - "3:" - : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory"); -} - -static inline int __byte_spin_trylock(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - u8 old = 1; - - asm("xchgb %1,%0" - : "+m" (bl->lock), "+q" (old) : : "memory"); - - return old == 0; -} - -static inline void __byte_spin_unlock(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - smp_wmb(); - bl->lock = 0; -} -#else /* !CONFIG_PARAVIRT */ static inline int __raw_spin_is_locked(raw_spinlock_t *lock) { return __ticket_spin_is_locked(lock); @@ -267,7 +205,7 @@ static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, __raw_spin_lock(lock); } -#endif /* CONFIG_PARAVIRT */ +#endif static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) { diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 95777b0faa73..3a7c5a44082e 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -26,13 +26,3 @@ struct pv_lock_ops pv_lock_ops = { }; EXPORT_SYMBOL(pv_lock_ops); -void __init paravirt_use_bytelocks(void) -{ -#ifdef CONFIG_SMP - pv_lock_ops.spin_is_locked = __byte_spin_is_locked; - pv_lock_ops.spin_is_contended = __byte_spin_is_contended; - pv_lock_ops.spin_lock = __byte_spin_lock; - pv_lock_ops.spin_trylock = __byte_spin_trylock; - pv_lock_ops.spin_unlock = __byte_spin_unlock; -#endif -} From 26a552264bc92d2ec4747b1babb7cb1264908018 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Wed, 21 Jan 2009 11:29:19 +0800 Subject: [PATCH 127/566] [ARM] pxa: stop and disable IRQ for each DMA channels at startup Some broken bootloaders will leave the DMA channel state unclean, which we should really initialize correctly here. Signed-off-by: Eric Miao --- arch/arm/mach-pxa/dma.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-pxa/dma.c b/arch/arm/mach-pxa/dma.c index b1514fb20d3a..7de17fc5d54b 100644 --- a/arch/arm/mach-pxa/dma.c +++ b/arch/arm/mach-pxa/dma.c @@ -121,6 +121,16 @@ int __init pxa_init_dma(int num_ch) if (dma_channels == NULL) return -ENOMEM; + /* dma channel priorities on pxa2xx processors: + * ch 0 - 3, 16 - 19 <--> (0) DMA_PRIO_HIGH + * ch 4 - 7, 20 - 23 <--> (1) DMA_PRIO_MEDIUM + * ch 8 - 15, 24 - 31 <--> (2) DMA_PRIO_LOW + */ + for (i = 0; i < num_ch; i++) { + DCSR(i) = 0; + dma_channels[i].prio = min((i & 0xf) >> 2, DMA_PRIO_LOW); + } + ret = request_irq(IRQ_DMA, dma_irq_handler, IRQF_DISABLED, "DMA", NULL); if (ret) { printk (KERN_CRIT "Wow! Can't register IRQ for DMA\n"); @@ -128,14 +138,6 @@ int __init pxa_init_dma(int num_ch) return ret; } - /* dma channel priorities on pxa2xx processors: - * ch 0 - 3, 16 - 19 <--> (0) DMA_PRIO_HIGH - * ch 4 - 7, 20 - 23 <--> (1) DMA_PRIO_MEDIUM - * ch 8 - 15, 24 - 31 <--> (2) DMA_PRIO_LOW - */ - for (i = 0; i < num_ch; i++) - dma_channels[i].prio = min((i & 0xf) >> 2, DMA_PRIO_LOW); - num_dma_channels = num_ch; return 0; } From 67e68bde02fe783efc2ce2ca31bdb992f5235f8d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Jan 2009 17:26:05 +0900 Subject: [PATCH 128/566] x86: update canary handling during switch Impact: cleanup In switch_to(), instead of taking offset to irq_stack_union.stack, make it a proper percpu access using __percpu_arg() and per_cpu_var(). Signed-off-by: Tejun Heo --- arch/x86/include/asm/system.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 52eb748a68af..2fcc70bc85f3 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -89,13 +89,15 @@ do { \ #ifdef CONFIG_CC_STACKPROTECTOR #define __switch_canary \ "movq %P[task_canary](%%rsi),%%r8\n\t" \ - "movq %%r8,%%gs:%P[gs_canary]\n\t" -#define __switch_canary_param \ - , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \ - , [gs_canary] "i" (offsetof(union irq_stack_union, stack_canary)) + "movq %%r8,"__percpu_arg([gs_canary])"\n\t" +#define __switch_canary_oparam \ + , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary)) +#define __switch_canary_iparam \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) #else /* CC_STACKPROTECTOR */ #define __switch_canary -#define __switch_canary_param +#define __switch_canary_oparam +#define __switch_canary_iparam #endif /* CC_STACKPROTECTOR */ /* Save restore flags to clear handle leaking NT */ @@ -114,13 +116,14 @@ do { \ "jc ret_from_fork\n\t" \ RESTORE_CONTEXT \ : "=a" (last) \ + __switch_canary_oparam \ : [next] "S" (next), [prev] "D" (prev), \ [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [current_task] "m" (per_cpu_var(current_task)) \ - __switch_canary_param \ + __switch_canary_iparam \ : "memory", "cc" __EXTRA_CLOBBER) #endif From 06deef892c7327992434917fb6592c233430803d Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 21 Jan 2009 17:26:05 +0900 Subject: [PATCH 129/566] x86: clean up gdt_page definition Impact: cleanup && more compact percpu area layout with future changes Move 64-bit GDT to page-aligned section and clean up comment formatting. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3887fcf6e519..a8f0dedcb0cc 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -63,23 +63,23 @@ cpumask_t cpu_sibling_setup_map; static struct cpu_dev *this_cpu __cpuinitdata; +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 -/* We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - */ -/* The TLS descriptors are currently at a different place compared to i386. - Hopefully nobody expects them at a fixed place (Wine?) */ -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + /* + * We need valid kernel segments for data and code in long mode too + * IRET will check the segment types kkeil 2000/10/28 + * Also sysret mandates a special GDT layout + * + * The TLS descriptors are currently at a different place compared to i386. + * Hopefully nobody expects them at a fixed place (Wine?) + */ [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, -} }; #else -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, @@ -112,8 +112,8 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, -} }; #endif +} }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); #ifdef CONFIG_X86_32 From 299e26992a737804e13e74fdb97cdab470ed19ac Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 21 Jan 2009 17:26:05 +0900 Subject: [PATCH 130/566] x86: fix percpu_write with 64-bit constants Impact: slightly better code generation for percpu_to_op() The processor will sign-extend 32-bit immediate values in 64-bit operations. Use the 'e' constraint ("32-bit signed integer constant, or a symbolic reference known to fit that range") for 64-bit constants. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ce980db5e59d..0b64af4f13ac 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -75,7 +75,7 @@ do { \ case 8: \ asm(op "q %1,"__percpu_arg(0) \ : "+m" (var) \ - : "r" ((T__)val)); \ + : "re" ((T__)val)); \ break; \ default: __bad_percpu_size(); \ } \ From 0dd76d736eeb3e0ef86c5b103b47ae0e15edebad Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 21 Jan 2009 17:26:05 +0900 Subject: [PATCH 131/566] x86: set %fs to __KERNEL_PERCPU unconditionally for x86_32 Impact: cleanup %fs is currently set to __KERNEL_DS at boot, and conditionally switched to __KERNEL_PERCPU for secondary cpus. Instead, initialize GDT_ENTRY_PERCPU to the same attributes as GDT_ENTRY_KERNEL_DS and set %fs to __KERNEL_PERCPU unconditionally. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/head_32.S | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a8f0dedcb0cc..fbebbcec001b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -111,7 +111,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, - [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, + [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, #endif } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index e835b4eea70b..24c0e5cd71e3 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -429,12 +429,14 @@ is386: movl $2,%ecx # set MP ljmp $(__KERNEL_CS),$1f 1: movl $(__KERNEL_DS),%eax # reload all the segment registers movl %eax,%ss # after changing gdt. - movl %eax,%fs # gets reset once there's real percpu movl $(__USER_DS),%eax # DS/ES contains default USER segment movl %eax,%ds movl %eax,%es + movl $(__KERNEL_PERCPU), %eax + movl %eax,%fs # set this cpu's percpu + xorl %eax,%eax # Clear GS and LDT movl %eax,%gs lldt %ax @@ -446,8 +448,6 @@ is386: movl $2,%ecx # set MP movb $1, ready cmpb $0,%cl # the first CPU calls start_kernel je 1f - movl $(__KERNEL_PERCPU), %eax - movl %eax,%fs # set this cpu's percpu movl (stack_start), %esp 1: #endif /* CONFIG_SMP */ From 6826c8ff07b5f95df0473a748a9831707079b940 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: [PATCH 132/566] x86: merge mmu_context.h Impact: cleanup tj: * changed cpu to unsigned as was done on mmu_context_64.h as cpu id is officially unsigned int * added missing ';' to 32bit version of deactivate_mm() Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/mmu_context.h | 63 +++++++++++++++++++++++++-- arch/x86/include/asm/mmu_context_32.h | 55 ----------------------- arch/x86/include/asm/mmu_context_64.h | 52 ---------------------- 3 files changed, 59 insertions(+), 111 deletions(-) delete mode 100644 arch/x86/include/asm/mmu_context_32.h delete mode 100644 arch/x86/include/asm/mmu_context_64.h diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 8aeeb3fd73db..52948df9cd1d 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, int init_new_context(struct task_struct *tsk, struct mm_struct *mm); void destroy_context(struct mm_struct *mm); -#ifdef CONFIG_X86_32 -# include "mmu_context_32.h" -#else -# include "mmu_context_64.h" + +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ +#ifdef CONFIG_SMP + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); #endif +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned cpu = smp_processor_id(); + + if (likely(prev != next)) { + /* stop flush ipis for the previous mm */ + cpu_clear(cpu, prev->cpu_vm_mask); +#ifdef CONFIG_SMP + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); +#endif + cpu_set(cpu, next->cpu_vm_mask); + + /* Re-load page tables */ + load_cr3(next->pgd); + + /* + * load the LDT, if the LDT is different: + */ + if (unlikely(prev->context.ldt != next->context.ldt)) + load_LDT_nolock(&next->context); + } +#ifdef CONFIG_SMP + else { + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); + + if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { + /* We were in lazy tlb mode and leave_mm disabled + * tlb flush IPI delivery. We must reload CR3 + * to make sure to use no freed page tables. + */ + load_cr3(next->pgd); + load_LDT_nolock(&next->context); + } + } +#endif +} #define activate_mm(prev, next) \ do { \ @@ -33,5 +76,17 @@ do { \ switch_mm((prev), (next), NULL); \ } while (0); +#ifdef CONFIG_X86_32 +#define deactivate_mm(tsk, mm) \ +do { \ + loadsegment(gs, 0); \ +} while (0) +#else +#define deactivate_mm(tsk, mm) \ +do { \ + load_gs_index(0); \ + loadsegment(fs, 0); \ +} while (0) +#endif #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h deleted file mode 100644 index 08b53454f831..000000000000 --- a/arch/x86/include/asm/mmu_context_32.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef _ASM_X86_MMU_CONTEXT_32_H -#define _ASM_X86_MMU_CONTEXT_32_H - -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) -{ -#ifdef CONFIG_SMP - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) - percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); -#endif -} - -static inline void switch_mm(struct mm_struct *prev, - struct mm_struct *next, - struct task_struct *tsk) -{ - int cpu = smp_processor_id(); - - if (likely(prev != next)) { - /* stop flush ipis for the previous mm */ - cpu_clear(cpu, prev->cpu_vm_mask); -#ifdef CONFIG_SMP - percpu_write(cpu_tlbstate.state, TLBSTATE_OK); - percpu_write(cpu_tlbstate.active_mm, next); -#endif - cpu_set(cpu, next->cpu_vm_mask); - - /* Re-load page tables */ - load_cr3(next->pgd); - - /* - * load the LDT, if the LDT is different: - */ - if (unlikely(prev->context.ldt != next->context.ldt)) - load_LDT_nolock(&next->context); - } -#ifdef CONFIG_SMP - else { - percpu_write(cpu_tlbstate.state, TLBSTATE_OK); - BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); - - if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { - /* We were in lazy tlb mode and leave_mm disabled - * tlb flush IPI delivery. We must reload %cr3. - */ - load_cr3(next->pgd); - load_LDT_nolock(&next->context); - } - } -#endif -} - -#define deactivate_mm(tsk, mm) \ - asm("movl %0,%%gs": :"r" (0)); - -#endif /* _ASM_X86_MMU_CONTEXT_32_H */ diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h deleted file mode 100644 index c4572505ab3e..000000000000 --- a/arch/x86/include/asm/mmu_context_64.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef _ASM_X86_MMU_CONTEXT_64_H -#define _ASM_X86_MMU_CONTEXT_64_H - -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) -{ -#ifdef CONFIG_SMP - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) - percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); -#endif -} - -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) -{ - unsigned cpu = smp_processor_id(); - if (likely(prev != next)) { - /* stop flush ipis for the previous mm */ - cpu_clear(cpu, prev->cpu_vm_mask); -#ifdef CONFIG_SMP - percpu_write(cpu_tlbstate.state, TLBSTATE_OK); - percpu_write(cpu_tlbstate.active_mm, next); -#endif - cpu_set(cpu, next->cpu_vm_mask); - load_cr3(next->pgd); - - if (unlikely(next->context.ldt != prev->context.ldt)) - load_LDT_nolock(&next->context); - } -#ifdef CONFIG_SMP - else { - percpu_write(cpu_tlbstate.state, TLBSTATE_OK); - BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); - - if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { - /* We were in lazy tlb mode and leave_mm disabled - * tlb flush IPI delivery. We must reload CR3 - * to make sure to use no freed page tables. - */ - load_cr3(next->pgd); - load_LDT_nolock(&next->context); - } - } -#endif -} - -#define deactivate_mm(tsk, mm) \ -do { \ - load_gs_index(0); \ - asm volatile("movl %0,%%fs"::"r"(0)); \ -} while (0) - -#endif /* _ASM_X86_MMU_CONTEXT_64_H */ From d650a5148593b65a3c3f9a344f46b91b7dfe7713 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: [PATCH 133/566] x86: merge irq_regs.h Impact: cleanup, better irq_regs code generation for x86_64 Make 64-bit use the same optimizations as 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/irq_regs.h | 36 +++++++++++++++++++++++++----- arch/x86/include/asm/irq_regs_32.h | 31 ------------------------- arch/x86/include/asm/irq_regs_64.h | 1 - arch/x86/kernel/irq_64.c | 3 +++ 4 files changed, 34 insertions(+), 37 deletions(-) delete mode 100644 arch/x86/include/asm/irq_regs_32.h delete mode 100644 arch/x86/include/asm/irq_regs_64.h diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h index 89c898ab298b..77843225b7ea 100644 --- a/arch/x86/include/asm/irq_regs.h +++ b/arch/x86/include/asm/irq_regs.h @@ -1,5 +1,31 @@ -#ifdef CONFIG_X86_32 -# include "irq_regs_32.h" -#else -# include "irq_regs_64.h" -#endif +/* + * Per-cpu current frame pointer - the location of the last exception frame on + * the stack, stored in the per-cpu area. + * + * Jeremy Fitzhardinge + */ +#ifndef _ASM_X86_IRQ_REGS_H +#define _ASM_X86_IRQ_REGS_H + +#include + +#define ARCH_HAS_OWN_IRQ_REGS + +DECLARE_PER_CPU(struct pt_regs *, irq_regs); + +static inline struct pt_regs *get_irq_regs(void) +{ + return percpu_read(irq_regs); +} + +static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) +{ + struct pt_regs *old_regs; + + old_regs = get_irq_regs(); + percpu_write(irq_regs, new_regs); + + return old_regs; +} + +#endif /* _ASM_X86_IRQ_REGS_32_H */ diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h deleted file mode 100644 index d7ed33ee94e9..000000000000 --- a/arch/x86/include/asm/irq_regs_32.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Per-cpu current frame pointer - the location of the last exception frame on - * the stack, stored in the per-cpu area. - * - * Jeremy Fitzhardinge - */ -#ifndef _ASM_X86_IRQ_REGS_32_H -#define _ASM_X86_IRQ_REGS_32_H - -#include - -#define ARCH_HAS_OWN_IRQ_REGS - -DECLARE_PER_CPU(struct pt_regs *, irq_regs); - -static inline struct pt_regs *get_irq_regs(void) -{ - return percpu_read(irq_regs); -} - -static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) -{ - struct pt_regs *old_regs; - - old_regs = get_irq_regs(); - percpu_write(irq_regs, new_regs); - - return old_regs; -} - -#endif /* _ASM_X86_IRQ_REGS_32_H */ diff --git a/arch/x86/include/asm/irq_regs_64.h b/arch/x86/include/asm/irq_regs_64.h deleted file mode 100644 index 3dd9c0b70270..000000000000 --- a/arch/x86/include/asm/irq_regs_64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 1db05247b47f..0b254de84083 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -22,6 +22,9 @@ DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); +DEFINE_PER_CPU(struct pt_regs *, irq_regs); +EXPORT_PER_CPU_SYMBOL(irq_regs); + /* * Probabilistic stack overflow check: * From bdbcdd48883940bbd8d17eb01172d58a261a413a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: [PATCH 134/566] x86: uv cleanup Impact: cleanup Make the following uv related cleanups. * collect visible uv related definitions and interfaces into uv/uv.h and use it. this cleans up the messy situation where on 64bit, uv is defined properly, on 32bit generic it's dummy and on the rest undefined. after this clean up, uv is defined on 64 and dummy on 32. * update uv_flush_tlb_others() such that it takes cpumask of to-be-flushed cpus as argument, instead of that minus self, and returns yet-to-be-flushed cpumask, instead of modifying the passed in parameter. this interface change will ease dummy implementation of uv_flush_tlb_others() and makes uv tlb flush related stuff defined in tlb_uv proper. Signed-off-by: Tejun Heo --- arch/x86/include/asm/genapic_32.h | 7 ---- arch/x86/include/asm/genapic_64.h | 6 --- arch/x86/include/asm/uv/uv.h | 33 +++++++++++++++ arch/x86/include/asm/uv/uv_bau.h | 2 - arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/genx2apic_uv_x.c | 1 + arch/x86/kernel/smpboot.c | 1 + arch/x86/kernel/tlb_64.c | 18 ++++---- arch/x86/kernel/tlb_uv.c | 68 ++++++++++++++++++------------- 9 files changed, 83 insertions(+), 54 deletions(-) create mode 100644 arch/x86/include/asm/uv/uv.h diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 2c05b737ee22..4334502d3664 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h @@ -138,11 +138,4 @@ struct genapic { extern struct genapic *genapic; extern void es7000_update_genapic_to_cluster(void); -enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; -#define get_uv_system_type() UV_NONE -#define is_uv_system() 0 -#define uv_wakeup_secondary(a, b) 1 -#define uv_system_init() do {} while (0) - - #endif /* _ASM_X86_GENAPIC_32_H */ diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index adf32fb56aa6..7bb092c59055 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h @@ -51,15 +51,9 @@ extern struct genapic apic_x2apic_phys; extern int acpi_madt_oem_check(char *, char *); extern void apic_send_IPI_self(int vector); -enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; -extern enum uv_system_type get_uv_system_type(void); -extern int is_uv_system(void); extern struct genapic apic_x2apic_uv_x; DECLARE_PER_CPU(int, x2apic_extra_bits); -extern void uv_cpu_init(void); -extern void uv_system_init(void); -extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); extern void setup_apic_routing(void); diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h new file mode 100644 index 000000000000..dce5fe350134 --- /dev/null +++ b/arch/x86/include/asm/uv/uv.h @@ -0,0 +1,33 @@ +#ifndef _ASM_X86_UV_UV_H +#define _ASM_X86_UV_UV_H + +enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; + +#ifdef CONFIG_X86_64 + +extern enum uv_system_type get_uv_system_type(void); +extern int is_uv_system(void); +extern void uv_cpu_init(void); +extern void uv_system_init(void); +extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); +extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, + unsigned long va, + unsigned int cpu); + +#else /* X86_64 */ + +static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } +static inline int is_uv_system(void) { return 0; } +static inline void uv_cpu_init(void) { } +static inline void uv_system_init(void) { } +static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) +{ return 1; } +static inline const struct cpumask * +uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, + unsigned long va, unsigned int cpu) +{ return cpumask; } + +#endif /* X86_64 */ + +#endif /* _ASM_X86_UV_UV_H */ diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 74e6393bfddb..9b0e61bf7a88 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -325,8 +325,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) #define cpubit_isset(cpu, bau_local_cpumask) \ test_bit((cpu), (bau_local_cpumask).bits) -extern int uv_flush_tlb_others(struct cpumask *, - struct mm_struct *, unsigned long); extern void uv_bau_message_intr1(void); extern void uv_bau_timeout_intr1(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fbebbcec001b..99904f288d6a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -28,6 +28,7 @@ #include #include #include +#include #endif #include diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index b193e082f6ce..bfe36249145c 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 869b98840fd0..def770b57b5a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index e64a32c48825..b8bed841ad67 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -15,8 +15,7 @@ #include #include #include -#include -#include +#include DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0, }; @@ -206,16 +205,13 @@ void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { if (is_uv_system()) { - /* FIXME: could be an percpu_alloc'd thing */ - static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); - struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask); + unsigned int cpu; - cpumask_andnot(after_uv_flush, cpumask, - cpumask_of(smp_processor_id())); - if (!uv_flush_tlb_others(after_uv_flush, mm, va)) - flush_tlb_others_ipi(after_uv_flush, mm, va); - - put_cpu_var(flush_tlb_uv_cpumask); + cpu = get_cpu(); + cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); + if (cpumask) + flush_tlb_others_ipi(cpumask, mm, va); + put_cpu(); return; } flush_tlb_others_ipi(cpumask, mm, va); diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 690dcf1a27d4..aae15dd72604 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -209,14 +210,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc, * * Send a broadcast and wait for a broadcast message to complete. * - * The cpumaskp mask contains the cpus the broadcast was sent to. + * The flush_mask contains the cpus the broadcast was sent to. * - * Returns 1 if all remote flushing was done. The mask is zeroed. - * Returns 0 if some remote flushing remains to be done. The mask will have - * some bits still set. + * Returns NULL if all remote flushing was done. The mask is zeroed. + * Returns @flush_mask if some remote flushing remains to be done. The + * mask will have some bits still set. */ -int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, - struct cpumask *cpumaskp) +const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade, + struct bau_desc *bau_desc, + struct cpumask *flush_mask) { int completion_status = 0; int right_shift; @@ -263,59 +265,69 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, * Success, so clear the remote cpu's from the mask so we don't * use the IPI method of shootdown on them. */ - for_each_cpu(bit, cpumaskp) { + for_each_cpu(bit, flush_mask) { blade = uv_cpu_to_blade_id(bit); if (blade == this_blade) continue; - cpumask_clear_cpu(bit, cpumaskp); + cpumask_clear_cpu(bit, flush_mask); } - if (!cpumask_empty(cpumaskp)) - return 0; - return 1; + if (!cpumask_empty(flush_mask)) + return flush_mask; + return NULL; } /** * uv_flush_tlb_others - globally purge translation cache of a virtual * address or all TLB's - * @cpumaskp: mask of all cpu's in which the address is to be removed + * @cpumask: mask of all cpu's in which the address is to be removed * @mm: mm_struct containing virtual address range * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) + * @cpu: the current cpu * * This is the entry point for initiating any UV global TLB shootdown. * * Purges the translation caches of all specified processors of the given * virtual address, or purges all TLB's on specified processors. * - * The caller has derived the cpumaskp from the mm_struct and has subtracted - * the local cpu from the mask. This function is called only if there - * are bits set in the mask. (e.g. flush_tlb_page()) + * The caller has derived the cpumask from the mm_struct. This function + * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) * - * The cpumaskp is converted into a nodemask of the nodes containing + * The cpumask is converted into a nodemask of the nodes containing * the cpus. * - * Returns 1 if all remote flushing was done. - * Returns 0 if some remote flushing remains to be done. + * Note that this function should be called with preemption disabled. + * + * Returns NULL if all remote flushing was done. + * Returns pointer to cpumask if some remote flushing remains to be + * done. The returned pointer is valid till preemption is re-enabled. */ -int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, - unsigned long va) +const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, + unsigned long va, unsigned int cpu) { + static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); + struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask); int i; int bit; int blade; - int cpu; + int uv_cpu; int this_blade; int locals = 0; struct bau_desc *bau_desc; - cpu = uv_blade_processor_id(); + WARN_ON(!in_atomic()); + + cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); + + uv_cpu = uv_blade_processor_id(); this_blade = uv_numa_blade_id(); bau_desc = __get_cpu_var(bau_control).descriptor_base; - bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; + bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); i = 0; - for_each_cpu(bit, cpumaskp) { + for_each_cpu(bit, flush_mask) { blade = uv_cpu_to_blade_id(bit); BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); if (blade == this_blade) { @@ -330,17 +342,17 @@ int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, * no off_node flushing; return status for local node */ if (locals) - return 0; + return flush_mask; else - return 1; + return NULL; } __get_cpu_var(ptcstats).requestor++; __get_cpu_var(ptcstats).ntargeted += i; bau_desc->payload.address = va; - bau_desc->payload.sending_cpu = smp_processor_id(); + bau_desc->payload.sending_cpu = cpu; - return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); + return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask); } /* From 6dd01bedee6c3191643db303a1dc530bad56ec55 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: [PATCH 135/566] x86: prepare for tlb merge Impact: clean up, ipi vector number reordering for x86_32 Make the following changes to prepare for tlb merge. * reorder x86_32 ip vectors * adjust tlb_32.c and tlb_64.c such that their logics coincide exactly - on spurious invalidate ipi, tlb_32 acks the irq - tlb_64 now has proper memory barriers around clearing flush_cpumask (no change in generated code) * unexport flush_tlb_page from tlb_32.c, there's no user * use unsigned int for cpu id * drop unnecessary includes from tlb_64.c Signed-off-by: Tejun Heo --- arch/x86/include/asm/irq_vectors.h | 33 +++++++++++++++--------------- arch/x86/kernel/tlb_32.c | 10 ++++----- arch/x86/kernel/tlb_64.c | 18 +++++++--------- 3 files changed, 28 insertions(+), 33 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index a16a2ab2b429..4ee8f800504b 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -49,31 +49,30 @@ * some of the following vectors are 'rare', they are merged * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). */ #ifdef CONFIG_X86_32 # define SPURIOUS_APIC_VECTOR 0xff # define ERROR_APIC_VECTOR 0xfe -# define INVALIDATE_TLB_VECTOR 0xfd -# define RESCHEDULE_VECTOR 0xfc -# define CALL_FUNCTION_VECTOR 0xfb -# define CALL_FUNCTION_SINGLE_VECTOR 0xfa -# define THERMAL_APIC_VECTOR 0xf0 +# define RESCHEDULE_VECTOR 0xfd +# define CALL_FUNCTION_VECTOR 0xfc +# define CALL_FUNCTION_SINGLE_VECTOR 0xfb +# define THERMAL_APIC_VECTOR 0xfa +/* 0xf1 - 0xf9 : free */ +# define INVALIDATE_TLB_VECTOR 0xf0 #else -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define RESCHEDULE_VECTOR 0xfd -#define CALL_FUNCTION_VECTOR 0xfc -#define CALL_FUNCTION_SINGLE_VECTOR 0xfb -#define THERMAL_APIC_VECTOR 0xfa -#define THRESHOLD_APIC_VECTOR 0xf9 -#define UV_BAU_MESSAGE 0xf8 -#define INVALIDATE_TLB_VECTOR_END 0xf7 -#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ +# define SPURIOUS_APIC_VECTOR 0xff +# define ERROR_APIC_VECTOR 0xfe +# define RESCHEDULE_VECTOR 0xfd +# define CALL_FUNCTION_VECTOR 0xfc +# define CALL_FUNCTION_SINGLE_VECTOR 0xfb +# define THERMAL_APIC_VECTOR 0xfa +# define THRESHOLD_APIC_VECTOR 0xf9 +# define UV_BAU_MESSAGE 0xf8 +# define INVALIDATE_TLB_VECTOR_END 0xf7 +# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ #define NUM_INVALIDATE_TLB_VECTORS 8 diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index abf0808d6fc4..93fcb05c7d43 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -84,13 +84,15 @@ EXPORT_SYMBOL_GPL(leave_mm); * * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. * 2) Leave the mm if we are in the lazy tlb mode. + * + * Interrupts are disabled. */ void smp_invalidate_interrupt(struct pt_regs *regs) { - unsigned long cpu; + unsigned int cpu; - cpu = get_cpu(); + cpu = smp_processor_id(); if (!cpumask_test_cpu(cpu, flush_cpumask)) goto out; @@ -112,12 +114,11 @@ void smp_invalidate_interrupt(struct pt_regs *regs) } else leave_mm(cpu); } +out: ack_APIC_irq(); smp_mb__before_clear_bit(); cpumask_clear_cpu(cpu, flush_cpumask); smp_mb__after_clear_bit(); -out: - put_cpu_no_resched(); inc_irq_stat(irq_tlb_count); } @@ -215,7 +216,6 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) flush_tlb_others(&mm->cpu_vm_mask, mm, va); preempt_enable(); } -EXPORT_SYMBOL(flush_tlb_page); static void do_flush_tlb_all(void *info) { diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index b8bed841ad67..19ac661422f7 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -1,20 +1,14 @@ #include #include -#include #include #include -#include -#include #include +#include -#include -#include #include #include -#include -#include -#include +#include #include DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) @@ -121,8 +115,8 @@ EXPORT_SYMBOL_GPL(leave_mm); asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) { - int cpu; - int sender; + unsigned int cpu; + unsigned int sender; union smp_flush_state *f; cpu = smp_processor_id(); @@ -155,14 +149,16 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) } out: ack_APIC_irq(); + smp_mb__before_clear_bit(); cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); + smp_mb__after_clear_bit(); inc_irq_stat(irq_tlb_count); } static void flush_tlb_others_ipi(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { - int sender; + unsigned int sender; union smp_flush_state *f; /* Caller has disabled preemption */ From 02cf94c370e0dc9bf408fe45eb86fe9ad58eaf7f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: [PATCH 136/566] x86: make x86_32 use tlb_64.c Impact: less contention when issuing invalidate IPI, cleanup Make x86_32 use the same tlb code as 64bit. The 64bit code uses multiple IPI vectors for tlb shootdown to reduce contention. This patch makes x86_32 allocate the same 8 IPIs as x86_64 and share the code paths. Note that the usage of asmlinkage is inconsistent for x86_32 and 64 and calls for further cleanup. This has been noted with a FIXME comment in tlb_64.c. Signed-off-by: Tejun Heo --- arch/x86/include/asm/irq_vectors.h | 7 +- .../x86/include/asm/mach-default/entry_arch.h | 18 +- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/entry_32.S | 6 +- arch/x86/kernel/irqinit_32.c | 11 +- arch/x86/kernel/tlb_32.c | 239 ------------------ arch/x86/kernel/tlb_64.c | 12 +- 7 files changed, 47 insertions(+), 248 deletions(-) delete mode 100644 arch/x86/kernel/tlb_32.c diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4ee8f800504b..9a83a10a5d51 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -58,8 +58,11 @@ # define CALL_FUNCTION_VECTOR 0xfc # define CALL_FUNCTION_SINGLE_VECTOR 0xfb # define THERMAL_APIC_VECTOR 0xfa -/* 0xf1 - 0xf9 : free */ -# define INVALIDATE_TLB_VECTOR 0xf0 +/* 0xf8 - 0xf9 : free */ +# define INVALIDATE_TLB_VECTOR_END 0xf7 +# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ + +# define NUM_INVALIDATE_TLB_VECTORS 8 #else diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h index 6b1add8e31dd..6fa399ad1de2 100644 --- a/arch/x86/include/asm/mach-default/entry_arch.h +++ b/arch/x86/include/asm/mach-default/entry_arch.h @@ -11,10 +11,26 @@ */ #ifdef CONFIG_X86_SMP BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) -BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR) BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) + +BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7, + smp_invalidate_interrupt) #endif /* diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index eb074530c7d3..a62a15c22227 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -58,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o +obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_64.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 46469029e9d3..a0b91aac72a1 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -672,7 +672,7 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC -#define BUILD_INTERRUPT(name, nr) \ +#define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ pushl $~(nr); \ @@ -680,11 +680,13 @@ ENTRY(name) \ SAVE_ALL; \ TRACE_IRQS_OFF \ movl %esp,%eax; \ - call smp_##name; \ + call fn; \ jmp ret_from_intr; \ CFI_ENDPROC; \ ENDPROC(name) +#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) + /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 1507ad4e674d..bf629cadec1a 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -149,8 +149,15 @@ void __init native_init_IRQ(void) */ alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - /* IPI for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + /* IPIs for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); /* IPI for generic function call */ alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c deleted file mode 100644 index 93fcb05c7d43..000000000000 --- a/arch/x86/kernel/tlb_32.c +++ /dev/null @@ -1,239 +0,0 @@ -#include -#include -#include - -#include - -DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) - = { &init_mm, 0, }; - -/* must come after the send_IPI functions above for inlining */ -#include - -/* - * Smarter SMP flushing macros. - * c/o Linus Torvalds. - * - * These mean you can really definitely utterly forget about - * writing to user space from interrupts. (Its not allowed anyway). - * - * Optimizations Manfred Spraul - */ - -static cpumask_var_t flush_cpumask; -static struct mm_struct *flush_mm; -static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - * - * We need to reload %cr3 since the page tables may be going - * away from under us.. - */ -void leave_mm(int cpu) -{ - BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK); - cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); - load_cr3(swapper_pg_dir); -} -EXPORT_SYMBOL_GPL(leave_mm); - -/* - * - * The flush IPI assumes that a thread switch happens in this order: - * [cpu0: the cpu that switches] - * 1) switch_mm() either 1a) or 1b) - * 1a) thread switch to a different mm - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); - * Stop ipi delivery for the old mm. This is not synchronized with - * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superfluous - * tlb flush. - * 1a2) set cpu_tlbstate to TLBSTATE_OK - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 - * was in lazy tlb mode. - * 1a3) update cpu_tlbstate[].active_mm - * Now cpu0 accepts tlb flushes for the new mm. - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); - * Now the other cpus will send tlb flush ipis. - * 1a4) change cr3. - * 1b) thread switch without mm change - * cpu_tlbstate[].active_mm is correct, cpu0 already handles - * flush ipis. - * 1b1) set cpu_tlbstate to TLBSTATE_OK - * 1b2) test_and_set the cpu bit in cpu_vm_mask. - * Atomically set the bit [other cpus will start sending flush ipis], - * and test the bit. - * 1b3) if the bit was 0: leave_mm was called, flush the tlb. - * 2) switch %%esp, ie current - * - * The interrupt must handle 2 special cases: - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. - * - the cpu performs speculative tlb reads, i.e. even if the cpu only - * runs in kernel space, the cpu could load tlb entries for user space - * pages. - * - * The good news is that cpu_tlbstate is local to each cpu, no - * write/read ordering problems. - */ - -/* - * TLB flush IPI: - * - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - * - * Interrupts are disabled. - */ - -void smp_invalidate_interrupt(struct pt_regs *regs) -{ - unsigned int cpu; - - cpu = smp_processor_id(); - - if (!cpumask_test_cpu(cpu, flush_cpumask)) - goto out; - /* - * This was a BUG() but until someone can quote me the - * line from the intel manual that guarantees an IPI to - * multiple CPUs is retried _only_ on the erroring CPUs - * its staying as a return - * - * BUG(); - */ - - if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) { - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { - if (flush_va == TLB_FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(flush_va); - } else - leave_mm(cpu); - } -out: - ack_APIC_irq(); - smp_mb__before_clear_bit(); - cpumask_clear_cpu(cpu, flush_cpumask); - smp_mb__after_clear_bit(); - inc_irq_stat(irq_tlb_count); -} - -void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, unsigned long va) -{ - /* - * - mask must exist :) - */ - BUG_ON(cpumask_empty(cpumask)); - BUG_ON(!mm); - - /* - * i'm not happy about this global shared spinlock in the - * MM hot path, but we'll see how contended it is. - * AK: x86-64 has a faster method that could be ported. - */ - spin_lock(&tlbstate_lock); - - cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id())); -#ifdef CONFIG_HOTPLUG_CPU - /* If a CPU which we ran on has gone down, OK. */ - cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask); - if (unlikely(cpumask_empty(flush_cpumask))) { - spin_unlock(&tlbstate_lock); - return; - } -#endif - flush_mm = mm; - flush_va = va; - - /* - * Make the above memory operations globally visible before - * sending the IPI. - */ - smp_mb(); - /* - * We have to send the IPI only to - * CPUs affected. - */ - send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR); - - while (!cpumask_empty(flush_cpumask)) - /* nothing. lockup detection does not belong here */ - cpu_relax(); - - flush_mm = NULL; - flush_va = 0; - spin_unlock(&tlbstate_lock); -} - -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - - preempt_disable(); - - local_flush_tlb(); - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); - preempt_enable(); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) - local_flush_tlb(); - else - leave_mm(smp_processor_id()); - } - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) -{ - struct mm_struct *mm = vma->vm_mm; - - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) - __flush_tlb_one(va); - else - leave_mm(smp_processor_id()); - } - - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, va); - preempt_enable(); -} - -static void do_flush_tlb_all(void *info) -{ - unsigned long cpu = smp_processor_id(); - - __flush_tlb_all(); - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) - leave_mm(cpu); -} - -void flush_tlb_all(void) -{ - on_each_cpu(do_flush_tlb_all, NULL, 1); -} - -static int init_flush_cpumask(void) -{ - alloc_cpumask_var(&flush_cpumask, GFP_KERNEL); - return 0; -} -early_initcall(init_flush_cpumask); diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 19ac661422f7..b3ca1b940654 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -113,7 +113,17 @@ EXPORT_SYMBOL_GPL(leave_mm); * Interrupts are disabled. */ -asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) +/* + * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop + * but still used for documentation purpose but the usage is slightly + * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt + * entry calls in with the first parameter in %eax. Maybe define + * intrlinkage? + */ +#ifdef CONFIG_X86_64 +asmlinkage +#endif +void smp_invalidate_interrupt(struct pt_regs *regs) { unsigned int cpu; unsigned int sender; From 16c2d3f895a3bc8d8e4c76c2646a6b750c181299 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: [PATCH 137/566] x86: rename tlb_64.c to tlb.c Impact: file rename tlb_64.c is now the tlb code for both 32 and 64. Rename it to tlb.c. Signed-off-by: Tejun Heo --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/{tlb_64.c => tlb.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename arch/x86/kernel/{tlb_64.c => tlb.c} (100%) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a62a15c22227..0626a88fbb46 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -58,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_64.o +obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb.c similarity index 100% rename from arch/x86/kernel/tlb_64.c rename to arch/x86/kernel/tlb.c From 55f4949f5765e7a29863b6d17a774601810732f5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 21 Jan 2009 10:08:53 +0100 Subject: [PATCH 138/566] x86, mm: move tlb.c to arch/x86/mm/ Impact: cleanup Now that it's unified, move the (SMP) TLB flushing code from arch/x86/kernel/ to arch/x86/mm/, where it belongs logically. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/mm/Makefile | 2 ++ arch/x86/{kernel => mm}/tlb.c | 0 3 files changed, 3 insertions(+), 1 deletion(-) rename arch/x86/{kernel => mm}/tlb.c (100%) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0626a88fbb46..0b3272f58bd9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -58,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb.o +obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index d8cc96a2738f..9f05157220f5 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,6 +1,8 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ pat.o pgtable.o gup.o +obj-$(CONFIG_X86_SMP) += tlb.o + obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/x86/kernel/tlb.c b/arch/x86/mm/tlb.c similarity index 100% rename from arch/x86/kernel/tlb.c rename to arch/x86/mm/tlb.c From 4ec71fa2d2c3f1040348f2604f4b8ccc833d1c2e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 21 Jan 2009 10:24:27 +0100 Subject: [PATCH 139/566] x86: uv cleanup, build fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: arch/x86/mm/srat_64.c: In function ‘acpi_numa_processor_affinity_init’: arch/x86/mm/srat_64.c:141: error: implicit declaration of function ‘get_uv_system_type’ arch/x86/mm/srat_64.c:141: error: ‘UV_X2APIC’ undeclared (first use in this function) arch/x86/mm/srat_64.c:141: error: (Each undeclared identifier is reported only once arch/x86/mm/srat_64.c:141: error: for each function it appears in.) A couple of UV definitions were moved to asm/uv/uv.h, but srat_64.c did not include that header. Add it. Signed-off-by: Ingo Molnar --- arch/x86/mm/srat_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 09737c8af074..15df1baee100 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -21,6 +21,7 @@ #include #include #include +#include int acpi_numa __initdata; From ace6c6c840878342f698f0da6588dd5ded755369 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 21 Jan 2009 10:32:44 +0100 Subject: [PATCH 140/566] x86: make x86_32 use tlb_64.c, build fix, clean up X86_L1_CACHE_BYTES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: arch/x86/mm/tlb.c:47: error: ‘CONFIG_X86_INTERNODE_CACHE_BYTES’ undeclared here (not in a function) The CONFIG_X86_INTERNODE_CACHE_BYTES symbol is only defined on 64-bit, because vsmp support is 64-bit only. Define it on 32-bit too - where it will always be equal to X86_L1_CACHE_BYTES. Also move the default of X86_L1_CACHE_BYTES (which is separate from the more commonly used L1_CACHE_SHIFT kconfig symbol) from 128 bytes to 64 bytes. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index cdf4a9623237..8eb50ba9161e 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -292,15 +292,13 @@ config X86_CPU # Define implied options from the CPU selection here config X86_L1_CACHE_BYTES int - default "128" if GENERIC_CPU || MPSC - default "64" if MK8 || MCORE2 - depends on X86_64 + default "128" if MPSC + default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32 config X86_INTERNODE_CACHE_BYTES int default "4096" if X86_VSMP default X86_L1_CACHE_BYTES if !X86_VSMP - depends on X86_64 config X86_CMPXCHG def_bool X86_64 || (X86_32 && !M386) From 5b221278d61e3907a5e4104a844b63bc8bb3d43a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 21 Jan 2009 11:30:07 +0100 Subject: [PATCH 141/566] x86: uv cleanup, build fix #2 Fix more build-failure fallout from the UV cleanup - the UV drivers were not updated to include . Signed-off-by: Ingo Molnar --- drivers/misc/sgi-gru/gru.h | 2 ++ drivers/misc/sgi-xp/xp.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h index f93f03a9e6e9..1b5f579df15f 100644 --- a/drivers/misc/sgi-gru/gru.h +++ b/drivers/misc/sgi-gru/gru.h @@ -19,6 +19,8 @@ #ifndef __GRU_H__ #define __GRU_H__ +#include + /* * GRU architectural definitions */ diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h index 7b4cbd5e03e9..069ad3a1c2ac 100644 --- a/drivers/misc/sgi-xp/xp.h +++ b/drivers/misc/sgi-xp/xp.h @@ -15,6 +15,8 @@ #include +#include + #ifdef CONFIG_IA64 #include #include /* defines is_shub1() and is_shub2() */ From 4d5d783896fc8c37be88ee5837ca9b3c13fcd55b Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 19 Jan 2009 16:34:26 -0800 Subject: [PATCH 142/566] x86: uaccess: fix style problems Impact: cleanup Fix coding style problems in arch/x86/include/asm/uaccess.h. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 4340055b7559..aeb3c1b074c2 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -121,7 +121,7 @@ extern int __get_user_bad(void); #define __get_user_x(size, ret, x, ptr) \ asm volatile("call __get_user_" #size \ - : "=a" (ret),"=d" (x) \ + : "=a" (ret), "=d" (x) \ : "0" (ptr)) \ /* Careful: we have to cast the result to the type of the pointer @@ -181,7 +181,7 @@ extern int __get_user_bad(void); #define __put_user_x(size, x, ptr, __ret_pu) \ asm volatile("call __put_user_" #size : "=a" (__ret_pu) \ - :"0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") + : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") @@ -276,7 +276,7 @@ do { \ __put_user_asm(x, ptr, retval, "w", "w", "ir", errret); \ break; \ case 4: \ - __put_user_asm(x, ptr, retval, "l", "k", "ir", errret);\ + __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ break; \ case 8: \ __put_user_u64((__typeof__(*ptr))(x), ptr, retval); \ From cc86c9e0dc1a41451240b948bb39d46bb2536ae8 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 19 Jan 2009 16:37:41 -0800 Subject: [PATCH 143/566] x86: uaccess: rename __put_user_u64() to __put_user_asm_u64() Impact: cleanup rename __put_user_u64() to __put_user_asm_u64() like __get_user_asm_u64(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index aeb3c1b074c2..69d2757cca9b 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -186,7 +186,7 @@ extern int __get_user_bad(void); #ifdef CONFIG_X86_32 -#define __put_user_u64(x, addr, err) \ +#define __put_user_asm_u64(x, addr, err) \ asm volatile("1: movl %%eax,0(%2)\n" \ "2: movl %%edx,4(%2)\n" \ "3:\n" \ @@ -203,7 +203,7 @@ extern int __get_user_bad(void); asm volatile("call __put_user_8" : "=a" (__ret_pu) \ : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") #else -#define __put_user_u64(x, ptr, retval) \ +#define __put_user_asm_u64(x, ptr, retval) \ __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT) #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) #endif @@ -279,7 +279,7 @@ do { \ __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ break; \ case 8: \ - __put_user_u64((__typeof__(*ptr))(x), ptr, retval); \ + __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval); \ break; \ default: \ __put_user_bad(); \ From 03b486322e994dde49e67aedb391867b7cf28822 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 20 Jan 2009 04:36:04 +0100 Subject: [PATCH 144/566] x86: make UV support configurable Make X86 SGI Ultraviolet support configurable. Saves about 13K of text size on my modest config. text data bss dec hex filename 6770537 1158680 694356 8623573 8395d5 vmlinux 6757492 1157664 694228 8609384 835e68 vmlinux.nouv Signed-off-by: Nick Piggin Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 7 +++++++ arch/x86/include/asm/uv/uv.h | 6 +++--- arch/x86/kernel/Makefile | 5 +++-- arch/x86/kernel/efi.c | 2 ++ arch/x86/kernel/entry_64.S | 2 ++ arch/x86/kernel/genapic_64.c | 2 ++ arch/x86/kernel/io_apic.c | 2 +- drivers/misc/Kconfig | 4 ++-- 8 files changed, 22 insertions(+), 8 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ef27aed6ff74..5a29b792cb84 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -391,6 +391,13 @@ config X86_RDC321X as R-8610-(G). If you don't have one of these chips, you should say N here. +config X86_UV + bool "SGI Ultraviolet" + depends on X86_64 + help + This option is needed in order to support SGI Ultraviolet systems. + If you don't have one of these, you should say N here. + config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index dce5fe350134..8ac1d7e312f3 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -3,7 +3,7 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_UV extern enum uv_system_type get_uv_system_type(void); extern int is_uv_system(void); @@ -15,7 +15,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, unsigned long va, unsigned int cpu); -#else /* X86_64 */ +#else /* X86_UV */ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } static inline int is_uv_system(void) { return 0; } @@ -28,6 +28,6 @@ uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va, unsigned int cpu) { return cpumask; } -#endif /* X86_64 */ +#endif /* X86_UV */ #endif /* _ASM_X86_UV_UV_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0b3272f58bd9..a99437c965cc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -115,10 +115,11 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) - obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o - obj-y += bios_uv.o uv_irq.o uv_sysfs.o + obj-y += genapic_64.o genapic_flat_64.o obj-y += genx2apic_cluster.o obj-y += genx2apic_phys.o + obj-$(CONFIG_X86_UV) += genx2apic_uv_x.o tlb_uv.o + obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 1119d247fe11..b205272ad394 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -366,10 +366,12 @@ void __init efi_init(void) SMBIOS_TABLE_GUID)) { efi.smbios = config_tables[i].table; printk(" SMBIOS=0x%lx ", config_tables[i].table); +#ifdef CONFIG_X86_UV } else if (!efi_guidcmp(config_tables[i].guid, UV_SYSTEM_TABLE_GUID)) { efi.uv_systab = config_tables[i].table; printk(" UVsystab=0x%lx ", config_tables[i].table); +#endif } else if (!efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID)) { efi.hcdp = config_tables[i].table; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c52b60919163..a52703864a16 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -982,8 +982,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt #endif +#ifdef CONFIG_X86_UV apicinterrupt UV_BAU_MESSAGE \ uv_bau_message_intr1 uv_bau_message_interrupt +#endif apicinterrupt LOCAL_TIMER_VECTOR \ apic_timer_interrupt smp_apic_timer_interrupt diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 2bced78b0b8e..e656c2721154 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -32,7 +32,9 @@ extern struct genapic apic_x2apic_cluster; struct genapic __read_mostly *genapic = &apic_flat; static struct genapic *apic_probe[] __initdata = { +#ifdef CONFIG_X86_UV &apic_x2apic_uv_x, +#endif &apic_x2apic_phys, &apic_x2apic_cluster, &apic_physflat, diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index f79660390724..e4d36bd56b62 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3765,7 +3765,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_UV /* * Re-target the irq to the specified CPU and enable the specified MMR located * on the specified blade to allow the sending of MSIs to the specified CPU. diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 419c378bd24b..abcb8459254e 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -170,7 +170,7 @@ config ENCLOSURE_SERVICES config SGI_XP tristate "Support communication between SGI SSIs" depends on NET - depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP + depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP @@ -197,7 +197,7 @@ config HP_ILO config SGI_GRU tristate "SGI GRU driver" - depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP + depends on (X86_UV || IA64_SGI_UV || IA64_GENERIC) && SMP default n select MMU_NOTIFIER ---help--- From fb746d0e1365b7472ccc4c3d5b0672b34a092d0b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 21 Jan 2009 20:45:41 +0100 Subject: [PATCH 145/566] x86: optimise page fault entry, cleanup tsk is already assigned to current, drop the redundant second assignment. Signed-off-by: Johannes Weiner Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 033292dc9e21..8f4b859a04b3 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -419,7 +419,6 @@ static noinline void pgtable_bad(struct pt_regs *regs, printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", tsk->comm, address); dump_pagetable(address); - tsk = current; tsk->thread.cr2 = address; tsk->thread.trap_no = 14; tsk->thread.error_code = error_code; From d639bab8da86d330493487e8c0fea8ca31f53427 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 9 Jan 2009 16:13:13 -0800 Subject: [PATCH 146/566] x86 PAT: ioremap_wc should take resource_size_t parameter Impact: fix/extend ioremap_wc() beyond 4GB aperture on 32-bit ioremap_wc() was taking in unsigned long parameter, where as it should take 64-bit resource_size_t parameter like other ioremap variants. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 2 +- arch/x86/mm/ioremap.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 05cfed4485fa..bdbb4b961605 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -91,7 +91,7 @@ extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr); extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, unsigned long prot_val); -extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); +extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); /* * early_ioremap() and early_iounmap() are for temporary early boot-time diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index bd85d42819e1..2ddb1e79a195 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -367,7 +367,7 @@ EXPORT_SYMBOL(ioremap_nocache); * * Must be freed with iounmap. */ -void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) +void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) { if (pat_enabled) return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, From ab897d2013128f470240a541b31cf5e636984e71 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 22 Jan 2009 14:24:16 -0800 Subject: [PATCH 147/566] x86/pvops: remove pte_flags pvop pte_flags() was introduced as a new pvop in order to extract just the flags portion of a pte, which is a potentially cheaper operation than extracting the page number as well. It turns out this operation is not needed, because simply using a mask to extract the flags from a pte is sufficient for all current users. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page.h | 3 +-- arch/x86/include/asm/paravirt.h | 18 ------------------ arch/x86/kernel/paravirt.c | 1 - arch/x86/xen/enlighten.c | 1 - 4 files changed, 1 insertion(+), 22 deletions(-) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index e9873a2e8695..6b9810859daf 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -147,7 +147,7 @@ static inline pteval_t native_pte_val(pte_t pte) return pte.pte; } -static inline pteval_t native_pte_flags(pte_t pte) +static inline pteval_t pte_flags(pte_t pte) { return native_pte_val(pte) & PTE_FLAGS_MASK; } @@ -173,7 +173,6 @@ static inline pteval_t native_pte_flags(pte_t pte) #endif #define pte_val(x) native_pte_val(x) -#define pte_flags(x) native_pte_flags(x) #define __pte(x) native_make_pte(x) #endif /* CONFIG_PARAVIRT */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aedc..e25c410f3d8c 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -279,7 +279,6 @@ struct pv_mmu_ops { pte_t *ptep, pte_t pte); pteval_t (*pte_val)(pte_t); - pteval_t (*pte_flags)(pte_t); pte_t (*make_pte)(pteval_t pte); pgdval_t (*pgd_val)(pgd_t); @@ -1084,23 +1083,6 @@ static inline pteval_t pte_val(pte_t pte) return ret; } -static inline pteval_t pte_flags(pte_t pte) -{ - pteval_t ret; - - if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags, - pte.pte, (u64)pte.pte >> 32); - else - ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, - pte.pte); - -#ifdef CONFIG_PARAVIRT_DEBUG - BUG_ON(ret & PTE_PFN_MASK); -#endif - return ret; -} - static inline pgd_t __pgd(pgdval_t val) { pgdval_t ret; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4c8fb608873..202514be5923 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -435,7 +435,6 @@ struct pv_mmu_ops pv_mmu_ops = { #endif /* PAGETABLE_LEVELS >= 3 */ .pte_val = native_pte_val, - .pte_flags = native_pte_flags, .pgd_val = native_pgd_val, .make_pte = native_make_pte, diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bea215230b20..6f1bb71aa13a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1314,7 +1314,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .ptep_modify_prot_commit = __ptep_modify_prot_commit, .pte_val = xen_pte_val, - .pte_flags = native_pte_flags, .pgd_val = xen_pgd_val, .make_pte = xen_make_pte, From 6522869c34664dd5f05a0a327e93915b1281c90d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 22 Jan 2009 14:24:22 -0800 Subject: [PATCH 148/566] x86: add pte_set_flags/clear_flags for pte flag manipulation It's not necessary to deconstruct and reconstruct a pte every time its flags are being updated. Introduce pte_set_flags and pte_clear_flags to set and clear flags in a pte. This allows the flag manipulation code to be inlined, and avoids calls via paravirt-ops. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 38 +++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 06bbcbd66e9c..6ceaef08486f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -240,64 +240,78 @@ static inline int pmd_large(pmd_t pte) (_PAGE_PSE | _PAGE_PRESENT); } +static inline pte_t pte_set_flags(pte_t pte, pteval_t set) +{ + pteval_t v = native_pte_val(pte); + + return native_make_pte(v | set); +} + +static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) +{ + pteval_t v = native_pte_val(pte); + + return native_make_pte(v & ~clear); +} + static inline pte_t pte_mkclean(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_DIRTY); + return pte_clear_flags(pte, _PAGE_DIRTY); } static inline pte_t pte_mkold(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_ACCESSED); + return pte_clear_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_wrprotect(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_RW); + return pte_clear_flags(pte, _PAGE_RW); } static inline pte_t pte_mkexec(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_NX); + return pte_clear_flags(pte, _PAGE_NX); } static inline pte_t pte_mkdirty(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_DIRTY); + return pte_set_flags(pte, _PAGE_DIRTY); } static inline pte_t pte_mkyoung(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_ACCESSED); + return pte_set_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_mkwrite(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_RW); + return pte_set_flags(pte, _PAGE_RW); } static inline pte_t pte_mkhuge(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_PSE); + return pte_set_flags(pte, _PAGE_PSE); } static inline pte_t pte_clrhuge(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_PSE); + return pte_clear_flags(pte, _PAGE_PSE); } static inline pte_t pte_mkglobal(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_GLOBAL); + return pte_set_flags(pte, _PAGE_GLOBAL); } static inline pte_t pte_clrglobal(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_GLOBAL); + return pte_clear_flags(pte, _PAGE_GLOBAL); } static inline pte_t pte_mkspecial(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_SPECIAL); + return pte_set_flags(pte, _PAGE_SPECIAL); } extern pteval_t __supported_pte_mask; From 03d2989df9c1c7df5b33c7a87e0790465461836a Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 23 Jan 2009 11:03:28 +0900 Subject: [PATCH 149/566] x86: remove idle_timestamp from 32bit irq_cpustat_t Impact: bogus irq_cpustat field removed idle_timestamp is left over from the removed irqbalance code. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/hardirq_32.h | 1 - arch/x86/kernel/process_32.c | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index d4b5d731073f..a70ed050fdef 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h @@ -6,7 +6,6 @@ typedef struct { unsigned int __softirq_pending; - unsigned long idle_timestamp; unsigned int __nmi_count; /* arch dependent */ unsigned int apic_timer_irqs; /* arch dependent */ unsigned int irq0_irqs; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 2c00a57ccb90..1a1ae8edc40c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -108,7 +108,6 @@ void cpu_idle(void) play_dead(); local_irq_disable(); - __get_cpu_var(irq_stat).idle_timestamp = jiffies; /* Don't trace irqs off for idle */ stop_critical_timings(); pm_idle(); From 3819cd489ec5d18a4cbd2f05acdc516473caa105 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 23 Jan 2009 11:03:29 +0900 Subject: [PATCH 150/566] x86: remove include of apic.h from hardirq_64.h Impact: cleanup APIC definitions aren't needed here. Remove the include and fix up the fallout. tj: added include to mce_intel_64.c. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/hardirq_64.h | 1 - arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 1 + arch/x86/kernel/efi_64.c | 1 + arch/x86/kernel/irq_64.c | 1 + 4 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h index a65bab20f6ce..873c3c7bffcc 100644 --- a/arch/x86/include/asm/hardirq_64.h +++ b/arch/x86/include/asm/hardirq_64.h @@ -3,7 +3,6 @@ #include #include -#include typedef struct { unsigned int __softirq_pending; diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 4b48f251fd39..5e8c79e748a6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 652c5287215f..a4ee29127fdf 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -36,6 +36,7 @@ #include #include #include +#include static pgd_t save_pgd __initdata; static unsigned long efi_flags __initdata; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 0b254de84083..018963aa6ee3 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -18,6 +18,7 @@ #include #include #include +#include DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); From 658a9a2c34914e8114eea9c4d85d4ecd3ee33b98 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 23 Jan 2009 11:03:31 +0900 Subject: [PATCH 151/566] x86: sync hardirq_{32,64}.h Impact: better code generation and removal of unused field for 32bit In general, use the 64-bit version. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/hardirq_32.h | 14 ++++++++++---- arch/x86/include/asm/hardirq_64.h | 10 +++++----- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index a70ed050fdef..e5a332c28c98 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h @@ -14,6 +14,7 @@ typedef struct { unsigned int irq_tlb_count; unsigned int irq_thermal_count; unsigned int irq_spurious_count; + unsigned int irq_threshold_count; } ____cacheline_aligned irq_cpustat_t; DECLARE_PER_CPU(irq_cpustat_t, irq_stat); @@ -22,11 +23,16 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat); #define MAX_HARDIRQS_PER_CPU NR_VECTORS #define __ARCH_IRQ_STAT -#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) -#define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++) +#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) -void ack_bad_irq(unsigned int irq); -#include +#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) + +#define __ARCH_SET_SOFTIRQ_PENDING + +#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) + +extern void ack_bad_irq(unsigned int irq); #endif /* _ASM_X86_HARDIRQ_32_H */ diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h index 873c3c7bffcc..392e7d614579 100644 --- a/arch/x86/include/asm/hardirq_64.h +++ b/arch/x86/include/asm/hardirq_64.h @@ -22,16 +22,16 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat); /* We can have at most NR_VECTORS irqs routed to a cpu at a time */ #define MAX_HARDIRQS_PER_CPU NR_VECTORS -#define __ARCH_IRQ_STAT 1 +#define __ARCH_IRQ_STAT #define inc_irq_stat(member) percpu_add(irq_stat.member, 1) -#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) +#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) -#define __ARCH_SET_SOFTIRQ_PENDING 1 +#define __ARCH_SET_SOFTIRQ_PENDING -#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) -#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) +#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) extern void ack_bad_irq(unsigned int irq); From 22da7b3df3a2e26a87a8581575dbf26e465a6ac7 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 23 Jan 2009 11:03:31 +0900 Subject: [PATCH 152/566] x86: merge hardirq_{32,64}.h into hardirq.h Impact: cleanup Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/hardirq.h | 43 +++++++++++++++++++++++++++---- arch/x86/include/asm/hardirq_32.h | 38 --------------------------- arch/x86/include/asm/hardirq_64.h | 38 --------------------------- 3 files changed, 38 insertions(+), 81 deletions(-) delete mode 100644 arch/x86/include/asm/hardirq_32.h delete mode 100644 arch/x86/include/asm/hardirq_64.h diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 000787df66e6..f4a95f20f8ec 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -1,11 +1,44 @@ -#ifdef CONFIG_X86_32 -# include "hardirq_32.h" -#else -# include "hardirq_64.h" -#endif +#ifndef _ASM_X86_HARDIRQ_H +#define _ASM_X86_HARDIRQ_H + +#include +#include + +typedef struct { + unsigned int __softirq_pending; + unsigned int __nmi_count; /* arch dependent */ + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int irq0_irqs; + unsigned int irq_resched_count; + unsigned int irq_call_count; + unsigned int irq_tlb_count; + unsigned int irq_thermal_count; + unsigned int irq_spurious_count; + unsigned int irq_threshold_count; +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU(irq_cpustat_t, irq_stat); + +/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ +#define MAX_HARDIRQS_PER_CPU NR_VECTORS + +#define __ARCH_IRQ_STAT + +#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) + +#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) + +#define __ARCH_SET_SOFTIRQ_PENDING + +#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) + +extern void ack_bad_irq(unsigned int irq); extern u64 arch_irq_stat_cpu(unsigned int cpu); #define arch_irq_stat_cpu arch_irq_stat_cpu extern u64 arch_irq_stat(void); #define arch_irq_stat arch_irq_stat + +#endif /* _ASM_X86_HARDIRQ_H */ diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h deleted file mode 100644 index e5a332c28c98..000000000000 --- a/arch/x86/include/asm/hardirq_32.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef _ASM_X86_HARDIRQ_32_H -#define _ASM_X86_HARDIRQ_32_H - -#include -#include - -typedef struct { - unsigned int __softirq_pending; - unsigned int __nmi_count; /* arch dependent */ - unsigned int apic_timer_irqs; /* arch dependent */ - unsigned int irq0_irqs; - unsigned int irq_resched_count; - unsigned int irq_call_count; - unsigned int irq_tlb_count; - unsigned int irq_thermal_count; - unsigned int irq_spurious_count; - unsigned int irq_threshold_count; -} ____cacheline_aligned irq_cpustat_t; - -DECLARE_PER_CPU(irq_cpustat_t, irq_stat); - -/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ -#define MAX_HARDIRQS_PER_CPU NR_VECTORS - -#define __ARCH_IRQ_STAT - -#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) - -#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) - -#define __ARCH_SET_SOFTIRQ_PENDING - -#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) -#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) - -extern void ack_bad_irq(unsigned int irq); - -#endif /* _ASM_X86_HARDIRQ_32_H */ diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h deleted file mode 100644 index 392e7d614579..000000000000 --- a/arch/x86/include/asm/hardirq_64.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef _ASM_X86_HARDIRQ_64_H -#define _ASM_X86_HARDIRQ_64_H - -#include -#include - -typedef struct { - unsigned int __softirq_pending; - unsigned int __nmi_count; /* arch dependent */ - unsigned int apic_timer_irqs; /* arch dependent */ - unsigned int irq0_irqs; - unsigned int irq_resched_count; - unsigned int irq_call_count; - unsigned int irq_tlb_count; - unsigned int irq_thermal_count; - unsigned int irq_spurious_count; - unsigned int irq_threshold_count; -} ____cacheline_aligned irq_cpustat_t; - -DECLARE_PER_CPU(irq_cpustat_t, irq_stat); - -/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ -#define MAX_HARDIRQS_PER_CPU NR_VECTORS - -#define __ARCH_IRQ_STAT - -#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) - -#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) - -#define __ARCH_SET_SOFTIRQ_PENDING - -#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) -#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) - -extern void ack_bad_irq(unsigned int irq); - -#endif /* _ASM_X86_HARDIRQ_64_H */ From 2de3a5f7956eb81447feea3aec68193ddd8534bb Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 23 Jan 2009 11:03:32 +0900 Subject: [PATCH 153/566] x86: make irq_cpustat_t fields conditional Impact: shrink size of irq_cpustat_t when possible Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/hardirq.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index f4a95f20f8ec..176f058e7159 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -7,14 +7,22 @@ typedef struct { unsigned int __softirq_pending; unsigned int __nmi_count; /* arch dependent */ - unsigned int apic_timer_irqs; /* arch dependent */ unsigned int irq0_irqs; +#ifdef CONFIG_X86_LOCAL_APIC + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int irq_spurious_count; +#endif +#ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; unsigned int irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE unsigned int irq_thermal_count; - unsigned int irq_spurious_count; +# ifdef CONFIG_X86_64 unsigned int irq_threshold_count; +# endif +#endif } ____cacheline_aligned irq_cpustat_t; DECLARE_PER_CPU(irq_cpustat_t, irq_stat); From 99d0000f710f3432182761f65f9658f1cf0bf455 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 23 Jan 2009 11:09:15 +0100 Subject: [PATCH 154/566] x86, xen: fix hardirq.h merge fallout Impact: build fix This build error: arch/x86/xen/suspend.c:22: error: implicit declaration of function 'fix_to_virt' arch/x86/xen/suspend.c:22: error: 'FIX_PARAVIRT_BOOTMAP' undeclared (first use in this function) arch/x86/xen/suspend.c:22: error: (Each undeclared identifier is reported only once arch/x86/xen/suspend.c:22: error: for each function it appears in.) triggers because the hardirq.h unification removed an implicit fixmap.h include - on which arch/x86/xen/suspend.c depended. Add the fixmap.h include explicitly. Signed-off-by: Ingo Molnar --- arch/x86/xen/suspend.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 212ffe012b76..95be7b434724 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -6,6 +6,7 @@ #include #include +#include #include "xen-ops.h" #include "mmu.h" From fe40c0af3cff3ea461cf25bddb979abc7279d4df Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 23 Jan 2009 15:49:41 -0800 Subject: [PATCH 155/566] x86: uaccess: introduce try and catch framework Impact: introduce new uaccess exception handling framework Introduce {get|put}_user_try and {get|put}_user_catch as new uaccess exception handling framework. {get|put}_user_try begins exception block and {get|put}_user_catch(err) ends the block and gets err if an exception occured in {get|put}_user_ex() in the block. The exception is stored thread_info->uaccess_err. The example usage of this framework is below; int func() { int err = 0; get_user_try { get_user_ex(...); get_user_ex(...); : } get_user_catch(err); return err; } Note: get_user_ex() is not clear the value when an exception occurs, it's different from the behavior of __get_user(), but I think it doesn't matter. Signed-off-by: Hiroshi Shimamoto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/thread_info.h | 1 + arch/x86/include/asm/uaccess.h | 103 +++++++++++++++++++++++++++++ arch/x86/mm/extable.c | 6 ++ 3 files changed, 110 insertions(+) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 98789647baa9..3f90aeb456bc 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -40,6 +40,7 @@ struct thread_info { */ __u8 supervisor_stack[0]; #endif + int uaccess_err; }; #define INIT_THREAD_INFO(tsk) \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 69d2757cca9b..0ec6de4bcb0b 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -199,12 +199,22 @@ extern int __get_user_bad(void); : "=r" (err) \ : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err)) +#define __put_user_asm_ex_u64(x, addr) \ + asm volatile("1: movl %%eax,0(%1)\n" \ + "2: movl %%edx,4(%1)\n" \ + "3:\n" \ + _ASM_EXTABLE(1b, 2b - 1b) \ + _ASM_EXTABLE(2b, 3b - 2b) \ + : : "A" (x), "r" (addr)) + #define __put_user_x8(x, ptr, __ret_pu) \ asm volatile("call __put_user_8" : "=a" (__ret_pu) \ : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") #else #define __put_user_asm_u64(x, ptr, retval) \ __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT) +#define __put_user_asm_ex_u64(x, addr) \ + __put_user_asm_ex(x, addr, "q", "", "Zr") #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) #endif @@ -286,6 +296,27 @@ do { \ } \ } while (0) +#define __put_user_size_ex(x, ptr, size) \ +do { \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: \ + __put_user_asm_ex(x, ptr, "b", "b", "iq"); \ + break; \ + case 2: \ + __put_user_asm_ex(x, ptr, "w", "w", "ir"); \ + break; \ + case 4: \ + __put_user_asm_ex(x, ptr, "l", "k", "ir"); \ + break; \ + case 8: \ + __put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr); \ + break; \ + default: \ + __put_user_bad(); \ + } \ +} while (0) + #else #define __put_user_size(x, ptr, size, retval, errret) \ @@ -311,9 +342,12 @@ do { \ #ifdef CONFIG_X86_32 #define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad() +#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad() #else #define __get_user_asm_u64(x, ptr, retval, errret) \ __get_user_asm(x, ptr, retval, "q", "", "=r", errret) +#define __get_user_asm_ex_u64(x, ptr) \ + __get_user_asm_ex(x, ptr, "q", "", "=r") #endif #define __get_user_size(x, ptr, size, retval, errret) \ @@ -350,6 +384,33 @@ do { \ : "=r" (err), ltype(x) \ : "m" (__m(addr)), "i" (errret), "0" (err)) +#define __get_user_size_ex(x, ptr, size) \ +do { \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: \ + __get_user_asm_ex(x, ptr, "b", "b", "=q"); \ + break; \ + case 2: \ + __get_user_asm_ex(x, ptr, "w", "w", "=r"); \ + break; \ + case 4: \ + __get_user_asm_ex(x, ptr, "l", "k", "=r"); \ + break; \ + case 8: \ + __get_user_asm_ex_u64(x, ptr); \ + break; \ + default: \ + (x) = __get_user_bad(); \ + } \ +} while (0) + +#define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ + asm volatile("1: mov"itype" %1,%"rtype"0\n" \ + "2:\n" \ + _ASM_EXTABLE(1b, 2b - 1b) \ + : ltype(x) : "m" (__m(addr))) + #define __put_user_nocheck(x, ptr, size) \ ({ \ int __pu_err; \ @@ -385,6 +446,26 @@ struct __large_struct { unsigned long buf[100]; }; _ASM_EXTABLE(1b, 3b) \ : "=r"(err) \ : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) + +#define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ + asm volatile("1: mov"itype" %"rtype"0,%1\n" \ + "2:\n" \ + _ASM_EXTABLE(1b, 2b - 1b) \ + : : ltype(x), "m" (__m(addr))) + +/* + * uaccess_try and catch + */ +#define uaccess_try do { \ + int prev_err = current_thread_info()->uaccess_err; \ + current_thread_info()->uaccess_err = 0; \ + barrier(); + +#define uaccess_catch(err) \ + (err) |= current_thread_info()->uaccess_err; \ + current_thread_info()->uaccess_err = prev_err; \ +} while (0) + /** * __get_user: - Get a simple variable from user space, with less checking. * @x: Variable to store result. @@ -408,6 +489,7 @@ struct __large_struct { unsigned long buf[100]; }; #define __get_user(x, ptr) \ __get_user_nocheck((x), (ptr), sizeof(*(ptr))) + /** * __put_user: - Write a simple value into user space, with less checking. * @x: Value to copy to user space. @@ -434,6 +516,27 @@ struct __large_struct { unsigned long buf[100]; }; #define __get_user_unaligned __get_user #define __put_user_unaligned __put_user +/* + * {get|put}_user_try and catch + * + * get_user_try { + * get_user_ex(...); + * } get_user_catch(err) + */ +#define get_user_try uaccess_try +#define get_user_catch(err) uaccess_catch(err) +#define put_user_try uaccess_try +#define put_user_catch(err) uaccess_catch(err) + +#define get_user_ex(x, ptr) do { \ + unsigned long __gue_val; \ + __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \ + (x) = (__force __typeof__(*(ptr)))__gue_val; \ +} while (0) + +#define put_user_ex(x, ptr) \ + __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) + /* * movsl can be slow when source and dest are not both 8-byte aligned */ diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 7e8db53528a7..61b41ca3b5a2 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -23,6 +23,12 @@ int fixup_exception(struct pt_regs *regs) fixup = search_exception_tables(regs->ip); if (fixup) { + /* If fixup is less than 16, it means uaccess error */ + if (fixup->fixup < 16) { + current_thread_info()->uaccess_err = -EFAULT; + regs->ip += fixup->fixup; + return 1; + } regs->ip = fixup->fixup; return 1; } From 98e3d45edad207b4358948d6e2cac4e482c3bb5d Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 23 Jan 2009 15:50:10 -0800 Subject: [PATCH 156/566] x86: signal: use {get|put}_user_try and catch Impact: use new framework Use {get|put}_user_try, catch, and _ex in arch/x86/kernel/signal.c. Note: this patch contains "WARNING: line over 80 characters", because when introducing new block I insert an indent to avoid mistakes by edit. Signed-off-by: Hiroshi Shimamoto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/signal.c | 289 +++++++++++++++++++++------------------ 1 file changed, 153 insertions(+), 136 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 89bb7668041d..cf34eb37fbee 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -51,24 +51,24 @@ #endif #define COPY(x) { \ - err |= __get_user(regs->x, &sc->x); \ + get_user_ex(regs->x, &sc->x); \ } #define COPY_SEG(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ regs->seg = tmp; \ } #define COPY_SEG_CPL3(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ regs->seg = tmp | 3; \ } #define GET_SEG(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ loadsegment(seg, tmp); \ } @@ -83,45 +83,49 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; + get_user_try { + #ifdef CONFIG_X86_32 - GET_SEG(gs); - COPY_SEG(fs); - COPY_SEG(es); - COPY_SEG(ds); + GET_SEG(gs); + COPY_SEG(fs); + COPY_SEG(es); + COPY_SEG(ds); #endif /* CONFIG_X86_32 */ - COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); - COPY(dx); COPY(cx); COPY(ip); + COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); + COPY(dx); COPY(cx); COPY(ip); #ifdef CONFIG_X86_64 - COPY(r8); - COPY(r9); - COPY(r10); - COPY(r11); - COPY(r12); - COPY(r13); - COPY(r14); - COPY(r15); + COPY(r8); + COPY(r9); + COPY(r10); + COPY(r11); + COPY(r12); + COPY(r13); + COPY(r14); + COPY(r15); #endif /* CONFIG_X86_64 */ #ifdef CONFIG_X86_32 - COPY_SEG_CPL3(cs); - COPY_SEG_CPL3(ss); + COPY_SEG_CPL3(cs); + COPY_SEG_CPL3(ss); #else /* !CONFIG_X86_32 */ - /* Kernel saves and restores only the CS segment register on signals, - * which is the bare minimum needed to allow mixed 32/64-bit code. - * App's signal handler can save/restore other segments if needed. */ - COPY_SEG_CPL3(cs); + /* Kernel saves and restores only the CS segment register on signals, + * which is the bare minimum needed to allow mixed 32/64-bit code. + * App's signal handler can save/restore other segments if needed. */ + COPY_SEG_CPL3(cs); #endif /* CONFIG_X86_32 */ - err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); - regs->orig_ax = -1; /* disable syscall checks */ + get_user_ex(tmpflags, &sc->flags); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); + regs->orig_ax = -1; /* disable syscall checks */ - err |= __get_user(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); + get_user_ex(buf, &sc->fpstate); + err |= restore_i387_xstate(buf); + + get_user_ex(*pax, &sc->ax); + } get_user_catch(err); - err |= __get_user(*pax, &sc->ax); return err; } @@ -131,57 +135,60 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, { int err = 0; -#ifdef CONFIG_X86_32 - { - unsigned int tmp; + put_user_try { - savesegment(gs, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->gs); - } - err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); - err |= __put_user(regs->es, (unsigned int __user *)&sc->es); - err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds); +#ifdef CONFIG_X86_32 + { + unsigned int tmp; + + savesegment(gs, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->gs); + } + put_user_ex(regs->fs, (unsigned int __user *)&sc->fs); + put_user_ex(regs->es, (unsigned int __user *)&sc->es); + put_user_ex(regs->ds, (unsigned int __user *)&sc->ds); #endif /* CONFIG_X86_32 */ - err |= __put_user(regs->di, &sc->di); - err |= __put_user(regs->si, &sc->si); - err |= __put_user(regs->bp, &sc->bp); - err |= __put_user(regs->sp, &sc->sp); - err |= __put_user(regs->bx, &sc->bx); - err |= __put_user(regs->dx, &sc->dx); - err |= __put_user(regs->cx, &sc->cx); - err |= __put_user(regs->ax, &sc->ax); + put_user_ex(regs->di, &sc->di); + put_user_ex(regs->si, &sc->si); + put_user_ex(regs->bp, &sc->bp); + put_user_ex(regs->sp, &sc->sp); + put_user_ex(regs->bx, &sc->bx); + put_user_ex(regs->dx, &sc->dx); + put_user_ex(regs->cx, &sc->cx); + put_user_ex(regs->ax, &sc->ax); #ifdef CONFIG_X86_64 - err |= __put_user(regs->r8, &sc->r8); - err |= __put_user(regs->r9, &sc->r9); - err |= __put_user(regs->r10, &sc->r10); - err |= __put_user(regs->r11, &sc->r11); - err |= __put_user(regs->r12, &sc->r12); - err |= __put_user(regs->r13, &sc->r13); - err |= __put_user(regs->r14, &sc->r14); - err |= __put_user(regs->r15, &sc->r15); + put_user_ex(regs->r8, &sc->r8); + put_user_ex(regs->r9, &sc->r9); + put_user_ex(regs->r10, &sc->r10); + put_user_ex(regs->r11, &sc->r11); + put_user_ex(regs->r12, &sc->r12); + put_user_ex(regs->r13, &sc->r13); + put_user_ex(regs->r14, &sc->r14); + put_user_ex(regs->r15, &sc->r15); #endif /* CONFIG_X86_64 */ - err |= __put_user(current->thread.trap_no, &sc->trapno); - err |= __put_user(current->thread.error_code, &sc->err); - err |= __put_user(regs->ip, &sc->ip); + put_user_ex(current->thread.trap_no, &sc->trapno); + put_user_ex(current->thread.error_code, &sc->err); + put_user_ex(regs->ip, &sc->ip); #ifdef CONFIG_X86_32 - err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); - err |= __put_user(regs->flags, &sc->flags); - err |= __put_user(regs->sp, &sc->sp_at_signal); - err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); + put_user_ex(regs->cs, (unsigned int __user *)&sc->cs); + put_user_ex(regs->flags, &sc->flags); + put_user_ex(regs->sp, &sc->sp_at_signal); + put_user_ex(regs->ss, (unsigned int __user *)&sc->ss); #else /* !CONFIG_X86_32 */ - err |= __put_user(regs->flags, &sc->flags); - err |= __put_user(regs->cs, &sc->cs); - err |= __put_user(0, &sc->gs); - err |= __put_user(0, &sc->fs); + put_user_ex(regs->flags, &sc->flags); + put_user_ex(regs->cs, &sc->cs); + put_user_ex(0, &sc->gs); + put_user_ex(0, &sc->fs); #endif /* CONFIG_X86_32 */ - err |= __put_user(fpstate, &sc->fpstate); + put_user_ex(fpstate, &sc->fpstate); - /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(current->thread.cr2, &sc->cr2); + /* non-iBCS2 extensions.. */ + put_user_ex(mask, &sc->oldmask); + put_user_ex(current->thread.cr2, &sc->cr2); + } put_user_catch(err); return err; } @@ -336,43 +343,41 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; - err |= __put_user(sig, &frame->sig); - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); - if (err) - return -EFAULT; + put_user_try { + put_user_ex(sig, &frame->sig); + put_user_ex(&frame->info, &frame->pinfo); + put_user_ex(&frame->uc, &frame->puc); + err |= copy_siginfo_to_user(&frame->info, info); - /* Create the ucontext. */ - if (cpu_has_xsave) - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); - else - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - return -EFAULT; + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - /* Set up to return from userspace. */ - restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; - err |= __put_user(restorer, &frame->pretcode); + /* Set up to return from userspace. */ + restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + put_user_ex(restorer, &frame->pretcode); - /* - * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 - * - * WE DO NOT USE IT ANY MORE! It's only left here for historical - * reasons and because gdb uses it as a signature to notice - * signal handler stack frames. - */ - err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode); + /* + * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 + * + * WE DO NOT USE IT ANY MORE! It's only left here for historical + * reasons and because gdb uses it as a signature to notice + * signal handler stack frames. + */ + put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); + } put_user_catch(err); if (err) return -EFAULT; @@ -436,28 +441,30 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, return -EFAULT; } - /* Create the ucontext. */ - if (cpu_has_xsave) - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); - else - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + put_user_try { + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - /* x86-64 should always use SA_RESTORER. */ - if (ka->sa.sa_flags & SA_RESTORER) { - err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); - } else { - /* could use a vstub here */ - return -EFAULT; - } + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + /* x86-64 should always use SA_RESTORER. */ + if (ka->sa.sa_flags & SA_RESTORER) { + put_user_ex(ka->sa.sa_restorer, &frame->pretcode); + } else { + /* could use a vstub here */ + err |= -EFAULT; + } + } put_user_catch(err); if (err) return -EFAULT; @@ -509,31 +516,41 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact) { struct k_sigaction new_ka, old_ka; - int ret; + int ret = 0; if (act) { old_sigset_t mask; - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + if (!access_ok(VERIFY_READ, act, sizeof(*act))) return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); + get_user_try { + get_user_ex(new_ka.sa.sa_handler, &act->sa_handler); + get_user_ex(new_ka.sa.sa_flags, &act->sa_flags); + get_user_ex(mask, &act->sa_mask); + get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer); + } get_user_catch(ret); + + if (ret) + return -EFAULT; siginitset(&new_ka.sa.sa_mask, mask); } ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact))) return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + put_user_try { + put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler); + put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags); + put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer); + } put_user_catch(ret); + + if (ret) + return -EFAULT; } return ret; From 3b4b75700a245d0d48fc52a4d2f67d3155812aba Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 23 Jan 2009 15:50:38 -0800 Subject: [PATCH 157/566] x86: ia32_signal: use {get|put}_user_try and catch Impact: use new framework Use {get|put}_user_try, catch, and _ex in arch/x86/ia32/ia32_signal.c. Note: this patch contains "WARNING: line over 80 characters", because when introducing new block I insert an indent to avoid mistakes by edit. Signed-off-by: Hiroshi Shimamoto Signed-off-by: H. Peter Anvin --- arch/x86/ia32/ia32_signal.c | 343 +++++++++++++++++++----------------- 1 file changed, 184 insertions(+), 159 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 9dabd00e9805..dd77ac0cac46 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -46,78 +46,83 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where); int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) { - int err; + int err = 0; if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) return -EFAULT; - /* If you change siginfo_t structure, please make sure that - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. */ - err = __put_user(from->si_signo, &to->si_signo); - err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); + put_user_try { + /* If you change siginfo_t structure, please make sure that + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + put_user_ex(from->si_signo, &to->si_signo); + put_user_ex(from->si_errno, &to->si_errno); + put_user_ex((short)from->si_code, &to->si_code); - if (from->si_code < 0) { - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr); - } else { - /* - * First 32bits of unions are always present: - * si_pid === si_band === si_tid === si_addr(LS half) - */ - err |= __put_user(from->_sifields._pad[0], - &to->_sifields._pad[0]); - switch (from->si_code >> 16) { - case __SI_FAULT >> 16: - break; - case __SI_CHLD >> 16: - err |= __put_user(from->si_utime, &to->si_utime); - err |= __put_user(from->si_stime, &to->si_stime); - err |= __put_user(from->si_status, &to->si_status); - /* FALL THROUGH */ - default: - case __SI_KILL >> 16: - err |= __put_user(from->si_uid, &to->si_uid); - break; - case __SI_POLL >> 16: - err |= __put_user(from->si_fd, &to->si_fd); - break; - case __SI_TIMER >> 16: - err |= __put_user(from->si_overrun, &to->si_overrun); - err |= __put_user(ptr_to_compat(from->si_ptr), - &to->si_ptr); - break; - /* This is not generated by the kernel as of now. */ - case __SI_RT >> 16: - case __SI_MESGQ >> 16: - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(from->si_int, &to->si_int); - break; + if (from->si_code < 0) { + put_user_ex(from->si_pid, &to->si_pid); + put_user_ex(from->si_uid, &to->si_uid); + put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr); + } else { + /* + * First 32bits of unions are always present: + * si_pid === si_band === si_tid === si_addr(LS half) + */ + put_user_ex(from->_sifields._pad[0], + &to->_sifields._pad[0]); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_CHLD >> 16: + put_user_ex(from->si_utime, &to->si_utime); + put_user_ex(from->si_stime, &to->si_stime); + put_user_ex(from->si_status, &to->si_status); + /* FALL THROUGH */ + default: + case __SI_KILL >> 16: + put_user_ex(from->si_uid, &to->si_uid); + break; + case __SI_POLL >> 16: + put_user_ex(from->si_fd, &to->si_fd); + break; + case __SI_TIMER >> 16: + put_user_ex(from->si_overrun, &to->si_overrun); + put_user_ex(ptr_to_compat(from->si_ptr), + &to->si_ptr); + break; + /* This is not generated by the kernel as of now. */ + case __SI_RT >> 16: + case __SI_MESGQ >> 16: + put_user_ex(from->si_uid, &to->si_uid); + put_user_ex(from->si_int, &to->si_int); + break; + } } - } + } put_user_catch(err); + return err; } int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - int err; + int err = 0; u32 ptr32; if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) return -EFAULT; - err = __get_user(to->si_signo, &from->si_signo); - err |= __get_user(to->si_errno, &from->si_errno); - err |= __get_user(to->si_code, &from->si_code); + get_user_try { + get_user_ex(to->si_signo, &from->si_signo); + get_user_ex(to->si_errno, &from->si_errno); + get_user_ex(to->si_code, &from->si_code); - err |= __get_user(to->si_pid, &from->si_pid); - err |= __get_user(to->si_uid, &from->si_uid); - err |= __get_user(ptr32, &from->si_ptr); - to->si_ptr = compat_ptr(ptr32); + get_user_ex(to->si_pid, &from->si_pid); + get_user_ex(to->si_uid, &from->si_uid); + get_user_ex(ptr32, &from->si_ptr); + to->si_ptr = compat_ptr(ptr32); + } get_user_catch(err); return err; } @@ -142,17 +147,23 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, struct pt_regs *regs) { stack_t uss, uoss; - int ret; + int ret, err = 0; mm_segment_t seg; if (uss_ptr) { u32 ptr; memset(&uss, 0, sizeof(stack_t)); - if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)) || - __get_user(ptr, &uss_ptr->ss_sp) || - __get_user(uss.ss_flags, &uss_ptr->ss_flags) || - __get_user(uss.ss_size, &uss_ptr->ss_size)) + if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t))) + return -EFAULT; + + get_user_try { + get_user_ex(ptr, &uss_ptr->ss_sp); + get_user_ex(uss.ss_flags, &uss_ptr->ss_flags); + get_user_ex(uss.ss_size, &uss_ptr->ss_size); + } get_user_catch(err); + + if (err) return -EFAULT; uss.ss_sp = compat_ptr(ptr); } @@ -161,10 +172,16 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp); set_fs(seg); if (ret >= 0 && uoss_ptr) { - if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)) || - __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) || - __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) || - __put_user(uoss.ss_size, &uoss_ptr->ss_size)) + if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t))) + return -EFAULT; + + put_user_try { + put_user_ex(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp); + put_user_ex(uoss.ss_flags, &uoss_ptr->ss_flags); + put_user_ex(uoss.ss_size, &uoss_ptr->ss_size); + } put_user_catch(err); + + if (err) ret = -EFAULT; } return ret; @@ -174,18 +191,18 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, * Do a signal return; undo the signal stack. */ #define COPY(x) { \ - err |= __get_user(regs->x, &sc->x); \ + get_user_ex(regs->x, &sc->x); \ } #define COPY_SEG_CPL3(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ regs->seg = tmp | 3; \ } #define RELOAD_SEG(seg) { \ unsigned int cur, pre; \ - err |= __get_user(pre, &sc->seg); \ + get_user_ex(pre, &sc->seg); \ savesegment(seg, cur); \ pre |= 3; \ if (pre != cur) \ @@ -209,39 +226,42 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, sc, sc->err, sc->ip, sc->cs, sc->flags); #endif - /* - * Reload fs and gs if they have changed in the signal - * handler. This does not handle long fs/gs base changes in - * the handler, but does not clobber them at least in the - * normal case. - */ - err |= __get_user(gs, &sc->gs); - gs |= 3; - savesegment(gs, oldgs); - if (gs != oldgs) - load_gs_index(gs); + get_user_try { + /* + * Reload fs and gs if they have changed in the signal + * handler. This does not handle long fs/gs base changes in + * the handler, but does not clobber them at least in the + * normal case. + */ + get_user_ex(gs, &sc->gs); + gs |= 3; + savesegment(gs, oldgs); + if (gs != oldgs) + load_gs_index(gs); - RELOAD_SEG(fs); - RELOAD_SEG(ds); - RELOAD_SEG(es); + RELOAD_SEG(fs); + RELOAD_SEG(ds); + RELOAD_SEG(es); - COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); - COPY(dx); COPY(cx); COPY(ip); - /* Don't touch extended registers */ + COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); + COPY(dx); COPY(cx); COPY(ip); + /* Don't touch extended registers */ - COPY_SEG_CPL3(cs); - COPY_SEG_CPL3(ss); + COPY_SEG_CPL3(cs); + COPY_SEG_CPL3(ss); - err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); - /* disable syscall checks */ - regs->orig_ax = -1; + get_user_ex(tmpflags, &sc->flags); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); + /* disable syscall checks */ + regs->orig_ax = -1; - err |= __get_user(tmp, &sc->fpstate); - buf = compat_ptr(tmp); - err |= restore_i387_xstate_ia32(buf); + get_user_ex(tmp, &sc->fpstate); + buf = compat_ptr(tmp); + err |= restore_i387_xstate_ia32(buf); + + get_user_ex(*pax, &sc->ax); + } get_user_catch(err); - err |= __get_user(*pax, &sc->ax); return err; } @@ -319,36 +339,38 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, { int tmp, err = 0; - savesegment(gs, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->gs); - savesegment(fs, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->fs); - savesegment(ds, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->ds); - savesegment(es, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->es); + put_user_try { + savesegment(gs, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->gs); + savesegment(fs, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->fs); + savesegment(ds, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->ds); + savesegment(es, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->es); - err |= __put_user(regs->di, &sc->di); - err |= __put_user(regs->si, &sc->si); - err |= __put_user(regs->bp, &sc->bp); - err |= __put_user(regs->sp, &sc->sp); - err |= __put_user(regs->bx, &sc->bx); - err |= __put_user(regs->dx, &sc->dx); - err |= __put_user(regs->cx, &sc->cx); - err |= __put_user(regs->ax, &sc->ax); - err |= __put_user(current->thread.trap_no, &sc->trapno); - err |= __put_user(current->thread.error_code, &sc->err); - err |= __put_user(regs->ip, &sc->ip); - err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); - err |= __put_user(regs->flags, &sc->flags); - err |= __put_user(regs->sp, &sc->sp_at_signal); - err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); + put_user_ex(regs->di, &sc->di); + put_user_ex(regs->si, &sc->si); + put_user_ex(regs->bp, &sc->bp); + put_user_ex(regs->sp, &sc->sp); + put_user_ex(regs->bx, &sc->bx); + put_user_ex(regs->dx, &sc->dx); + put_user_ex(regs->cx, &sc->cx); + put_user_ex(regs->ax, &sc->ax); + put_user_ex(current->thread.trap_no, &sc->trapno); + put_user_ex(current->thread.error_code, &sc->err); + put_user_ex(regs->ip, &sc->ip); + put_user_ex(regs->cs, (unsigned int __user *)&sc->cs); + put_user_ex(regs->flags, &sc->flags); + put_user_ex(regs->sp, &sc->sp_at_signal); + put_user_ex(regs->ss, (unsigned int __user *)&sc->ss); - err |= __put_user(ptr_to_compat(fpstate), &sc->fpstate); + put_user_ex(ptr_to_compat(fpstate), &sc->fpstate); - /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(current->thread.cr2, &sc->cr2); + /* non-iBCS2 extensions.. */ + put_user_ex(mask, &sc->oldmask); + put_user_ex(current->thread.cr2, &sc->cr2); + } put_user_catch(err); return err; } @@ -437,13 +459,17 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, else restorer = &frame->retcode; } - err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); - /* - * These are actually not used anymore, but left because some - * gdb versions depend on them as a marker. - */ - err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode); + put_user_try { + put_user_ex(ptr_to_compat(restorer), &frame->pretcode); + + /* + * These are actually not used anymore, but left because some + * gdb versions depend on them as a marker. + */ + put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + } put_user_catch(err); + if (err) return -EFAULT; @@ -496,41 +522,40 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; - err |= __put_user(sig, &frame->sig); - err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo); - err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc); - err |= copy_siginfo_to_user32(&frame->info, info); - if (err) - return -EFAULT; + put_user_try { + put_user_ex(sig, &frame->sig); + put_user_ex(ptr_to_compat(&frame->info), &frame->pinfo); + put_user_ex(ptr_to_compat(&frame->uc), &frame->puc); + err |= copy_siginfo_to_user32(&frame->info, info); - /* Create the ucontext. */ - if (cpu_has_xsave) - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); - else - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - return -EFAULT; + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; - else - restorer = VDSO32_SYMBOL(current->mm->context.vdso, - rt_sigreturn); - err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + else + restorer = VDSO32_SYMBOL(current->mm->context.vdso, + rt_sigreturn); + put_user_ex(ptr_to_compat(restorer), &frame->pretcode); + + /* + * Not actually used anymore, but left because some gdb + * versions need it. + */ + put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + } put_user_catch(err); - /* - * Not actually used anymore, but left because some gdb - * versions need it. - */ - err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode); if (err) return -EFAULT; From b1882e68d17a93b523dce09c3a181319aace2f0e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 23 Jan 2009 17:18:52 -0800 Subject: [PATCH 158/566] x86: clean up stray space in Impact: Whitespace cleanup only Clean up a stray space character in arch/x86/include/asm/processor.h. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 091cd8855f2e..ac8fab3b868f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -73,7 +73,7 @@ struct cpuinfo_x86 { char pad0; #else /* Number of 4K pages in DTLB/ITLB combined(in pages): */ - int x86_tlbsize; + int x86_tlbsize; __u8 x86_virt_bits; __u8 x86_phys_bits; #endif From 75a048119e76540d73132cfc8e0fa0c0a8bb6c83 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 22 Jan 2009 16:17:05 -0800 Subject: [PATCH 159/566] x86: handle PAT more like other CPU features Impact: Cleanup When PAT was originally introduced, it was handled specially for a few reasons: - PAT bugs are hard to track down, so we wanted to maintain a whitelist of CPUs. - The i386 and x86-64 CPUID code was not yet unified. Both of these are now obsolete, so handle PAT like any other features, including ordinary feature blacklisting due to known bugs. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pat.h | 4 --- arch/x86/kernel/cpu/addon_cpuid_features.c | 34 ---------------------- arch/x86/kernel/cpu/common.c | 2 -- arch/x86/kernel/cpu/intel.c | 12 ++++++++ arch/x86/mm/pat.c | 31 +++++++++++++------- 5 files changed, 32 insertions(+), 51 deletions(-) diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index b8493b3b9890..9709fdff6615 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -5,10 +5,8 @@ #ifdef CONFIG_X86_PAT extern int pat_enabled; -extern void validate_pat_support(struct cpuinfo_x86 *c); #else static const int pat_enabled; -static inline void validate_pat_support(struct cpuinfo_x86 *c) { } #endif extern void pat_init(void); @@ -17,6 +15,4 @@ extern int reserve_memtype(u64 start, u64 end, unsigned long req_type, unsigned long *ret_type); extern int free_memtype(u64 start, u64 end); -extern void pat_disable(char *reason); - #endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 2cf23634b6d9..4e581fdc0a5a 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -143,37 +143,3 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) return; #endif } - -#ifdef CONFIG_X86_PAT -void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) -{ - if (!cpu_has_pat) - pat_disable("PAT not supported by CPU."); - - switch (c->x86_vendor) { - case X86_VENDOR_INTEL: - /* - * There is a known erratum on Pentium III and Core Solo - * and Core Duo CPUs. - * " Page with PAT set to WC while associated MTRR is UC - * may consolidate to UC " - * Because of this erratum, it is better to stick with - * setting WC in MTRR rather than using PAT on these CPUs. - * - * Enable PAT WC only on P4, Core 2 or later CPUs. - */ - if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15)) - return; - - pat_disable("PAT WC disabled due to known CPU erratum."); - return; - - case X86_VENDOR_AMD: - case X86_VENDOR_CENTAUR: - case X86_VENDOR_TRANSMETA: - return; - } - - pat_disable("PAT disabled. Not yet verified on this CPU type."); -} -#endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 83492b1f93b1..0f8656361e04 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -570,8 +570,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_early_init) this_cpu->c_early_init(c); - validate_pat_support(c); - #ifdef CONFIG_SMP c->cpu_index = boot_cpu_id; #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 8ea6929e974c..20ce03acf04b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -50,6 +50,18 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); } + /* + * There is a known erratum on Pentium III and Core Solo + * and Core Duo CPUs. + * " Page with PAT set to WC while associated MTRR is UC + * may consolidate to UC " + * Because of this erratum, it is better to stick with + * setting WC in MTRR rather than using PAT on these CPUs. + * + * Enable PAT WC only on P4, Core 2 or later CPUs. + */ + if (c->x86 == 6 && c->x86_model < 15) + clear_cpu_cap(c, X86_FEATURE_PAT); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 8b08fb955274..430cb44dd3f4 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -30,7 +30,7 @@ #ifdef CONFIG_X86_PAT int __read_mostly pat_enabled = 1; -void __cpuinit pat_disable(char *reason) +void __cpuinit pat_disable(const char *reason) { pat_enabled = 0; printk(KERN_INFO "%s\n", reason); @@ -42,6 +42,11 @@ static int __init nopat(char *str) return 0; } early_param("nopat", nopat); +#else +static inline void pat_disable(const char *reason) +{ + (void)reason; +} #endif @@ -78,16 +83,20 @@ void pat_init(void) if (!pat_enabled) return; - /* Paranoia check. */ - if (!cpu_has_pat && boot_pat_state) { - /* - * If this happens we are on a secondary CPU, but - * switched to PAT on the boot CPU. We have no way to - * undo PAT. - */ - printk(KERN_ERR "PAT enabled, " - "but not supported by secondary CPU\n"); - BUG(); + if (!cpu_has_pat) { + if (!boot_pat_state) { + pat_disable("PAT not supported by CPU."); + return; + } else { + /* + * If this happens we are on a secondary CPU, but + * switched to PAT on the boot CPU. We have no way to + * undo PAT. + */ + printk(KERN_ERR "PAT enabled, " + "but not supported by secondary CPU\n"); + BUG(); + } } /* Set PWT to Write-Combining. All other bits stay the same */ From b38b0665905538e76e26f2a4c686179abb1f69f6 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 23 Jan 2009 17:20:50 -0800 Subject: [PATCH 160/566] x86: filter CPU features dependent on unavailable CPUID levels Impact: Fixes potential crashes on misconfigured systems. Some CPU features require specific CPUID levels to be available in order to function, as they contain information about the operation of a specific feature. However, some BIOSes and virtualization software provide the ability to mask CPUID levels in order to support legacy operating systems. We try to enable such CPUID levels when we know how to do it, but for the remaining cases, filter out such CPU features when there is no way for us to support them. Do this in one place, in the CPUID code, with a table-driven approach. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 47 ++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0f8656361e04..21f086b4c1a8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -212,6 +212,49 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) } #endif +/* + * Some CPU features depend on higher CPUID levels, which may not always + * be available due to CPUID level capping or broken virtualization + * software. Add those features to this table to auto-disable them. + */ +struct cpuid_dependent_feature { + u32 feature; + u32 level; +}; +static const struct cpuid_dependent_feature __cpuinitconst +cpuid_dependent_features[] = { + { X86_FEATURE_MWAIT, 0x00000005 }, + { X86_FEATURE_DCA, 0x00000009 }, + { X86_FEATURE_XSAVE, 0x0000000d }, + { 0, 0 } +}; + +static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) +{ + const struct cpuid_dependent_feature *df; + for (df = cpuid_dependent_features; df->feature; df++) { + /* + * Note: cpuid_level is set to -1 if unavailable, but + * extended_extended_level is set to 0 if unavailable + * and the legitimate extended levels are all negative + * when signed; hence the weird messing around with + * signs here... + */ + if (cpu_has(c, df->feature) && + ((s32)df->feature < 0 ? + (u32)df->feature > (u32)c->extended_cpuid_level : + (s32)df->feature > (s32)c->cpuid_level)) { + clear_cpu_cap(c, df->feature); + if (warn) + printk(KERN_WARNING + "CPU: CPU feature %s disabled " + "due to lack of CPUID level 0x%x\n", + x86_cap_flags[df->feature], + df->level); + } + } +} + /* * Naming convention should be: [()] * This table only is used unless init_() below doesn't set it; @@ -573,6 +616,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP c->cpu_index = boot_cpu_id; #endif + filter_cpuid_features(c, false); } void __init early_cpu_init(void) @@ -706,6 +750,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) * we do "generic changes." */ + /* Filter out anything that depends on CPUID levels we don't have */ + filter_cpuid_features(c, true); + /* If the model name is still unset, do table lookup. */ if (!c->x86_model_id[0]) { char *p; From 2d4d57db692ea790e185656516e6ebe8791f1788 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Jan 2009 12:50:13 -0800 Subject: [PATCH 161/566] x86: micro-optimize __raw_read_trylock() The current version of __raw_read_trylock starts with decrementing the lock and read its new value as a separate operation after that. That makes 3 dereferences (read, write (after sub), read) whereas a single atomic_dec_return does only two pointers dereferences (read, write). Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- arch/x86/include/asm/spinlock.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index d17c91981da2..4d3dcc51cacd 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -329,8 +329,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; - atomic_dec(count); - if (atomic_read(count) >= 0) + if (atomic_dec_return(count) >= 0) return 1; atomic_inc(count); return 0; From 34707bcd0452aba644396767bc9fb61585bdab4f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 26 Jan 2009 14:18:05 +0100 Subject: [PATCH 162/566] x86, debug: remove early_printk() #ifdefs from head_32.S Impact: cleanup Remove such constructs: #ifdef CONFIG_EARLY_PRINTK call early_printk #else call printk #endif Not only are they ugly, they are also pointless: a call to printk() maps to early_printk during early bootup anyway, if CONFIG_EARLY_PRINTK is enabled. Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index e835b4eea70b..9f1410711607 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -548,11 +548,7 @@ early_fault: pushl %eax pushl %edx /* trapno */ pushl $fault_msg -#ifdef CONFIG_EARLY_PRINTK - call early_printk -#else call printk -#endif #endif call dump_stack hlt_loop: @@ -580,11 +576,7 @@ ignore_int: pushl 32(%esp) pushl 40(%esp) pushl $int_msg -#ifdef CONFIG_EARLY_PRINTK - call early_printk -#else call printk -#endif addl $(5*4),%esp popl %ds popl %es From d5e397cb49b53381e4c99a064ca733c665646de8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 26 Jan 2009 06:09:00 +0100 Subject: [PATCH 163/566] x86: improve early fault/irq printout Impact: add a stack dump to early IRQs/faults Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 9f1410711607..84d05a4d7fc4 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -577,6 +577,9 @@ ignore_int: pushl 40(%esp) pushl $int_msg call printk + + call dump_stack + addl $(5*4),%esp popl %ds popl %es @@ -652,7 +655,7 @@ early_recursion_flag: .long 0 int_msg: - .asciz "Unknown interrupt or fault at EIP %p %p %p\n" + .asciz "Unknown interrupt or fault at: %p %p %p\n" fault_msg: /* fault info: */ From 5a611268b69f05262936dd177205acbce4471358 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 26 Jan 2009 08:44:05 -0500 Subject: [PATCH 164/566] generic, x86: fix __per_cpu_load relocation This patch fixes this linker error: WARNING: Absolute relocations present Offset Info Type Sym.Value Sym.Name c0a4e07d 00e78001 R_386_32 c0ab0000 __per_cpu_load Now, __per_cpu_load is a section-relative symbol: c0aa4000 D __per_cpu_load c0aa4000 A __per_cpu_load_abs Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 53e21f36a802..f3180a85c66a 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -451,17 +451,18 @@ * end offset. */ #define PERCPU_VADDR(vaddr, phdr) \ - VMLINUX_SYMBOL(__per_cpu_load) = .; \ - .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ + VMLINUX_SYMBOL(__per_cpu_load_abs) = .; \ + .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load_abs) \ - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ + VMLINUX_SYMBOL(__per_cpu_load) = LOADADDR(.data.percpu) + LOAD_OFFSET;\ *(.data.percpu.first) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ } phdr \ - . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); + . = VMLINUX_SYMBOL(__per_cpu_load_abs) + SIZEOF(.data.percpu); /** * PERCPU - define output section for percpu area, simple version From 0d77e7f04d5da160307f4f5c030a171e004f602b Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: [PATCH 165/566] x86: merge setup_per_cpu_maps() into setup_per_cpu_areas() Impact: minor optimization Eliminates the need for two loops over possible cpus. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 48 ++++++++++++++-------------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 90b8e154bb53..d0b1476490a7 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -97,33 +97,6 @@ static inline void setup_cpu_local_masks(void) #endif /* CONFIG_X86_32 */ #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA -/* - * Copy data used in early init routines from the initial arrays to the - * per cpu data areas. These arrays then become expendable and the - * *_early_ptr's are zeroed indicating that the static arrays are gone. - */ -static void __init setup_per_cpu_maps(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - per_cpu(x86_cpu_to_apicid, cpu) = - early_per_cpu_map(x86_cpu_to_apicid, cpu); - per_cpu(x86_bios_cpu_apicid, cpu) = - early_per_cpu_map(x86_bios_cpu_apicid, cpu); -#ifdef X86_64_NUMA - per_cpu(x86_cpu_to_node_map, cpu) = - early_per_cpu_map(x86_cpu_to_node_map, cpu); -#endif - } - - /* indicate the early static arrays will soon be gone */ - early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; - early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; -#ifdef X86_64_NUMA - early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; -#endif -} #ifdef CONFIG_X86_64 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { @@ -181,6 +154,19 @@ void __init setup_per_cpu_areas(void) per_cpu_offset(cpu) = ptr - __per_cpu_start; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; + /* + * Copy data used in early init routines from the initial arrays to the + * per cpu data areas. These arrays then become expendable and the + * *_early_ptr's are zeroed indicating that the static arrays are gone. + */ + per_cpu(x86_cpu_to_apicid, cpu) = + early_per_cpu_map(x86_cpu_to_apicid, cpu); + per_cpu(x86_bios_cpu_apicid, cpu) = + early_per_cpu_map(x86_bios_cpu_apicid, cpu); +#ifdef X86_64_NUMA + per_cpu(x86_cpu_to_node_map, cpu) = + early_per_cpu_map(x86_cpu_to_node_map, cpu); +#endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; @@ -195,8 +181,12 @@ void __init setup_per_cpu_areas(void) DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } - /* Setup percpu data maps */ - setup_per_cpu_maps(); + /* indicate the early static arrays will soon be gone */ + early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; + early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; +#ifdef X86_64_NUMA + early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; +#endif /* Setup node to cpumask map */ setup_node_to_cpumask_map(); From 6470aff619fbb9dff8dfe8afa5033084cd55ca20 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: [PATCH 166/566] x86: move 64-bit NUMA code Impact: Code movement, no functional change. Move the 64-bit NUMA code from setup_percpu.c to numa_64.c Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/topology.h | 6 + arch/x86/kernel/setup_percpu.c | 237 +------------------------------- arch/x86/mm/numa_64.c | 217 +++++++++++++++++++++++++++++ 3 files changed, 228 insertions(+), 232 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 10022ed3a4b6..77cfb2cfb386 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -74,6 +74,8 @@ static inline const struct cpumask *cpumask_of_node(int node) return &node_to_cpumask_map[node]; } +static inline void setup_node_to_cpumask_map(void) { } + #else /* CONFIG_X86_64 */ /* Mappings between node number and cpus on that node. */ @@ -120,6 +122,8 @@ static inline cpumask_t node_to_cpumask(int node) #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ +extern void setup_node_to_cpumask_map(void); + /* * Replace default node_to_cpumask_ptr with optimized version * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" @@ -218,6 +222,8 @@ static inline int node_to_first_cpu(int node) return first_cpu(cpu_online_map); } +static inline void setup_node_to_cpumask_map(void) { } + /* * Replace default node_to_cpumask_ptr with optimized version * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index d0b1476490a7..cb6d622520be 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -51,32 +51,6 @@ DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) -#define X86_64_NUMA 1 /* (used later) */ -DEFINE_PER_CPU(int, node_number) = 0; -EXPORT_PER_CPU_SYMBOL(node_number); - -/* - * Map cpu index to node index - */ -DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); - -/* - * Which logical CPUs are on which nodes - */ -cpumask_t *node_to_cpumask_map; -EXPORT_SYMBOL(node_to_cpumask_map); - -/* - * Setup node_to_cpumask_map - */ -static void __init setup_node_to_cpumask_map(void); - -#else -static inline void setup_node_to_cpumask_map(void) { } -#endif - #ifdef CONFIG_X86_64 /* correctly size the local cpu masks */ @@ -163,13 +137,13 @@ void __init setup_per_cpu_areas(void) early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); -#ifdef X86_64_NUMA - per_cpu(x86_cpu_to_node_map, cpu) = - early_per_cpu_map(x86_cpu_to_node_map, cpu); -#endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; +#ifdef CONFIG_NUMA + per_cpu(x86_cpu_to_node_map, cpu) = + early_per_cpu_map(x86_cpu_to_node_map, cpu); +#endif /* * Up to this point, CPU0 has been using .data.init * area. Reload %gs offset for CPU0. @@ -184,7 +158,7 @@ void __init setup_per_cpu_areas(void) /* indicate the early static arrays will soon be gone */ early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; -#ifdef X86_64_NUMA +#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif @@ -197,204 +171,3 @@ void __init setup_per_cpu_areas(void) #endif -#ifdef X86_64_NUMA - -/* - * Allocate node_to_cpumask_map based on number of available nodes - * Requires node_possible_map to be valid. - * - * Note: node_to_cpumask() is not valid until after this is done. - * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) - */ -static void __init setup_node_to_cpumask_map(void) -{ - unsigned int node, num = 0; - cpumask_t *map; - - /* setup nr_node_ids if not done yet */ - if (nr_node_ids == MAX_NUMNODES) { - for_each_node_mask(node, node_possible_map) - num = node; - nr_node_ids = num + 1; - } - - /* allocate the map */ - map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); - DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids); - - pr_debug("Node to cpumask map at %p for %d nodes\n", - map, nr_node_ids); - - /* node_to_cpumask() will now work */ - node_to_cpumask_map = map; -} - -void __cpuinit numa_set_node(int cpu, int node) -{ - int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); - - /* early setting, no percpu area yet */ - if (cpu_to_node_map) { - cpu_to_node_map[cpu] = node; - return; - } - -#ifdef CONFIG_DEBUG_PER_CPU_MAPS - if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) { - printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); - dump_stack(); - return; - } -#endif - per_cpu(x86_cpu_to_node_map, cpu) = node; - - if (node != NUMA_NO_NODE) - per_cpu(node_number, cpu) = node; -} - -void __cpuinit numa_clear_node(int cpu) -{ - numa_set_node(cpu, NUMA_NO_NODE); -} - -#ifndef CONFIG_DEBUG_PER_CPU_MAPS - -void __cpuinit numa_add_cpu(int cpu) -{ - cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); -} - -void __cpuinit numa_remove_cpu(int cpu) -{ - cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); -} - -#else /* CONFIG_DEBUG_PER_CPU_MAPS */ - -/* - * --------- debug versions of the numa functions --------- - */ -static void __cpuinit numa_set_cpumask(int cpu, int enable) -{ - int node = early_cpu_to_node(cpu); - cpumask_t *mask; - char buf[64]; - - if (node_to_cpumask_map == NULL) { - printk(KERN_ERR "node_to_cpumask_map NULL\n"); - dump_stack(); - return; - } - - mask = &node_to_cpumask_map[node]; - if (enable) - cpu_set(cpu, *mask); - else - cpu_clear(cpu, *mask); - - cpulist_scnprintf(buf, sizeof(buf), mask); - printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", - enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); -} - -void __cpuinit numa_add_cpu(int cpu) -{ - numa_set_cpumask(cpu, 1); -} - -void __cpuinit numa_remove_cpu(int cpu) -{ - numa_set_cpumask(cpu, 0); -} - -int cpu_to_node(int cpu) -{ - if (early_per_cpu_ptr(x86_cpu_to_node_map)) { - printk(KERN_WARNING - "cpu_to_node(%d): usage too early!\n", cpu); - dump_stack(); - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - } - return per_cpu(x86_cpu_to_node_map, cpu); -} -EXPORT_SYMBOL(cpu_to_node); - -/* - * Same function as cpu_to_node() but used if called before the - * per_cpu areas are setup. - */ -int early_cpu_to_node(int cpu) -{ - if (early_per_cpu_ptr(x86_cpu_to_node_map)) - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - - if (!per_cpu_offset(cpu)) { - printk(KERN_WARNING - "early_cpu_to_node(%d): no per_cpu area!\n", cpu); - dump_stack(); - return NUMA_NO_NODE; - } - return per_cpu(x86_cpu_to_node_map, cpu); -} - - -/* empty cpumask */ -static const cpumask_t cpu_mask_none; - -/* - * Returns a pointer to the bitmask of CPUs on Node 'node'. - */ -const cpumask_t *cpumask_of_node(int node) -{ - if (node_to_cpumask_map == NULL) { - printk(KERN_WARNING - "cpumask_of_node(%d): no node_to_cpumask_map!\n", - node); - dump_stack(); - return (const cpumask_t *)&cpu_online_map; - } - if (node >= nr_node_ids) { - printk(KERN_WARNING - "cpumask_of_node(%d): node > nr_node_ids(%d)\n", - node, nr_node_ids); - dump_stack(); - return &cpu_mask_none; - } - return &node_to_cpumask_map[node]; -} -EXPORT_SYMBOL(cpumask_of_node); - -/* - * Returns a bitmask of CPUs on Node 'node'. - * - * Side note: this function creates the returned cpumask on the stack - * so with a high NR_CPUS count, excessive stack space is used. The - * node_to_cpumask_ptr function should be used whenever possible. - */ -cpumask_t node_to_cpumask(int node) -{ - if (node_to_cpumask_map == NULL) { - printk(KERN_WARNING - "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); - dump_stack(); - return cpu_online_map; - } - if (node >= nr_node_ids) { - printk(KERN_WARNING - "node_to_cpumask(%d): node > nr_node_ids(%d)\n", - node, nr_node_ids); - dump_stack(); - return cpu_mask_none; - } - return node_to_cpumask_map[node]; -} -EXPORT_SYMBOL(node_to_cpumask); - -/* - * --------- end of debug versions of the numa functions --------- - */ - -#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ - -#endif /* X86_64_NUMA */ - diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 71a14f89f89e..08d140fbc31b 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -20,6 +20,12 @@ #include #include +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +# define DBG(x...) printk(KERN_DEBUG x) +#else +# define DBG(x...) +#endif + struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); @@ -33,6 +39,21 @@ int numa_off __initdata; static unsigned long __initdata nodemap_addr; static unsigned long __initdata nodemap_size; +DEFINE_PER_CPU(int, node_number) = 0; +EXPORT_PER_CPU_SYMBOL(node_number); + +/* + * Map cpu index to node index + */ +DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); + +/* + * Which logical CPUs are on which nodes + */ +cpumask_t *node_to_cpumask_map; +EXPORT_SYMBOL(node_to_cpumask_map); + /* * Given a shift value, try to populate memnodemap[] * Returns : @@ -640,3 +661,199 @@ void __init init_cpu_to_node(void) #endif +/* + * Allocate node_to_cpumask_map based on number of available nodes + * Requires node_possible_map to be valid. + * + * Note: node_to_cpumask() is not valid until after this is done. + * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) + */ +void __init setup_node_to_cpumask_map(void) +{ + unsigned int node, num = 0; + cpumask_t *map; + + /* setup nr_node_ids if not done yet */ + if (nr_node_ids == MAX_NUMNODES) { + for_each_node_mask(node, node_possible_map) + num = node; + nr_node_ids = num + 1; + } + + /* allocate the map */ + map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); + DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids); + + pr_debug("Node to cpumask map at %p for %d nodes\n", + map, nr_node_ids); + + /* node_to_cpumask() will now work */ + node_to_cpumask_map = map; +} + +void __cpuinit numa_set_node(int cpu, int node) +{ + int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); + + /* early setting, no percpu area yet */ + if (cpu_to_node_map) { + cpu_to_node_map[cpu] = node; + return; + } + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) { + printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); + dump_stack(); + return; + } +#endif + per_cpu(x86_cpu_to_node_map, cpu) = node; + + if (node != NUMA_NO_NODE) + per_cpu(node_number, cpu) = node; +} + +void __cpuinit numa_clear_node(int cpu) +{ + numa_set_node(cpu, NUMA_NO_NODE); +} + +#ifndef CONFIG_DEBUG_PER_CPU_MAPS + +void __cpuinit numa_add_cpu(int cpu) +{ + cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); +} + +void __cpuinit numa_remove_cpu(int cpu) +{ + cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); +} + +#else /* CONFIG_DEBUG_PER_CPU_MAPS */ + +/* + * --------- debug versions of the numa functions --------- + */ +static void __cpuinit numa_set_cpumask(int cpu, int enable) +{ + int node = early_cpu_to_node(cpu); + cpumask_t *mask; + char buf[64]; + + if (node_to_cpumask_map == NULL) { + printk(KERN_ERR "node_to_cpumask_map NULL\n"); + dump_stack(); + return; + } + + mask = &node_to_cpumask_map[node]; + if (enable) + cpu_set(cpu, *mask); + else + cpu_clear(cpu, *mask); + + cpulist_scnprintf(buf, sizeof(buf), mask); + printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", + enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); +} + +void __cpuinit numa_add_cpu(int cpu) +{ + numa_set_cpumask(cpu, 1); +} + +void __cpuinit numa_remove_cpu(int cpu) +{ + numa_set_cpumask(cpu, 0); +} + +int cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) { + printk(KERN_WARNING + "cpu_to_node(%d): usage too early!\n", cpu); + dump_stack(); + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + } + return per_cpu(x86_cpu_to_node_map, cpu); +} +EXPORT_SYMBOL(cpu_to_node); + +/* + * Same function as cpu_to_node() but used if called before the + * per_cpu areas are setup. + */ +int early_cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + + if (!per_cpu_offset(cpu)) { + printk(KERN_WARNING + "early_cpu_to_node(%d): no per_cpu area!\n", cpu); + dump_stack(); + return NUMA_NO_NODE; + } + return per_cpu(x86_cpu_to_node_map, cpu); +} + + +/* empty cpumask */ +static const cpumask_t cpu_mask_none; + +/* + * Returns a pointer to the bitmask of CPUs on Node 'node'. + */ +const cpumask_t *cpumask_of_node(int node) +{ + if (node_to_cpumask_map == NULL) { + printk(KERN_WARNING + "cpumask_of_node(%d): no node_to_cpumask_map!\n", + node); + dump_stack(); + return (const cpumask_t *)&cpu_online_map; + } + if (node >= nr_node_ids) { + printk(KERN_WARNING + "cpumask_of_node(%d): node > nr_node_ids(%d)\n", + node, nr_node_ids); + dump_stack(); + return &cpu_mask_none; + } + return &node_to_cpumask_map[node]; +} +EXPORT_SYMBOL(cpumask_of_node); + +/* + * Returns a bitmask of CPUs on Node 'node'. + * + * Side note: this function creates the returned cpumask on the stack + * so with a high NR_CPUS count, excessive stack space is used. The + * node_to_cpumask_ptr function should be used whenever possible. + */ +cpumask_t node_to_cpumask(int node) +{ + if (node_to_cpumask_map == NULL) { + printk(KERN_WARNING + "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); + dump_stack(); + return cpu_online_map; + } + if (node >= nr_node_ids) { + printk(KERN_WARNING + "node_to_cpumask(%d): node > nr_node_ids(%d)\n", + node, nr_node_ids); + dump_stack(); + return cpu_mask_none; + } + return node_to_cpumask_map[node]; +} +EXPORT_SYMBOL(node_to_cpumask); + +/* + * --------- end of debug versions of the numa functions --------- + */ + +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ From 2f2f52bad72f5e1ca5d1b9ad00a7b57a8cbd9159 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: [PATCH 167/566] x86: move setup_cpu_local_masks() Impact: Code movement, no functional change. Move setup_cpu_local_masks() to kernel/cpu/common.c, where the masks are defined. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/cpumask.h | 4 ++++ arch/x86/kernel/cpu/common.c | 9 +++++++++ arch/x86/kernel/setup_percpu.c | 19 ------------------- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 26c6dad90479..a7f3c75f8ad7 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -10,6 +10,8 @@ extern cpumask_var_t cpu_callout_mask; extern cpumask_var_t cpu_initialized_mask; extern cpumask_var_t cpu_sibling_setup_mask; +extern void setup_cpu_local_masks(void); + #else /* CONFIG_X86_32 */ extern cpumask_t cpu_callin_map; @@ -22,6 +24,8 @@ extern cpumask_t cpu_sibling_setup_map; #define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) #define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) +static inline void setup_cpu_local_masks(void) { } + #endif /* CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 99904f288d6a..67e30c8a282c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -52,6 +52,15 @@ cpumask_var_t cpu_initialized_mask; /* representing cpus for which sibling maps can be computed */ cpumask_var_t cpu_sibling_setup_mask; +/* correctly size the local cpu masks */ +void setup_cpu_local_masks(void) +{ + alloc_bootmem_cpumask_var(&cpu_initialized_mask); + alloc_bootmem_cpumask_var(&cpu_callin_mask); + alloc_bootmem_cpumask_var(&cpu_callout_mask); + alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); +} + #else /* CONFIG_X86_32 */ cpumask_t cpu_callin_map; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index cb6d622520be..7bebdba8eb89 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -51,25 +51,6 @@ DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); -#ifdef CONFIG_X86_64 - -/* correctly size the local cpu masks */ -static void setup_cpu_local_masks(void) -{ - alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callin_mask); - alloc_bootmem_cpumask_var(&cpu_callout_mask); - alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); -} - -#else /* CONFIG_X86_32 */ - -static inline void setup_cpu_local_masks(void) -{ -} - -#endif /* CONFIG_X86_32 */ - #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA #ifdef CONFIG_X86_64 From 74631a248dc2c2129a96f6b8b706ed54bb5c3d3c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: [PATCH 168/566] x86: always page-align per-cpu area start and size Impact: cleanup The way the code is written, align is always PAGE_SIZE. Simplify the code by removing the align variable. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 7bebdba8eb89..5d4a4964a8b3 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -69,15 +69,12 @@ EXPORT_SYMBOL(__per_cpu_offset); */ void __init setup_per_cpu_areas(void) { - ssize_t size, old_size; + ssize_t size; char *ptr; int cpu; - unsigned long align = 1; /* Copy section for each CPU (we discard the original) */ - old_size = PERCPU_ENOUGH_ROOM; - align = max_t(unsigned long, PAGE_SIZE, align); - size = roundup(old_size, align); + size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE); pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); @@ -86,20 +83,17 @@ void __init setup_per_cpu_areas(void) for_each_possible_cpu(cpu) { #ifndef CONFIG_NEED_MULTIPLE_NODES - ptr = __alloc_bootmem(size, align, - __pa(MAX_DMA_ADDRESS)); + ptr = alloc_bootmem_pages(size); #else int node = early_cpu_to_node(cpu); if (!node_online(node) || !NODE_DATA(node)) { - ptr = __alloc_bootmem(size, align, - __pa(MAX_DMA_ADDRESS)); + ptr = alloc_bootmem_pages(size); pr_info("cpu %d has no node %d or node-local memory\n", cpu, node); pr_debug("per cpu data for cpu%d at %016lx\n", cpu, __pa(ptr)); } else { - ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, - __pa(MAX_DMA_ADDRESS)); + ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); pr_debug("per cpu data for cpu%d on node%d at %016lx\n", cpu, node, __pa(ptr)); } From ec70de8b04bf37213982a5e8f303bc38679f3f8e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: [PATCH 169/566] x86: move apic variables to apic.c Impact: Code movement Move the variable definitions to apic.c. Ifdef the copying of the two early per-cpu variables, since Voyager doesn't use them. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/apic.c | 18 ++++++++++++++++++ arch/x86/kernel/setup_percpu.c | 22 ++-------------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 1df341a528a1..c6f15647eba9 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -60,6 +60,24 @@ # error SPURIOUS_APIC_VECTOR definition error #endif +unsigned int num_processors; +unsigned disabled_cpus __cpuinitdata; +/* Processor that is doing the boot up */ +unsigned int boot_cpu_physical_apicid = -1U; +EXPORT_SYMBOL(boot_cpu_physical_apicid); +unsigned int max_physical_apicid; + +/* Bitmask of physically existing CPUs */ +physid_mask_t phys_cpu_present_map; + +/* + * Map cpu index to physical APIC ID + */ +DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); + #ifdef CONFIG_X86_32 /* * Knob to control our willingness to enable the local APIC. diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 5d4a4964a8b3..d367996693e6 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -31,26 +31,6 @@ DEFINE_PER_CPU(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); #endif -#ifdef CONFIG_X86_LOCAL_APIC -unsigned int num_processors; -unsigned disabled_cpus __cpuinitdata; -/* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; -EXPORT_SYMBOL(boot_cpu_physical_apicid); -unsigned int max_physical_apicid; - -/* Bitmask of physically existing CPUs */ -physid_mask_t phys_cpu_present_map; -#endif - -/* - * Map cpu index to physical APIC ID - */ -DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); - #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA #ifdef CONFIG_X86_64 @@ -108,10 +88,12 @@ void __init setup_per_cpu_areas(void) * per cpu data areas. These arrays then become expendable and the * *_early_ptr's are zeroed indicating that the static arrays are gone. */ +#ifdef CONFIG_X86_LOCAL_APIC per_cpu(x86_cpu_to_apicid, cpu) = early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); +#endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; From 996db817e3d1529d711e55b938d72ae4060b39fd Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: [PATCH 170/566] x86: only compile setup_percpu.o on SMP Impact: Minor build optimization Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/Makefile | 3 ++- arch/x86/kernel/setup_percpu.c | 6 ------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a99437c965cc..73de055c29c2 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -28,7 +28,7 @@ CFLAGS_paravirt.o := $(nostackp) obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o -obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o +obj-y += setup.o i8259.o irqinit_$(BITS).o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o @@ -59,6 +59,7 @@ apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o +obj-$(CONFIG_SMP) += setup_percpu.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index d367996693e6..f30ff691c34d 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -22,14 +22,8 @@ # define DBG(x...) #endif -/* - * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but - * voyager wants cpu_number too. - */ -#ifdef CONFIG_SMP DEFINE_PER_CPU(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); -#endif #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA From 1688401a0fddba8991aa5c0943b8ae9583998d60 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: [PATCH 171/566] x86: move this_cpu_offset Impact: Small cleanup Define BOOT_PERCPU_OFFSET and use it for this_cpu_offset and __per_cpu_offset initializers. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 15 ++++++++++----- arch/x86/kernel/smpcommon.c | 7 ------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index f30ff691c34d..36c2e81dfc3c 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -25,15 +25,20 @@ DEFINE_PER_CPU(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); +#ifdef CONFIG_X86_64 +#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) +#else +#define BOOT_PERCPU_OFFSET 0 +#endif + +DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; +EXPORT_PER_CPU_SYMBOL(this_cpu_off); + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA -#ifdef CONFIG_X86_64 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { - [0] = (unsigned long)__per_cpu_load, + [0] = BOOT_PERCPU_OFFSET, }; -#else -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; -#endif EXPORT_SYMBOL(__per_cpu_offset); /* diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index add36b4e37c9..5ec29a1a8465 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -5,13 +5,6 @@ #include #include -#ifdef CONFIG_X86_64 -DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_load; -#else -DEFINE_PER_CPU(unsigned long, this_cpu_off); -#endif -EXPORT_PER_CPU_SYMBOL(this_cpu_off); - #ifdef CONFIG_X86_32 /* * Initialize the CPU's GDT. This is either the boot CPU doing itself From 34019be1cd2941128b5de6d7c0fbdb51f967d268 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: [PATCH 172/566] x86: don't assume boot cpu is #0 Impact: minor cleanup Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 36c2e81dfc3c..be77f1a1231d 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_PER_CPU_MAPS # define DBG(x...) printk(KERN_DEBUG x) @@ -37,7 +38,7 @@ EXPORT_PER_CPU_SYMBOL(this_cpu_off); #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { - [0] = BOOT_PERCPU_OFFSET, + [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, }; EXPORT_SYMBOL(__per_cpu_offset); @@ -101,10 +102,10 @@ void __init setup_per_cpu_areas(void) early_per_cpu_map(x86_cpu_to_node_map, cpu); #endif /* - * Up to this point, CPU0 has been using .data.init - * area. Reload %gs offset for CPU0. + * Up to this point, the boot CPU has been using .data.init + * area. Reload %gs offset for the boot CPU. */ - if (cpu == 0) + if (cpu == boot_cpu_id) load_gs_base(cpu); #endif From 89c9c4c58ee86e6e8802597271f23679e0c46647 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: [PATCH 173/566] x86: make Voyager use x86 per-cpu setup. Impact: standardize all x86 platforms on same setup code With the preceding changes, Voyager can use the same per-cpu setup code as all the other x86 platforms. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/Kconfig | 2 +- arch/x86/kernel/setup_percpu.c | 5 ----- arch/x86/mach-voyager/voyager_smp.c | 3 --- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5a29b792cb84..d6218e6c9824 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -133,7 +133,7 @@ config ARCH_HAS_CACHE_LINE_SIZE def_bool y config HAVE_SETUP_PER_CPU_AREA - def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER) + def_bool y config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index be77f1a1231d..599dc1cc1da8 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -35,8 +35,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_number); DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; EXPORT_PER_CPU_SYMBOL(this_cpu_off); -#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA - unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, }; @@ -125,6 +123,3 @@ void __init setup_per_cpu_areas(void) /* Setup cpu initialized, callin, callout masks */ setup_cpu_local_masks(); } - -#endif - diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 96f15b09a4c5..dd82f2052f34 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -531,7 +531,6 @@ static void __init do_boot_cpu(__u8 cpu) stack_start.sp = (void *)idle->thread.sp; init_gdt(cpu); - per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; per_cpu(current_task, cpu) = idle; early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); irq_ctx_init(cpu); @@ -1749,7 +1748,6 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) static void __cpuinit voyager_smp_prepare_boot_cpu(void) { init_gdt(smp_processor_id()); - per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; switch_to_new_gdt(); cpu_set(smp_processor_id(), cpu_online_map); @@ -1782,7 +1780,6 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus) void __init smp_setup_processor_id(void) { current_thread_info()->cpu = hard_smp_processor_id(); - percpu_write(cpu_number, hard_smp_processor_id()); } static void voyager_send_call_func(cpumask_t callmask) From b2d2f4312b117a6cc647c8521e2643a88771f757 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: [PATCH 174/566] x86: initialize per-cpu GDT segment in per-cpu setup Impact: cleanup Rename init_gdt() to setup_percpu_segment(), and move it to setup_percpu.c. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 1 - arch/x86/kernel/Makefile | 3 +-- arch/x86/kernel/setup_percpu.c | 14 ++++++++++++++ arch/x86/kernel/smpboot.c | 4 ---- arch/x86/kernel/smpcommon.c | 25 ------------------------- arch/x86/mach-voyager/voyager_smp.c | 2 -- arch/x86/xen/smp.c | 1 - 7 files changed, 15 insertions(+), 35 deletions(-) delete mode 100644 arch/x86/kernel/smpcommon.c diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 48676b943b92..32c30b02b51f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -778,7 +778,6 @@ extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); extern void switch_to_new_gdt(void); extern void cpu_init(void); -extern void init_gdt(int cpu); static inline unsigned long get_debugctlmsr(void) { diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 73de055c29c2..37fa30bada17 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -60,8 +60,7 @@ obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o obj-$(CONFIG_SMP) += setup_percpu.o -obj-$(CONFIG_X86_32_SMP) += smpcommon.o -obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o +obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 599dc1cc1da8..bcca3a7b3748 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -40,6 +40,19 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); +static inline void setup_percpu_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + struct desc_struct gdt; + + pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF, + 0x2 | DESCTYPE_S, 0x8); + gdt.s = 1; + write_gdt_entry(get_cpu_gdt_table(cpu), + GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); +#endif +} + /* * Great future plan: * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. @@ -81,6 +94,7 @@ void __init setup_per_cpu_areas(void) per_cpu_offset(cpu) = ptr - __per_cpu_start; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; + setup_percpu_segment(cpu); /* * Copy data used in early init routines from the initial arrays to the * per cpu data areas. These arrays then become expendable and the diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index def770b57b5a..f9dbcff43546 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -793,7 +793,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) do_rest: per_cpu(current_task, cpu) = c_idle.idle; #ifdef CONFIG_X86_32 - init_gdt(cpu); /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else @@ -1186,9 +1185,6 @@ out: void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); -#ifdef CONFIG_X86_32 - init_gdt(me); -#endif switch_to_new_gdt(); /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c deleted file mode 100644 index 5ec29a1a8465..000000000000 --- a/arch/x86/kernel/smpcommon.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * SMP stuff which is common to all sub-architectures. - */ -#include -#include -#include - -#ifdef CONFIG_X86_32 -/* - * Initialize the CPU's GDT. This is either the boot CPU doing itself - * (still using the master per-cpu area), or a CPU doing it for a - * secondary which will soon come up. - */ -__cpuinit void init_gdt(int cpu) -{ - struct desc_struct gdt; - - pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, - 0x2 | DESCTYPE_S, 0x8); - gdt.s = 1; - - write_gdt_entry(get_cpu_gdt_table(cpu), - GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); -} -#endif diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index dd82f2052f34..331cd6d56483 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -530,7 +530,6 @@ static void __init do_boot_cpu(__u8 cpu) /* init_tasks (in sched.c) is indexed logically */ stack_start.sp = (void *)idle->thread.sp; - init_gdt(cpu); per_cpu(current_task, cpu) = idle; early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); irq_ctx_init(cpu); @@ -1747,7 +1746,6 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) static void __cpuinit voyager_smp_prepare_boot_cpu(void) { - init_gdt(smp_processor_id()); switch_to_new_gdt(); cpu_set(smp_processor_id(), cpu_online_map); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 72c2eb9b64cd..7735e3dd359c 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -281,7 +281,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) per_cpu(current_task, cpu) = idle; #ifdef CONFIG_X86_32 - init_gdt(cpu); irq_ctx_init(cpu); #else clear_tsk_thread_flag(idle, TIF_FORK); From 1825b8edc2034c012ae48f797d74efd1bd9d4f72 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: [PATCH 175/566] x86: remove extra barriers from load_gs_base() Impact: optimization mb() generates an mfence instruction, which is not needed here. Only a compiler barrier is needed, and that is handled by the memory clobber in the wrmsrl function. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 32c30b02b51f..794234eba317 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -397,10 +397,7 @@ DECLARE_PER_CPU(char *, irq_stack_ptr); static inline void load_gs_base(int cpu) { - /* Memory clobbers used to order pda/percpu accesses */ - mb(); wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); - mb(); } #endif From 2697fbd5faf19c84c17441b1752bdcbdcfd1248c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: [PATCH 176/566] x86: load new GDT after setting up boot cpu per-cpu area Impact: sync 32 and 64-bit code Merge load_gs_base() into switch_to_new_gdt(). Load the GDT and per-cpu state for the boot cpu when its new area is set up. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 5 ----- arch/x86/kernel/cpu/common.c | 15 +++++++++------ arch/x86/kernel/setup_percpu.c | 6 +++--- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 794234eba317..befa20b4a68c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -394,11 +394,6 @@ union irq_stack_union { DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); - -static inline void load_gs_base(int cpu) -{ - wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); -} #endif extern void print_cpu_info(struct cpuinfo_x86 *); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 67e30c8a282c..0c766b80d915 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -258,12 +258,17 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; void switch_to_new_gdt(void) { struct desc_ptr gdt_descr; + int cpu = smp_processor_id(); - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.address = (long)get_cpu_gdt_table(cpu); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); + /* Reload the per-cpu base */ #ifdef CONFIG_X86_32 - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); + loadsegment(fs, __KERNEL_PERCPU); +#else + loadsegment(gs, 0); + wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); #endif } @@ -968,10 +973,6 @@ void __cpuinit cpu_init(void) struct task_struct *me; int i; - loadsegment(fs, 0); - loadsegment(gs, 0); - load_gs_base(cpu); - #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) @@ -993,6 +994,8 @@ void __cpuinit cpu_init(void) */ switch_to_new_gdt(); + loadsegment(fs, 0); + load_idt((const struct desc_ptr *)&idt_descr); memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index bcca3a7b3748..4caa78d7cb15 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -112,14 +112,14 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); +#endif #endif /* * Up to this point, the boot CPU has been using .data.init - * area. Reload %gs offset for the boot CPU. + * area. Reload any changed state for the boot CPU. */ if (cpu == boot_cpu_id) - load_gs_base(cpu); -#endif + switch_to_new_gdt(); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } From 22f25138c345ec46a13744c93c093ff822cd98d1 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 27 Jan 2009 14:21:37 +0900 Subject: [PATCH 177/566] x86: fix build breakage on voyage Impact: build fix x86_cpu_to_apicid and x86_bios_cpu_apicid aren't defined for voyage. Earlier patch forgot to conditionalize early percpu clearing. Fix it. Signed-off-by: James Bottomley Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 4caa78d7cb15..c7458ead22de 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -125,8 +125,10 @@ void __init setup_per_cpu_areas(void) } /* indicate the early static arrays will soon be gone */ +#ifdef CONFIG_X86_LOCAL_APIC early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; +#endif #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif From cf3997f507624757f149fcc42b76fb03c151fb65 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 27 Jan 2009 14:25:05 +0900 Subject: [PATCH 178/566] x86: clean up indentation in setup_per_cpu_areas() Impact: cosmetic cleanup Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index c7458ead22de..0d1e7ac439f4 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -96,22 +96,25 @@ void __init setup_per_cpu_areas(void) per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); /* - * Copy data used in early init routines from the initial arrays to the - * per cpu data areas. These arrays then become expendable and the - * *_early_ptr's are zeroed indicating that the static arrays are gone. + * Copy data used in early init routines from the + * initial arrays to the per cpu data areas. These + * arrays then become expendable and the *_early_ptr's + * are zeroed indicating that the static arrays are + * gone. */ #ifdef CONFIG_X86_LOCAL_APIC per_cpu(x86_cpu_to_apicid, cpu) = - early_per_cpu_map(x86_cpu_to_apicid, cpu); + early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = - early_per_cpu_map(x86_bios_cpu_apicid, cpu); + early_per_cpu_map(x86_bios_cpu_apicid, cpu); #endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = - per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; + per_cpu(irq_stack_union.irq_stack, cpu) + + IRQ_STACK_SIZE - 64; #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = - early_per_cpu_map(x86_cpu_to_node_map, cpu); + early_per_cpu_map(x86_cpu_to_node_map, cpu); #endif #endif /* From 042cbaf88ab48e11afb725541e3c2cbf5b483680 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 27 Jan 2009 21:45:57 +0100 Subject: [PATCH 179/566] x86 setup: fix asm constraints in vesa_store_edid Impact: fix potential miscompile (currently believed non-manifest) As the comment explains, the VBE DDC call can clobber any register. Tell the compiler about that fact. Signed-off-by: Andreas Schwab Signed-off-by: H. Peter Anvin --- arch/x86/boot/video-vesa.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 75115849af33..4a58c8ce3f69 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -269,9 +269,8 @@ void vesa_store_edid(void) we genuinely have to assume all registers are destroyed here. */ asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es" - : "+a" (ax), "+b" (bx) - : "c" (cx), "D" (di) - : "esi"); + : "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di) + : : "esi", "edx"); if (ax != 0x004f) return; /* No EDID */ @@ -285,9 +284,9 @@ void vesa_store_edid(void) dx = 0; /* EDID block number */ di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */ asm(INT10 - : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info) - : "c" (cx), "D" (di) - : "esi"); + : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info), + "+c" (cx), "+D" (di) + : : "esi"); #endif /* CONFIG_FIRMWARE_EDID */ } From 8f6d86dc4178957d9814b1784848012a927a3898 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 27 Jan 2009 21:41:34 +0100 Subject: [PATCH 180/566] x86: cpu_init(): remove ugly #ifdef construct around debug register clear Impact: Cleanup While I was looking through the new and improved bootstrap code - great work that, thanks! I found the below a slight improvement. Remove unnecessary ugly #ifdef construct around debug register clear. Signed-off-by: Peter Zijlstra Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f00258462444..3f272d42d09a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1071,22 +1071,19 @@ void __cpuinit cpu_init(void) */ if (kgdb_connected && arch_kgdb_ops.correct_hw_break) arch_kgdb_ops.correct_hw_break(); - else { + else #endif - /* - * Clear all 6 debug registers: - */ - - set_debugreg(0UL, 0); - set_debugreg(0UL, 1); - set_debugreg(0UL, 2); - set_debugreg(0UL, 3); - set_debugreg(0UL, 6); - set_debugreg(0UL, 7); -#ifdef CONFIG_KGDB - /* If the kgdb is connected no debug regs should be altered. */ + { + /* + * Clear all 6 debug registers: + */ + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); } -#endif fpu_init(); From b8e15992b420d09dae831125a623c474c8637cee Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 28 Jan 2009 14:09:59 +1100 Subject: [PATCH 181/566] crypto: api - Fix algorithm test race that broke aead initialisation When we complete a test we'll notify everyone waiting on it, drop the mutex, and then remove the test larval (after reacquiring the mutex). If one of the notified parties tries to register another algorithm with the same driver name prior to the removal of the test larval, they will fail with EEXIST as only one algorithm of a given name can be tested at any time. This broke the initialisation of aead and givcipher algorithms as they will register two algorithms with the same driver name, in sequence. This patch fixes the problem by marking the larval as dead before we drop the mutex, and also ignoring all dead or dying algorithms on the registration path. Tested-by: Andreas Steffen Signed-off-by: Herbert Xu --- crypto/algapi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index 7c41e7405c41..56c62e2858d5 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -149,6 +149,9 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) if (q == alg) goto err; + if (crypto_is_moribund(q)) + continue; + if (crypto_is_larval(q)) { if (!strcmp(alg->cra_driver_name, q->cra_driver_name)) goto err; @@ -197,7 +200,7 @@ void crypto_alg_tested(const char *name, int err) down_write(&crypto_alg_sem); list_for_each_entry(q, &crypto_alg_list, cra_list) { - if (!crypto_is_larval(q)) + if (crypto_is_moribund(q) || !crypto_is_larval(q)) continue; test = (struct crypto_larval *)q; @@ -210,6 +213,7 @@ void crypto_alg_tested(const char *name, int err) goto unlock; found: + q->cra_flags |= CRYPTO_ALG_DEAD; alg = test->adult; if (err || list_empty(&alg->cra_list)) goto complete; From 6e7a59944a2971c4fb400bfbecb2f68570086b05 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 23:07:42 +0100 Subject: [PATCH 182/566] x86, genapic: refactor genapic_64.h Impact: pre unification cleanup Make genapic_64.h similar to genapic_32.h: reorder fields, unify types and bring in new entries. No existing functionality is affected. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic_64.h | 68 +++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index 7bb092c59055..0a5f7122d9fe 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h @@ -16,13 +16,62 @@ struct genapic { char *name; + + int (*probe)(void); int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + int (*apic_id_registered)(void); + u32 int_delivery_mode; u32 int_dest_mode; - int (*apic_id_registered)(void); + const struct cpumask *(*target_cpus)(void); + + int ESR_DISABLE; + + int apic_destination_logical; + unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); + unsigned long (*check_apicid_present)(int apicid); + + int no_balance_irq; + int no_ioapic_check; + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); + + physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); + + void (*setup_apic_routing)(void); + int (*multi_timer_check)(int apic, int irq); + int (*apicid_to_node)(int logical_apicid); + int (*cpu_to_logical_apicid)(int cpu); + int (*cpu_present_to_apicid)(int mps_cpu); + physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); + void (*setup_portio_remap)(void); + int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); + void (*enable_apic_mode)(void); +#ifdef CONFIG_X86_32 + u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); +#else + unsigned int (*phys_pkg_id)(int index_msb); +#endif + + /* + * When one of the next two hooks returns 1 the genapic + * is switched to this. Essentially they are additional + * probe functions: + */ + int (*mps_oem_check)(struct mpc_table *mpc, char *oem, + char *productid); + + unsigned int (*get_apic_id)(unsigned long x); + unsigned long (*set_apic_id)(unsigned int id); + unsigned long apic_id_mask; + + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); + unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask); + +#ifdef CONFIG_SMP /* ipi */ void (*send_IPI_mask)(const struct cpumask *mask, int vector); void (*send_IPI_mask_allbutself)(const struct cpumask *mask, @@ -30,16 +79,17 @@ struct genapic { void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); void (*send_IPI_self)(int vector); - /* */ - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); - unsigned int (*phys_pkg_id)(int index_msb); - unsigned int (*get_apic_id)(unsigned long x); - unsigned long (*set_apic_id)(unsigned int id); - unsigned long apic_id_mask; +#endif /* wakeup_secondary_cpu */ int (*wakeup_cpu)(int apicid, unsigned long start_eip); + + int trampoline_phys_low; + int trampoline_phys_high; + void (*wait_for_init_deassert)(atomic_t *deassert); + void (*smp_callin_clear_local_apic)(void); + void (*store_NMI_vector)(unsigned short *high, unsigned short *low); + void (*restore_NMI_vector)(unsigned short *high, unsigned short *low); + void (*inquire_remote_apic)(int apicid); }; extern struct genapic *genapic; From 943d0f74d47724d0e33083674c16a834f080af2c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 23:07:42 +0100 Subject: [PATCH 183/566] x86, genapic: refactor genapic_32.h Impact: pre unification cleanup Make genapic_32.h similar to genapic_64.h: reorder fields, unify types and bring in new entries. No existing functionality is affected. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic_32.h | 37 ++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 4334502d3664..5808b7daf0a3 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h @@ -21,19 +21,28 @@ struct mpc_cpu; struct genapic { char *name; - int (*probe)(void); + int (*probe)(void); + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); int (*apic_id_registered)(void); + + u32 int_delivery_mode; + u32 int_dest_mode; + const struct cpumask *(*target_cpus)(void); - int int_delivery_mode; - int int_dest_mode; + int ESR_DISABLE; + int apic_destination_logical; unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); unsigned long (*check_apicid_present)(int apicid); + int no_balance_irq; int no_ioapic_check; + + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); + physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); void (*setup_apic_routing)(void); @@ -45,22 +54,27 @@ struct genapic { void (*setup_portio_remap)(void); int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); void (*enable_apic_mode)(void); +#ifdef CONFIG_X86_32 u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); +#else + unsigned int (*phys_pkg_id)(int index_msb); +#endif - /* mpparse */ - /* When one of the next two hooks returns 1 the genapic - is switched to this. Essentially they are additional probe - functions. */ + /* + * When one of the next two hooks returns 1 the genapic + * is switched to this. Essentially they are additional + * probe functions: + */ int (*mps_oem_check)(struct mpc_table *mpc, char *oem, char *productid); - int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - unsigned (*get_apic_id)(unsigned long x); + unsigned int (*get_apic_id)(unsigned long x); + unsigned long (*set_apic_id)(unsigned int id); unsigned long apic_id_mask; + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, const struct cpumask *andmask); - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); #ifdef CONFIG_SMP /* ipi */ @@ -69,8 +83,11 @@ struct genapic { int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); #endif + /* wakeup_secondary_cpu */ int (*wakeup_cpu)(int apicid, unsigned long start_eip); + int trampoline_phys_low; int trampoline_phys_high; void (*wait_for_init_deassert)(atomic_t *deassert); From ef7471b13f3ef81074af1972b97355df9df3cdf3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 23:12:02 +0100 Subject: [PATCH 184/566] x86, genapic: unify struct genapic Move over the (now identical) struct genapic definitions from genapic_32/64.h to genapic.h. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 96 +++++++++++++++++++++++++++++++ arch/x86/include/asm/genapic_32.h | 93 ------------------------------ arch/x86/include/asm/genapic_64.h | 91 ----------------------------- 3 files changed, 96 insertions(+), 184 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index d48bee663a6f..3dea66a328e3 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -1,5 +1,101 @@ +#ifndef _ASM_X86_GENAPIC_H +#define _ASM_X86_GENAPIC_H + +#include + +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Generic APIC sub-arch data struct. + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + */ + +struct genapic { + char *name; + + int (*probe)(void); + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + int (*apic_id_registered)(void); + + u32 int_delivery_mode; + u32 int_dest_mode; + + const struct cpumask *(*target_cpus)(void); + + int ESR_DISABLE; + + int apic_destination_logical; + unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); + unsigned long (*check_apicid_present)(int apicid); + + int no_balance_irq; + int no_ioapic_check; + + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + void (*init_apic_ldr)(void); + + physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); + + void (*setup_apic_routing)(void); + int (*multi_timer_check)(int apic, int irq); + int (*apicid_to_node)(int logical_apicid); + int (*cpu_to_logical_apicid)(int cpu); + int (*cpu_present_to_apicid)(int mps_cpu); + physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); + void (*setup_portio_remap)(void); + int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); + void (*enable_apic_mode)(void); +#ifdef CONFIG_X86_32 + u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); +#else + unsigned int (*phys_pkg_id)(int index_msb); +#endif + + /* + * When one of the next two hooks returns 1 the genapic + * is switched to this. Essentially they are additional + * probe functions: + */ + int (*mps_oem_check)(struct mpc_table *mpc, char *oem, + char *productid); + + unsigned int (*get_apic_id)(unsigned long x); + unsigned long (*set_apic_id)(unsigned int id); + unsigned long apic_id_mask; + + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); + unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask); + +#ifdef CONFIG_SMP + /* ipi */ + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *mask, + int vector); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); +#endif + /* wakeup_secondary_cpu */ + int (*wakeup_cpu)(int apicid, unsigned long start_eip); + + int trampoline_phys_low; + int trampoline_phys_high; + void (*wait_for_init_deassert)(atomic_t *deassert); + void (*smp_callin_clear_local_apic)(void); + void (*store_NMI_vector)(unsigned short *high, unsigned short *low); + void (*restore_NMI_vector)(unsigned short *high, unsigned short *low); + void (*inquire_remote_apic)(int apicid); +}; + #ifdef CONFIG_X86_32 # include "genapic_32.h" #else # include "genapic_64.h" #endif + +#endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 5808b7daf0a3..a56785ed12a9 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h @@ -4,99 +4,6 @@ #include #include -/* - * Generic APIC driver interface. - * - * An straight forward mapping of the APIC related parts of the - * x86 subarchitecture interface to a dynamic object. - * - * This is used by the "generic" x86 subarchitecture. - * - * Copyright 2003 Andi Kleen, SuSE Labs. - */ - -struct mpc_bus; -struct mpc_table; -struct mpc_cpu; - -struct genapic { - char *name; - - int (*probe)(void); - int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - int (*apic_id_registered)(void); - - u32 int_delivery_mode; - u32 int_dest_mode; - - const struct cpumask *(*target_cpus)(void); - - int ESR_DISABLE; - - int apic_destination_logical; - unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); - unsigned long (*check_apicid_present)(int apicid); - - int no_balance_irq; - int no_ioapic_check; - - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); - void (*init_apic_ldr)(void); - - physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); - - void (*setup_apic_routing)(void); - int (*multi_timer_check)(int apic, int irq); - int (*apicid_to_node)(int logical_apicid); - int (*cpu_to_logical_apicid)(int cpu); - int (*cpu_present_to_apicid)(int mps_cpu); - physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); - void (*setup_portio_remap)(void); - int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); - void (*enable_apic_mode)(void); -#ifdef CONFIG_X86_32 - u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); -#else - unsigned int (*phys_pkg_id)(int index_msb); -#endif - - /* - * When one of the next two hooks returns 1 the genapic - * is switched to this. Essentially they are additional - * probe functions: - */ - int (*mps_oem_check)(struct mpc_table *mpc, char *oem, - char *productid); - - unsigned int (*get_apic_id)(unsigned long x); - unsigned long (*set_apic_id)(unsigned int id); - unsigned long apic_id_mask; - - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); - -#ifdef CONFIG_SMP - /* ipi */ - void (*send_IPI_mask)(const struct cpumask *mask, int vector); - void (*send_IPI_mask_allbutself)(const struct cpumask *mask, - int vector); - void (*send_IPI_allbutself)(int vector); - void (*send_IPI_all)(int vector); - void (*send_IPI_self)(int vector); -#endif - /* wakeup_secondary_cpu */ - int (*wakeup_cpu)(int apicid, unsigned long start_eip); - - int trampoline_phys_low; - int trampoline_phys_high; - void (*wait_for_init_deassert)(atomic_t *deassert); - void (*smp_callin_clear_local_apic)(void); - void (*store_NMI_vector)(unsigned short *high, unsigned short *low); - void (*restore_NMI_vector)(unsigned short *high, unsigned short *low); - void (*inquire_remote_apic)(int apicid); -}; - #define APICFUNC(x) .x = x, /* More functions could be probably marked IPIFUNC and save some space diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index 0a5f7122d9fe..c70ca0d50eb3 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h @@ -1,97 +1,6 @@ #ifndef _ASM_X86_GENAPIC_64_H #define _ASM_X86_GENAPIC_64_H -#include - -/* - * Copyright 2004 James Cleverdon, IBM. - * Subject to the GNU Public License, v.2 - * - * Generic APIC sub-arch data struct. - * - * Hacked for x86-64 by James Cleverdon from i386 architecture code by - * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and - * James Cleverdon. - */ - -struct genapic { - char *name; - - int (*probe)(void); - int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - int (*apic_id_registered)(void); - - u32 int_delivery_mode; - u32 int_dest_mode; - - const struct cpumask *(*target_cpus)(void); - - int ESR_DISABLE; - - int apic_destination_logical; - unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); - unsigned long (*check_apicid_present)(int apicid); - - int no_balance_irq; - int no_ioapic_check; - - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); - void (*init_apic_ldr)(void); - - physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); - - void (*setup_apic_routing)(void); - int (*multi_timer_check)(int apic, int irq); - int (*apicid_to_node)(int logical_apicid); - int (*cpu_to_logical_apicid)(int cpu); - int (*cpu_present_to_apicid)(int mps_cpu); - physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); - void (*setup_portio_remap)(void); - int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); - void (*enable_apic_mode)(void); -#ifdef CONFIG_X86_32 - u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); -#else - unsigned int (*phys_pkg_id)(int index_msb); -#endif - - /* - * When one of the next two hooks returns 1 the genapic - * is switched to this. Essentially they are additional - * probe functions: - */ - int (*mps_oem_check)(struct mpc_table *mpc, char *oem, - char *productid); - - unsigned int (*get_apic_id)(unsigned long x); - unsigned long (*set_apic_id)(unsigned int id); - unsigned long apic_id_mask; - - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); - -#ifdef CONFIG_SMP - /* ipi */ - void (*send_IPI_mask)(const struct cpumask *mask, int vector); - void (*send_IPI_mask_allbutself)(const struct cpumask *mask, - int vector); - void (*send_IPI_allbutself)(int vector); - void (*send_IPI_all)(int vector); - void (*send_IPI_self)(int vector); -#endif - /* wakeup_secondary_cpu */ - int (*wakeup_cpu)(int apicid, unsigned long start_eip); - - int trampoline_phys_low; - int trampoline_phys_high; - void (*wait_for_init_deassert)(atomic_t *deassert); - void (*smp_callin_clear_local_apic)(void); - void (*store_NMI_vector)(unsigned short *high, unsigned short *low); - void (*restore_NMI_vector)(unsigned short *high, unsigned short *low); - void (*inquire_remote_apic)(int apicid); -}; - extern struct genapic *genapic; extern struct genapic apic_flat; From ced733ec0bfe9a8a5140a7aefdfe802598e4b8c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 23:15:06 +0100 Subject: [PATCH 185/566] x86, genapic: finish unification Unify remaining bits of genapic_32/64.h. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 80 ++++++++++++++++++++++++++++++- arch/x86/include/asm/genapic_32.h | 65 ------------------------- arch/x86/include/asm/genapic_64.h | 19 -------- 3 files changed, 78 insertions(+), 86 deletions(-) delete mode 100644 arch/x86/include/asm/genapic_32.h delete mode 100644 arch/x86/include/asm/genapic_64.h diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 3dea66a328e3..7df1b48fa35a 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -93,9 +93,85 @@ struct genapic { }; #ifdef CONFIG_X86_32 -# include "genapic_32.h" + +#include +#include + +#define APICFUNC(x) .x = x, + +/* More functions could be probably marked IPIFUNC and save some space + in UP GENERICARCH kernels, but I don't have the nerve right now + to untangle this mess. -AK */ +#ifdef CONFIG_SMP +#define IPIFUNC(x) APICFUNC(x) #else -# include "genapic_64.h" +#define IPIFUNC(x) #endif +#define APIC_INIT(aname, aprobe) \ +{ \ + .name = aname, \ + .probe = aprobe, \ + .int_delivery_mode = INT_DELIVERY_MODE, \ + .int_dest_mode = INT_DEST_MODE, \ + .no_balance_irq = NO_BALANCE_IRQ, \ + .ESR_DISABLE = esr_disable, \ + .apic_destination_logical = APIC_DEST_LOGICAL, \ + APICFUNC(apic_id_registered) \ + APICFUNC(target_cpus) \ + APICFUNC(check_apicid_used) \ + APICFUNC(check_apicid_present) \ + APICFUNC(init_apic_ldr) \ + APICFUNC(ioapic_phys_id_map) \ + APICFUNC(setup_apic_routing) \ + APICFUNC(multi_timer_check) \ + APICFUNC(apicid_to_node) \ + APICFUNC(cpu_to_logical_apicid) \ + APICFUNC(cpu_present_to_apicid) \ + APICFUNC(apicid_to_cpu_present) \ + APICFUNC(setup_portio_remap) \ + APICFUNC(check_phys_apicid_present) \ + APICFUNC(mps_oem_check) \ + APICFUNC(get_apic_id) \ + .apic_id_mask = APIC_ID_MASK, \ + APICFUNC(cpu_mask_to_apicid) \ + APICFUNC(cpu_mask_to_apicid_and) \ + APICFUNC(vector_allocation_domain) \ + APICFUNC(acpi_madt_oem_check) \ + IPIFUNC(send_IPI_mask) \ + IPIFUNC(send_IPI_allbutself) \ + IPIFUNC(send_IPI_all) \ + APICFUNC(enable_apic_mode) \ + APICFUNC(phys_pkg_id) \ + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \ + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \ + APICFUNC(wait_for_init_deassert) \ + APICFUNC(smp_callin_clear_local_apic) \ + APICFUNC(store_NMI_vector) \ + APICFUNC(restore_NMI_vector) \ + APICFUNC(inquire_remote_apic) \ +} + +extern struct genapic *genapic; +extern void es7000_update_genapic_to_cluster(void); + +#else /* CONFIG_X86_64: */ + +extern struct genapic *genapic; + +extern struct genapic apic_flat; +extern struct genapic apic_physflat; +extern struct genapic apic_x2apic_cluster; +extern struct genapic apic_x2apic_phys; +extern int acpi_madt_oem_check(char *, char *); + +extern void apic_send_IPI_self(int vector); + +extern struct genapic apic_x2apic_uv_x; +DECLARE_PER_CPU(int, x2apic_extra_bits); + +extern void setup_apic_routing(void); + +#endif /* CONFIG_X86_64 */ + #endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h deleted file mode 100644 index a56785ed12a9..000000000000 --- a/arch/x86/include/asm/genapic_32.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef _ASM_X86_GENAPIC_32_H -#define _ASM_X86_GENAPIC_32_H - -#include -#include - -#define APICFUNC(x) .x = x, - -/* More functions could be probably marked IPIFUNC and save some space - in UP GENERICARCH kernels, but I don't have the nerve right now - to untangle this mess. -AK */ -#ifdef CONFIG_SMP -#define IPIFUNC(x) APICFUNC(x) -#else -#define IPIFUNC(x) -#endif - -#define APIC_INIT(aname, aprobe) \ -{ \ - .name = aname, \ - .probe = aprobe, \ - .int_delivery_mode = INT_DELIVERY_MODE, \ - .int_dest_mode = INT_DEST_MODE, \ - .no_balance_irq = NO_BALANCE_IRQ, \ - .ESR_DISABLE = esr_disable, \ - .apic_destination_logical = APIC_DEST_LOGICAL, \ - APICFUNC(apic_id_registered) \ - APICFUNC(target_cpus) \ - APICFUNC(check_apicid_used) \ - APICFUNC(check_apicid_present) \ - APICFUNC(init_apic_ldr) \ - APICFUNC(ioapic_phys_id_map) \ - APICFUNC(setup_apic_routing) \ - APICFUNC(multi_timer_check) \ - APICFUNC(apicid_to_node) \ - APICFUNC(cpu_to_logical_apicid) \ - APICFUNC(cpu_present_to_apicid) \ - APICFUNC(apicid_to_cpu_present) \ - APICFUNC(setup_portio_remap) \ - APICFUNC(check_phys_apicid_present) \ - APICFUNC(mps_oem_check) \ - APICFUNC(get_apic_id) \ - .apic_id_mask = APIC_ID_MASK, \ - APICFUNC(cpu_mask_to_apicid) \ - APICFUNC(cpu_mask_to_apicid_and) \ - APICFUNC(vector_allocation_domain) \ - APICFUNC(acpi_madt_oem_check) \ - IPIFUNC(send_IPI_mask) \ - IPIFUNC(send_IPI_allbutself) \ - IPIFUNC(send_IPI_all) \ - APICFUNC(enable_apic_mode) \ - APICFUNC(phys_pkg_id) \ - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \ - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \ - APICFUNC(wait_for_init_deassert) \ - APICFUNC(smp_callin_clear_local_apic) \ - APICFUNC(store_NMI_vector) \ - APICFUNC(restore_NMI_vector) \ - APICFUNC(inquire_remote_apic) \ -} - -extern struct genapic *genapic; -extern void es7000_update_genapic_to_cluster(void); - -#endif /* _ASM_X86_GENAPIC_32_H */ diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h deleted file mode 100644 index c70ca0d50eb3..000000000000 --- a/arch/x86/include/asm/genapic_64.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _ASM_X86_GENAPIC_64_H -#define _ASM_X86_GENAPIC_64_H - -extern struct genapic *genapic; - -extern struct genapic apic_flat; -extern struct genapic apic_physflat; -extern struct genapic apic_x2apic_cluster; -extern struct genapic apic_x2apic_phys; -extern int acpi_madt_oem_check(char *, char *); - -extern void apic_send_IPI_self(int vector); - -extern struct genapic apic_x2apic_uv_x; -DECLARE_PER_CPU(int, x2apic_extra_bits); - -extern void setup_apic_routing(void); - -#endif /* _ASM_X86_GENAPIC_64_H */ From 505deeb1a228e5b0bf6ac5d0d78f4a4253a9efe9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 23:23:22 +0100 Subject: [PATCH 186/566] x86, genapic: cleanups Unify genapic.h some more. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 7df1b48fa35a..19a5193e9651 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -3,6 +3,9 @@ #include +#include +#include + /* * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 @@ -13,7 +16,6 @@ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and * James Cleverdon. */ - struct genapic { char *name; @@ -85,6 +87,7 @@ struct genapic { int trampoline_phys_low; int trampoline_phys_high; + void (*wait_for_init_deassert)(atomic_t *deassert); void (*smp_callin_clear_local_apic)(void); void (*store_NMI_vector)(unsigned short *high, unsigned short *low); @@ -92,10 +95,9 @@ struct genapic { void (*inquire_remote_apic)(int apicid); }; -#ifdef CONFIG_X86_32 +extern struct genapic *genapic; -#include -#include +#ifdef CONFIG_X86_32 #define APICFUNC(x) .x = x, @@ -143,8 +145,8 @@ struct genapic { IPIFUNC(send_IPI_all) \ APICFUNC(enable_apic_mode) \ APICFUNC(phys_pkg_id) \ - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \ - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \ + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \ + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \ APICFUNC(wait_for_init_deassert) \ APICFUNC(smp_callin_clear_local_apic) \ APICFUNC(store_NMI_vector) \ @@ -152,13 +154,10 @@ struct genapic { APICFUNC(inquire_remote_apic) \ } -extern struct genapic *genapic; extern void es7000_update_genapic_to_cluster(void); #else /* CONFIG_X86_64: */ -extern struct genapic *genapic; - extern struct genapic apic_flat; extern struct genapic apic_physflat; extern struct genapic apic_x2apic_cluster; From 6781d948cc05b02df915650f2eb49550a1631df9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 23:54:23 +0100 Subject: [PATCH 187/566] x86, genapic: provide IPI callbacks unconditionally 64-bit x86 uses the IPI callbacks even on UP - so provide them generally. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 19a5193e9651..c27efde0523d 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -73,7 +73,6 @@ struct genapic { unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, const struct cpumask *andmask); -#ifdef CONFIG_SMP /* ipi */ void (*send_IPI_mask)(const struct cpumask *mask, int vector); void (*send_IPI_mask_allbutself)(const struct cpumask *mask, @@ -81,7 +80,7 @@ struct genapic { void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); void (*send_IPI_self)(int vector); -#endif + /* wakeup_secondary_cpu */ int (*wakeup_cpu)(int apicid, unsigned long start_eip); From c8d46cf06dc2e3a8f57a350eb9f9b19fd7f2ffe5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 00:14:11 +0100 Subject: [PATCH 188/566] x86: rename 'genapic' to 'apic' Rename genapic-> to apic-> references because in a future chagne we'll open-code all the indirect calls (instead of obscuring them via macros), so we want this reference to be as short as possible. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 2 +- arch/x86/include/asm/mach-default/mach_apic.h | 22 ++--- .../include/asm/mach-default/mach_apicdef.h | 6 +- arch/x86/include/asm/mach-default/mach_ipi.h | 8 +- arch/x86/include/asm/mach-generic/mach_apic.h | 50 ++++++------ .../include/asm/mach-generic/mach_apicdef.h | 4 +- arch/x86/include/asm/mach-generic/mach_ipi.h | 6 +- .../include/asm/mach-generic/mach_wakecpu.h | 14 ++-- arch/x86/kernel/es7000_32.c | 6 +- arch/x86/kernel/genapic_64.c | 16 ++-- arch/x86/kernel/io_apic.c | 80 +++++++++---------- arch/x86/kernel/numaq_32.c | 2 +- arch/x86/kernel/setup.c | 2 +- arch/x86/mach-generic/es7000.c | 12 +-- arch/x86/mach-generic/probe.c | 24 +++--- 15 files changed, 127 insertions(+), 127 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index c27efde0523d..3970da3245c8 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -94,7 +94,7 @@ struct genapic { void (*inquire_remote_apic)(int apicid); }; -extern struct genapic *genapic; +extern struct genapic *apic; #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index cc09cbbee27e..2448b927b644 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -22,18 +22,18 @@ static inline const struct cpumask *target_cpus(void) #ifdef CONFIG_X86_64 #include -#define INT_DELIVERY_MODE (genapic->int_delivery_mode) -#define INT_DEST_MODE (genapic->int_dest_mode) -#define TARGET_CPUS (genapic->target_cpus()) -#define apic_id_registered (genapic->apic_id_registered) -#define init_apic_ldr (genapic->init_apic_ldr) -#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) -#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) -#define phys_pkg_id (genapic->phys_pkg_id) -#define vector_allocation_domain (genapic->vector_allocation_domain) +#define INT_DELIVERY_MODE (apic->int_delivery_mode) +#define INT_DEST_MODE (apic->int_dest_mode) +#define TARGET_CPUS (apic->target_cpus()) +#define apic_id_registered (apic->apic_id_registered) +#define init_apic_ldr (apic->init_apic_ldr) +#define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) +#define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) +#define phys_pkg_id (apic->phys_pkg_id) +#define vector_allocation_domain (apic->vector_allocation_domain) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) -#define send_IPI_self (genapic->send_IPI_self) -#define wakeup_secondary_cpu (genapic->wakeup_cpu) +#define send_IPI_self (apic->send_IPI_self) +#define wakeup_secondary_cpu (apic->wakeup_cpu) extern void setup_apic_routing(void); #else #define INT_DELIVERY_MODE dest_LowestPrio diff --git a/arch/x86/include/asm/mach-default/mach_apicdef.h b/arch/x86/include/asm/mach-default/mach_apicdef.h index 53179936d6c6..b4dcc0971c76 100644 --- a/arch/x86/include/asm/mach-default/mach_apicdef.h +++ b/arch/x86/include/asm/mach-default/mach_apicdef.h @@ -4,9 +4,9 @@ #include #ifdef CONFIG_X86_64 -#define APIC_ID_MASK (genapic->apic_id_mask) -#define GET_APIC_ID(x) (genapic->get_apic_id(x)) -#define SET_APIC_ID(x) (genapic->set_apic_id(x)) +#define APIC_ID_MASK (apic->apic_id_mask) +#define GET_APIC_ID(x) (apic->get_apic_id(x)) +#define SET_APIC_ID(x) (apic->set_apic_id(x)) #else #define APIC_ID_MASK (0xF<<24) static inline unsigned get_apic_id(unsigned long x) diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h index 191312d155da..089399643dfa 100644 --- a/arch/x86/include/asm/mach-default/mach_ipi.h +++ b/arch/x86/include/asm/mach-default/mach_ipi.h @@ -12,8 +12,8 @@ extern int no_broadcast; #ifdef CONFIG_X86_64 #include -#define send_IPI_mask (genapic->send_IPI_mask) -#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself) +#define send_IPI_mask (apic->send_IPI_mask) +#define send_IPI_mask_allbutself (apic->send_IPI_mask_allbutself) #else static inline void send_IPI_mask(const struct cpumask *mask, int vector) { @@ -39,8 +39,8 @@ static inline void __local_send_IPI_all(int vector) } #ifdef CONFIG_X86_64 -#define send_IPI_allbutself (genapic->send_IPI_allbutself) -#define send_IPI_all (genapic->send_IPI_all) +#define send_IPI_allbutself (apic->send_IPI_allbutself) +#define send_IPI_all (apic->send_IPI_all) #else static inline void send_IPI_allbutself(int vector) { diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 48553e958ad5..59972d94ff18 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,32 +3,32 @@ #include -#define esr_disable (genapic->ESR_DISABLE) -#define NO_BALANCE_IRQ (genapic->no_balance_irq) -#define INT_DELIVERY_MODE (genapic->int_delivery_mode) -#define INT_DEST_MODE (genapic->int_dest_mode) +#define esr_disable (apic->ESR_DISABLE) +#define NO_BALANCE_IRQ (apic->no_balance_irq) +#define INT_DELIVERY_MODE (apic->int_delivery_mode) +#define INT_DEST_MODE (apic->int_dest_mode) #undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL (genapic->apic_destination_logical) -#define TARGET_CPUS (genapic->target_cpus()) -#define apic_id_registered (genapic->apic_id_registered) -#define init_apic_ldr (genapic->init_apic_ldr) -#define ioapic_phys_id_map (genapic->ioapic_phys_id_map) -#define setup_apic_routing (genapic->setup_apic_routing) -#define multi_timer_check (genapic->multi_timer_check) -#define apicid_to_node (genapic->apicid_to_node) -#define cpu_to_logical_apicid (genapic->cpu_to_logical_apicid) -#define cpu_present_to_apicid (genapic->cpu_present_to_apicid) -#define apicid_to_cpu_present (genapic->apicid_to_cpu_present) -#define setup_portio_remap (genapic->setup_portio_remap) -#define check_apicid_present (genapic->check_apicid_present) -#define check_phys_apicid_present (genapic->check_phys_apicid_present) -#define check_apicid_used (genapic->check_apicid_used) -#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) -#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) -#define vector_allocation_domain (genapic->vector_allocation_domain) -#define enable_apic_mode (genapic->enable_apic_mode) -#define phys_pkg_id (genapic->phys_pkg_id) -#define wakeup_secondary_cpu (genapic->wakeup_cpu) +#define APIC_DEST_LOGICAL (apic->apic_destination_logical) +#define TARGET_CPUS (apic->target_cpus()) +#define apic_id_registered (apic->apic_id_registered) +#define init_apic_ldr (apic->init_apic_ldr) +#define ioapic_phys_id_map (apic->ioapic_phys_id_map) +#define setup_apic_routing (apic->setup_apic_routing) +#define multi_timer_check (apic->multi_timer_check) +#define apicid_to_node (apic->apicid_to_node) +#define cpu_to_logical_apicid (apic->cpu_to_logical_apicid) +#define cpu_present_to_apicid (apic->cpu_present_to_apicid) +#define apicid_to_cpu_present (apic->apicid_to_cpu_present) +#define setup_portio_remap (apic->setup_portio_remap) +#define check_apicid_present (apic->check_apicid_present) +#define check_phys_apicid_present (apic->check_phys_apicid_present) +#define check_apicid_used (apic->check_apicid_used) +#define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) +#define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) +#define vector_allocation_domain (apic->vector_allocation_domain) +#define enable_apic_mode (apic->enable_apic_mode) +#define phys_pkg_id (apic->phys_pkg_id) +#define wakeup_secondary_cpu (apic->wakeup_cpu) extern void generic_bigsmp_probe(void); diff --git a/arch/x86/include/asm/mach-generic/mach_apicdef.h b/arch/x86/include/asm/mach-generic/mach_apicdef.h index 68041f3802f4..acc9adddb344 100644 --- a/arch/x86/include/asm/mach-generic/mach_apicdef.h +++ b/arch/x86/include/asm/mach-generic/mach_apicdef.h @@ -4,8 +4,8 @@ #ifndef APIC_DEFINITION #include -#define GET_APIC_ID (genapic->get_apic_id) -#define APIC_ID_MASK (genapic->apic_id_mask) +#define GET_APIC_ID (apic->get_apic_id) +#define APIC_ID_MASK (apic->apic_id_mask) #endif #endif /* _ASM_X86_MACH_GENERIC_MACH_APICDEF_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_ipi.h b/arch/x86/include/asm/mach-generic/mach_ipi.h index ffd637e3c3d9..75e54bd6cbdc 100644 --- a/arch/x86/include/asm/mach-generic/mach_ipi.h +++ b/arch/x86/include/asm/mach-generic/mach_ipi.h @@ -3,8 +3,8 @@ #include -#define send_IPI_mask (genapic->send_IPI_mask) -#define send_IPI_allbutself (genapic->send_IPI_allbutself) -#define send_IPI_all (genapic->send_IPI_all) +#define send_IPI_mask (apic->send_IPI_mask) +#define send_IPI_allbutself (apic->send_IPI_allbutself) +#define send_IPI_all (apic->send_IPI_all) #endif /* _ASM_X86_MACH_GENERIC_MACH_IPI_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h index 1ab16b168c8a..22006bbee617 100644 --- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h @@ -1,12 +1,12 @@ #ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H #define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low) -#define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high) -#define wait_for_init_deassert (genapic->wait_for_init_deassert) -#define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic) -#define store_NMI_vector (genapic->store_NMI_vector) -#define restore_NMI_vector (genapic->restore_NMI_vector) -#define inquire_remote_apic (genapic->inquire_remote_apic) +#define TRAMPOLINE_PHYS_LOW (apic->trampoline_phys_low) +#define TRAMPOLINE_PHYS_HIGH (apic->trampoline_phys_high) +#define wait_for_init_deassert (apic->wait_for_init_deassert) +#define smp_callin_clear_local_apic (apic->smp_callin_clear_local_apic) +#define store_NMI_vector (apic->store_NMI_vector) +#define restore_NMI_vector (apic->restore_NMI_vector) +#define inquire_remote_apic (apic->inquire_remote_apic) #endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 53699c931ad4..20a2a43c2a9c 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -188,14 +188,14 @@ static void noop_wait_for_deassert(atomic_t *deassert_not_used) static int __init es7000_update_genapic(void) { - genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; + apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; /* MPENTIUMIII */ if (boot_cpu_data.x86 == 6 && (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { es7000_update_genapic_to_cluster(); - genapic->wait_for_init_deassert = noop_wait_for_deassert; - genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; + apic->wait_for_init_deassert = noop_wait_for_deassert; + apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; } return 0; diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index e656c2721154..2b986389a24f 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -29,7 +29,7 @@ extern struct genapic apic_x2xpic_uv_x; extern struct genapic apic_x2apic_phys; extern struct genapic apic_x2apic_cluster; -struct genapic __read_mostly *genapic = &apic_flat; +struct genapic __read_mostly *apic = &apic_flat; static struct genapic *apic_probe[] __initdata = { #ifdef CONFIG_X86_UV @@ -46,15 +46,15 @@ static struct genapic *apic_probe[] __initdata = { */ void __init setup_apic_routing(void) { - if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) { + if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) { if (!intr_remapping_enabled) - genapic = &apic_flat; + apic = &apic_flat; } - if (genapic == &apic_flat) { + if (apic == &apic_flat) { if (max_physical_apicid >= 8) - genapic = &apic_physflat; - printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); + apic = &apic_physflat; + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } if (x86_quirks->update_genapic) @@ -74,9 +74,9 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) for (i = 0; apic_probe[i]; ++i) { if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { - genapic = apic_probe[i]; + apic = apic_probe[i]; printk(KERN_INFO "Setting APIC routing to %s.\n", - genapic->name); + apic->name); return 1; } } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index bfb7d734062a..7283234229fe 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1486,7 +1486,7 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t handle_edge_irq, "edge"); } -static int setup_ioapic_entry(int apic, int irq, +static int setup_ioapic_entry(int apic_id, int irq, struct IO_APIC_route_entry *entry, unsigned int destination, int trigger, int polarity, int vector) @@ -1498,18 +1498,18 @@ static int setup_ioapic_entry(int apic, int irq, #ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_ioapic_to_ir(apic); + struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); struct irte irte; struct IR_IO_APIC_route_entry *ir_entry = (struct IR_IO_APIC_route_entry *) entry; int index; if (!iommu) - panic("No mapping iommu for ioapic %d\n", apic); + panic("No mapping iommu for ioapic %d\n", apic_id); index = alloc_irte(iommu, irq, 1); if (index < 0) - panic("Failed to allocate IRTE for ioapic %d\n", apic); + panic("Failed to allocate IRTE for ioapic %d\n", apic_id); memset(&irte, 0, sizeof(irte)); @@ -1547,7 +1547,7 @@ static int setup_ioapic_entry(int apic, int irq, return 0; } -static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, +static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc, int trigger, int polarity) { struct irq_cfg *cfg; @@ -1567,14 +1567,14 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].apicid, pin, cfg->vector, + apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector, irq, trigger, polarity); - if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry, + if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry, dest, trigger, polarity, cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mp_ioapics[apic].apicid, pin); + mp_ioapics[apic_id].apicid, pin); __clear_irq_vector(irq, cfg); return; } @@ -1583,12 +1583,12 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de if (irq < NR_IRQS_LEGACY) disable_8259A_irq(irq); - ioapic_write_entry(apic, pin, entry); + ioapic_write_entry(apic_id, pin, entry); } static void __init setup_IO_APIC_irqs(void) { - int apic, pin, idx, irq; + int apic_id, pin, idx, irq; int notcon = 0; struct irq_desc *desc; struct irq_cfg *cfg; @@ -1596,19 +1596,19 @@ static void __init setup_IO_APIC_irqs(void) apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { + for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); + idx = find_irq_entry(apic_id, pin, mp_INT); if (idx == -1) { if (!notcon) { notcon = 1; apic_printk(APIC_VERBOSE, KERN_DEBUG " %d-%d", - mp_ioapics[apic].apicid, pin); + mp_ioapics[apic_id].apicid, pin); } else apic_printk(APIC_VERBOSE, " %d-%d", - mp_ioapics[apic].apicid, pin); + mp_ioapics[apic_id].apicid, pin); continue; } if (notcon) { @@ -1617,9 +1617,9 @@ static void __init setup_IO_APIC_irqs(void) notcon = 0; } - irq = pin_2_irq(idx, apic, pin); + irq = pin_2_irq(idx, apic_id, pin); #ifdef CONFIG_X86_32 - if (multi_timer_check(apic, irq)) + if (multi_timer_check(apic_id, irq)) continue; #endif desc = irq_to_desc_alloc_cpu(irq, cpu); @@ -1628,9 +1628,9 @@ static void __init setup_IO_APIC_irqs(void) continue; } cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, apic, pin); + add_pin_to_irq_cpu(cfg, cpu, apic_id, pin); - setup_IO_APIC_irq(apic, pin, irq, desc, + setup_IO_APIC_irq(apic_id, pin, irq, desc, irq_trigger(idx), irq_polarity(idx)); } } @@ -1643,7 +1643,7 @@ static void __init setup_IO_APIC_irqs(void) /* * Set up the timer pin, possibly with the 8259A-master behind. */ -static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, +static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, int vector) { struct IO_APIC_route_entry entry; @@ -1676,7 +1676,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, /* * Add it to the IO-APIC irq-routing table: */ - ioapic_write_entry(apic, pin, entry); + ioapic_write_entry(apic_id, pin, entry); } @@ -2089,7 +2089,7 @@ static void __init setup_ioapic_ids_from_mpc(void) { union IO_APIC_reg_00 reg_00; physid_mask_t phys_id_present_map; - int apic; + int apic_id; int i; unsigned char old_id; unsigned long flags; @@ -2113,21 +2113,21 @@ static void __init setup_ioapic_ids_from_mpc(void) /* * Set the IOAPIC ID to the value stored in the MPC table. */ - for (apic = 0; apic < nr_ioapics; apic++) { + for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { /* Read the register 0 value */ spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); + reg_00.raw = io_apic_read(apic_id, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - old_id = mp_ioapics[apic].apicid; + old_id = mp_ioapics[apic_id].apicid; - if (mp_ioapics[apic].apicid >= get_physical_broadcast()) { + if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].apicid); + apic_id, mp_ioapics[apic_id].apicid); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - mp_ioapics[apic].apicid = reg_00.bits.ID; + mp_ioapics[apic_id].apicid = reg_00.bits.ID; } /* @@ -2136,9 +2136,9 @@ static void __init setup_ioapic_ids_from_mpc(void) * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].apicid)) { + mp_ioapics[apic_id].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].apicid); + apic_id, mp_ioapics[apic_id].apicid); for (i = 0; i < get_physical_broadcast(); i++) if (!physid_isset(i, phys_id_present_map)) break; @@ -2147,13 +2147,13 @@ static void __init setup_ioapic_ids_from_mpc(void) printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); - mp_ioapics[apic].apicid = i; + mp_ioapics[apic_id].apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid); + tmp = apicid_to_cpu_present(mp_ioapics[apic_id].apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", - mp_ioapics[apic].apicid); + mp_ioapics[apic_id].apicid); physids_or(phys_id_present_map, phys_id_present_map, tmp); } @@ -2162,11 +2162,11 @@ static void __init setup_ioapic_ids_from_mpc(void) * We need to adjust the IRQ routing table * if the ID changed. */ - if (old_id != mp_ioapics[apic].apicid) + if (old_id != mp_ioapics[apic_id].apicid) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].dstapic == old_id) mp_irqs[i].dstapic - = mp_ioapics[apic].apicid; + = mp_ioapics[apic_id].apicid; /* * Read the right value from the MPC table and @@ -2174,20 +2174,20 @@ static void __init setup_ioapic_ids_from_mpc(void) */ apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].apicid); + mp_ioapics[apic_id].apicid); - reg_00.bits.ID = mp_ioapics[apic].apicid; + reg_00.bits.ID = mp_ioapics[apic_id].apicid; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0, reg_00.raw); + io_apic_write(apic_id, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check */ spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); + reg_00.raw = io_apic_read(apic_id, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].apicid) + if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) printk("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index f2191d4f2717..3928280278f0 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -236,7 +236,7 @@ static int __init numaq_setup_ioapic_ids(void) static int __init numaq_update_genapic(void) { - genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; + apic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; return 0; } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f41c4486c270..a58e9f5e6030 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -590,7 +590,7 @@ static int __init default_update_genapic(void) { #ifdef CONFIG_X86_SMP # if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) - genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; + apic->wakeup_cpu = wakeup_secondary_cpu_via_init; # endif #endif diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index c2ded1448024..2f4f4a6e39b3 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -20,14 +20,14 @@ void __init es7000_update_genapic_to_cluster(void) { - genapic->target_cpus = target_cpus_cluster; - genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER; - genapic->int_dest_mode = INT_DEST_MODE_CLUSTER; - genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER; + apic->target_cpus = target_cpus_cluster; + apic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER; + apic->int_dest_mode = INT_DEST_MODE_CLUSTER; + apic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER; - genapic->init_apic_ldr = init_apic_ldr_cluster; + apic->init_apic_ldr = init_apic_ldr_cluster; - genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster; + apic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster; } static int probe_es7000(void) diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index 15a38daef1a8..82bf0f520fb6 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -23,7 +23,7 @@ extern struct genapic apic_bigsmp; extern struct genapic apic_es7000; extern struct genapic apic_default; -struct genapic *genapic = &apic_default; +struct genapic *apic = &apic_default; static struct genapic *apic_probe[] __initdata = { #ifdef CONFIG_X86_NUMAQ @@ -52,7 +52,7 @@ static int __init parse_apic(char *arg) for (i = 0; apic_probe[i]; i++) { if (!strcmp(apic_probe[i]->name, arg)) { - genapic = apic_probe[i]; + apic = apic_probe[i]; cmdline_apic = 1; return 0; } @@ -76,13 +76,13 @@ void __init generic_bigsmp_probe(void) * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support */ - if (!cmdline_apic && genapic == &apic_default) { + if (!cmdline_apic && apic == &apic_default) { if (apic_bigsmp.probe()) { - genapic = &apic_bigsmp; + apic = &apic_bigsmp; if (x86_quirks->update_genapic) x86_quirks->update_genapic(); printk(KERN_INFO "Overriding APIC driver with %s\n", - genapic->name); + apic->name); } } #endif @@ -94,7 +94,7 @@ void __init generic_apic_probe(void) int i; for (i = 0; apic_probe[i]; i++) { if (apic_probe[i]->probe()) { - genapic = apic_probe[i]; + apic = apic_probe[i]; break; } } @@ -105,7 +105,7 @@ void __init generic_apic_probe(void) if (x86_quirks->update_genapic) x86_quirks->update_genapic(); } - printk(KERN_INFO "Using APIC driver %s\n", genapic->name); + printk(KERN_INFO "Using APIC driver %s\n", apic->name); } /* These functions can switch the APIC even after the initial ->probe() */ @@ -116,11 +116,11 @@ int __init mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) for (i = 0; apic_probe[i]; ++i) { if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) { if (!cmdline_apic) { - genapic = apic_probe[i]; + apic = apic_probe[i]; if (x86_quirks->update_genapic) x86_quirks->update_genapic(); printk(KERN_INFO "Switched to APIC driver `%s'.\n", - genapic->name); + apic->name); } return 1; } @@ -134,11 +134,11 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) for (i = 0; apic_probe[i]; ++i) { if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { if (!cmdline_apic) { - genapic = apic_probe[i]; + apic = apic_probe[i]; if (x86_quirks->update_genapic) x86_quirks->update_genapic(); printk(KERN_INFO "Switched to APIC driver `%s'.\n", - genapic->name); + apic->name); } return 1; } @@ -148,5 +148,5 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) int hard_smp_processor_id(void) { - return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID)); + return apic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID)); } From f2f05ee8b8d346d4edee766384a5fedafdd4f9f8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: [PATCH 189/566] x86: clean up genapic_flat - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 73 +++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 34185488e4fb..f1bfdd3608a8 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -175,25 +175,60 @@ static unsigned int phys_pkg_id(int index_msb) } struct genapic apic_flat = { - .name = "flat", - .acpi_madt_oem_check = flat_acpi_madt_oem_check, - .int_delivery_mode = dest_LowestPrio, - .int_dest_mode = (APIC_DEST_LOGICAL != 0), - .target_cpus = flat_target_cpus, - .vector_allocation_domain = flat_vector_allocation_domain, - .apic_id_registered = flat_apic_id_registered, - .init_apic_ldr = flat_init_apic_ldr, - .send_IPI_all = flat_send_IPI_all, - .send_IPI_allbutself = flat_send_IPI_allbutself, - .send_IPI_mask = flat_send_IPI_mask, - .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, - .send_IPI_self = apic_send_IPI_self, - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFu<<24), + .name = "flat", + .probe = NULL, + .acpi_madt_oem_check = flat_acpi_madt_oem_check, + .apic_id_registered = flat_apic_id_registered, + + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_LOGICAL != 0), + + .target_cpus = flat_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = flat_vector_allocation_domain, + .init_apic_ldr = flat_init_apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFu << 24, + + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + + .send_IPI_mask = flat_send_IPI_mask, + .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, + .send_IPI_allbutself = flat_send_IPI_allbutself, + .send_IPI_all = flat_send_IPI_all, + .send_IPI_self = apic_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; /* From 4c3e51e05a7eefead4033b187394458ff8626497 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: [PATCH 190/566] x86: clean up genapic_phys_flat - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 75 +++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index f1bfdd3608a8..e9233374cef1 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -320,23 +320,60 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, } struct genapic apic_physflat = { - .name = "physical flat", - .acpi_madt_oem_check = physflat_acpi_madt_oem_check, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), - .target_cpus = physflat_target_cpus, - .vector_allocation_domain = physflat_vector_allocation_domain, - .apic_id_registered = flat_apic_id_registered, - .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ - .send_IPI_all = physflat_send_IPI_all, - .send_IPI_allbutself = physflat_send_IPI_allbutself, - .send_IPI_mask = physflat_send_IPI_mask, - .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, - .send_IPI_self = apic_send_IPI_self, - .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFu<<24), + + .name = "physical flat", + .probe = NULL, + .acpi_madt_oem_check = physflat_acpi_madt_oem_check, + .apic_id_registered = flat_apic_id_registered, + + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + + .target_cpus = physflat_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = physflat_vector_allocation_domain, + /* not needed, but shouldn't hurt: */ + .init_apic_ldr = flat_init_apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFu<<24, + + .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, + + .send_IPI_mask = physflat_send_IPI_mask, + .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, + .send_IPI_allbutself = physflat_send_IPI_allbutself, + .send_IPI_all = physflat_send_IPI_all, + .send_IPI_self = apic_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; From c7967329911013a05920ef12832935c541bb8c9a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: [PATCH 191/566] x86: clean up apic_x2apic_uv_x - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 74 ++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index bfe36249145c..94f606f204a1 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -237,25 +237,61 @@ static void uv_send_IPI_self(int vector) } struct genapic apic_x2apic_uv_x = { - .name = "UV large system", - .acpi_madt_oem_check = uv_acpi_madt_oem_check, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), - .target_cpus = uv_target_cpus, - .vector_allocation_domain = uv_vector_allocation_domain, - .apic_id_registered = uv_apic_id_registered, - .init_apic_ldr = uv_init_apic_ldr, - .send_IPI_all = uv_send_IPI_all, - .send_IPI_allbutself = uv_send_IPI_allbutself, - .send_IPI_mask = uv_send_IPI_mask, - .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, - .send_IPI_self = uv_send_IPI_self, - .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFFFFFFFu), + + .name = "UV large system", + .probe = NULL, + .acpi_madt_oem_check = uv_acpi_madt_oem_check, + .apic_id_registered = uv_apic_id_registered, + + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + + .target_cpus = uv_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = uv_vector_allocation_domain, + .init_apic_ldr = uv_init_apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFFFFFFFu, + + .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, + + .send_IPI_mask = uv_send_IPI_mask, + .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, + .send_IPI_allbutself = uv_send_IPI_allbutself, + .send_IPI_all = uv_send_IPI_all, + .send_IPI_self = uv_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; static __cpuinit void set_x2apic_extra_bits(int pnode) From 05c155c235c757329ec89ad591516538ed8352c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: [PATCH 192/566] x86: clean up apic_x2apic_phys - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_phys.c | 74 ++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 21bcc0e098ba..c98361fb7ee1 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -172,23 +172,59 @@ static void init_x2apic_ldr(void) } struct genapic apic_x2apic_phys = { - .name = "physical x2apic", - .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), - .target_cpus = x2apic_target_cpus, - .vector_allocation_domain = x2apic_vector_allocation_domain, - .apic_id_registered = x2apic_apic_id_registered, - .init_apic_ldr = init_x2apic_ldr, - .send_IPI_all = x2apic_send_IPI_all, - .send_IPI_allbutself = x2apic_send_IPI_allbutself, - .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, - .send_IPI_self = x2apic_send_IPI_self, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFFFFFFFu), + + .name = "physical x2apic", + .probe = NULL, + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, + .apic_id_registered = x2apic_apic_id_registered, + + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + + .target_cpus = x2apic_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = x2apic_vector_allocation_domain, + .init_apic_ldr = init_x2apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFFFFFFFu, + + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, + + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_self = x2apic_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; From 504a3c3ad45d200a6ac8be5aa019c8fa05e26dc8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: [PATCH 193/566] x86: clean up apic_x2apic_cluster - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_cluster.c | 74 +++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 6ce497cc372d..fc855e503ac4 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -176,23 +176,59 @@ static void init_x2apic_ldr(void) } struct genapic apic_x2apic_cluster = { - .name = "cluster x2apic", - .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, - .int_delivery_mode = dest_LowestPrio, - .int_dest_mode = (APIC_DEST_LOGICAL != 0), - .target_cpus = x2apic_target_cpus, - .vector_allocation_domain = x2apic_vector_allocation_domain, - .apic_id_registered = x2apic_apic_id_registered, - .init_apic_ldr = init_x2apic_ldr, - .send_IPI_all = x2apic_send_IPI_all, - .send_IPI_allbutself = x2apic_send_IPI_allbutself, - .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, - .send_IPI_self = x2apic_send_IPI_self, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFFFFFFFu), + + .name = "cluster x2apic", + .probe = NULL, + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, + .apic_id_registered = x2apic_apic_id_registered, + + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_LOGICAL != 0), + + .target_cpus = x2apic_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = x2apic_vector_allocation_domain, + .init_apic_ldr = init_x2apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFFFFFFFu, + + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, + + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_self = x2apic_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; From 0a7e8c64142b2ae5aacdc509ed112b8e362ac8a4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:32:03 +0100 Subject: [PATCH 194/566] x86, genapic: cleanup 32-bit apic_default template Clean up the APIC driver template: - order fields properly - use the macro names explicitly (so that they can be renamed later) - fill in NULL entries as well Signed-off-by: Ingo Molnar --- arch/x86/mach-generic/default.c | 58 ++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index e63a4a76d8cd..d5fec764fb40 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -24,4 +24,60 @@ static int probe_default(void) return 1; } -struct genapic apic_default = APIC_INIT("default", probe_default); +struct genapic apic_default = { + + .name = "default", + .probe = probe_default, + .acpi_madt_oem_check = acpi_madt_oem_check, + .apic_id_registered = apic_id_registered, + + .int_delivery_mode = INT_DELIVERY_MODE, + .int_dest_mode = INT_DEST_MODE, + + .target_cpus = target_cpus, + .ESR_DISABLE = esr_disable, + .apic_destination_logical = APIC_DEST_LOGICAL, + .check_apicid_used = check_apicid_used, + .check_apicid_present = check_apicid_present, + + .no_balance_irq = NO_BALANCE_IRQ, + .no_ioapic_check = 0, + + .vector_allocation_domain = vector_allocation_domain, + .init_apic_ldr = init_apic_ldr, + + .ioapic_phys_id_map = ioapic_phys_id_map, + .setup_apic_routing = setup_apic_routing, + .multi_timer_check = multi_timer_check, + .apicid_to_node = apicid_to_node, + .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_present_to_apicid = cpu_present_to_apicid, + .apicid_to_cpu_present = apicid_to_cpu_present, + .setup_portio_remap = setup_portio_remap, + .check_phys_apicid_present = check_phys_apicid_present, + .enable_apic_mode = enable_apic_mode, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = mps_oem_check, + + .get_apic_id = get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = APIC_ID_MASK, + + .cpu_mask_to_apicid = cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + + .send_IPI_mask = send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = send_IPI_allbutself, + .send_IPI_all = send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .wait_for_init_deassert = wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .store_NMI_vector = store_NMI_vector, + .restore_NMI_vector = restore_NMI_vector, + .inquire_remote_apic = inquire_remote_apic, +}; From d26b6d6660d704ffa59f22ad57c9103e3fba289f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:32:03 +0100 Subject: [PATCH 195/566] x86, genapic: cleanup 32-bit apic_bigsmp template Clean up the APIC driver template: - order fields properly - use the macro names explicitly (so that they can be renamed later) - fill in NULL entries as well Signed-off-by: Ingo Molnar --- arch/x86/mach-generic/bigsmp.c | 58 +++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index bc4c7840b2a8..13e82bc4dae6 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -57,4 +57,60 @@ static int probe_bigsmp(void) return dmi_bigsmp; } -struct genapic apic_bigsmp = APIC_INIT("bigsmp", probe_bigsmp); +struct genapic apic_bigsmp = { + + .name = "bigsmp", + .probe = probe_bigsmp, + .acpi_madt_oem_check = acpi_madt_oem_check, + .apic_id_registered = apic_id_registered, + + .int_delivery_mode = INT_DELIVERY_MODE, + .int_dest_mode = INT_DEST_MODE, + + .target_cpus = target_cpus, + .ESR_DISABLE = esr_disable, + .apic_destination_logical = APIC_DEST_LOGICAL, + .check_apicid_used = check_apicid_used, + .check_apicid_present = check_apicid_present, + + .no_balance_irq = NO_BALANCE_IRQ, + .no_ioapic_check = 0, + + .vector_allocation_domain = vector_allocation_domain, + .init_apic_ldr = init_apic_ldr, + + .ioapic_phys_id_map = ioapic_phys_id_map, + .setup_apic_routing = setup_apic_routing, + .multi_timer_check = multi_timer_check, + .apicid_to_node = apicid_to_node, + .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_present_to_apicid = cpu_present_to_apicid, + .apicid_to_cpu_present = apicid_to_cpu_present, + .setup_portio_remap = setup_portio_remap, + .check_phys_apicid_present = check_phys_apicid_present, + .enable_apic_mode = enable_apic_mode, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = mps_oem_check, + + .get_apic_id = get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = APIC_ID_MASK, + + .cpu_mask_to_apicid = cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + + .send_IPI_mask = send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = send_IPI_allbutself, + .send_IPI_all = send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .wait_for_init_deassert = wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .store_NMI_vector = store_NMI_vector, + .restore_NMI_vector = restore_NMI_vector, + .inquire_remote_apic = inquire_remote_apic, +}; From fea3437adf778cfe69b7f8cff0afb8060d84b647 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:32:03 +0100 Subject: [PATCH 196/566] x86, genapic: cleanup 32-bit apic_numaq template Clean up the APIC driver template: - order fields properly - use the macro names explicitly (so that they can be renamed later) - fill in NULL entries as well Signed-off-by: Ingo Molnar --- arch/x86/mach-generic/numaq.c | 58 ++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 3679e2255645..fa486ca49c0a 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -50,4 +50,60 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } -struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); +struct genapic apic_numaq = { + + .name = "NUMAQ", + .probe = probe_numaq, + .acpi_madt_oem_check = acpi_madt_oem_check, + .apic_id_registered = apic_id_registered, + + .int_delivery_mode = INT_DELIVERY_MODE, + .int_dest_mode = INT_DEST_MODE, + + .target_cpus = target_cpus, + .ESR_DISABLE = esr_disable, + .apic_destination_logical = APIC_DEST_LOGICAL, + .check_apicid_used = check_apicid_used, + .check_apicid_present = check_apicid_present, + + .no_balance_irq = NO_BALANCE_IRQ, + .no_ioapic_check = 0, + + .vector_allocation_domain = vector_allocation_domain, + .init_apic_ldr = init_apic_ldr, + + .ioapic_phys_id_map = ioapic_phys_id_map, + .setup_apic_routing = setup_apic_routing, + .multi_timer_check = multi_timer_check, + .apicid_to_node = apicid_to_node, + .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_present_to_apicid = cpu_present_to_apicid, + .apicid_to_cpu_present = apicid_to_cpu_present, + .setup_portio_remap = setup_portio_remap, + .check_phys_apicid_present = check_phys_apicid_present, + .enable_apic_mode = enable_apic_mode, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = mps_oem_check, + + .get_apic_id = get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = APIC_ID_MASK, + + .cpu_mask_to_apicid = cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + + .send_IPI_mask = send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = send_IPI_allbutself, + .send_IPI_all = send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .wait_for_init_deassert = wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .store_NMI_vector = store_NMI_vector, + .restore_NMI_vector = restore_NMI_vector, + .inquire_remote_apic = inquire_remote_apic, +}; From fed53ebf3c4e233e085c453a27ae287ccbf149fb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:32:03 +0100 Subject: [PATCH 197/566] x86, genapic: cleanup 32-bit apic_es7000 template Clean up the APIC driver template: - order fields properly - use the macro names explicitly (so that they can be renamed later) - fill in NULL entries as well Signed-off-by: Ingo Molnar --- arch/x86/mach-generic/es7000.c | 58 +++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 2f4f4a6e39b3..4a404ea5f928 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -100,4 +100,60 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } -struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); +struct genapic apic_es7000 = { + + .name = "es7000", + .probe = probe_es7000, + .acpi_madt_oem_check = acpi_madt_oem_check, + .apic_id_registered = apic_id_registered, + + .int_delivery_mode = INT_DELIVERY_MODE, + .int_dest_mode = INT_DEST_MODE, + + .target_cpus = target_cpus, + .ESR_DISABLE = esr_disable, + .apic_destination_logical = APIC_DEST_LOGICAL, + .check_apicid_used = check_apicid_used, + .check_apicid_present = check_apicid_present, + + .no_balance_irq = NO_BALANCE_IRQ, + .no_ioapic_check = 0, + + .vector_allocation_domain = vector_allocation_domain, + .init_apic_ldr = init_apic_ldr, + + .ioapic_phys_id_map = ioapic_phys_id_map, + .setup_apic_routing = setup_apic_routing, + .multi_timer_check = multi_timer_check, + .apicid_to_node = apicid_to_node, + .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_present_to_apicid = cpu_present_to_apicid, + .apicid_to_cpu_present = apicid_to_cpu_present, + .setup_portio_remap = setup_portio_remap, + .check_phys_apicid_present = check_phys_apicid_present, + .enable_apic_mode = enable_apic_mode, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = mps_oem_check, + + .get_apic_id = get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = APIC_ID_MASK, + + .cpu_mask_to_apicid = cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + + .send_IPI_mask = send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = send_IPI_allbutself, + .send_IPI_all = send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .wait_for_init_deassert = wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .store_NMI_vector = store_NMI_vector, + .restore_NMI_vector = restore_NMI_vector, + .inquire_remote_apic = inquire_remote_apic, +}; From 491a50c4fbcf6cc39a702a16a2dfaf42f0eb8058 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:32:03 +0100 Subject: [PATCH 198/566] x86, genapic: cleanup 32-bit apic_summit template Clean up the APIC driver template: - order fields properly - use the macro names explicitly (so that they can be renamed later) - fill in NULL entries as well Signed-off-by: Ingo Molnar --- arch/x86/mach-generic/summit.c | 58 +++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 2821ffc188b5..479c1d409779 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -37,4 +37,60 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } -struct genapic apic_summit = APIC_INIT("summit", probe_summit); +struct genapic apic_summit = { + + .name = "summit", + .probe = probe_summit, + .acpi_madt_oem_check = acpi_madt_oem_check, + .apic_id_registered = apic_id_registered, + + .int_delivery_mode = INT_DELIVERY_MODE, + .int_dest_mode = INT_DEST_MODE, + + .target_cpus = target_cpus, + .ESR_DISABLE = esr_disable, + .apic_destination_logical = APIC_DEST_LOGICAL, + .check_apicid_used = check_apicid_used, + .check_apicid_present = check_apicid_present, + + .no_balance_irq = NO_BALANCE_IRQ, + .no_ioapic_check = 0, + + .vector_allocation_domain = vector_allocation_domain, + .init_apic_ldr = init_apic_ldr, + + .ioapic_phys_id_map = ioapic_phys_id_map, + .setup_apic_routing = setup_apic_routing, + .multi_timer_check = multi_timer_check, + .apicid_to_node = apicid_to_node, + .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_present_to_apicid = cpu_present_to_apicid, + .apicid_to_cpu_present = apicid_to_cpu_present, + .setup_portio_remap = setup_portio_remap, + .check_phys_apicid_present = check_phys_apicid_present, + .enable_apic_mode = enable_apic_mode, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = mps_oem_check, + + .get_apic_id = get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = APIC_ID_MASK, + + .cpu_mask_to_apicid = cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + + .send_IPI_mask = send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = send_IPI_allbutself, + .send_IPI_all = send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .wait_for_init_deassert = wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .store_NMI_vector = store_NMI_vector, + .restore_NMI_vector = restore_NMI_vector, + .inquire_remote_apic = inquire_remote_apic, +}; From 9a6801da55e4a4492e8f666ac272efe8186682c8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:18:13 +0100 Subject: [PATCH 199/566] x86: remove APIC_INIT / APICFUNC / IPIFUNC The APIC_INIT() / APICFUNC / IPIFUNC macros were ugly and obfuscated the true identity of various APIC driver methods. Now that they are not used anymore, remove them. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 63 ++-------------------------------- 1 file changed, 2 insertions(+), 61 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 3970da3245c8..26c5e824a717 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -97,66 +97,8 @@ struct genapic { extern struct genapic *apic; #ifdef CONFIG_X86_32 - -#define APICFUNC(x) .x = x, - -/* More functions could be probably marked IPIFUNC and save some space - in UP GENERICARCH kernels, but I don't have the nerve right now - to untangle this mess. -AK */ -#ifdef CONFIG_SMP -#define IPIFUNC(x) APICFUNC(x) -#else -#define IPIFUNC(x) -#endif - -#define APIC_INIT(aname, aprobe) \ -{ \ - .name = aname, \ - .probe = aprobe, \ - .int_delivery_mode = INT_DELIVERY_MODE, \ - .int_dest_mode = INT_DEST_MODE, \ - .no_balance_irq = NO_BALANCE_IRQ, \ - .ESR_DISABLE = esr_disable, \ - .apic_destination_logical = APIC_DEST_LOGICAL, \ - APICFUNC(apic_id_registered) \ - APICFUNC(target_cpus) \ - APICFUNC(check_apicid_used) \ - APICFUNC(check_apicid_present) \ - APICFUNC(init_apic_ldr) \ - APICFUNC(ioapic_phys_id_map) \ - APICFUNC(setup_apic_routing) \ - APICFUNC(multi_timer_check) \ - APICFUNC(apicid_to_node) \ - APICFUNC(cpu_to_logical_apicid) \ - APICFUNC(cpu_present_to_apicid) \ - APICFUNC(apicid_to_cpu_present) \ - APICFUNC(setup_portio_remap) \ - APICFUNC(check_phys_apicid_present) \ - APICFUNC(mps_oem_check) \ - APICFUNC(get_apic_id) \ - .apic_id_mask = APIC_ID_MASK, \ - APICFUNC(cpu_mask_to_apicid) \ - APICFUNC(cpu_mask_to_apicid_and) \ - APICFUNC(vector_allocation_domain) \ - APICFUNC(acpi_madt_oem_check) \ - IPIFUNC(send_IPI_mask) \ - IPIFUNC(send_IPI_allbutself) \ - IPIFUNC(send_IPI_all) \ - APICFUNC(enable_apic_mode) \ - APICFUNC(phys_pkg_id) \ - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \ - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \ - APICFUNC(wait_for_init_deassert) \ - APICFUNC(smp_callin_clear_local_apic) \ - APICFUNC(store_NMI_vector) \ - APICFUNC(restore_NMI_vector) \ - APICFUNC(inquire_remote_apic) \ -} - extern void es7000_update_genapic_to_cluster(void); - -#else /* CONFIG_X86_64: */ - +#else extern struct genapic apic_flat; extern struct genapic apic_physflat; extern struct genapic apic_x2apic_cluster; @@ -169,7 +111,6 @@ extern struct genapic apic_x2apic_uv_x; DECLARE_PER_CPU(int, x2apic_extra_bits); extern void setup_apic_routing(void); - -#endif /* CONFIG_X86_64 */ +#endif #endif /* _ASM_X86_GENAPIC_64_H */ From 306db03b0d71bf9c94155c0c4771a79fc70b4b27 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:43:47 +0100 Subject: [PATCH 200/566] x86: clean up apic->acpi_madt_oem_check methods Impact: refactor code x86 subarchitectures each defined a "acpi_madt_oem_check()" method, which could be an inline function, or an extern, or a static function, and which was also the name of a genapic field. Untangle this namespace spaghetti by setting ->acpi_madt_oem_check() to NULL on those subarchitectures that have no detection quirks, and rename the other ones (summit, es7000) that do. Also change default_acpi_madt_oem_check() to handle NULL entries, and clean its control flow up as well. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/mpparse.h | 2 +- arch/x86/include/asm/genapic.h | 2 +- .../include/asm/mach-default/mach_mpparse.h | 2 +- .../include/asm/mach-generic/mach_mpparse.h | 2 +- arch/x86/include/asm/summit/mpparse.h | 2 +- arch/x86/kernel/acpi/boot.c | 3 ++- arch/x86/kernel/genapic_64.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 6 ++--- arch/x86/mach-generic/numaq.c | 8 +------ arch/x86/mach-generic/probe.c | 24 +++++++++++-------- arch/x86/mach-generic/summit.c | 2 +- 13 files changed, 29 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/es7000/mpparse.h b/arch/x86/include/asm/es7000/mpparse.h index c1629b090ec2..30692c4ae859 100644 --- a/arch/x86/include/asm/es7000/mpparse.h +++ b/arch/x86/include/asm/es7000/mpparse.h @@ -9,7 +9,7 @@ extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); extern void setup_unisys(void); #ifndef CONFIG_X86_GENERICARCH -extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id); +extern int default_acpi_madt_oem_check(char *oem_id, char *oem_table_id); extern int mps_oem_check(struct mpc_table *mpc, char *oem, char *productid); #endif diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 26c5e824a717..108abdf6953b 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -103,7 +103,7 @@ extern struct genapic apic_flat; extern struct genapic apic_physflat; extern struct genapic apic_x2apic_cluster; extern struct genapic apic_x2apic_phys; -extern int acpi_madt_oem_check(char *, char *); +extern int default_acpi_madt_oem_check(char *, char *); extern void apic_send_IPI_self(int vector); diff --git a/arch/x86/include/asm/mach-default/mach_mpparse.h b/arch/x86/include/asm/mach-default/mach_mpparse.h index c70a263d68cd..8fa01770ba62 100644 --- a/arch/x86/include/asm/mach-default/mach_mpparse.h +++ b/arch/x86/include/asm/mach-default/mach_mpparse.h @@ -8,7 +8,7 @@ mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) } /* Hook from generic ACPI tables.c */ -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static inline int default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } diff --git a/arch/x86/include/asm/mach-generic/mach_mpparse.h b/arch/x86/include/asm/mach-generic/mach_mpparse.h index 9444ab8dca94..f497d96c76bb 100644 --- a/arch/x86/include/asm/mach-generic/mach_mpparse.h +++ b/arch/x86/include/asm/mach-generic/mach_mpparse.h @@ -4,6 +4,6 @@ extern int mps_oem_check(struct mpc_table *, char *, char *); -extern int acpi_madt_oem_check(char *, char *); +extern int default_acpi_madt_oem_check(char *, char *); #endif /* _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H */ diff --git a/arch/x86/include/asm/summit/mpparse.h b/arch/x86/include/asm/summit/mpparse.h index 380e86c02363..555ed8238e94 100644 --- a/arch/x86/include/asm/summit/mpparse.h +++ b/arch/x86/include/asm/summit/mpparse.h @@ -27,7 +27,7 @@ static inline int mps_oem_check(struct mpc_table *mpc, char *oem, } /* Hook from generic ACPI tables.c */ -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static inline int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERVIGIL", 8) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4cb5964f1499..314fe0dddef4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -239,7 +239,8 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) madt->address); } - acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); + default_acpi_madt_oem_check(madt->header.oem_id, + madt->header.oem_table_id); return 0; } diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 2b986389a24f..060945b8eec4 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -68,7 +68,7 @@ void apic_send_IPI_self(int vector) __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } -int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { int i; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 13e82bc4dae6..22c3608b80dd 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -61,7 +61,7 @@ struct genapic apic_bigsmp = { .name = "bigsmp", .probe = probe_bigsmp, - .acpi_madt_oem_check = acpi_madt_oem_check, + .acpi_madt_oem_check = NULL, .apic_id_registered = apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index d5fec764fb40..cfec3494a967 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -28,7 +28,7 @@ struct genapic apic_default = { .name = "default", .probe = probe_default, - .acpi_madt_oem_check = acpi_madt_oem_check, + .acpi_madt_oem_check = NULL, .apic_id_registered = apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 4a404ea5f928..23fe6f1c9691 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -57,7 +57,7 @@ mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) #ifdef CONFIG_ACPI /* Hook from generic ACPI tables.c */ -static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { unsigned long oem_addr = 0; int check_dsdt; @@ -81,7 +81,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) return ret; } #else -static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } @@ -104,7 +104,7 @@ struct genapic apic_es7000 = { .name = "es7000", .probe = probe_es7000, - .acpi_madt_oem_check = acpi_madt_oem_check, + .acpi_madt_oem_check = es7000_acpi_madt_oem_check, .apic_id_registered = apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index fa486ca49c0a..9691b4e1654d 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -31,12 +31,6 @@ static int probe_numaq(void) return found_numaq; } -/* Hook from generic ACPI tables.c */ -static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} - static void vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus @@ -54,7 +48,7 @@ struct genapic apic_numaq = { .name = "NUMAQ", .probe = probe_numaq, - .acpi_madt_oem_check = acpi_madt_oem_check, + .acpi_madt_oem_check = NULL, .apic_id_registered = apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index 82bf0f520fb6..a21e2b1a7011 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -128,20 +128,24 @@ int __init mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) return 0; } -int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { int i; + for (i = 0; apic_probe[i]; ++i) { - if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { - if (!cmdline_apic) { - apic = apic_probe[i]; - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); - printk(KERN_INFO "Switched to APIC driver `%s'.\n", - apic->name); - } - return 1; + if (!apic_probe[i]->acpi_madt_oem_check) + continue; + if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) + continue; + + if (!cmdline_apic) { + apic = apic_probe[i]; + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); + printk(KERN_INFO "Switched to APIC driver `%s'.\n", + apic->name); } + return 1; } return 0; } diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 479c1d409779..0eea9fbb2a50 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -41,7 +41,7 @@ struct genapic apic_summit = { .name = "summit", .probe = probe_summit, - .acpi_madt_oem_check = acpi_madt_oem_check, + .acpi_madt_oem_check = summit_acpi_madt_oem_check, .apic_id_registered = apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, From 7ed248daa56156f2fd7175f90b62fc6397b0c7b7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:43:47 +0100 Subject: [PATCH 201/566] x86: clean up apic->apic_id_registered() methods Impact: cleanup x86 subarchitectures each defined a "apic_id_registered()" method, which could be an inline function depending on which subarch we build for, and which was also the name of a genapic field. Untangle this namespace spaghetti by giving each of the instances a separate name. Also remove wrapper macro obfuscation. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 4 ++-- arch/x86/include/asm/es7000/apic.h | 4 ++-- arch/x86/include/asm/mach-default/mach_apic.h | 3 +-- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/apic.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 13 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index d8dd9f537911..42c56df3ff32 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -4,9 +4,9 @@ #define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) #define esr_disable (1) -static inline int apic_id_registered(void) +static inline int bigsmp_apic_id_registered(void) { - return (1); + return 1; } static inline const cpumask_t *target_cpus(void) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index c58b9cc74465..a1819b510de3 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -6,9 +6,9 @@ #define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) #define esr_disable (1) -static inline int apic_id_registered(void) +static inline int es7000_apic_id_registered(void) { - return (1); + return 1; } static inline const cpumask_t *target_cpus_cluster(void) diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 2448b927b644..6a454fa0b433 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -25,7 +25,6 @@ static inline const struct cpumask *target_cpus(void) #define INT_DELIVERY_MODE (apic->int_delivery_mode) #define INT_DEST_MODE (apic->int_dest_mode) #define TARGET_CPUS (apic->target_cpus()) -#define apic_id_registered (apic->apic_id_registered) #define init_apic_ldr (apic->init_apic_ldr) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) @@ -57,7 +56,7 @@ static inline void init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline int apic_id_registered(void) +static inline int default_apic_id_registered(void) { return physid_isset(read_apic_id(), phys_cpu_present_map); } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 59972d94ff18..cc6e9d70f06e 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -10,7 +10,6 @@ #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL (apic->apic_destination_logical) #define TARGET_CPUS (apic->target_cpus()) -#define apic_id_registered (apic->apic_id_registered) #define init_apic_ldr (apic->init_apic_ldr) #define ioapic_phys_id_map (apic->ioapic_phys_id_map) #define setup_apic_routing (apic->setup_apic_routing) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index bf37bc49bd8e..59b62b19d02c 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -28,7 +28,7 @@ static inline unsigned long check_apicid_present(int bit) } #define apicid_cluster(apicid) (apicid & 0xF0) -static inline int apic_id_registered(void) +static inline int numaq_apic_id_registered(void) { return 1; } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 93d2c8667cfe..a36ef6e4b1ff 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -74,7 +74,7 @@ static inline int multi_timer_check(int apic, int irq) return 0; } -static inline int apic_id_registered(void) +static inline int summit_apic_id_registered(void) { return 1; } diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index c6f15647eba9..b6740de18fbb 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1171,7 +1171,7 @@ void __cpuinit setup_local_APIC(void) * Double-check whether this APIC is really registered. * This is meaningless in clustered apic mode, so we skip it. */ - if (!apic_id_registered()) + if (!apic->apic_id_registered()) BUG(); /* diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 22c3608b80dd..17abf5c62429 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -62,7 +62,7 @@ struct genapic apic_bigsmp = { .name = "bigsmp", .probe = probe_bigsmp, .acpi_madt_oem_check = NULL, - .apic_id_registered = apic_id_registered, + .apic_id_registered = bigsmp_apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, .int_dest_mode = INT_DEST_MODE, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index cfec3494a967..1f30559e9d8d 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -29,7 +29,7 @@ struct genapic apic_default = { .name = "default", .probe = probe_default, .acpi_madt_oem_check = NULL, - .apic_id_registered = apic_id_registered, + .apic_id_registered = default_apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, .int_dest_mode = INT_DEST_MODE, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 23fe6f1c9691..d68ca0bce675 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -105,7 +105,7 @@ struct genapic apic_es7000 = { .name = "es7000", .probe = probe_es7000, .acpi_madt_oem_check = es7000_acpi_madt_oem_check, - .apic_id_registered = apic_id_registered, + .apic_id_registered = es7000_apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, .int_dest_mode = INT_DEST_MODE, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 9691b4e1654d..b22a79b15b19 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -49,7 +49,7 @@ struct genapic apic_numaq = { .name = "NUMAQ", .probe = probe_numaq, .acpi_madt_oem_check = NULL, - .apic_id_registered = apic_id_registered, + .apic_id_registered = numaq_apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, .int_dest_mode = INT_DEST_MODE, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 0eea9fbb2a50..744fa1b86ef4 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -42,7 +42,7 @@ struct genapic apic_summit = { .name = "summit", .probe = probe_summit, .acpi_madt_oem_check = summit_acpi_madt_oem_check, - .apic_id_registered = apic_id_registered, + .apic_id_registered = summit_apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, .int_dest_mode = INT_DEST_MODE, From f8987a1093cc7a896137e264c24e04d4048e9f95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:02:31 +0100 Subject: [PATCH 202/566] x86, genapic: rename int_delivery_mode, et. al. int_delivery_mode is supposed to mean 'interrupt delivery mode', but it's quite a misnomer as 'int' we usually think of as an integer type ... The standard naming for such attributes is 'irq' - so rename the following fields and macros: int_delivery_mode => irq_delivery_mode INT_DELIVERY_MODE => IRQ_DELIVERY_MODE int_dest_mode => irq_dest_mode INT_DEST_MODE => IRQ_DEST_MODE Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 4 +-- arch/x86/include/asm/es7000/apic.h | 4 +-- arch/x86/include/asm/genapic.h | 4 +-- arch/x86/include/asm/mach-default/mach_apic.h | 8 ++--- arch/x86/include/asm/mach-generic/mach_apic.h | 4 +-- arch/x86/include/asm/numaq/apic.h | 4 +-- arch/x86/include/asm/summit/apic.h | 4 +-- arch/x86/kernel/genapic_flat_64.c | 8 ++--- arch/x86/kernel/genx2apic_cluster.c | 4 +-- arch/x86/kernel/genx2apic_phys.c | 4 +-- arch/x86/kernel/genx2apic_uv_x.c | 4 +-- arch/x86/kernel/io_apic.c | 30 +++++++++---------- arch/x86/mach-generic/bigsmp.c | 4 +-- arch/x86/mach-generic/default.c | 4 +-- arch/x86/mach-generic/es7000.c | 8 ++--- arch/x86/mach-generic/numaq.c | 4 +-- arch/x86/mach-generic/summit.c | 4 +-- 17 files changed, 53 insertions(+), 53 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 42c56df3ff32..8ff8bba88338 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -21,8 +21,8 @@ static inline const cpumask_t *target_cpus(void) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0 #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define INT_DELIVERY_MODE (dest_Fixed) -#define INT_DEST_MODE (0) /* phys delivery to target proc */ +#define IRQ_DELIVERY_MODE (dest_Fixed) +#define IRQ_DEST_MODE (0) /* phys delivery to target proc */ #define NO_BALANCE_IRQ (0) static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index a1819b510de3..830e8731cc05 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -27,8 +27,8 @@ static inline const cpumask_t *target_cpus(void) #define NO_BALANCE_IRQ_CLUSTER (1) #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define INT_DELIVERY_MODE (dest_Fixed) -#define INT_DEST_MODE (0) /* phys delivery to target procs */ +#define IRQ_DELIVERY_MODE (dest_Fixed) +#define IRQ_DEST_MODE (0) /* phys delivery to target procs */ #define NO_BALANCE_IRQ (0) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0x0 diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 108abdf6953b..e998e3df5d23 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -23,8 +23,8 @@ struct genapic { int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); int (*apic_id_registered)(void); - u32 int_delivery_mode; - u32 int_dest_mode; + u32 irq_delivery_mode; + u32 irq_dest_mode; const struct cpumask *(*target_cpus)(void); diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 6a454fa0b433..b5364793262a 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -22,8 +22,8 @@ static inline const struct cpumask *target_cpus(void) #ifdef CONFIG_X86_64 #include -#define INT_DELIVERY_MODE (apic->int_delivery_mode) -#define INT_DEST_MODE (apic->int_dest_mode) +#define IRQ_DELIVERY_MODE (apic->irq_delivery_mode) +#define IRQ_DEST_MODE (apic->irq_dest_mode) #define TARGET_CPUS (apic->target_cpus()) #define init_apic_ldr (apic->init_apic_ldr) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) @@ -35,8 +35,8 @@ static inline const struct cpumask *target_cpus(void) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void setup_apic_routing(void); #else -#define INT_DELIVERY_MODE dest_LowestPrio -#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ +#define IRQ_DELIVERY_MODE dest_LowestPrio +#define IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ #define TARGET_CPUS (target_cpus()) #define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index cc6e9d70f06e..03492f2219ed 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -5,8 +5,8 @@ #define esr_disable (apic->ESR_DISABLE) #define NO_BALANCE_IRQ (apic->no_balance_irq) -#define INT_DELIVERY_MODE (apic->int_delivery_mode) -#define INT_DEST_MODE (apic->int_dest_mode) +#define IRQ_DELIVERY_MODE (apic->irq_delivery_mode) +#define IRQ_DEST_MODE (apic->irq_dest_mode) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL (apic->apic_destination_logical) #define TARGET_CPUS (apic->target_cpus()) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 59b62b19d02c..d885e35df18e 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -15,8 +15,8 @@ static inline const cpumask_t *target_cpus(void) #define NO_BALANCE_IRQ (1) #define esr_disable (1) -#define INT_DELIVERY_MODE dest_LowestPrio -#define INT_DEST_MODE 0 /* physical delivery on LOCAL quad */ +#define IRQ_DELIVERY_MODE dest_LowestPrio +#define IRQ_DEST_MODE 0 /* physical delivery on LOCAL quad */ static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index a36ef6e4b1ff..0b7d0d14e568 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -24,8 +24,8 @@ static inline const cpumask_t *target_cpus(void) return &cpumask_of_cpu(0); } -#define INT_DELIVERY_MODE (dest_LowestPrio) -#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ +#define IRQ_DELIVERY_MODE (dest_LowestPrio) +#define IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index e9233374cef1..0a263d6bb5e2 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -180,8 +180,8 @@ struct genapic apic_flat = { .acpi_madt_oem_check = flat_acpi_madt_oem_check, .apic_id_registered = flat_apic_id_registered, - .int_delivery_mode = dest_LowestPrio, - .int_dest_mode = (APIC_DEST_LOGICAL != 0), + .irq_delivery_mode = dest_LowestPrio, + .irq_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = flat_target_cpus, .ESR_DISABLE = 0, @@ -326,8 +326,8 @@ struct genapic apic_physflat = { .acpi_madt_oem_check = physflat_acpi_madt_oem_check, .apic_id_registered = flat_apic_id_registered, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_delivery_mode = dest_Fixed, + .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = physflat_target_cpus, .ESR_DISABLE = 0, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index fc855e503ac4..e9ff7dc9a0f6 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -182,8 +182,8 @@ struct genapic apic_x2apic_cluster = { .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, .apic_id_registered = x2apic_apic_id_registered, - .int_delivery_mode = dest_LowestPrio, - .int_dest_mode = (APIC_DEST_LOGICAL != 0), + .irq_delivery_mode = dest_LowestPrio, + .irq_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = x2apic_target_cpus, .ESR_DISABLE = 0, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index c98361fb7ee1..8141b5a88f61 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -178,8 +178,8 @@ struct genapic apic_x2apic_phys = { .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, .apic_id_registered = x2apic_apic_id_registered, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_delivery_mode = dest_Fixed, + .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = x2apic_target_cpus, .ESR_DISABLE = 0, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 94f606f204a1..6a73cad0d3e9 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -243,8 +243,8 @@ struct genapic apic_x2apic_uv_x = { .acpi_madt_oem_check = uv_acpi_madt_oem_check, .apic_id_registered = uv_apic_id_registered, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_delivery_mode = dest_Fixed, + .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = uv_target_cpus, .ESR_DISABLE = 0, diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7283234229fe..5f967b9c9afd 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1514,9 +1514,9 @@ static int setup_ioapic_entry(int apic_id, int irq, memset(&irte, 0, sizeof(irte)); irte.present = 1; - irte.dst_mode = INT_DEST_MODE; + irte.dst_mode = IRQ_DEST_MODE; irte.trigger_mode = trigger; - irte.dlvry_mode = INT_DELIVERY_MODE; + irte.dlvry_mode = IRQ_DELIVERY_MODE; irte.vector = vector; irte.dest_id = IRTE_DEST(destination); @@ -1529,8 +1529,8 @@ static int setup_ioapic_entry(int apic_id, int irq, } else #endif { - entry->delivery_mode = INT_DELIVERY_MODE; - entry->dest_mode = INT_DEST_MODE; + entry->delivery_mode = IRQ_DELIVERY_MODE; + entry->dest_mode = IRQ_DEST_MODE; entry->dest = destination; } @@ -1659,10 +1659,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, * We use logical delivery to get the timer IRQ * to the first CPU. */ - entry.dest_mode = INT_DEST_MODE; + entry.dest_mode = IRQ_DEST_MODE; entry.mask = 1; /* mask IRQ now */ entry.dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = INT_DELIVERY_MODE; + entry.delivery_mode = IRQ_DELIVERY_MODE; entry.polarity = 0; entry.trigger = 0; entry.vector = vector; @@ -3279,9 +3279,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms memset (&irte, 0, sizeof(irte)); irte.present = 1; - irte.dst_mode = INT_DEST_MODE; + irte.dst_mode = IRQ_DEST_MODE; irte.trigger_mode = 0; /* edge */ - irte.dlvry_mode = INT_DELIVERY_MODE; + irte.dlvry_mode = IRQ_DELIVERY_MODE; irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3299,10 +3299,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? + ((IRQ_DEST_MODE == 0) ? MSI_ADDR_DEST_MODE_PHYSICAL: MSI_ADDR_DEST_MODE_LOGICAL) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? + ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? MSI_ADDR_REDIRECTION_CPU: MSI_ADDR_REDIRECTION_LOWPRI) | MSI_ADDR_DEST_ID(dest); @@ -3310,7 +3310,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms msg->data = MSI_DATA_TRIGGER_EDGE | MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? + ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? MSI_DATA_DELIVERY_FIXED: MSI_DATA_DELIVERY_LOWPRI) | MSI_DATA_VECTOR(cfg->vector); @@ -3711,11 +3711,11 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) HT_IRQ_LOW_BASE | HT_IRQ_LOW_DEST_ID(dest) | HT_IRQ_LOW_VECTOR(cfg->vector) | - ((INT_DEST_MODE == 0) ? + ((IRQ_DEST_MODE == 0) ? HT_IRQ_LOW_DM_PHYSICAL : HT_IRQ_LOW_DM_LOGICAL) | HT_IRQ_LOW_RQEOI_EDGE | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? + ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? HT_IRQ_LOW_MT_FIXED : HT_IRQ_LOW_MT_ARBITRATED) | HT_IRQ_LOW_IRQ_MASKED; @@ -3763,8 +3763,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); entry->vector = cfg->vector; - entry->delivery_mode = INT_DELIVERY_MODE; - entry->dest_mode = INT_DEST_MODE; + entry->delivery_mode = IRQ_DELIVERY_MODE; + entry->dest_mode = IRQ_DEST_MODE; entry->polarity = 0; entry->trigger = 0; entry->mask = 0; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 17abf5c62429..c15c1aa2dc7f 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -64,8 +64,8 @@ struct genapic apic_bigsmp = { .acpi_madt_oem_check = NULL, .apic_id_registered = bigsmp_apic_id_registered, - .int_delivery_mode = INT_DELIVERY_MODE, - .int_dest_mode = INT_DEST_MODE, + .irq_delivery_mode = IRQ_DELIVERY_MODE, + .irq_dest_mode = IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 1f30559e9d8d..d32b175eff88 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -31,8 +31,8 @@ struct genapic apic_default = { .acpi_madt_oem_check = NULL, .apic_id_registered = default_apic_id_registered, - .int_delivery_mode = INT_DELIVERY_MODE, - .int_dest_mode = INT_DEST_MODE, + .irq_delivery_mode = IRQ_DELIVERY_MODE, + .irq_dest_mode = IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index d68ca0bce675..06653892953e 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -21,8 +21,8 @@ void __init es7000_update_genapic_to_cluster(void) { apic->target_cpus = target_cpus_cluster; - apic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER; - apic->int_dest_mode = INT_DEST_MODE_CLUSTER; + apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER; + apic->irq_dest_mode = INT_DEST_MODE_CLUSTER; apic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER; apic->init_apic_ldr = init_apic_ldr_cluster; @@ -107,8 +107,8 @@ struct genapic apic_es7000 = { .acpi_madt_oem_check = es7000_acpi_madt_oem_check, .apic_id_registered = es7000_apic_id_registered, - .int_delivery_mode = INT_DELIVERY_MODE, - .int_dest_mode = INT_DEST_MODE, + .irq_delivery_mode = IRQ_DELIVERY_MODE, + .irq_dest_mode = IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index b22a79b15b19..401957142fda 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -51,8 +51,8 @@ struct genapic apic_numaq = { .acpi_madt_oem_check = NULL, .apic_id_registered = numaq_apic_id_registered, - .int_delivery_mode = INT_DELIVERY_MODE, - .int_dest_mode = INT_DEST_MODE, + .irq_delivery_mode = IRQ_DELIVERY_MODE, + .irq_dest_mode = IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 744fa1b86ef4..946da7aa7622 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -44,8 +44,8 @@ struct genapic apic_summit = { .acpi_madt_oem_check = summit_acpi_madt_oem_check, .apic_id_registered = summit_apic_id_registered, - .int_delivery_mode = INT_DELIVERY_MODE, - .int_dest_mode = INT_DEST_MODE, + .irq_delivery_mode = IRQ_DELIVERY_MODE, + .irq_dest_mode = IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, From 9b5bc8dc12421a4b17047061f473d85c1797d543 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:09:58 +0100 Subject: [PATCH 203/566] x86, apic: remove IRQ_DEST_MODE / IRQ_DELIVERY_MODE Remove the wrapper macros IRQ_DEST_MODE and IRQ_DELIVERY_MODE. The typical 32-bit and the 64-bit build all dereference via the genapic, so it's pointless to hide that indirection via these ugly macros. Furthermore, it also obscures subarchitecture details. So replace it with apic->irq_dest_mode / etc. accesses. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 4 +-- arch/x86/include/asm/es7000/apic.h | 4 +-- arch/x86/include/asm/mach-default/mach_apic.h | 5 ++-- arch/x86/include/asm/mach-generic/mach_apic.h | 2 -- arch/x86/include/asm/numaq/apic.h | 4 +-- arch/x86/include/asm/summit/apic.h | 4 +-- arch/x86/kernel/io_apic.c | 30 +++++++++---------- arch/x86/mach-generic/bigsmp.c | 4 +-- arch/x86/mach-generic/default.c | 4 +-- arch/x86/mach-generic/es7000.c | 4 +-- arch/x86/mach-generic/numaq.c | 4 +-- arch/x86/mach-generic/summit.c | 4 +-- 12 files changed, 35 insertions(+), 38 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 8ff8bba88338..293551b0e610 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -21,8 +21,8 @@ static inline const cpumask_t *target_cpus(void) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0 #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define IRQ_DELIVERY_MODE (dest_Fixed) -#define IRQ_DEST_MODE (0) /* phys delivery to target proc */ +#define BIGSMP_IRQ_DELIVERY_MODE (dest_Fixed) +#define BIGSMP_IRQ_DEST_MODE (0) /* phys delivery to target proc */ #define NO_BALANCE_IRQ (0) static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 830e8731cc05..690016683f21 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -27,8 +27,8 @@ static inline const cpumask_t *target_cpus(void) #define NO_BALANCE_IRQ_CLUSTER (1) #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define IRQ_DELIVERY_MODE (dest_Fixed) -#define IRQ_DEST_MODE (0) /* phys delivery to target procs */ +#define ES7000_IRQ_DELIVERY_MODE (dest_Fixed) +#define ES7000_IRQ_DEST_MODE (0) /* phys delivery to target procs */ #define NO_BALANCE_IRQ (0) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0x0 diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index b5364793262a..eafbf4f20387 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -22,7 +22,6 @@ static inline const struct cpumask *target_cpus(void) #ifdef CONFIG_X86_64 #include -#define IRQ_DELIVERY_MODE (apic->irq_delivery_mode) #define IRQ_DEST_MODE (apic->irq_dest_mode) #define TARGET_CPUS (apic->target_cpus()) #define init_apic_ldr (apic->init_apic_ldr) @@ -35,8 +34,8 @@ static inline const struct cpumask *target_cpus(void) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void setup_apic_routing(void); #else -#define IRQ_DELIVERY_MODE dest_LowestPrio -#define IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ +#define DEFAULT_IRQ_DELIVERY_MODE dest_LowestPrio +#define DEFAULT_IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ #define TARGET_CPUS (target_cpus()) #define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 03492f2219ed..387a5d00c43d 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -5,8 +5,6 @@ #define esr_disable (apic->ESR_DISABLE) #define NO_BALANCE_IRQ (apic->no_balance_irq) -#define IRQ_DELIVERY_MODE (apic->irq_delivery_mode) -#define IRQ_DEST_MODE (apic->irq_dest_mode) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL (apic->apic_destination_logical) #define TARGET_CPUS (apic->target_cpus()) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index d885e35df18e..7746035c5911 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -15,8 +15,8 @@ static inline const cpumask_t *target_cpus(void) #define NO_BALANCE_IRQ (1) #define esr_disable (1) -#define IRQ_DELIVERY_MODE dest_LowestPrio -#define IRQ_DEST_MODE 0 /* physical delivery on LOCAL quad */ +#define NUMAQ_IRQ_DELIVERY_MODE dest_LowestPrio +#define NUMAQ_IRQ_DEST_MODE 0 /* physical delivery on LOCAL quad */ static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 0b7d0d14e568..ea2abe9b5979 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -24,8 +24,8 @@ static inline const cpumask_t *target_cpus(void) return &cpumask_of_cpu(0); } -#define IRQ_DELIVERY_MODE (dest_LowestPrio) -#define IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ +#define SUMMIT_IRQ_DELIVERY_MODE (dest_LowestPrio) +#define SUMMIT_IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 5f967b9c9afd..301b6571d700 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1514,9 +1514,9 @@ static int setup_ioapic_entry(int apic_id, int irq, memset(&irte, 0, sizeof(irte)); irte.present = 1; - irte.dst_mode = IRQ_DEST_MODE; + irte.dst_mode = apic->irq_dest_mode; irte.trigger_mode = trigger; - irte.dlvry_mode = IRQ_DELIVERY_MODE; + irte.dlvry_mode = apic->irq_delivery_mode; irte.vector = vector; irte.dest_id = IRTE_DEST(destination); @@ -1529,8 +1529,8 @@ static int setup_ioapic_entry(int apic_id, int irq, } else #endif { - entry->delivery_mode = IRQ_DELIVERY_MODE; - entry->dest_mode = IRQ_DEST_MODE; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; entry->dest = destination; } @@ -1659,10 +1659,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, * We use logical delivery to get the timer IRQ * to the first CPU. */ - entry.dest_mode = IRQ_DEST_MODE; + entry.dest_mode = apic->irq_dest_mode; entry.mask = 1; /* mask IRQ now */ entry.dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = IRQ_DELIVERY_MODE; + entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; entry.trigger = 0; entry.vector = vector; @@ -3279,9 +3279,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms memset (&irte, 0, sizeof(irte)); irte.present = 1; - irte.dst_mode = IRQ_DEST_MODE; + irte.dst_mode = apic->irq_dest_mode; irte.trigger_mode = 0; /* edge */ - irte.dlvry_mode = IRQ_DELIVERY_MODE; + irte.dlvry_mode = apic->irq_delivery_mode; irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3299,10 +3299,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO | - ((IRQ_DEST_MODE == 0) ? + ((apic->irq_dest_mode == 0) ? MSI_ADDR_DEST_MODE_PHYSICAL: MSI_ADDR_DEST_MODE_LOGICAL) | - ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? + ((apic->irq_delivery_mode != dest_LowestPrio) ? MSI_ADDR_REDIRECTION_CPU: MSI_ADDR_REDIRECTION_LOWPRI) | MSI_ADDR_DEST_ID(dest); @@ -3310,7 +3310,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms msg->data = MSI_DATA_TRIGGER_EDGE | MSI_DATA_LEVEL_ASSERT | - ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? + ((apic->irq_delivery_mode != dest_LowestPrio) ? MSI_DATA_DELIVERY_FIXED: MSI_DATA_DELIVERY_LOWPRI) | MSI_DATA_VECTOR(cfg->vector); @@ -3711,11 +3711,11 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) HT_IRQ_LOW_BASE | HT_IRQ_LOW_DEST_ID(dest) | HT_IRQ_LOW_VECTOR(cfg->vector) | - ((IRQ_DEST_MODE == 0) ? + ((apic->irq_dest_mode == 0) ? HT_IRQ_LOW_DM_PHYSICAL : HT_IRQ_LOW_DM_LOGICAL) | HT_IRQ_LOW_RQEOI_EDGE | - ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? + ((apic->irq_delivery_mode != dest_LowestPrio) ? HT_IRQ_LOW_MT_FIXED : HT_IRQ_LOW_MT_ARBITRATED) | HT_IRQ_LOW_IRQ_MASKED; @@ -3763,8 +3763,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); entry->vector = cfg->vector; - entry->delivery_mode = IRQ_DELIVERY_MODE; - entry->dest_mode = IRQ_DEST_MODE; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; entry->polarity = 0; entry->trigger = 0; entry->mask = 0; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index c15c1aa2dc7f..e8c1ceca7c94 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -64,8 +64,8 @@ struct genapic apic_bigsmp = { .acpi_madt_oem_check = NULL, .apic_id_registered = bigsmp_apic_id_registered, - .irq_delivery_mode = IRQ_DELIVERY_MODE, - .irq_dest_mode = IRQ_DEST_MODE, + .irq_delivery_mode = BIGSMP_IRQ_DELIVERY_MODE, + .irq_dest_mode = BIGSMP_IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index d32b175eff88..0482106f0e19 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -31,8 +31,8 @@ struct genapic apic_default = { .acpi_madt_oem_check = NULL, .apic_id_registered = default_apic_id_registered, - .irq_delivery_mode = IRQ_DELIVERY_MODE, - .irq_dest_mode = IRQ_DEST_MODE, + .irq_delivery_mode = DEFAULT_IRQ_DELIVERY_MODE, + .irq_dest_mode = DEFAULT_IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 06653892953e..5d97408919bb 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -107,8 +107,8 @@ struct genapic apic_es7000 = { .acpi_madt_oem_check = es7000_acpi_madt_oem_check, .apic_id_registered = es7000_apic_id_registered, - .irq_delivery_mode = IRQ_DELIVERY_MODE, - .irq_dest_mode = IRQ_DEST_MODE, + .irq_delivery_mode = ES7000_IRQ_DELIVERY_MODE, + .irq_dest_mode = ES7000_IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 401957142fda..77ac66935fdd 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -51,8 +51,8 @@ struct genapic apic_numaq = { .acpi_madt_oem_check = NULL, .apic_id_registered = numaq_apic_id_registered, - .irq_delivery_mode = IRQ_DELIVERY_MODE, - .irq_dest_mode = IRQ_DEST_MODE, + .irq_delivery_mode = NUMAQ_IRQ_DELIVERY_MODE, + .irq_dest_mode = NUMAQ_IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 946da7aa7622..7b3f43caf2ae 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -44,8 +44,8 @@ struct genapic apic_summit = { .acpi_madt_oem_check = summit_acpi_madt_oem_check, .apic_id_registered = summit_apic_id_registered, - .irq_delivery_mode = IRQ_DELIVERY_MODE, - .irq_dest_mode = IRQ_DEST_MODE, + .irq_delivery_mode = SUMMIT_IRQ_DELIVERY_MODE, + .irq_dest_mode = SUMMIT_IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, From dcafa4a8c95ce063cbae0a5e61632bc3c4924e66 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:16:01 +0100 Subject: [PATCH 204/566] x86, apic: remove DEFAULT_IRQ_DELIVERY_MODE and DEFAULT_IRQ_DEST_MODE Impact: cleanup They were only used in a single place and obscured the apic_default template. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mach-default/mach_apic.h | 2 -- arch/x86/mach-generic/default.c | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index eafbf4f20387..f3b2cd423882 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -34,8 +34,6 @@ static inline const struct cpumask *target_cpus(void) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void setup_apic_routing(void); #else -#define DEFAULT_IRQ_DELIVERY_MODE dest_LowestPrio -#define DEFAULT_IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ #define TARGET_CPUS (target_cpus()) #define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 0482106f0e19..fe97b0114a06 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -31,8 +31,9 @@ struct genapic apic_default = { .acpi_madt_oem_check = NULL, .apic_id_registered = default_apic_id_registered, - .irq_delivery_mode = DEFAULT_IRQ_DELIVERY_MODE, - .irq_dest_mode = DEFAULT_IRQ_DEST_MODE, + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, From 82daea6b0890f739be1ad4ab1c1b922b1555582e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:16:01 +0100 Subject: [PATCH 205/566] x86, apic: remove SUMMIT_IRQ_DELIVERY_MODE and SUMMIT_IRQ_DEST_MODE Impact: cleanup They were only used in a single place and obscured the apic_summit template. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/summit/apic.h | 3 --- arch/x86/mach-generic/summit.c | 5 +++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index ea2abe9b5979..427d0889f6f2 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -24,9 +24,6 @@ static inline const cpumask_t *target_cpus(void) return &cpumask_of_cpu(0); } -#define SUMMIT_IRQ_DELIVERY_MODE (dest_LowestPrio) -#define SUMMIT_IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ - static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { return 0; diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 7b3f43caf2ae..1b9164b92b0a 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -44,8 +44,9 @@ struct genapic apic_summit = { .acpi_madt_oem_check = summit_acpi_madt_oem_check, .apic_id_registered = summit_apic_id_registered, - .irq_delivery_mode = SUMMIT_IRQ_DELIVERY_MODE, - .irq_dest_mode = SUMMIT_IRQ_DEST_MODE, + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, From 1b1bcb3ff4e4934d949574cec90679219ace5412 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:16:01 +0100 Subject: [PATCH 206/566] x86, apic: remove NUMAQ_IRQ_DELIVERY_MODE and NUMAQ_IRQ_DEST_MODE Impact: cleanup They were only used in a single place and obscured the apic_numaq template. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/numaq/apic.h | 3 --- arch/x86/mach-generic/numaq.c | 5 +++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 7746035c5911..a9d846769a02 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -15,9 +15,6 @@ static inline const cpumask_t *target_cpus(void) #define NO_BALANCE_IRQ (1) #define esr_disable (1) -#define NUMAQ_IRQ_DELIVERY_MODE dest_LowestPrio -#define NUMAQ_IRQ_DEST_MODE 0 /* physical delivery on LOCAL quad */ - static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { return physid_isset(apicid, bitmap); diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 77ac66935fdd..6daddb6949d2 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -51,8 +51,9 @@ struct genapic apic_numaq = { .acpi_madt_oem_check = NULL, .apic_id_registered = numaq_apic_id_registered, - .irq_delivery_mode = NUMAQ_IRQ_DELIVERY_MODE, - .irq_dest_mode = NUMAQ_IRQ_DEST_MODE, + .irq_delivery_mode = dest_LowestPrio, + /* physical delivery on LOCAL quad: */ + .irq_dest_mode = 0, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, From d8a3539e64f8e27b0ab5bb7e7ba3b8f34b739224 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:16:01 +0100 Subject: [PATCH 207/566] x86, apic: remove BIGSMP_IRQ_DELIVERY_MODE and BIGSMP_IRQ_DEST_MODE Impact: cleanup They were only used in a single place and obscured the apic_bigsmp driver template. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 -- arch/x86/mach-generic/bigsmp.c | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 293551b0e610..dca2d5b01daa 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -21,8 +21,6 @@ static inline const cpumask_t *target_cpus(void) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0 #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define BIGSMP_IRQ_DELIVERY_MODE (dest_Fixed) -#define BIGSMP_IRQ_DEST_MODE (0) /* phys delivery to target proc */ #define NO_BALANCE_IRQ (0) static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index e8c1ceca7c94..06be776067ad 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -64,8 +64,9 @@ struct genapic apic_bigsmp = { .acpi_madt_oem_check = NULL, .apic_id_registered = bigsmp_apic_id_registered, - .irq_delivery_mode = BIGSMP_IRQ_DELIVERY_MODE, - .irq_dest_mode = BIGSMP_IRQ_DEST_MODE, + .irq_delivery_mode = dest_Fixed, + /* phys delivery to target CPU: */ + .irq_dest_mode = 0, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, From 38bd77a6c35168b03b65f7438cdcc1257d550924 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:16:01 +0100 Subject: [PATCH 208/566] x86, apic: remove ES7000_IRQ_DELIVERY_MODE and ES7000_IRQ_DEST_MODE Impact: cleanup They were only used in a single place and obscured the apic_es7000 driver template. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/apic.h | 2 -- arch/x86/mach-generic/es7000.c | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 690016683f21..342416b3fadd 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -27,8 +27,6 @@ static inline const cpumask_t *target_cpus(void) #define NO_BALANCE_IRQ_CLUSTER (1) #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define ES7000_IRQ_DELIVERY_MODE (dest_Fixed) -#define ES7000_IRQ_DEST_MODE (0) /* phys delivery to target procs */ #define NO_BALANCE_IRQ (0) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0x0 diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 5d97408919bb..269a97aef431 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -107,8 +107,9 @@ struct genapic apic_es7000 = { .acpi_madt_oem_check = es7000_acpi_madt_oem_check, .apic_id_registered = es7000_apic_id_registered, - .irq_delivery_mode = ES7000_IRQ_DELIVERY_MODE, - .irq_dest_mode = ES7000_IRQ_DEST_MODE, + .irq_delivery_mode = dest_Fixed, + /* phys delivery to target CPUs: */ + .irq_dest_mode = 0, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, From 7fe732862d9697cc1863286fbcace9a67f231b4c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:24:16 +0100 Subject: [PATCH 209/566] x86, apic: remove IRQ_DEST_MODE Remove leftover definition. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mach-default/mach_apic.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index f3b2cd423882..ce3bc4845b98 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -22,7 +22,6 @@ static inline const struct cpumask *target_cpus(void) #ifdef CONFIG_X86_64 #include -#define IRQ_DEST_MODE (apic->irq_dest_mode) #define TARGET_CPUS (apic->target_cpus()) #define init_apic_ldr (apic->init_apic_ldr) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) From 0a9cc20b9c18372ba5a9fea990f5812f3ee01e32 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:30:40 +0100 Subject: [PATCH 210/566] x86, apic: clean up target_cpus methods Impact: cleanup Clean up all the target_cpus() namespace overlap that exists between bigsmp, es7000, mach-default, numaq and summit - by separating the different functions into different names. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 4 ++-- arch/x86/include/asm/mach-default/mach_apic.h | 4 ++-- arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index dca2d5b01daa..d6aeca3c5a8d 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -9,7 +9,7 @@ static inline int bigsmp_apic_id_registered(void) return 1; } -static inline const cpumask_t *target_cpus(void) +static inline const cpumask_t *bigsmp_target_cpus(void) { #ifdef CONFIG_SMP return &cpu_online_map; diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 342416b3fadd..7e5c31a4f8da 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -16,7 +16,7 @@ static inline const cpumask_t *target_cpus_cluster(void) return &CPU_MASK_ALL; } -static inline const cpumask_t *target_cpus(void) +static inline const cpumask_t *es7000_target_cpus(void) { return &cpumask_of_cpu(smp_processor_id()); } @@ -83,7 +83,7 @@ static inline void setup_apic_routing(void) printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", (apic_version[apic] == 0x14) ? "Physical Cluster" : "Logical Cluster", - nr_ioapics, cpus_addr(*target_cpus())[0]); + nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); } static inline int multi_timer_check(int apic, int irq) diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index ce3bc4845b98..af1607ddd2a2 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -8,7 +8,7 @@ #define APIC_DFR_VALUE (APIC_DFR_FLAT) -static inline const struct cpumask *target_cpus(void) +static inline const struct cpumask *default_target_cpus(void) { #ifdef CONFIG_SMP return cpu_online_mask; @@ -33,7 +33,7 @@ static inline const struct cpumask *target_cpus(void) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void setup_apic_routing(void); #else -#define TARGET_CPUS (target_cpus()) +#define TARGET_CPUS (default_target_cpus()) #define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* * Set up the logical destination ID. diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index a9d846769a02..1111ff9e41de 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -7,7 +7,7 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline const cpumask_t *target_cpus(void) +static inline const cpumask_t *numaq_target_cpus(void) { return &CPU_MASK_ALL; } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 427d0889f6f2..7c1f9151429c 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -15,7 +15,7 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline const cpumask_t *target_cpus(void) +static inline const cpumask_t *summit_target_cpus(void) { /* CPU_MASK_ALL (0xff) has undefined behaviour with * dest_LowestPrio mode logical clustered apic interrupt routing diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 06be776067ad..d3cead2d2fc8 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -68,7 +68,7 @@ struct genapic apic_bigsmp = { /* phys delivery to target CPU: */ .irq_dest_mode = 0, - .target_cpus = target_cpus, + .target_cpus = bigsmp_target_cpus, .ESR_DISABLE = esr_disable, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index fe97b0114a06..a483e22273e5 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -35,7 +35,7 @@ struct genapic apic_default = { /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, - .target_cpus = target_cpus, + .target_cpus = default_target_cpus, .ESR_DISABLE = esr_disable, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 269a97aef431..e31f0c35470d 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -111,7 +111,7 @@ struct genapic apic_es7000 = { /* phys delivery to target CPUs: */ .irq_dest_mode = 0, - .target_cpus = target_cpus, + .target_cpus = es7000_target_cpus, .ESR_DISABLE = esr_disable, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 6daddb6949d2..4b84b5970fbe 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -55,7 +55,7 @@ struct genapic apic_numaq = { /* physical delivery on LOCAL quad: */ .irq_dest_mode = 0, - .target_cpus = target_cpus, + .target_cpus = numaq_target_cpus, .ESR_DISABLE = esr_disable, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 1b9164b92b0a..e6b956a08484 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -48,7 +48,7 @@ struct genapic apic_summit = { /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, - .target_cpus = target_cpus, + .target_cpus = summit_target_cpus, .ESR_DISABLE = esr_disable, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, From fe402e1f2b67a63f1e53ab2a316fc20f7ca4ec91 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:32:51 +0100 Subject: [PATCH 211/566] x86, apic: clean up / remove TARGET_CPUS Impact: cleanup use apic->target_cpus() directly instead of the TARGET_CPUS wrapper. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/apic.h | 4 ++-- arch/x86/include/asm/mach-default/mach_apic.h | 2 -- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/io_apic.c | 22 +++++++++---------- 5 files changed, 14 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 7e5c31a4f8da..53adda099c96 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -161,7 +161,7 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) return 0xFF; /* * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. + * on the same apicid cluster return default value of target_cpus(): */ cpu = cpumask_first(cpumask); apicid = cpu_to_logical_apicid(cpu); @@ -194,7 +194,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return cpu_to_logical_apicid(0); /* * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. + * on the same apicid cluster return default value of target_cpus(): */ cpu = first_cpu(*cpumask); apicid = cpu_to_logical_apicid(cpu); diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index af1607ddd2a2..77a972475878 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -22,7 +22,6 @@ static inline const struct cpumask *default_target_cpus(void) #ifdef CONFIG_X86_64 #include -#define TARGET_CPUS (apic->target_cpus()) #define init_apic_ldr (apic->init_apic_ldr) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) @@ -33,7 +32,6 @@ static inline const struct cpumask *default_target_cpus(void) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void setup_apic_routing(void); #else -#define TARGET_CPUS (default_target_cpus()) #define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* * Set up the logical destination ID. diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 387a5d00c43d..da2d7780cb52 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -7,7 +7,6 @@ #define NO_BALANCE_IRQ (apic->no_balance_irq) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL (apic->apic_destination_logical) -#define TARGET_CPUS (apic->target_cpus()) #define init_apic_ldr (apic->init_apic_ldr) #define ioapic_phys_id_map (apic->ioapic_phys_id_map) #define setup_apic_routing (apic->setup_apic_routing) diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 7c1f9151429c..cf5036f1ce6d 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -148,7 +148,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return (int) 0xFF; /* * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. + * on the same apicid cluster return default value of target_cpus(): */ cpu = first_cpu(*cpumask); apicid = cpu_to_logical_apicid(cpu); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 301b6571d700..7503285e180d 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1559,10 +1559,10 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, TARGET_CPUS)) + if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1661,7 +1661,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, */ entry.dest_mode = apic->irq_dest_mode; entry.mask = 1; /* mask IRQ now */ - entry.dest = cpu_mask_to_apicid(TARGET_CPUS); + entry.dest = cpu_mask_to_apicid(apic->target_cpus()); entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; entry.trigger = 0; @@ -2877,7 +2877,7 @@ static inline void __init check_timer(void) * get/set the timer IRQ vector: */ disable_8259A_irq(0); - assign_irq_vector(0, cfg, TARGET_CPUS); + assign_irq_vector(0, cfg, apic->target_cpus()); /* * As IRQ0 is to be enabled in the 8259A, the virtual @@ -3195,7 +3195,7 @@ unsigned int create_irq_nr(unsigned int irq_want) if (cfg_new->vector != 0) continue; - if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) + if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; break; } @@ -3261,11 +3261,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms return -ENXIO; cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, TARGET_CPUS); + err = assign_irq_vector(irq, cfg, apic->target_cpus()); if (err) return err; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { @@ -3698,12 +3698,12 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) return -ENXIO; cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, TARGET_CPUS); + err = assign_irq_vector(irq, cfg, apic->target_cpus()); if (!err) { struct ht_irq_msg msg; unsigned dest; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -3987,7 +3987,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) /* * This function currently is only a helper for the i386 smp boot process where * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be TARGET_CPUS + * so mask in all cases should simply be apic->target_cpus() */ #ifdef CONFIG_SMP void __init setup_ioapic_dest(void) @@ -4028,7 +4028,7 @@ void __init setup_ioapic_dest(void) (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) mask = desc->affinity; else - mask = TARGET_CPUS; + mask = apic->target_cpus(); #ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) From f6f52baf2613dd319e9ba3f3319bf1f1c442e4b3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:01:41 +0100 Subject: [PATCH 212/566] x86: clean up esr_disable() methods Impact: cleanup Most subarchitectures want to disable the APIC ESR (Error Status Register), because they generally have hardware hacks that wrap standard CPUs into a bigger system and hence the APIC bus is quite non-standard and weirdnesses (lockups) have been seen with ESR reporting. Remove the esr_disable macros and put the desired flag into each subarchitecture's genapic template directly. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 1 - arch/x86/include/asm/es7000/apic.h | 1 - arch/x86/include/asm/mach-default/mach_apic.h | 1 - arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 1 - arch/x86/include/asm/summit/apic.h | 1 - arch/x86/kernel/apic.c | 4 ++-- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index d6aeca3c5a8d..b550cb111028 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -2,7 +2,6 @@ #define __ASM_MACH_APIC_H #define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) -#define esr_disable (1) static inline int bigsmp_apic_id_registered(void) { diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 53adda099c96..aa11c768bed7 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -4,7 +4,6 @@ #include #define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) -#define esr_disable (1) static inline int es7000_apic_id_registered(void) { diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 77a972475878..5f8d17fdc965 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -18,7 +18,6 @@ static inline const struct cpumask *default_target_cpus(void) } #define NO_BALANCE_IRQ (0) -#define esr_disable (0) #ifdef CONFIG_X86_64 #include diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index da2d7780cb52..63fe985219f9 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define esr_disable (apic->ESR_DISABLE) #define NO_BALANCE_IRQ (apic->no_balance_irq) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL (apic->apic_destination_logical) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 1111ff9e41de..8ecb3b45c6c4 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -13,7 +13,6 @@ static inline const cpumask_t *numaq_target_cpus(void) } #define NO_BALANCE_IRQ (1) -#define esr_disable (1) static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index cf5036f1ce6d..84679e687add 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -4,7 +4,6 @@ #include #include -#define esr_disable (1) #define NO_BALANCE_IRQ (0) /* In clustered mode, the high nibble of APIC ID is a cluster number. diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index b6740de18fbb..69d8c30d5711 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1107,7 +1107,7 @@ static void __cpuinit lapic_setup_esr(void) return; } - if (esr_disable) { + if (apic->ESR_DISABLE) { /* * Something untraceable is creating bad interrupts on * secondary quads ... for the moment, just leave the @@ -1157,7 +1157,7 @@ void __cpuinit setup_local_APIC(void) #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (lapic_is_integrated() && esr_disable) { + if (lapic_is_integrated() && apic->ESR_DISABLE) { apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index d3cead2d2fc8..f0bb72674f73 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -69,7 +69,7 @@ struct genapic apic_bigsmp = { .irq_dest_mode = 0, .target_cpus = bigsmp_target_cpus, - .ESR_DISABLE = esr_disable, + .ESR_DISABLE = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index a483e22273e5..c30141a9aca0 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -36,7 +36,7 @@ struct genapic apic_default = { .irq_dest_mode = 1, .target_cpus = default_target_cpus, - .ESR_DISABLE = esr_disable, + .ESR_DISABLE = 0, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index e31f0c35470d..e8aa8fd4f49f 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -112,7 +112,7 @@ struct genapic apic_es7000 = { .irq_dest_mode = 0, .target_cpus = es7000_target_cpus, - .ESR_DISABLE = esr_disable, + .ESR_DISABLE = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 4b84b5970fbe..860edc8bd903 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -56,7 +56,7 @@ struct genapic apic_numaq = { .irq_dest_mode = 0, .target_cpus = numaq_target_cpus, - .ESR_DISABLE = esr_disable, + .ESR_DISABLE = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index e6b956a08484..cd5ef115a4ee 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -49,7 +49,7 @@ struct genapic apic_summit = { .irq_dest_mode = 1, .target_cpus = summit_target_cpus, - .ESR_DISABLE = esr_disable, + .ESR_DISABLE = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, From 08125d3edab90644724652eedec3e219e3e0f2e7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:08:44 +0100 Subject: [PATCH 213/566] x86: rename ->ESR_DISABLE to ->disable_esr the ->ESR_DISABLE shouting variant was used to enable the esr_disable macro wrappers. Those ugly macros are removed now so we can rename ->ESR_DISABLE to ->disable_esr Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 2 +- arch/x86/kernel/apic.c | 4 ++-- arch/x86/kernel/genapic_flat_64.c | 4 ++-- arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 11 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index e998e3df5d23..7aee4a4c5244 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -28,7 +28,7 @@ struct genapic { const struct cpumask *(*target_cpus)(void); - int ESR_DISABLE; + int disable_esr; int apic_destination_logical; unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 69d8c30d5711..3853ed76c888 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1107,7 +1107,7 @@ static void __cpuinit lapic_setup_esr(void) return; } - if (apic->ESR_DISABLE) { + if (apic->disable_esr) { /* * Something untraceable is creating bad interrupts on * secondary quads ... for the moment, just leave the @@ -1157,7 +1157,7 @@ void __cpuinit setup_local_APIC(void) #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (lapic_is_integrated() && apic->ESR_DISABLE) { + if (lapic_is_integrated() && apic->disable_esr) { apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 0a263d6bb5e2..d437a60cc589 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -184,7 +184,7 @@ struct genapic apic_flat = { .irq_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = flat_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, @@ -330,7 +330,7 @@ struct genapic apic_physflat = { .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = physflat_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index e9ff7dc9a0f6..c1cffae4a4c2 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -186,7 +186,7 @@ struct genapic apic_x2apic_cluster = { .irq_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = x2apic_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 8141b5a88f61..c59602be0353 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -182,7 +182,7 @@ struct genapic apic_x2apic_phys = { .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = x2apic_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 6a73cad0d3e9..525b4e480a73 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -247,7 +247,7 @@ struct genapic apic_x2apic_uv_x = { .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = uv_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index f0bb72674f73..fe9bf252c069 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -69,7 +69,7 @@ struct genapic apic_bigsmp = { .irq_dest_mode = 0, .target_cpus = bigsmp_target_cpus, - .ESR_DISABLE = 1, + .disable_esr = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index c30141a9aca0..d3fe8017e94a 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -36,7 +36,7 @@ struct genapic apic_default = { .irq_dest_mode = 1, .target_cpus = default_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index e8aa8fd4f49f..b4f8abfb714f 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -112,7 +112,7 @@ struct genapic apic_es7000 = { .irq_dest_mode = 0, .target_cpus = es7000_target_cpus, - .ESR_DISABLE = 1, + .disable_esr = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 860edc8bd903..f3b7840d227d 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -56,7 +56,7 @@ struct genapic apic_numaq = { .irq_dest_mode = 0, .target_cpus = numaq_target_cpus, - .ESR_DISABLE = 1, + .disable_esr = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index cd5ef115a4ee..95e075b61bdd 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -49,7 +49,7 @@ struct genapic apic_summit = { .irq_dest_mode = 1, .target_cpus = summit_target_cpus, - .ESR_DISABLE = 1, + .disable_esr = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, From 0b06e734bff7554c31eac4aad2fc9be4adb7c1c1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:13:04 +0100 Subject: [PATCH 214/566] x86: clean up the APIC_DEST_LOGICAL logic Impact: cleanup The bigsmp and es7000 subarchitectures un-defined APIC_DEST_LOGICAL in a rather nasty way by re-defining it to zero. That is infinitely fragile and makes it very hard to see what to code really does in a given context. The very same constant has different meanings and values - depending on which subarch is enabled. Untangle this mess by never undefining the constant, but instead propagating the right values into the genapic driver templates. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 -- arch/x86/include/asm/es7000/apic.h | 2 -- arch/x86/include/asm/mach-generic/mach_apic.h | 2 -- arch/x86/kernel/genapic_flat_64.c | 12 ++++++------ arch/x86/kernel/genx2apic_cluster.c | 10 +++++----- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 4 ++-- arch/x86/kernel/io_apic.c | 2 +- arch/x86/kernel/ipi.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- 12 files changed, 19 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index b550cb111028..7e6e33a6db02 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -17,8 +17,6 @@ static inline const cpumask_t *bigsmp_target_cpus(void) #endif } -#undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL 0 #define APIC_DFR_VALUE (APIC_DFR_FLAT) #define NO_BALANCE_IRQ (0) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index aa11c768bed7..0d770fce4b28 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -27,8 +27,6 @@ static inline const cpumask_t *es7000_target_cpus(void) #define APIC_DFR_VALUE (APIC_DFR_FLAT) #define NO_BALANCE_IRQ (0) -#undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL 0x0 static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 63fe985219f9..00d5fe6e6769 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -4,8 +4,6 @@ #include #define NO_BALANCE_IRQ (apic->no_balance_irq) -#undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL (apic->apic_destination_logical) #define init_apic_ldr (apic->init_apic_ldr) #define ioapic_phys_id_map (apic->ioapic_phys_id_map) #define setup_apic_routing (apic->setup_apic_routing) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index d437a60cc589..fd242c6b3ba1 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -74,7 +74,7 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) unsigned long flags; local_irq_save(flags); - __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); + __send_IPI_dest_field(mask, vector, apic->apic_destination_logical); local_irq_restore(flags); } @@ -114,7 +114,7 @@ static void flat_send_IPI_allbutself(int vector) _flat_send_IPI_mask(mask, vector); } } else if (num_online_cpus() > 1) { - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->apic_destination_logical); } } @@ -123,7 +123,7 @@ static void flat_send_IPI_all(int vector) if (vector == NMI_VECTOR) flat_send_IPI_mask(cpu_online_mask, vector); else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); + __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->apic_destination_logical); } static unsigned int get_apic_id(unsigned long x) @@ -181,11 +181,11 @@ struct genapic apic_flat = { .apic_id_registered = flat_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, - .irq_dest_mode = (APIC_DEST_LOGICAL != 0), + .irq_dest_mode = 1, /* logical */ .target_cpus = flat_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, @@ -327,7 +327,7 @@ struct genapic apic_physflat = { .apic_id_registered = flat_apic_id_registered, .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_dest_mode = 0, /* physical */ .target_cpus = physflat_target_cpus, .disable_esr = 0, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index c1cffae4a4c2..a76e75ecc206 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -64,7 +64,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) for_each_cpu(query_cpu, mask) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); + vector, apic->apic_destination_logical); local_irq_restore(flags); } @@ -80,7 +80,7 @@ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, if (query_cpu != this_cpu) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); + vector, apic->apic_destination_logical); local_irq_restore(flags); } @@ -95,7 +95,7 @@ static void x2apic_send_IPI_allbutself(int vector) if (query_cpu != this_cpu) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); + vector, apic->apic_destination_logical); local_irq_restore(flags); } @@ -183,11 +183,11 @@ struct genapic apic_x2apic_cluster = { .apic_id_registered = x2apic_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, - .irq_dest_mode = (APIC_DEST_LOGICAL != 0), + .irq_dest_mode = 1, /* logical */ .target_cpus = x2apic_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index c59602be0353..9b6d68deb147 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -179,7 +179,7 @@ struct genapic apic_x2apic_phys = { .apic_id_registered = x2apic_apic_id_registered, .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_dest_mode = 0, /* physical */ .target_cpus = x2apic_target_cpus, .disable_esr = 0, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 525b4e480a73..0a756800c11a 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -244,11 +244,11 @@ struct genapic apic_x2apic_uv_x = { .apic_id_registered = uv_apic_id_registered, .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_dest_mode = 1, /* logical */ .target_cpus = uv_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7503285e180d..17526d7a8ab3 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -806,7 +806,7 @@ void send_IPI_self(int vector) * Wait for idle. */ apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; + cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | apic->apic_destination_logical; /* * Send the IPI. The write to APIC_ICR fires this off. */ diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 285bbf8831fa..400b7bd48f67 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -30,7 +30,7 @@ static inline int __prepare_ICR(unsigned int shortcut, int vector) { - unsigned int icr = shortcut | APIC_DEST_LOGICAL; + unsigned int icr = shortcut | apic->apic_destination_logical; switch (vector) { default: diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f9dbcff43546..f0a173718d9f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -583,7 +583,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) /* Target chip */ /* Boot on the stack */ /* Kick the second */ - apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); + apic_icr_write(APIC_DM_NMI | apic->apic_destination_logical, logical_apicid); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index fe9bf252c069..13ee7dc9b952 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -70,7 +70,7 @@ struct genapic apic_bigsmp = { .target_cpus = bigsmp_target_cpus, .disable_esr = 1, - .apic_destination_logical = APIC_DEST_LOGICAL, + .apic_destination_logical = 0, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index b4f8abfb714f..61b5da213ce6 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -113,7 +113,7 @@ struct genapic apic_es7000 = { .target_cpus = es7000_target_cpus, .disable_esr = 1, - .apic_destination_logical = APIC_DEST_LOGICAL, + .apic_destination_logical = 0, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, From bdb1a9b62fc182d4da3143e346f7a0925d243352 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:29:25 +0100 Subject: [PATCH 215/566] x86, apic: rename genapic::apic_destination_logical to genapic::dest_logical This field name was unreasonably long - shorten it. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 2 +- arch/x86/kernel/genapic_flat_64.c | 10 +++++----- arch/x86/kernel/genx2apic_cluster.c | 8 ++++---- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- arch/x86/kernel/io_apic.c | 2 +- arch/x86/kernel/ipi.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 13 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 7aee4a4c5244..f9d1ec018fd3 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -30,7 +30,7 @@ struct genapic { int disable_esr; - int apic_destination_logical; + int dest_logical; unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); unsigned long (*check_apicid_present)(int apicid); diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index fd242c6b3ba1..d22cbdaee208 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -74,7 +74,7 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) unsigned long flags; local_irq_save(flags); - __send_IPI_dest_field(mask, vector, apic->apic_destination_logical); + __send_IPI_dest_field(mask, vector, apic->dest_logical); local_irq_restore(flags); } @@ -114,7 +114,7 @@ static void flat_send_IPI_allbutself(int vector) _flat_send_IPI_mask(mask, vector); } } else if (num_online_cpus() > 1) { - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->apic_destination_logical); + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical); } } @@ -123,7 +123,7 @@ static void flat_send_IPI_all(int vector) if (vector == NMI_VECTOR) flat_send_IPI_mask(cpu_online_mask, vector); else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->apic_destination_logical); + __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); } static unsigned int get_apic_id(unsigned long x) @@ -185,7 +185,7 @@ struct genapic apic_flat = { .target_cpus = flat_target_cpus, .disable_esr = 0, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, @@ -331,7 +331,7 @@ struct genapic apic_physflat = { .target_cpus = physflat_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .dest_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index a76e75ecc206..b91a48eae52e 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -64,7 +64,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) for_each_cpu(query_cpu, mask) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->apic_destination_logical); + vector, apic->dest_logical); local_irq_restore(flags); } @@ -80,7 +80,7 @@ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, if (query_cpu != this_cpu) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->apic_destination_logical); + vector, apic->dest_logical); local_irq_restore(flags); } @@ -95,7 +95,7 @@ static void x2apic_send_IPI_allbutself(int vector) if (query_cpu != this_cpu) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->apic_destination_logical); + vector, apic->dest_logical); local_irq_restore(flags); } @@ -187,7 +187,7 @@ struct genapic apic_x2apic_cluster = { .target_cpus = x2apic_target_cpus, .disable_esr = 0, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 9b6d68deb147..f070e86af0f4 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -183,7 +183,7 @@ struct genapic apic_x2apic_phys = { .target_cpus = x2apic_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .dest_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 0a756800c11a..c8a891586799 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -248,7 +248,7 @@ struct genapic apic_x2apic_uv_x = { .target_cpus = uv_target_cpus, .disable_esr = 0, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 17526d7a8ab3..7f8b32b20897 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -806,7 +806,7 @@ void send_IPI_self(int vector) * Wait for idle. */ apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | apic->apic_destination_logical; + cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | apic->dest_logical; /* * Send the IPI. The write to APIC_ICR fires this off. */ diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 400b7bd48f67..e2e4895ca69f 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -30,7 +30,7 @@ static inline int __prepare_ICR(unsigned int shortcut, int vector) { - unsigned int icr = shortcut | apic->apic_destination_logical; + unsigned int icr = shortcut | apic->dest_logical; switch (vector) { default: diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f0a173718d9f..45c096f605fe 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -583,7 +583,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) /* Target chip */ /* Boot on the stack */ /* Kick the second */ - apic_icr_write(APIC_DM_NMI | apic->apic_destination_logical, logical_apicid); + apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 13ee7dc9b952..7c52840f2050 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -70,7 +70,7 @@ struct genapic apic_bigsmp = { .target_cpus = bigsmp_target_cpus, .disable_esr = 1, - .apic_destination_logical = 0, + .dest_logical = 0, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index d3fe8017e94a..53fa1ad83184 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -37,7 +37,7 @@ struct genapic apic_default = { .target_cpus = default_target_cpus, .disable_esr = 0, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 61b5da213ce6..50fed0225cda 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -113,7 +113,7 @@ struct genapic apic_es7000 = { .target_cpus = es7000_target_cpus, .disable_esr = 1, - .apic_destination_logical = 0, + .dest_logical = 0, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index f3b7840d227d..1fb1b1a4aa00 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -57,7 +57,7 @@ struct genapic apic_numaq = { .target_cpus = numaq_target_cpus, .disable_esr = 1, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 95e075b61bdd..5c27d4d824e5 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -50,7 +50,7 @@ struct genapic apic_summit = { .target_cpus = summit_target_cpus, .disable_esr = 1, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, From d1d7cae8fd54a301a0de531b48451649933ffdcf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:41:42 +0100 Subject: [PATCH 216/566] x86, apic: clean up check_apicid*() callbacks Clean up these methods - to make it clearer which function is used in which case. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 9 +++++---- arch/x86/include/asm/es7000/apic.h | 5 +++-- arch/x86/include/asm/mach-default/mach_apic.h | 4 ++-- arch/x86/include/asm/mach-generic/mach_apic.h | 2 -- arch/x86/include/asm/numaq/apic.h | 5 +++-- arch/x86/include/asm/summit/apic.h | 5 +++-- arch/x86/kernel/io_apic.c | 6 +++--- arch/x86/mach-generic/bigsmp.c | 4 ++-- arch/x86/mach-generic/default.c | 4 ++-- arch/x86/mach-generic/es7000.c | 4 ++-- arch/x86/mach-generic/numaq.c | 4 ++-- arch/x86/mach-generic/summit.c | 4 ++-- 12 files changed, 29 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 7e6e33a6db02..bd52d4d86f0e 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -20,14 +20,15 @@ static inline const cpumask_t *bigsmp_target_cpus(void) #define APIC_DFR_VALUE (APIC_DFR_FLAT) #define NO_BALANCE_IRQ (0) -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long +bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) { - return (0); + return 0; } -static inline unsigned long check_apicid_present(int bit) +static inline unsigned long bigsmp_check_apicid_present(int bit) { - return (1); + return 1; } static inline unsigned long calculate_ldr(int cpu) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 0d770fce4b28..cd888daa1930 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -28,11 +28,12 @@ static inline const cpumask_t *es7000_target_cpus(void) #define APIC_DFR_VALUE (APIC_DFR_FLAT) #define NO_BALANCE_IRQ (0) -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long +es7000_check_apicid_used(physid_mask_t bitmap, int apicid) { return 0; } -static inline unsigned long check_apicid_present(int bit) +static inline unsigned long es7000_check_apicid_present(int bit) { return physid_isset(bit, phys_cpu_present_map); } diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 5f8d17fdc965..064bc11a991c 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -105,12 +105,12 @@ static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) } #endif -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) { return physid_isset(apicid, bitmap); } -static inline unsigned long check_apicid_present(int bit) +static inline unsigned long default_check_apicid_present(int bit) { return physid_isset(bit, phys_cpu_present_map); } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 00d5fe6e6769..e035f88dfcde 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -13,9 +13,7 @@ #define cpu_present_to_apicid (apic->cpu_present_to_apicid) #define apicid_to_cpu_present (apic->apicid_to_cpu_present) #define setup_portio_remap (apic->setup_portio_remap) -#define check_apicid_present (apic->check_apicid_present) #define check_phys_apicid_present (apic->check_phys_apicid_present) -#define check_apicid_used (apic->check_apicid_used) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) #define vector_allocation_domain (apic->vector_allocation_domain) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 8ecb3b45c6c4..571fdaeafaa8 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -14,11 +14,12 @@ static inline const cpumask_t *numaq_target_cpus(void) #define NO_BALANCE_IRQ (1) -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long +numaq_check_apicid_used(physid_mask_t bitmap, int apicid) { return physid_isset(apicid, bitmap); } -static inline unsigned long check_apicid_present(int bit) +static inline unsigned long numaq_check_apicid_present(int bit) { return physid_isset(bit, phys_cpu_present_map); } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 84679e687add..482038b244b0 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -23,13 +23,14 @@ static inline const cpumask_t *summit_target_cpus(void) return &cpumask_of_cpu(0); } -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long +summit_check_apicid_used(physid_mask_t bitmap, int apicid) { return 0; } /* we don't use the phys_cpu_present_map to indicate apicid presence */ -static inline unsigned long check_apicid_present(int bit) +static inline unsigned long summit_check_apicid_present(int bit) { return 1; } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7f8b32b20897..733ecf172724 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2135,7 +2135,7 @@ static void __init setup_ioapic_ids_from_mpc(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (check_apicid_used(phys_id_present_map, + if (apic->check_apicid_used(phys_id_present_map, mp_ioapics[apic_id].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", apic_id, mp_ioapics[apic_id].apicid); @@ -3878,10 +3878,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) * Every APIC in a system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (check_apicid_used(apic_id_map, apic_id)) { + if (apic->check_apicid_used(apic_id_map, apic_id)) { for (i = 0; i < get_physical_broadcast(); i++) { - if (!check_apicid_used(apic_id_map, i)) + if (!apic->check_apicid_used(apic_id_map, i)) break; } diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 7c52840f2050..aa8443f6c0f7 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -71,8 +71,8 @@ struct genapic apic_bigsmp = { .target_cpus = bigsmp_target_cpus, .disable_esr = 1, .dest_logical = 0, - .check_apicid_used = check_apicid_used, - .check_apicid_present = check_apicid_present, + .check_apicid_used = bigsmp_check_apicid_used, + .check_apicid_present = bigsmp_check_apicid_present, .no_balance_irq = NO_BALANCE_IRQ, .no_ioapic_check = 0, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 53fa1ad83184..47f6b5b06ba1 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -38,8 +38,8 @@ struct genapic apic_default = { .target_cpus = default_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = check_apicid_used, - .check_apicid_present = check_apicid_present, + .check_apicid_used = default_check_apicid_used, + .check_apicid_present = default_check_apicid_present, .no_balance_irq = NO_BALANCE_IRQ, .no_ioapic_check = 0, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 50fed0225cda..5633f3296e1c 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -114,8 +114,8 @@ struct genapic apic_es7000 = { .target_cpus = es7000_target_cpus, .disable_esr = 1, .dest_logical = 0, - .check_apicid_used = check_apicid_used, - .check_apicid_present = check_apicid_present, + .check_apicid_used = es7000_check_apicid_used, + .check_apicid_present = es7000_check_apicid_present, .no_balance_irq = NO_BALANCE_IRQ, .no_ioapic_check = 0, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 1fb1b1a4aa00..d85206d8e4ae 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -58,8 +58,8 @@ struct genapic apic_numaq = { .target_cpus = numaq_target_cpus, .disable_esr = 1, .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = check_apicid_used, - .check_apicid_present = check_apicid_present, + .check_apicid_used = numaq_check_apicid_used, + .check_apicid_present = numaq_check_apicid_present, .no_balance_irq = NO_BALANCE_IRQ, .no_ioapic_check = 0, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 5c27d4d824e5..f54cf73d3edb 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -51,8 +51,8 @@ struct genapic apic_summit = { .target_cpus = summit_target_cpus, .disable_esr = 1, .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = check_apicid_used, - .check_apicid_present = check_apicid_present, + .check_apicid_used = summit_check_apicid_used, + .check_apicid_present = summit_check_apicid_present, .no_balance_irq = NO_BALANCE_IRQ, .no_ioapic_check = 0, From 2e867b17cc02e1799f18126af0ddd7b63dd8f6f4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:57:56 +0100 Subject: [PATCH 217/566] x86, apic: remove no_balance_irq and no_ioapic_check flags These flags are completely unused. (the in-kernel IRQ balancer has been removed from the upstream kernel.) Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 1 - arch/x86/include/asm/es7000/apic.h | 2 -- arch/x86/include/asm/genapic.h | 3 --- arch/x86/include/asm/mach-default/mach_apic.h | 2 -- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 -- arch/x86/include/asm/summit/apic.h | 2 -- arch/x86/kernel/genapic_flat_64.c | 6 ------ arch/x86/kernel/genx2apic_cluster.c | 3 --- arch/x86/kernel/genx2apic_phys.c | 3 --- arch/x86/kernel/genx2apic_uv_x.c | 3 --- arch/x86/mach-generic/bigsmp.c | 3 --- arch/x86/mach-generic/default.c | 3 --- arch/x86/mach-generic/es7000.c | 4 ---- arch/x86/mach-generic/numaq.c | 3 --- arch/x86/mach-generic/summit.c | 3 --- 16 files changed, 44 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index bd52d4d86f0e..916451252b3a 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -18,7 +18,6 @@ static inline const cpumask_t *bigsmp_target_cpus(void) } #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define NO_BALANCE_IRQ (0) static inline unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index cd888daa1930..847008a77029 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -23,10 +23,8 @@ static inline const cpumask_t *es7000_target_cpus(void) #define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) #define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) #define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ -#define NO_BALANCE_IRQ_CLUSTER (1) #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define NO_BALANCE_IRQ (0) static inline unsigned long es7000_check_apicid_used(physid_mask_t bitmap, int apicid) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index f9d1ec018fd3..661898c2229c 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -34,9 +34,6 @@ struct genapic { unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); unsigned long (*check_apicid_present)(int apicid); - int no_balance_irq; - int no_ioapic_check; - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 064bc11a991c..8adccf8ee473 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -17,8 +17,6 @@ static inline const struct cpumask *default_target_cpus(void) #endif } -#define NO_BALANCE_IRQ (0) - #ifdef CONFIG_X86_64 #include #define init_apic_ldr (apic->init_apic_ldr) diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index e035f88dfcde..4cb9e2b99e37 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define NO_BALANCE_IRQ (apic->no_balance_irq) #define init_apic_ldr (apic->init_apic_ldr) #define ioapic_phys_id_map (apic->ioapic_phys_id_map) #define setup_apic_routing (apic->setup_apic_routing) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 571fdaeafaa8..defee3496ad6 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -12,8 +12,6 @@ static inline const cpumask_t *numaq_target_cpus(void) return &CPU_MASK_ALL; } -#define NO_BALANCE_IRQ (1) - static inline unsigned long numaq_check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 482038b244b0..51df002ecf4c 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -4,8 +4,6 @@ #include #include -#define NO_BALANCE_IRQ (0) - /* In clustered mode, the high nibble of APIC ID is a cluster number. * The low nibble is a 4-bit bitmap. */ #define XAPIC_DEST_CPUS_SHIFT 4 diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index d22cbdaee208..9446f372a16b 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -189,9 +189,6 @@ struct genapic apic_flat = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, @@ -335,9 +332,6 @@ struct genapic apic_physflat = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = physflat_vector_allocation_domain, /* not needed, but shouldn't hurt: */ .init_apic_ldr = flat_init_apic_ldr, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index b91a48eae52e..2eeca6e744af 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -191,9 +191,6 @@ struct genapic apic_x2apic_cluster = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = x2apic_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index f070e86af0f4..be0ee3e56ef1 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -187,9 +187,6 @@ struct genapic apic_x2apic_phys = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = x2apic_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index c8a891586799..68b423f3da99 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -252,9 +252,6 @@ struct genapic apic_x2apic_uv_x = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = uv_vector_allocation_domain, .init_apic_ldr = uv_init_apic_ldr, diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index aa8443f6c0f7..6da251aa9f4e 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -74,9 +74,6 @@ struct genapic apic_bigsmp = { .check_apicid_used = bigsmp_check_apicid_used, .check_apicid_present = bigsmp_check_apicid_present, - .no_balance_irq = NO_BALANCE_IRQ, - .no_ioapic_check = 0, - .vector_allocation_domain = vector_allocation_domain, .init_apic_ldr = init_apic_ldr, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 47f6b5b06ba1..e89e8c9dd68d 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -41,9 +41,6 @@ struct genapic apic_default = { .check_apicid_used = default_check_apicid_used, .check_apicid_present = default_check_apicid_present, - .no_balance_irq = NO_BALANCE_IRQ, - .no_ioapic_check = 0, - .vector_allocation_domain = vector_allocation_domain, .init_apic_ldr = init_apic_ldr, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 5633f3296e1c..8e9eeecf7e24 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -23,7 +23,6 @@ void __init es7000_update_genapic_to_cluster(void) apic->target_cpus = target_cpus_cluster; apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER; apic->irq_dest_mode = INT_DEST_MODE_CLUSTER; - apic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER; apic->init_apic_ldr = init_apic_ldr_cluster; @@ -117,9 +116,6 @@ struct genapic apic_es7000 = { .check_apicid_used = es7000_check_apicid_used, .check_apicid_present = es7000_check_apicid_present, - .no_balance_irq = NO_BALANCE_IRQ, - .no_ioapic_check = 0, - .vector_allocation_domain = vector_allocation_domain, .init_apic_ldr = init_apic_ldr, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index d85206d8e4ae..f909189fee3e 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -61,9 +61,6 @@ struct genapic apic_numaq = { .check_apicid_used = numaq_check_apicid_used, .check_apicid_present = numaq_check_apicid_present, - .no_balance_irq = NO_BALANCE_IRQ, - .no_ioapic_check = 0, - .vector_allocation_domain = vector_allocation_domain, .init_apic_ldr = init_apic_ldr, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index f54cf73d3edb..99a9bea8d141 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -54,9 +54,6 @@ struct genapic apic_summit = { .check_apicid_used = summit_check_apicid_used, .check_apicid_present = summit_check_apicid_present, - .no_balance_irq = NO_BALANCE_IRQ, - .no_ioapic_check = 0, - .vector_allocation_domain = vector_allocation_domain, .init_apic_ldr = init_apic_ldr, From e2d40b1878bd13ca1028ddd299c48e4821ac3535 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 218/566] x86, apic: clean up ->vector_allocation_domain() - separate the namespace - remove macros - move the default vector-allocation-domain to mach-generic - fix whitespace damage Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mach-default/mach_apic.h | 13 ------------- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/kernel/io_apic.c | 2 +- arch/x86/mach-generic/bigsmp.c | 4 ++-- arch/x86/mach-generic/default.c | 16 +++++++++++++++- arch/x86/mach-generic/es7000.c | 4 ++-- arch/x86/mach-generic/numaq.c | 4 ++-- arch/x86/mach-generic/summit.c | 4 ++-- 8 files changed, 24 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 8adccf8ee473..9c56542644ca 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -23,7 +23,6 @@ static inline const struct cpumask *default_target_cpus(void) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) #define phys_pkg_id (apic->phys_pkg_id) -#define vector_allocation_domain (apic->vector_allocation_domain) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) #define send_IPI_self (apic->send_IPI_self) #define wakeup_secondary_cpu (apic->wakeup_cpu) @@ -89,18 +88,6 @@ static inline int apicid_to_node(int logical_apicid) #endif } -static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; -} #endif static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 4cb9e2b99e37..e94881af9625 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -15,7 +15,6 @@ #define check_phys_apicid_present (apic->check_phys_apicid_present) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) -#define vector_allocation_domain (apic->vector_allocation_domain) #define enable_apic_mode (apic->enable_apic_mode) #define phys_pkg_id (apic->phys_pkg_id) #define wakeup_secondary_cpu (apic->wakeup_cpu) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 733ecf172724..49899e066247 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1316,7 +1316,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) int new_cpu; int vector, offset; - vector_allocation_domain(cpu, tmp_mask); + apic->vector_allocation_domain(cpu, tmp_mask); vector = current_vector; offset = current_offset; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 6da251aa9f4e..391cc99cd21c 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -42,7 +42,7 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } }; -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static void bigsmp_vector_allocation_domain(int cpu, cpumask_t *retmask) { cpus_clear(*retmask); cpu_set(cpu, *retmask); @@ -74,7 +74,7 @@ struct genapic apic_bigsmp = { .check_apicid_used = bigsmp_check_apicid_used, .check_apicid_present = bigsmp_check_apicid_present, - .vector_allocation_domain = vector_allocation_domain, + .vector_allocation_domain = bigsmp_vector_allocation_domain, .init_apic_ldr = init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index e89e8c9dd68d..6adc3c69a3c9 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -18,6 +18,20 @@ #include #include +static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + /* + * Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; +} + /* should be called last. */ static int probe_default(void) { @@ -41,7 +55,7 @@ struct genapic apic_default = { .check_apicid_used = default_check_apicid_used, .check_apicid_present = default_check_apicid_present, - .vector_allocation_domain = vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 8e9eeecf7e24..bc1f21cd6a4d 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -86,7 +86,7 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) } #endif -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -116,7 +116,7 @@ struct genapic apic_es7000 = { .check_apicid_used = es7000_check_apicid_used, .check_apicid_present = es7000_check_apicid_present, - .vector_allocation_domain = vector_allocation_domain, + .vector_allocation_domain = es7000_vector_allocation_domain, .init_apic_ldr = init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index f909189fee3e..712882f48c4e 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -31,7 +31,7 @@ static int probe_numaq(void) return found_numaq; } -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -61,7 +61,7 @@ struct genapic apic_numaq = { .check_apicid_used = numaq_check_apicid_used, .check_apicid_present = numaq_check_apicid_present, - .vector_allocation_domain = vector_allocation_domain, + .vector_allocation_domain = numaq_vector_allocation_domain, .init_apic_ldr = init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 99a9bea8d141..1834887b94a3 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -24,7 +24,7 @@ static int probe_summit(void) return 0; } -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -54,7 +54,7 @@ struct genapic apic_summit = { .check_apicid_used = summit_check_apicid_used, .check_apicid_present = summit_check_apicid_present, - .vector_allocation_domain = vector_allocation_domain, + .vector_allocation_domain = summit_vector_allocation_domain, .init_apic_ldr = init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, From a5c4329622a3437adef4b2a4288d127957743c97 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 219/566] x86, apic: clean up ->init_apic_ldr() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 4 ++-- arch/x86/include/asm/mach-default/mach_apic.h | 3 +-- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/apic.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 4 ++-- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 13 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 916451252b3a..819413082999 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -46,7 +46,7 @@ static inline unsigned long calculate_ldr(int cpu) * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ -static inline void init_apic_ldr(void) +static inline void bigsmp_init_apic_ldr(void) { unsigned long val; int cpu = smp_processor_id(); diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 847008a77029..06f5757bf7af 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -52,7 +52,7 @@ static inline unsigned long calculate_ldr(int cpu) * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ -static inline void init_apic_ldr_cluster(void) +static inline void es7000_init_apic_ldr_cluster(void) { unsigned long val; int cpu = smp_processor_id(); @@ -62,7 +62,7 @@ static inline void init_apic_ldr_cluster(void) apic_write(APIC_LDR, val); } -static inline void init_apic_ldr(void) +static inline void es7000_init_apic_ldr(void) { unsigned long val; int cpu = smp_processor_id(); diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 9c56542644ca..23e0a2da3a96 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -19,7 +19,6 @@ static inline const struct cpumask *default_target_cpus(void) #ifdef CONFIG_X86_64 #include -#define init_apic_ldr (apic->init_apic_ldr) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) #define phys_pkg_id (apic->phys_pkg_id) @@ -36,7 +35,7 @@ extern void setup_apic_routing(void); * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ -static inline void init_apic_ldr(void) +static inline void default_init_apic_ldr(void) { unsigned long val; diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index e94881af9625..8e51f4163944 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define init_apic_ldr (apic->init_apic_ldr) #define ioapic_phys_id_map (apic->ioapic_phys_id_map) #define setup_apic_routing (apic->setup_apic_routing) #define multi_timer_check (apic->multi_timer_check) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index defee3496ad6..802297489a34 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -28,7 +28,7 @@ static inline int numaq_apic_id_registered(void) return 1; } -static inline void init_apic_ldr(void) +static inline void numaq_init_apic_ldr(void) { /* Already done in NUMA-Q firmware */ } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 51df002ecf4c..9108c89fe881 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -37,7 +37,7 @@ static inline unsigned long summit_check_apicid_present(int bit) extern u8 cpu_2_logical_apicid[]; -static inline void init_apic_ldr(void) +static inline void summit_init_apic_ldr(void) { unsigned long val, id; int count = 0; diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 3853ed76c888..b7077936ac09 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1179,7 +1179,7 @@ void __cpuinit setup_local_APIC(void) * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ - init_apic_ldr(); + apic->init_apic_ldr(); /* * Set Task Priority to 'accept all'. We never change this diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 391cc99cd21c..7b7fc471a3f7 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -75,7 +75,7 @@ struct genapic apic_bigsmp = { .check_apicid_present = bigsmp_check_apicid_present, .vector_allocation_domain = bigsmp_vector_allocation_domain, - .init_apic_ldr = init_apic_ldr, + .init_apic_ldr = bigsmp_init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 6adc3c69a3c9..633e8482af25 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -56,7 +56,7 @@ struct genapic apic_default = { .check_apicid_present = default_check_apicid_present, .vector_allocation_domain = default_vector_allocation_domain, - .init_apic_ldr = init_apic_ldr, + .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index bc1f21cd6a4d..b70833e35976 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -24,7 +24,7 @@ void __init es7000_update_genapic_to_cluster(void) apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER; apic->irq_dest_mode = INT_DEST_MODE_CLUSTER; - apic->init_apic_ldr = init_apic_ldr_cluster; + apic->init_apic_ldr = es7000_init_apic_ldr_cluster; apic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster; } @@ -117,7 +117,7 @@ struct genapic apic_es7000 = { .check_apicid_present = es7000_check_apicid_present, .vector_allocation_domain = es7000_vector_allocation_domain, - .init_apic_ldr = init_apic_ldr, + .init_apic_ldr = es7000_init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 712882f48c4e..a06fda579281 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -62,7 +62,7 @@ struct genapic apic_numaq = { .check_apicid_present = numaq_check_apicid_present, .vector_allocation_domain = numaq_vector_allocation_domain, - .init_apic_ldr = init_apic_ldr, + .init_apic_ldr = numaq_init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 1834887b94a3..36c552fa4275 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -55,7 +55,7 @@ struct genapic apic_summit = { .check_apicid_present = summit_check_apicid_present, .vector_allocation_domain = summit_vector_allocation_domain, - .init_apic_ldr = init_apic_ldr, + .init_apic_ldr = summit_init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, From d190cb87c4503014353f2310c4bfa2268fa7111d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 220/566] x86, apic: clean up ->ioapic_phys_id_map() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 2 +- arch/x86/include/asm/mach-default/mach_apic.h | 2 +- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 3 ++- arch/x86/kernel/io_apic.c | 4 ++-- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 819413082999..05116d5487d2 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -94,7 +94,7 @@ static inline int cpu_to_logical_apicid(int cpu) return cpu_physical_id(cpu); } -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) { /* For clustered we don't have a good way to do this yet - hack */ return physids_promote(0xFFL); diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 06f5757bf7af..db3e652f0f7d 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -125,7 +125,7 @@ static inline int cpu_to_logical_apicid(int cpu) #endif } -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +static inline physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) { /* For clustered we don't have a good way to do this yet - hack */ return physids_promote(0xff); diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 23e0a2da3a96..7abdaae06f24 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -99,7 +99,7 @@ static inline unsigned long default_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) { return phys_map; } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 8e51f4163944..c1c96e6bb185 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define ioapic_phys_id_map (apic->ioapic_phys_id_map) #define setup_apic_routing (apic->setup_apic_routing) #define multi_timer_check (apic->multi_timer_check) #define apicid_to_node (apic->apicid_to_node) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 802297489a34..dc7499b92629 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -48,7 +48,7 @@ static inline int multi_timer_check(int apic, int irq) return apic != 0 && irq == 0; } -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) { /* We don't have a good way to do this yet - hack */ return physids_promote(0xFUL); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 9108c89fe881..4dafb58f9307 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -109,7 +109,8 @@ static inline int cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map) +static inline physid_mask_t + summit_ioapic_phys_id_map(physid_mask_t phys_id_map) { /* For clustered we don't have a good way to do this yet - hack */ return physids_promote(0x0F); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 49899e066247..db79ad9a7646 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2108,7 +2108,7 @@ static void __init setup_ioapic_ids_from_mpc(void) * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ - phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); + phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); /* * Set the IOAPIC ID to the value stored in the MPC table. @@ -3862,7 +3862,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) */ if (physids_empty(apic_id_map)) - apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); + apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 7b7fc471a3f7..f2a3418d0cc9 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -77,7 +77,7 @@ struct genapic apic_bigsmp = { .vector_allocation_domain = bigsmp_vector_allocation_domain, .init_apic_ldr = bigsmp_init_apic_ldr, - .ioapic_phys_id_map = ioapic_phys_id_map, + .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 633e8482af25..c403f3d9300c 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -58,7 +58,7 @@ struct genapic apic_default = { .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = default_init_apic_ldr, - .ioapic_phys_id_map = ioapic_phys_id_map, + .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index b70833e35976..ce09baf08724 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -119,7 +119,7 @@ struct genapic apic_es7000 = { .vector_allocation_domain = es7000_vector_allocation_domain, .init_apic_ldr = es7000_init_apic_ldr, - .ioapic_phys_id_map = ioapic_phys_id_map, + .ioapic_phys_id_map = es7000_ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index a06fda579281..5d98f18a0bde 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -64,7 +64,7 @@ struct genapic apic_numaq = { .vector_allocation_domain = numaq_vector_allocation_domain, .init_apic_ldr = numaq_init_apic_ldr, - .ioapic_phys_id_map = ioapic_phys_id_map, + .ioapic_phys_id_map = numaq_ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 36c552fa4275..6abdd53a01c5 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -57,7 +57,7 @@ struct genapic apic_summit = { .vector_allocation_domain = summit_vector_allocation_domain, .init_apic_ldr = summit_init_apic_ldr, - .ioapic_phys_id_map = ioapic_phys_id_map, + .ioapic_phys_id_map = summit_ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, From 72ce016583916fb7ffcbaa6a3e1f8f731b79a865 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 221/566] x86, apic: clean up ->setup_apic_routing() - separate the namespace - remove macros - remove namespace clash on 64-bit Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 2 +- arch/x86/include/asm/genapic.h | 2 +- arch/x86/include/asm/mach-default/mach_apic.h | 4 ++-- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/acpi/boot.c | 5 ++--- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/genapic_64.c | 2 +- arch/x86/kernel/mpparse.c | 6 +++--- arch/x86/kernel/smpboot.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 17 files changed, 20 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 05116d5487d2..321ea47b5dd1 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -56,7 +56,7 @@ static inline void bigsmp_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline void setup_apic_routing(void) +static inline void bigsmp_setup_apic_routing(void) { printk("Enabling APIC mode: %s. Using %d I/O APICs\n", "Physflat", nr_ioapics); diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index db3e652f0f7d..f1183000a940 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -73,7 +73,7 @@ static inline void es7000_init_apic_ldr(void) } extern int apic_version [MAX_APICS]; -static inline void setup_apic_routing(void) +static inline void es7000_setup_apic_routing(void) { int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 661898c2229c..38b1202316f5 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -107,7 +107,7 @@ extern void apic_send_IPI_self(int vector); extern struct genapic apic_x2apic_uv_x; DECLARE_PER_CPU(int, x2apic_extra_bits); -extern void setup_apic_routing(void); +extern void default_setup_apic_routing(void); #endif #endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 7abdaae06f24..d44677463046 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -25,7 +25,7 @@ static inline const struct cpumask *default_target_cpus(void) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) #define send_IPI_self (apic->send_IPI_self) #define wakeup_secondary_cpu (apic->wakeup_cpu) -extern void setup_apic_routing(void); +extern void default_setup_apic_routing(void); #else #define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* @@ -70,7 +70,7 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -static inline void setup_apic_routing(void) +static inline void default_setup_apic_routing(void) { #ifdef CONFIG_X86_IO_APIC printk("Enabling APIC mode: %s. Using %d I/O APICs\n", diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index c1c96e6bb185..ddf369248ab7 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define setup_apic_routing (apic->setup_apic_routing) #define multi_timer_check (apic->multi_timer_check) #define apicid_to_node (apic->apicid_to_node) #define cpu_to_logical_apicid (apic->cpu_to_logical_apicid) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index dc7499b92629..2feb7e72e9ea 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -33,7 +33,7 @@ static inline void numaq_init_apic_ldr(void) /* Already done in NUMA-Q firmware */ } -static inline void setup_apic_routing(void) +static inline void numaq_setup_apic_routing(void) { printk("Enabling APIC mode: %s. Using %d I/O APICs\n", "NUMA-Q", nr_ioapics); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 4dafb58f9307..7ec2696bc9a0 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -74,7 +74,7 @@ static inline int summit_apic_id_registered(void) return 1; } -static inline void setup_apic_routing(void) +static inline void summit_setup_apic_routing(void) { printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", nr_ioapics); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 314fe0dddef4..539163161a4c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1360,9 +1360,8 @@ static void __init acpi_process_madt(void) acpi_ioapic = 1; smp_found_config = 1; -#ifdef CONFIG_X86_32 - setup_apic_routing(); -#endif + if (apic->setup_apic_routing) + apic->setup_apic_routing(); } } if (error == -EINVAL) { diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index b7077936ac09..fcbcc03cd4bd 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1625,7 +1625,7 @@ int __init APIC_init_uniprocessor(void) enable_IR_x2apic(); #endif #ifdef CONFIG_X86_64 - setup_apic_routing(); + default_setup_apic_routing(); #endif verify_local_APIC(); diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 060945b8eec4..d57d2138f078 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -44,7 +44,7 @@ static struct genapic *apic_probe[] __initdata = { /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ -void __init setup_apic_routing(void) +void __init default_setup_apic_routing(void) { if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) { if (!intr_remapping_enabled) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index fa6bb263892e..c8a534a16d98 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -390,9 +390,9 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) generic_bigsmp_probe(); #endif -#ifdef CONFIG_X86_32 - setup_apic_routing(); -#endif + if (apic->setup_apic_routing) + apic->setup_apic_routing(); + if (!num_processors) printk(KERN_ERR "MPTABLE: no processors registered!\n"); return num_processors; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 45c096f605fe..3791b4ae567f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1128,7 +1128,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) #ifdef CONFIG_X86_64 enable_IR_x2apic(); - setup_apic_routing(); + default_setup_apic_routing(); #endif if (smp_sanity_check(max_cpus) < 0) { diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index f2a3418d0cc9..ad3837a59bd4 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -78,7 +78,7 @@ struct genapic apic_bigsmp = { .init_apic_ldr = bigsmp_init_apic_ldr, .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, - .setup_apic_routing = setup_apic_routing, + .setup_apic_routing = bigsmp_setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index c403f3d9300c..67f287fc12df 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -59,7 +59,7 @@ struct genapic apic_default = { .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, - .setup_apic_routing = setup_apic_routing, + .setup_apic_routing = default_setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index ce09baf08724..f61172939461 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -120,7 +120,7 @@ struct genapic apic_es7000 = { .init_apic_ldr = es7000_init_apic_ldr, .ioapic_phys_id_map = es7000_ioapic_phys_id_map, - .setup_apic_routing = setup_apic_routing, + .setup_apic_routing = es7000_setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 5d98f18a0bde..8c137f413485 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -65,7 +65,7 @@ struct genapic apic_numaq = { .init_apic_ldr = numaq_init_apic_ldr, .ioapic_phys_id_map = numaq_ioapic_phys_id_map, - .setup_apic_routing = setup_apic_routing, + .setup_apic_routing = numaq_setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 6abdd53a01c5..0698566dc7b4 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -58,7 +58,7 @@ struct genapic apic_summit = { .init_apic_ldr = summit_init_apic_ldr, .ioapic_phys_id_map = summit_ioapic_phys_id_map, - .setup_apic_routing = setup_apic_routing, + .setup_apic_routing = summit_setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, From 33a201fac698a93d9d1ffa77030ba2ff38d1a3d1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 07:17:26 +0100 Subject: [PATCH 222/566] x86, apic: streamline the ->multi_timer_check() quirk only NUMAQ uses this quirk: to prevent the timer IRQ from being added on secondary nodes. All other genapic templates can have a NULL ->multi_timer_check() callback. Also, extend the generic code to treat a NULL pointer accordingly. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 5 ----- arch/x86/include/asm/es7000/apic.h | 5 ----- arch/x86/include/asm/mach-default/mach_apic.h | 5 ----- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 5 ----- arch/x86/kernel/io_apic.c | 11 ++++++++--- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 14 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 321ea47b5dd1..df59298086c6 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -62,11 +62,6 @@ static inline void bigsmp_setup_apic_routing(void) "Physflat", nr_ioapics); } -static inline int multi_timer_check(int apic, int irq) -{ - return (0); -} - static inline int apicid_to_node(int logical_apicid) { return apicid_2_node[hard_smp_processor_id()]; diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index f1183000a940..632e4cd3f4fd 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -82,11 +82,6 @@ static inline void es7000_setup_apic_routing(void) nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); } -static inline int multi_timer_check(int apic, int irq) -{ - return 0; -} - static inline int apicid_to_node(int logical_apicid) { return 0; diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index d44677463046..f418d470cf45 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -104,11 +104,6 @@ static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) return phys_map; } -static inline int multi_timer_check(int apic, int irq) -{ - return 0; -} - /* Mapping from cpu number to logical apicid */ static inline int cpu_to_logical_apicid(int cpu) { diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index ddf369248ab7..bdea0a759e8a 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define multi_timer_check (apic->multi_timer_check) #define apicid_to_node (apic->apicid_to_node) #define cpu_to_logical_apicid (apic->cpu_to_logical_apicid) #define cpu_present_to_apicid (apic->cpu_present_to_apicid) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 2feb7e72e9ea..22bdf3d4c0e3 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -43,7 +43,7 @@ static inline void numaq_setup_apic_routing(void) * Skip adding the timer int on secondary nodes, which causes * a small but painful rift in the time-space continuum. */ -static inline int multi_timer_check(int apic, int irq) +static inline int numaq_multi_timer_check(int apic, int irq) { return apic != 0 && irq == 0; } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 7ec2696bc9a0..acb7bd1de848 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -64,11 +64,6 @@ static inline void summit_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline int multi_timer_check(int apic, int irq) -{ - return 0; -} - static inline int summit_apic_id_registered(void) { return 1; diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index db79ad9a7646..282ea112f3cf 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1618,10 +1618,15 @@ static void __init setup_IO_APIC_irqs(void) } irq = pin_2_irq(idx, apic_id, pin); -#ifdef CONFIG_X86_32 - if (multi_timer_check(apic_id, irq)) + + /* + * Skip the timer IRQ if there's a quirk handler + * installed and if it returns 1: + */ + if (apic->multi_timer_check && + apic->multi_timer_check(apic_id, irq)) continue; -#endif + desc = irq_to_desc_alloc_cpu(irq, cpu); if (!desc) { printk(KERN_INFO "can not get irq_desc for %d\n", irq); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index ad3837a59bd4..d0749569cdf7 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -79,7 +79,7 @@ struct genapic apic_bigsmp = { .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, .setup_apic_routing = bigsmp_setup_apic_routing, - .multi_timer_check = multi_timer_check, + .multi_timer_check = NULL, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 67f287fc12df..6a21aa7c0c6d 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -60,7 +60,7 @@ struct genapic apic_default = { .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = default_setup_apic_routing, - .multi_timer_check = multi_timer_check, + .multi_timer_check = NULL, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index f61172939461..0be59a51df2f 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -121,7 +121,7 @@ struct genapic apic_es7000 = { .ioapic_phys_id_map = es7000_ioapic_phys_id_map, .setup_apic_routing = es7000_setup_apic_routing, - .multi_timer_check = multi_timer_check, + .multi_timer_check = NULL, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 8c137f413485..da4ed653506a 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -66,7 +66,7 @@ struct genapic apic_numaq = { .ioapic_phys_id_map = numaq_ioapic_phys_id_map, .setup_apic_routing = numaq_setup_apic_routing, - .multi_timer_check = multi_timer_check, + .multi_timer_check = numaq_multi_timer_check, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 0698566dc7b4..b618a186f1e2 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -59,7 +59,7 @@ struct genapic apic_summit = { .ioapic_phys_id_map = summit_ioapic_phys_id_map, .setup_apic_routing = summit_setup_apic_routing, - .multi_timer_check = multi_timer_check, + .multi_timer_check = NULL, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, From 3f57a318c36e1f24070a18df8c4971ca08d33142 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 223/566] x86, apic: clean up ->apicid_to_node() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 2 +- arch/x86/include/asm/mach-default/mach_apic.h | 2 +- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 4 ++-- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 12 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index df59298086c6..77f0b7348755 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -62,7 +62,7 @@ static inline void bigsmp_setup_apic_routing(void) "Physflat", nr_ioapics); } -static inline int apicid_to_node(int logical_apicid) +static inline int bigsmp_apicid_to_node(int logical_apicid) { return apicid_2_node[hard_smp_processor_id()]; } diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 632e4cd3f4fd..bcdf31400dfa 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -82,7 +82,7 @@ static inline void es7000_setup_apic_routing(void) nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); } -static inline int apicid_to_node(int logical_apicid) +static inline int es7000_apicid_to_node(int logical_apicid) { return 0; } diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index f418d470cf45..2f78209d972c 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -78,7 +78,7 @@ static inline void default_setup_apic_routing(void) #endif } -static inline int apicid_to_node(int logical_apicid) +static inline int default_apicid_to_node(int logical_apicid) { #ifdef CONFIG_SMP return apicid_2_node[hard_smp_processor_id()]; diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index bdea0a759e8a..b585a8e5f816 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define apicid_to_node (apic->apicid_to_node) #define cpu_to_logical_apicid (apic->cpu_to_logical_apicid) #define cpu_present_to_apicid (apic->cpu_present_to_apicid) #define apicid_to_cpu_present (apic->apicid_to_cpu_present) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 22bdf3d4c0e3..a0e3b437118c 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -76,14 +76,14 @@ static inline int cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static inline int apicid_to_node(int logical_apicid) +static inline int numaq_apicid_to_node(int logical_apicid) { return logical_apicid >> 4; } static inline physid_mask_t apicid_to_cpu_present(int logical_apicid) { - int node = apicid_to_node(logical_apicid); + int node = numaq_apicid_to_node(logical_apicid); int cpu = __ffs(logical_apicid & 0xf); return physid_mask_of_physid(cpu + 4*node); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index acb7bd1de848..cfff2760e60d 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -75,7 +75,7 @@ static inline void summit_setup_apic_routing(void) nr_ioapics); } -static inline int apicid_to_node(int logical_apicid) +static inline int summit_apicid_to_node(int logical_apicid) { #ifdef CONFIG_SMP return apicid_2_node[hard_smp_processor_id()]; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3791b4ae567f..1dd4cecd4bc0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -163,7 +163,7 @@ static void map_cpu_to_logical_apicid(void) { int cpu = smp_processor_id(); int apicid = logical_smp_processor_id(); - int node = apicid_to_node(apicid); + int node = apic->apicid_to_node(apicid); if (!node_online(node)) node = first_online_node; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index d0749569cdf7..2f4121499e5f 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -80,7 +80,7 @@ struct genapic apic_bigsmp = { .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, .setup_apic_routing = bigsmp_setup_apic_routing, .multi_timer_check = NULL, - .apicid_to_node = apicid_to_node, + .apicid_to_node = bigsmp_apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 6a21aa7c0c6d..d391c2dc819d 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -61,7 +61,7 @@ struct genapic apic_default = { .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = default_setup_apic_routing, .multi_timer_check = NULL, - .apicid_to_node = apicid_to_node, + .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 0be59a51df2f..933f2a385990 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -122,7 +122,7 @@ struct genapic apic_es7000 = { .ioapic_phys_id_map = es7000_ioapic_phys_id_map, .setup_apic_routing = es7000_setup_apic_routing, .multi_timer_check = NULL, - .apicid_to_node = apicid_to_node, + .apicid_to_node = es7000_apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index da4ed653506a..38344fb99793 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -67,7 +67,7 @@ struct genapic apic_numaq = { .ioapic_phys_id_map = numaq_ioapic_phys_id_map, .setup_apic_routing = numaq_setup_apic_routing, .multi_timer_check = numaq_multi_timer_check, - .apicid_to_node = apicid_to_node, + .apicid_to_node = numaq_apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index b618a186f1e2..6150604deaa3 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -60,7 +60,7 @@ struct genapic apic_summit = { .ioapic_phys_id_map = summit_ioapic_phys_id_map, .setup_apic_routing = summit_setup_apic_routing, .multi_timer_check = NULL, - .apicid_to_node = apicid_to_node, + .apicid_to_node = summit_apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, From 5257c5111ca21c8e857b65a79ab986b313e1c362 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 224/566] x86, apic: clean up ->cpu_to_logical_apicid() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 6 +++--- arch/x86/include/asm/es7000/apic.h | 16 ++++++++-------- arch/x86/include/asm/mach-default/mach_apic.h | 2 +- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 3 ++- arch/x86/include/asm/summit/apic.h | 8 ++++---- arch/x86/kernel/ipi.c | 4 ++-- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 25 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 77f0b7348755..d0d894ff7d3e 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -82,7 +82,7 @@ static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) extern u8 cpu_2_logical_apicid[]; /* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) +static inline int bigsmp_cpu_to_logical_apicid(int cpu) { if (cpu >= nr_cpu_ids) return BAD_APICID; @@ -115,7 +115,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) int apicid; cpu = first_cpu(*cpumask); - apicid = cpu_to_logical_apicid(cpu); + apicid = bigsmp_cpu_to_logical_apicid(cpu); return apicid; } @@ -132,7 +132,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; if (cpu < nr_cpu_ids) - return cpu_to_logical_apicid(cpu); + return bigsmp_cpu_to_logical_apicid(cpu); return BAD_APICID; } diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index bcdf31400dfa..e0cd07e74f98 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -109,7 +109,7 @@ static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) extern u8 cpu_2_logical_apicid[]; /* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) +static inline int es7000_cpu_to_logical_apicid(int cpu) { #ifdef CONFIG_SMP if (cpu >= nr_cpu_ids) @@ -155,10 +155,10 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) * on the same apicid cluster return default value of target_cpus(): */ cpu = cpumask_first(cpumask); - apicid = cpu_to_logical_apicid(cpu); + apicid = es7000_cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { if (cpumask_test_cpu(cpu, cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); + int new_apicid = es7000_cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ printk ("%s: Not a valid mask!\n", __func__); @@ -182,20 +182,20 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) num_bits_set = cpus_weight(*cpumask); /* Return id to all */ if (num_bits_set == nr_cpu_ids) - return cpu_to_logical_apicid(0); + return es7000_cpu_to_logical_apicid(0); /* * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of target_cpus(): */ cpu = first_cpu(*cpumask); - apicid = cpu_to_logical_apicid(cpu); + apicid = es7000_cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { if (cpu_isset(cpu, *cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); + int new_apicid = es7000_cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ printk ("%s: Not a valid mask!\n", __func__); - return cpu_to_logical_apicid(0); + return es7000_cpu_to_logical_apicid(0); } apicid = new_apicid; cpus_found++; @@ -209,7 +209,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, const struct cpumask *andmask) { - int apicid = cpu_to_logical_apicid(0); + int apicid = es7000_cpu_to_logical_apicid(0); cpumask_var_t cpumask; if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 2f78209d972c..eae3e4b6ed04 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -105,7 +105,7 @@ static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) } /* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) +static inline int default_cpu_to_logical_apicid(int cpu) { return 1 << cpu; } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index b585a8e5f816..2ea913e8e0d0 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define cpu_to_logical_apicid (apic->cpu_to_logical_apicid) #define cpu_present_to_apicid (apic->cpu_present_to_apicid) #define apicid_to_cpu_present (apic->apicid_to_cpu_present) #define setup_portio_remap (apic->setup_portio_remap) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index a0e3b437118c..6989abd34853 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -56,7 +56,8 @@ static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) /* Mapping from cpu number to logical apicid */ extern u8 cpu_2_logical_apicid[]; -static inline int cpu_to_logical_apicid(int cpu) + +static inline int numaq_cpu_to_logical_apicid(int cpu) { if (cpu >= nr_cpu_ids) return BAD_APICID; diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index cfff2760e60d..d564d7ee3f6c 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -85,7 +85,7 @@ static inline int summit_apicid_to_node(int logical_apicid) } /* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) +static inline int summit_cpu_to_logical_apicid(int cpu) { #ifdef CONFIG_SMP if (cpu >= nr_cpu_ids) @@ -145,10 +145,10 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) * on the same apicid cluster return default value of target_cpus(): */ cpu = first_cpu(*cpumask); - apicid = cpu_to_logical_apicid(cpu); + apicid = summit_cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { if (cpu_isset(cpu, *cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); + int new_apicid = summit_cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ printk ("%s: Not a valid mask!\n", __func__); @@ -165,7 +165,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, const struct cpumask *andmask) { - int apicid = cpu_to_logical_apicid(0); + int apicid = summit_cpu_to_logical_apicid(0); cpumask_var_t cpumask; if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index e2e4895ca69f..367c5e684fa1 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -140,7 +140,7 @@ void send_IPI_mask_sequence(const struct cpumask *mask, int vector) local_irq_save(flags); for_each_cpu(query_cpu, mask) - __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); + __send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); local_irq_restore(flags); } @@ -155,7 +155,7 @@ void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) local_irq_save(flags); for_each_cpu(query_cpu, mask) if (query_cpu != this_cpu) - __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), + __send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); local_irq_restore(flags); } diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 2f4121499e5f..cd6f02ba88ea 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -81,7 +81,7 @@ struct genapic apic_bigsmp = { .setup_apic_routing = bigsmp_setup_apic_routing, .multi_timer_check = NULL, .apicid_to_node = bigsmp_apicid_to_node, - .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index d391c2dc819d..ef9b936c41ab 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -62,7 +62,7 @@ struct genapic apic_default = { .setup_apic_routing = default_setup_apic_routing, .multi_timer_check = NULL, .apicid_to_node = default_apicid_to_node, - .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_to_logical_apicid = default_cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 933f2a385990..74bf2b6b7519 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -123,7 +123,7 @@ struct genapic apic_es7000 = { .setup_apic_routing = es7000_setup_apic_routing, .multi_timer_check = NULL, .apicid_to_node = es7000_apicid_to_node, - .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 38344fb99793..461f5beedb28 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -68,7 +68,7 @@ struct genapic apic_numaq = { .setup_apic_routing = numaq_setup_apic_routing, .multi_timer_check = numaq_multi_timer_check, .apicid_to_node = numaq_apicid_to_node, - .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 6150604deaa3..d99be2d4efc7 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -61,7 +61,7 @@ struct genapic apic_summit = { .setup_apic_routing = summit_setup_apic_routing, .multi_timer_check = NULL, .apicid_to_node = summit_apicid_to_node, - .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, From a21769a4461801454930a06bc18bd8249cd9e993 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 225/566] x86, apic: clean up ->cpu_present_to_apicid() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 2 +- arch/x86/include/asm/genapic.h | 2 ++ arch/x86/include/asm/mach-default/mach_apic.h | 11 ++++++++++- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/genapic_flat_64.c | 4 ++-- arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- arch/x86/kernel/smpboot.c | 9 ++++++++- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 17 files changed, 34 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index d0d894ff7d3e..eea5e9788ddd 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -67,7 +67,7 @@ static inline int bigsmp_apicid_to_node(int logical_apicid) return apicid_2_node[hard_smp_processor_id()]; } -static inline int cpu_present_to_apicid(int mps_cpu) +static inline int bigsmp_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < nr_cpu_ids) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index e0cd07e74f98..7cdde3d9c5f6 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -88,7 +88,7 @@ static inline int es7000_apicid_to_node(int logical_apicid) } -static inline int cpu_present_to_apicid(int mps_cpu) +static inline int es7000_cpu_present_to_apicid(int mps_cpu) { if (!mps_cpu) return boot_cpu_physical_apicid; diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 38b1202316f5..2cb14d51e459 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -108,6 +108,8 @@ extern struct genapic apic_x2apic_uv_x; DECLARE_PER_CPU(int, x2apic_extra_bits); extern void default_setup_apic_routing(void); + +extern int default_cpu_present_to_apicid(int mps_cpu); #endif #endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index eae3e4b6ed04..15d5627a9d6f 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -110,7 +110,7 @@ static inline int default_cpu_to_logical_apicid(int cpu) return 1 << cpu; } -static inline int cpu_present_to_apicid(int mps_cpu) +static inline int __default_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); @@ -118,6 +118,15 @@ static inline int cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } +#ifdef CONFIG_X86_32 +static inline int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} +#else +extern int default_cpu_present_to_apicid(int mps_cpu); +#endif + static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) { return physid_mask_of_physid(phys_apicid); diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 2ea913e8e0d0..332fe93ab41a 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define cpu_present_to_apicid (apic->cpu_present_to_apicid) #define apicid_to_cpu_present (apic->apicid_to_cpu_present) #define setup_portio_remap (apic->setup_portio_remap) #define check_phys_apicid_present (apic->check_phys_apicid_present) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 6989abd34853..f482b0634476 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -69,7 +69,7 @@ static inline int numaq_cpu_to_logical_apicid(int cpu) * cpu to APIC ID relation to properly interact with the intelligent * mode of the cluster controller. */ -static inline int cpu_present_to_apicid(int mps_cpu) +static inline int numaq_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < 60) return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index d564d7ee3f6c..fc1273691880 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -96,7 +96,7 @@ static inline int summit_cpu_to_logical_apicid(int cpu) #endif } -static inline int cpu_present_to_apicid(int mps_cpu) +static inline int summit_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < nr_cpu_ids) return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 9446f372a16b..f4a2c1c0a1a4 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -197,7 +197,7 @@ struct genapic apic_flat = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, @@ -341,7 +341,7 @@ struct genapic apic_physflat = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 2eeca6e744af..710d612a9641 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -199,7 +199,7 @@ struct genapic apic_x2apic_cluster = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index be0ee3e56ef1..49a449178c3b 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -195,7 +195,7 @@ struct genapic apic_x2apic_phys = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 68b423f3da99..a08a63591864 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -260,7 +260,7 @@ struct genapic apic_x2apic_uv_x = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1dd4cecd4bc0..812bf39de355 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -903,9 +903,16 @@ do_rest: return boot_error; } +#ifdef CONFIG_X86_64 +int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} +#endif + int __cpuinit native_cpu_up(unsigned int cpu) { - int apicid = cpu_present_to_apicid(cpu); + int apicid = apic->cpu_present_to_apicid(cpu); unsigned long flags; int err; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index cd6f02ba88ea..1eaf18c801d8 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -82,7 +82,7 @@ struct genapic apic_bigsmp = { .multi_timer_check = NULL, .apicid_to_node = bigsmp_apicid_to_node, .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, - .cpu_present_to_apicid = cpu_present_to_apicid, + .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index ef9b936c41ab..2903657f4209 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -63,7 +63,7 @@ struct genapic apic_default = { .multi_timer_check = NULL, .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = default_cpu_to_logical_apicid, - .cpu_present_to_apicid = cpu_present_to_apicid, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 74bf2b6b7519..5a3a8ab4f8a0 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -124,7 +124,7 @@ struct genapic apic_es7000 = { .multi_timer_check = NULL, .apicid_to_node = es7000_apicid_to_node, .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, - .cpu_present_to_apicid = cpu_present_to_apicid, + .cpu_present_to_apicid = es7000_cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 461f5beedb28..d928cae211cc 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -69,7 +69,7 @@ struct genapic apic_numaq = { .multi_timer_check = numaq_multi_timer_check, .apicid_to_node = numaq_apicid_to_node, .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, - .cpu_present_to_apicid = cpu_present_to_apicid, + .cpu_present_to_apicid = numaq_cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index d99be2d4efc7..e6bb34ee580b 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -62,7 +62,7 @@ struct genapic apic_summit = { .multi_timer_check = NULL, .apicid_to_node = summit_apicid_to_node, .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, - .cpu_present_to_apicid = cpu_present_to_apicid, + .cpu_present_to_apicid = summit_cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, From 8058714a41afc4c983acb274b1adf7bd3cfe7f6e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 226/566] x86, apic: clean up ->apicid_to_cpu_present() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 2 +- arch/x86/include/asm/mach-default/mach_apic.h | 2 +- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 4 ++-- arch/x86/kernel/io_apic.c | 4 ++-- arch/x86/kernel/visws_quirks.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 13 files changed, 14 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index eea5e9788ddd..080457450b22 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -75,7 +75,7 @@ static inline int bigsmp_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) +static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) { return physid_mask_of_physid(phys_apicid); } diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 7cdde3d9c5f6..a09e1133ced9 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -98,7 +98,7 @@ static inline int es7000_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) +static inline physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) { static int id = 0; physid_mask_t mask; diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 15d5627a9d6f..22683e5b82be 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -127,7 +127,7 @@ static inline int default_cpu_present_to_apicid(int mps_cpu) extern int default_cpu_present_to_apicid(int mps_cpu); #endif -static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) +static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) { return physid_mask_of_physid(phys_apicid); } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 332fe93ab41a..997618f2eb5c 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define apicid_to_cpu_present (apic->apicid_to_cpu_present) #define setup_portio_remap (apic->setup_portio_remap) #define check_phys_apicid_present (apic->check_phys_apicid_present) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index f482b0634476..8ac000f99285 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -82,7 +82,7 @@ static inline int numaq_apicid_to_node(int logical_apicid) return logical_apicid >> 4; } -static inline physid_mask_t apicid_to_cpu_present(int logical_apicid) +static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) { int node = numaq_apicid_to_node(logical_apicid); int cpu = __ffs(logical_apicid & 0xf); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index fc1273691880..79c1a45f886b 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -105,13 +105,13 @@ static inline int summit_cpu_present_to_apicid(int mps_cpu) } static inline physid_mask_t - summit_ioapic_phys_id_map(physid_mask_t phys_id_map) +summit_ioapic_phys_id_map(physid_mask_t phys_id_map) { /* For clustered we don't have a good way to do this yet - hack */ return physids_promote(0x0F); } -static inline physid_mask_t apicid_to_cpu_present(int apicid) +static inline physid_mask_t summit_apicid_to_cpu_present(int apicid) { return physid_mask_of_physid(0); } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 282ea112f3cf..3d85d3d810b2 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2155,7 +2155,7 @@ static void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic_id].apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic_id].apicid); + tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", mp_ioapics[apic_id].apicid); @@ -3899,7 +3899,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) apic_id = i; } - tmp = apicid_to_cpu_present(apic_id); + tmp = apic->apicid_to_cpu_present(apic_id); physids_or(apic_id_map, apic_id_map, tmp); if (reg_00.bits.ID != apic_id) { diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index d801d06af068..2ed5bdf15c9f 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -200,7 +200,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) return; } - apic_cpus = apicid_to_cpu_present(m->apicid); + apic_cpus = apic->apicid_to_cpu_present(m->apicid); physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); /* * Validate version diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 1eaf18c801d8..613965230744 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -83,7 +83,7 @@ struct genapic apic_bigsmp = { .apicid_to_node = bigsmp_apicid_to_node, .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, - .apicid_to_cpu_present = apicid_to_cpu_present, + .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 2903657f4209..8fc704a3db7c 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -64,7 +64,7 @@ struct genapic apic_default = { .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = default_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = apicid_to_cpu_present, + .apicid_to_cpu_present = default_apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 5a3a8ab4f8a0..1e0e16274557 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -125,7 +125,7 @@ struct genapic apic_es7000 = { .apicid_to_node = es7000_apicid_to_node, .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, .cpu_present_to_apicid = es7000_cpu_present_to_apicid, - .apicid_to_cpu_present = apicid_to_cpu_present, + .apicid_to_cpu_present = es7000_apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index d928cae211cc..839b86b765a1 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -70,7 +70,7 @@ struct genapic apic_numaq = { .apicid_to_node = numaq_apicid_to_node, .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, .cpu_present_to_apicid = numaq_cpu_present_to_apicid, - .apicid_to_cpu_present = apicid_to_cpu_present, + .apicid_to_cpu_present = numaq_apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index e6bb34ee580b..b6e37607a523 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -63,7 +63,7 @@ struct genapic apic_summit = { .apicid_to_node = summit_apicid_to_node, .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, .cpu_present_to_apicid = summit_cpu_present_to_apicid, - .apicid_to_cpu_present = apicid_to_cpu_present, + .apicid_to_cpu_present = summit_apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, From d83093b50416f4ca59d3a84b2ddc217748214d64 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 12:43:18 +0100 Subject: [PATCH 227/566] x86: refactor ->setup_portio_remap() subarch methods Only NUMAQ has a real ->setup_portio_remap() method, the other subarchitectures define it but keep it empty. So mark the vector as NULL, extend the generic code to handle NULL -setup_portio_remap() entries and remove all the empty handlers. Also move the NUMAQ method from the header file into the apic driver .c file. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 4 ---- arch/x86/include/asm/mach-default/mach_apic.h | 4 ---- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 13 ------------- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/smpboot.c | 3 ++- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 15 ++++++++++++++- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 22 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 080457450b22..2fa70032e3b4 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -95,7 +95,7 @@ static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) return physids_promote(0xFFL); } -static inline void setup_portio_remap(void) +static inline void bigsmp_setup_portio_remap(void) { } diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index a09e1133ced9..c5b0eb51e53a 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -127,10 +127,6 @@ static inline physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) } -static inline void setup_portio_remap(void) -{ -} - extern unsigned int boot_cpu_physical_apicid; static inline int check_phys_apicid_present(int cpu_physical_apicid) { diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 22683e5b82be..54c20e19e280 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -132,10 +132,6 @@ static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) return physid_mask_of_physid(phys_apicid); } -static inline void setup_portio_remap(void) -{ -} - static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) { return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 997618f2eb5c..393a97c5685f 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define setup_portio_remap (apic->setup_portio_remap) #define check_phys_apicid_present (apic->check_phys_apicid_present) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 8ac000f99285..6b626519cd75 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -92,19 +92,6 @@ static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) extern void *xquad_portio; -static inline void setup_portio_remap(void) -{ - int num_quads = num_online_nodes(); - - if (num_quads <= 1) - return; - - printk("Remapping cross-quad port I/O for %d quads\n", num_quads); - xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); - printk("xquad_portio vaddr 0x%08lx, len %08lx\n", - (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); -} - static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) { return (1); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 79c1a45f886b..131343b0b757 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -116,7 +116,7 @@ static inline physid_mask_t summit_apicid_to_cpu_present(int apicid) return physid_mask_of_physid(0); } -static inline void setup_portio_remap(void) +static inline void summit_setup_portio_remap(void) { } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 812bf39de355..0e7d26c01f9f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1170,7 +1170,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) map_cpu_to_logical_apicid(); - setup_portio_remap(); + if (apic->setup_portio_remap) + apic->setup_portio_remap(); smpboot_setup_io_apic(); /* diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 613965230744..424740554a32 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -84,7 +84,7 @@ struct genapic apic_bigsmp = { .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, - .setup_portio_remap = setup_portio_remap, + .setup_portio_remap = NULL, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 8fc704a3db7c..b48a58daf719 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -65,7 +65,7 @@ struct genapic apic_default = { .cpu_to_logical_apicid = default_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = default_apicid_to_cpu_present, - .setup_portio_remap = setup_portio_remap, + .setup_portio_remap = NULL, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 1e0e16274557..449eca5b6048 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -126,7 +126,7 @@ struct genapic apic_es7000 = { .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, .cpu_present_to_apicid = es7000_cpu_present_to_apicid, .apicid_to_cpu_present = es7000_apicid_to_cpu_present, - .setup_portio_remap = setup_portio_remap, + .setup_portio_remap = NULL, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 839b86b765a1..e60361b0bf1e 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -44,6 +44,19 @@ static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask) *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } +static void numaq_setup_portio_remap(void) +{ + int num_quads = num_online_nodes(); + + if (num_quads <= 1) + return; + + printk("Remapping cross-quad port I/O for %d quads\n", num_quads); + xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); + printk("xquad_portio vaddr 0x%08lx, len %08lx\n", + (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); +} + struct genapic apic_numaq = { .name = "NUMAQ", @@ -71,7 +84,7 @@ struct genapic apic_numaq = { .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, .cpu_present_to_apicid = numaq_cpu_present_to_apicid, .apicid_to_cpu_present = numaq_apicid_to_cpu_present, - .setup_portio_remap = setup_portio_remap, + .setup_portio_remap = numaq_setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index b6e37607a523..ffcf7ca2e8ce 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -64,7 +64,7 @@ struct genapic apic_summit = { .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, .cpu_present_to_apicid = summit_cpu_present_to_apicid, .apicid_to_cpu_present = summit_apicid_to_cpu_present, - .setup_portio_remap = setup_portio_remap, + .setup_portio_remap = NULL, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, From a27a621001f4c3e57caf47feff4b014577fd01c6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 12:43:18 +0100 Subject: [PATCH 228/566] x86: refactor ->check_phys_apicid_present() subarch methods - spread out the namespace to per driver methods - extend it to 64-bit as well so that we can use apic->check_phys_apicid_present() unconditionally Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 6 +++--- arch/x86/include/asm/es7000/apic.h | 4 ++-- arch/x86/include/asm/genapic.h | 1 + arch/x86/include/asm/mach-default/mach_apic.h | 18 +++++++++++++----- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 4 ++-- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/genapic_flat_64.c | 4 ++-- arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- arch/x86/kernel/smpboot.c | 7 ++++++- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 17 files changed, 38 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 2fa70032e3b4..5ba4118fcdf2 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -99,13 +99,13 @@ static inline void bigsmp_setup_portio_remap(void) { } -static inline void enable_apic_mode(void) +static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) { + return 1; } -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) +static inline void enable_apic_mode(void) { - return (1); } /* As we are using single CPU as destination, pick only one CPU here */ diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index c5b0eb51e53a..717c27f8da61 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -126,9 +126,9 @@ static inline physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) return physids_promote(0xff); } - extern unsigned int boot_cpu_physical_apicid; -static inline int check_phys_apicid_present(int cpu_physical_apicid) + +static inline int es7000_check_phys_apicid_present(int cpu_physical_apicid) { boot_cpu_physical_apicid = read_apic_id(); return (1); diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 2cb14d51e459..f292fd02ebab 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -110,6 +110,7 @@ DECLARE_PER_CPU(int, x2apic_extra_bits); extern void default_setup_apic_routing(void); extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); #endif #endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 54c20e19e280..0a824d3a2238 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -118,13 +118,26 @@ static inline int __default_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } +static inline int +__default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); +} + #ifdef CONFIG_X86_32 static inline int default_cpu_present_to_apicid(int mps_cpu) { return __default_cpu_present_to_apicid(mps_cpu); } + +static inline int +default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return __default_check_phys_apicid_present(boot_cpu_physical_apicid); +} #else extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); #endif static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) @@ -132,11 +145,6 @@ static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) return physid_mask_of_physid(phys_apicid); } -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); -} - static inline void enable_apic_mode(void) { } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 393a97c5685f..efd762d951ac 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define check_phys_apicid_present (apic->check_phys_apicid_present) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) #define enable_apic_mode (apic->enable_apic_mode) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 6b626519cd75..3be735e2ab90 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -92,9 +92,9 @@ static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) extern void *xquad_portio; -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) +static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) { - return (1); + return 1; } static inline void enable_apic_mode(void) diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 131343b0b757..fe578f6df782 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -120,7 +120,7 @@ static inline void summit_setup_portio_remap(void) { } -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) +static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) { return 1; } diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index f4a2c1c0a1a4..78adf71f7e55 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -200,7 +200,7 @@ struct genapic apic_flat = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, @@ -344,7 +344,7 @@ struct genapic apic_physflat = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 710d612a9641..7062e24b18f6 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -202,7 +202,7 @@ struct genapic apic_x2apic_cluster = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 49a449178c3b..7177a1110f0d 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -198,7 +198,7 @@ struct genapic apic_x2apic_phys = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index a08a63591864..debd721f0b4e 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -263,7 +263,7 @@ struct genapic apic_x2apic_uv_x = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0e7d26c01f9f..ab83be2f8e0f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -908,6 +908,11 @@ int default_cpu_present_to_apicid(int mps_cpu) { return __default_cpu_present_to_apicid(mps_cpu); } + +int default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return __default_check_phys_apicid_present(boot_cpu_physical_apicid); +} #endif int __cpuinit native_cpu_up(unsigned int cpu) @@ -1058,7 +1063,7 @@ static int __init smp_sanity_check(unsigned max_cpus) * Should not be necessary because the MP table should list the boot * CPU too, but we do it for the sake of robustness anyway. */ - if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { + if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n", boot_cpu_physical_apicid); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 424740554a32..82743d16c23d 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -85,7 +85,7 @@ struct genapic apic_bigsmp = { .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, .setup_portio_remap = NULL, - .check_phys_apicid_present = check_phys_apicid_present, + .check_phys_apicid_present = bigsmp_check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index b48a58daf719..d0374c69ad01 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -66,7 +66,7 @@ struct genapic apic_default = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = default_apicid_to_cpu_present, .setup_portio_remap = NULL, - .check_phys_apicid_present = check_phys_apicid_present, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 449eca5b6048..52b3eb5e645f 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -127,7 +127,7 @@ struct genapic apic_es7000 = { .cpu_present_to_apicid = es7000_cpu_present_to_apicid, .apicid_to_cpu_present = es7000_apicid_to_cpu_present, .setup_portio_remap = NULL, - .check_phys_apicid_present = check_phys_apicid_present, + .check_phys_apicid_present = es7000_check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index e60361b0bf1e..7ec2ca43ca20 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -85,7 +85,7 @@ struct genapic apic_numaq = { .cpu_present_to_apicid = numaq_cpu_present_to_apicid, .apicid_to_cpu_present = numaq_apicid_to_cpu_present, .setup_portio_remap = numaq_setup_portio_remap, - .check_phys_apicid_present = check_phys_apicid_present, + .check_phys_apicid_present = numaq_check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index ffcf7ca2e8ce..acf12de8916f 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -65,7 +65,7 @@ struct genapic apic_summit = { .cpu_present_to_apicid = summit_cpu_present_to_apicid, .apicid_to_cpu_present = summit_apicid_to_cpu_present, .setup_portio_remap = NULL, - .check_phys_apicid_present = check_phys_apicid_present, + .check_phys_apicid_present = summit_check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, From 4904033302c745342e3b3a611881cdee57fbe06a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 12:43:18 +0100 Subject: [PATCH 229/566] x86: refactor ->enable_apic_mode() subarch methods Only ES7000 has a real ->enable_apic_mode() method, the other subarchitectures define it but keep it empty. So mark the vector as NULL, extend the generic code to handle NULL -setup_portio_remap() entries and remove all the empty handlers. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 4 ---- arch/x86/include/asm/mach-default/mach_apic.h | 3 --- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 4 ---- arch/x86/include/asm/summit/apic.h | 4 ---- arch/x86/kernel/apic.c | 3 ++- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 6 +++--- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 11 files changed, 9 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 5ba4118fcdf2..f49d440862f1 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -104,10 +104,6 @@ static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) return 1; } -static inline void enable_apic_mode(void) -{ -} - /* As we are using single CPU as destination, pick only one CPU here */ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 0a824d3a2238..3647c92d45e5 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -145,8 +145,5 @@ static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) return physid_mask_of_physid(phys_apicid); } -static inline void enable_apic_mode(void) -{ -} #endif /* CONFIG_X86_LOCAL_APIC */ #endif /* _ASM_X86_MACH_DEFAULT_MACH_APIC_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index efd762d951ac..6fed521585c4 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -5,7 +5,6 @@ #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) -#define enable_apic_mode (apic->enable_apic_mode) #define phys_pkg_id (apic->phys_pkg_id) #define wakeup_secondary_cpu (apic->wakeup_cpu) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 3be735e2ab90..dc93c30972e7 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -97,10 +97,6 @@ static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) return 1; } -static inline void enable_apic_mode(void) -{ -} - /* * We use physical apicids here, not logical, so just return the default * physical broadcast to stop people from breaking us diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index fe578f6df782..526d19e79447 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -125,10 +125,6 @@ static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) return 1; } -static inline void enable_apic_mode(void) -{ -} - static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { int num_bits_set; diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index fcbcc03cd4bd..9d6374da4784 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1763,7 +1763,8 @@ void __init connect_bsp_APIC(void) outb(0x01, 0x23); } #endif - enable_apic_mode(); + if (apic->enable_apic_mode) + apic->enable_apic_mode(); } /** diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 82743d16c23d..e151b472456f 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -86,7 +86,7 @@ struct genapic apic_bigsmp = { .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, .setup_portio_remap = NULL, .check_phys_apicid_present = bigsmp_check_phys_apicid_present, - .enable_apic_mode = enable_apic_mode, + .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index d0374c69ad01..ac6be195b977 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -67,7 +67,7 @@ struct genapic apic_default = { .apicid_to_cpu_present = default_apicid_to_cpu_present, .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = enable_apic_mode, + .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 52b3eb5e645f..9acb71120ef6 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -36,10 +36,10 @@ static int probe_es7000(void) } extern void es7000_sw_apic(void); -static void __init enable_apic_mode(void) + +static void __init es7000_enable_apic_mode(void) { es7000_sw_apic(); - return; } static __init int @@ -128,7 +128,7 @@ struct genapic apic_es7000 = { .apicid_to_cpu_present = es7000_apicid_to_cpu_present, .setup_portio_remap = NULL, .check_phys_apicid_present = es7000_check_phys_apicid_present, - .enable_apic_mode = enable_apic_mode, + .enable_apic_mode = es7000_enable_apic_mode, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 7ec2ca43ca20..8d3358de3fe7 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -86,7 +86,7 @@ struct genapic apic_numaq = { .apicid_to_cpu_present = numaq_apicid_to_cpu_present, .setup_portio_remap = numaq_setup_portio_remap, .check_phys_apicid_present = numaq_check_phys_apicid_present, - .enable_apic_mode = enable_apic_mode, + .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index acf12de8916f..cb83bcbb2dec 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -66,7 +66,7 @@ struct genapic apic_summit = { .apicid_to_cpu_present = summit_apicid_to_cpu_present, .setup_portio_remap = NULL, .check_phys_apicid_present = summit_check_phys_apicid_present, - .enable_apic_mode = enable_apic_mode, + .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, From b0b20e5a3a6615ae750804523aeedd32911bb9d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:15:06 +0100 Subject: [PATCH 230/566] x86, es7000: clean up es7000_enable_apic_mode() - eliminate the needless es7000_enable_apic_mode() complication which was not apparent prior the namespace cleanups - clean up the control flow in es7000_enable_apic_mode() - other cleanups Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/apic.h | 2 ++ arch/x86/kernel/es7000_32.c | 25 +++++++++++++------------ arch/x86/mach-generic/es7000.c | 7 ------- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 717c27f8da61..038c4f0e480b 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -36,6 +36,8 @@ static inline unsigned long es7000_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } +extern void es7000_enable_apic_mode(void); + #define apicid_cluster(apicid) (apicid & 0xF0) static inline unsigned long calculate_ldr(int cpu) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 20a2a43c2a9c..e73fe18488ac 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -359,20 +359,21 @@ es7000_mip_write(struct mip_reg *mip_reg) return status; } -void __init -es7000_sw_apic(void) +void __init es7000_enable_apic_mode(void) { - if (es7000_plat) { - int mip_status; - struct mip_reg es7000_mip_reg; + struct mip_reg es7000_mip_reg; + int mip_status; - printk("ES7000: Enabling APIC mode.\n"); - memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); - es7000_mip_reg.off_0 = MIP_SW_APIC; - es7000_mip_reg.off_38 = (MIP_VALID); - while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) - printk("es7000_sw_apic: command failed, status = %x\n", - mip_status); + if (!es7000_plat) return; + + printk("ES7000: Enabling APIC mode.\n"); + memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); + es7000_mip_reg.off_0 = MIP_SW_APIC; + es7000_mip_reg.off_38 = MIP_VALID; + + while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) { + printk("es7000_enable_apic_mode: command failed, status = %x\n", + mip_status); } } diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 9acb71120ef6..1185964b7a33 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -35,13 +35,6 @@ static int probe_es7000(void) return 0; } -extern void es7000_sw_apic(void); - -static void __init es7000_enable_apic_mode(void) -{ - es7000_sw_apic(); -} - static __init int mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { From d4c9a9f3d416cfa1f5ffbe09d864d069467fe693 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:31:22 +0100 Subject: [PATCH 231/566] x86, apic: unify phys_pkg_id() - unify the call signature of 64-bit to that of 32-bit - clean up the types all around - clean up namespace contamination Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 2 +- arch/x86/include/asm/genapic.h | 6 +----- arch/x86/include/asm/mach-default/mach_apic.h | 2 +- arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 5 +++-- arch/x86/kernel/cpu/addon_cpuid_features.c | 10 +--------- arch/x86/kernel/cpu/common.c | 11 +---------- arch/x86/kernel/genapic_flat_64.c | 6 +++--- arch/x86/kernel/genx2apic_cluster.c | 4 ++-- arch/x86/kernel/genx2apic_phys.c | 4 ++-- arch/x86/kernel/genx2apic_uv_x.c | 4 ++-- 12 files changed, 19 insertions(+), 39 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index f49d440862f1..b7cba5b5635b 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -133,7 +133,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +static inline int phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 038c4f0e480b..d2c6c202e8bd 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -221,7 +221,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, return apicid; } -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +static inline int phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index f292fd02ebab..14b19de8cd09 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -48,11 +48,7 @@ struct genapic { void (*setup_portio_remap)(void); int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); void (*enable_apic_mode)(void); -#ifdef CONFIG_X86_32 - u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); -#else - unsigned int (*phys_pkg_id)(int index_msb); -#endif + int (*phys_pkg_id)(int cpuid_apic, int index_msb); /* * When one of the next two hooks returns 1 the genapic diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 3647c92d45e5..55797a35150f 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -65,7 +65,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, return (unsigned int)(mask1 & mask2 & mask3); } -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +static inline int phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index dc93c30972e7..bc2c8a425c03 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -113,7 +113,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, } /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +static inline int phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 526d19e79447..64cd441ae006 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -175,13 +175,14 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, return apicid; } -/* cpuid returns the value latched in the HW at reset, not the APIC ID +/* + * cpuid returns the value latched in the HW at reset, not the APIC ID * register's value. For any box whose BIOS changes APIC IDs, like * clustered APIC systems, we must use hard_smp_processor_id. * * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. */ -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +static inline int phys_pkg_id(int cpuid_apic, int index_msb) { return hard_smp_processor_id() >> index_msb; } diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 4e581fdc0a5a..84f8e4a5aef7 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -116,7 +116,6 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; -#ifdef CONFIG_X86_32 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) & core_select_mask; c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); @@ -124,14 +123,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) * Reinit the apicid, now that we have extended initial_apicid. */ c->apicid = phys_pkg_id(c->initial_apicid, 0); -#else - c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; - c->phys_proc_id = phys_pkg_id(core_plus_mask_width); - /* - * Reinit the apicid, now that we have extended initial_apicid. - */ - c->apicid = phys_pkg_id(0); -#endif + c->x86_max_cores = (core_level_siblings / smp_num_siblings); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 275e2cb43b91..93c491c4fe7f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -442,11 +442,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); -#ifdef CONFIG_X86_64 - c->phys_proc_id = phys_pkg_id(index_msb); -#else c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); -#endif smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -454,13 +450,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); -#ifdef CONFIG_X86_64 - c->cpu_core_id = phys_pkg_id(index_msb) & - ((1 << core_bits) - 1); -#else c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); -#endif } out: @@ -742,7 +733,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_identify(c); #ifdef CONFIG_X86_64 - c->apicid = phys_pkg_id(0); + c->apicid = phys_pkg_id(c->initial_apicid, 0); #endif /* diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 78adf71f7e55..cc9e07b96094 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -169,7 +169,7 @@ static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return mask1 & mask2; } -static unsigned int phys_pkg_id(int index_msb) +static int flat_phys_pkg_id(int initial_apic_id, int index_msb) { return hard_smp_processor_id() >> index_msb; } @@ -202,7 +202,7 @@ struct genapic apic_flat = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = flat_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, @@ -346,7 +346,7 @@ struct genapic apic_physflat = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = flat_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 7062e24b18f6..18b6f14376eb 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -157,7 +157,7 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int phys_pkg_id(int index_msb) +static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) { return current_cpu_data.initial_apicid >> index_msb; } @@ -204,7 +204,7 @@ struct genapic apic_x2apic_cluster = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = x2apic_cluster_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 7177a1110f0d..2cb6f49e4c50 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -156,7 +156,7 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int phys_pkg_id(int index_msb) +static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) { return current_cpu_data.initial_apicid >> index_msb; } @@ -200,7 +200,7 @@ struct genapic apic_x2apic_phys = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = x2apic_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index debd721f0b4e..67e7658775e7 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -226,7 +226,7 @@ static unsigned int uv_read_apic_id(void) return get_apic_id(apic_read(APIC_ID)); } -static unsigned int phys_pkg_id(int index_msb) +static int uv_phys_pkg_id(int initial_apicid, int index_msb) { return uv_read_apic_id() >> index_msb; } @@ -265,7 +265,7 @@ struct genapic apic_x2apic_uv_x = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = uv_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, From cb8cc442dc7e07cb5438b357843ab4095ad73933 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:24:54 +0100 Subject: [PATCH 232/566] x86, apic: refactor ->phys_pkg_id() Refactor the ->phys_pkg_id() methods: - namespace separation - macro wrapper removal - open-coded calls to the methods in the generic code Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 2 +- arch/x86/include/asm/mach-default/mach_apic.h | 3 +-- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/cpu/addon_cpuid_features.c | 6 +++--- arch/x86/kernel/cpu/common.c | 8 ++++---- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 13 files changed, 17 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index b7cba5b5635b..1230f5d7a38e 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -133,7 +133,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -static inline int phys_pkg_id(int cpuid_apic, int index_msb) +static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index d2c6c202e8bd..f183dfb4de4a 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -221,7 +221,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, return apicid; } -static inline int phys_pkg_id(int cpuid_apic, int index_msb) +static inline int es7000_phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 55797a35150f..d0605281a6b7 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -21,7 +21,6 @@ static inline const struct cpumask *default_target_cpus(void) #include #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) -#define phys_pkg_id (apic->phys_pkg_id) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) #define send_IPI_self (apic->send_IPI_self) #define wakeup_secondary_cpu (apic->wakeup_cpu) @@ -65,7 +64,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, return (unsigned int)(mask1 & mask2 & mask3); } -static inline int phys_pkg_id(int cpuid_apic, int index_msb) +static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 6fed521585c4..1eeb5b61e488 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -5,7 +5,6 @@ #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) -#define phys_pkg_id (apic->phys_pkg_id) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void generic_bigsmp_probe(void); diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index bc2c8a425c03..765c4d5124cb 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -113,7 +113,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, } /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ -static inline int phys_pkg_id(int cpuid_apic, int index_msb) +static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 64cd441ae006..fa6b3b45290d 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -182,7 +182,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, * * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. */ -static inline int phys_pkg_id(int cpuid_apic, int index_msb) +static inline int summit_phys_pkg_id(int cpuid_apic, int index_msb) { return hard_smp_processor_id() >> index_msb; } diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84f8e4a5aef7..e8bb892c09fd 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -116,13 +116,13 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; - c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) + c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width) & core_select_mask; - c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width); /* * Reinit the apicid, now that we have extended initial_apicid. */ - c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); c->x86_max_cores = (core_level_siblings / smp_num_siblings); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 93c491c4fe7f..055b9c3a6600 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -442,7 +442,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -450,7 +450,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); - c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & + c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); } @@ -686,7 +686,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; #ifdef CONFIG_X86_32 # ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); # else c->apicid = c->initial_apicid; # endif @@ -733,7 +733,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_identify(c); #ifdef CONFIG_X86_64 - c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); #endif /* diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index e151b472456f..d04b38954df3 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -87,7 +87,7 @@ struct genapic apic_bigsmp = { .setup_portio_remap = NULL, .check_phys_apicid_present = bigsmp_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = bigsmp_phys_pkg_id, .mps_oem_check = mps_oem_check, .get_apic_id = get_apic_id, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index ac6be195b977..5c9266f756e0 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -68,7 +68,7 @@ struct genapic apic_default = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = default_phys_pkg_id, .mps_oem_check = mps_oem_check, .get_apic_id = get_apic_id, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 1185964b7a33..52787e34c9cc 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -122,7 +122,7 @@ struct genapic apic_es7000 = { .setup_portio_remap = NULL, .check_phys_apicid_present = es7000_check_phys_apicid_present, .enable_apic_mode = es7000_enable_apic_mode, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = es7000_phys_pkg_id, .mps_oem_check = mps_oem_check, .get_apic_id = get_apic_id, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 8d3358de3fe7..6a1134e6d72d 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -87,7 +87,7 @@ struct genapic apic_numaq = { .setup_portio_remap = numaq_setup_portio_remap, .check_phys_apicid_present = numaq_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = numaq_phys_pkg_id, .mps_oem_check = mps_oem_check, .get_apic_id = get_apic_id, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index cb83bcbb2dec..2d6843a61d97 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -67,7 +67,7 @@ struct genapic apic_summit = { .setup_portio_remap = NULL, .check_phys_apicid_present = summit_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = summit_phys_pkg_id, .mps_oem_check = mps_oem_check, .get_apic_id = get_apic_id, From 5f836405ef632ba82f4a5261ff2be4198e53b51b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:47:42 +0100 Subject: [PATCH 233/566] x86, smp: clean up mps_oem_check() Impact: cleanup - allow NULL ->mps_oem_check() entries - clean up the code flow Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 3 +-- arch/x86/mach-generic/probe.c | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 14b19de8cd09..8bb1c73c55b7 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -55,8 +55,7 @@ struct genapic { * is switched to this. Essentially they are additional * probe functions: */ - int (*mps_oem_check)(struct mpc_table *mpc, char *oem, - char *productid); + int (*mps_oem_check)(struct mpc_table *mpc, char *oem, char *productid); unsigned int (*get_apic_id)(unsigned long x); unsigned long (*set_apic_id)(unsigned int id); diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index a21e2b1a7011..799a70f4d90e 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -113,17 +113,21 @@ void __init generic_apic_probe(void) int __init mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { int i; + for (i = 0; apic_probe[i]; ++i) { - if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) { - if (!cmdline_apic) { - apic = apic_probe[i]; - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); - printk(KERN_INFO "Switched to APIC driver `%s'.\n", - apic->name); - } - return 1; + if (!apic_probe[i]->mps_oem_check) + continue; + if (!apic_probe[i]->mps_oem_check(mpc, oem, productid)) + continue; + + if (!cmdline_apic) { + apic = apic_probe[i]; + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); + printk(KERN_INFO "Switched to APIC driver `%s'.\n", + apic->name); } + return 1; } return 0; } From 1322a2e2db87c938d8381f8501af9a4d0eab8bc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:54:56 +0100 Subject: [PATCH 234/566] x86, mpparse: call the generic quirk handlers early Call all the registered MPS quirk handlers early. These methods scan low RAM typically for specific signatures so are safe to be called early. Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index c8a534a16d98..f6fb1928439d 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -292,16 +292,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) return 0; #ifdef CONFIG_X86_32 - /* - * need to make sure summit and es7000's mps_oem_check is safe to be - * called early via genericarch 's mps_oem_check - */ - if (early) { -#ifdef CONFIG_X86_NUMAQ - numaq_mps_oem_check(mpc, oem, str); -#endif - } else - mps_oem_check(mpc, oem, str); + mps_oem_check(mpc, oem, str); #endif /* save the local APIC address, it might be non-default */ if (!acpi_lapic) From 9c7642470ecf03d8b4946a2addc8fe631b8426dd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:44:32 +0100 Subject: [PATCH 235/566] x86: consolidate the ->mps_oem_check() code - spread out the mps_oem_check() namespace on a per APIC driver basis Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/mpparse.h | 6 ------ arch/x86/include/asm/mach-default/mach_mpparse.h | 2 +- arch/x86/include/asm/mach-generic/mach_mpparse.h | 3 +-- arch/x86/include/asm/summit/mpparse.h | 4 ++-- arch/x86/kernel/mpparse.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 5 +++-- arch/x86/mach-generic/numaq.c | 4 ++-- arch/x86/mach-generic/probe.c | 3 ++- arch/x86/mach-generic/summit.c | 2 +- 11 files changed, 15 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/es7000/mpparse.h b/arch/x86/include/asm/es7000/mpparse.h index 30692c4ae859..662eb1e574de 100644 --- a/arch/x86/include/asm/es7000/mpparse.h +++ b/arch/x86/include/asm/es7000/mpparse.h @@ -8,13 +8,7 @@ extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); extern void setup_unisys(void); -#ifndef CONFIG_X86_GENERICARCH -extern int default_acpi_madt_oem_check(char *oem_id, char *oem_table_id); -extern int mps_oem_check(struct mpc_table *mpc, char *oem, char *productid); -#endif - #ifdef CONFIG_ACPI - static inline int es7000_check_dsdt(void) { struct acpi_table_header header; diff --git a/arch/x86/include/asm/mach-default/mach_mpparse.h b/arch/x86/include/asm/mach-default/mach_mpparse.h index 8fa01770ba62..af0da140df95 100644 --- a/arch/x86/include/asm/mach-default/mach_mpparse.h +++ b/arch/x86/include/asm/mach-default/mach_mpparse.h @@ -2,7 +2,7 @@ #define _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H static inline int -mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { return 0; } diff --git a/arch/x86/include/asm/mach-generic/mach_mpparse.h b/arch/x86/include/asm/mach-generic/mach_mpparse.h index f497d96c76bb..22bfb56f8fbd 100644 --- a/arch/x86/include/asm/mach-generic/mach_mpparse.h +++ b/arch/x86/include/asm/mach-generic/mach_mpparse.h @@ -1,8 +1,7 @@ #ifndef _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H #define _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H - -extern int mps_oem_check(struct mpc_table *, char *, char *); +extern int generic_mps_oem_check(struct mpc_table *, char *, char *); extern int default_acpi_madt_oem_check(char *, char *); diff --git a/arch/x86/include/asm/summit/mpparse.h b/arch/x86/include/asm/summit/mpparse.h index 555ed8238e94..4bbcce39acb8 100644 --- a/arch/x86/include/asm/summit/mpparse.h +++ b/arch/x86/include/asm/summit/mpparse.h @@ -11,8 +11,8 @@ extern void setup_summit(void); #define setup_summit() {} #endif -static inline int mps_oem_check(struct mpc_table *mpc, char *oem, - char *productid) +static inline int +summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { if (!strncmp(oem, "IBM ENSW", 8) && (!strncmp(productid, "VIGIL SMP", 9) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index f6fb1928439d..b12fa5ce6f58 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -292,7 +292,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) return 0; #ifdef CONFIG_X86_32 - mps_oem_check(mpc, oem, str); + generic_mps_oem_check(mpc, oem, str); #endif /* save the local APIC address, it might be non-default */ if (!acpi_lapic) diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index d04b38954df3..6bf6aafeb2c6 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -88,7 +88,7 @@ struct genapic apic_bigsmp = { .check_phys_apicid_present = bigsmp_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = bigsmp_phys_pkg_id, - .mps_oem_check = mps_oem_check, + .mps_oem_check = NULL, .get_apic_id = get_apic_id, .set_apic_id = NULL, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 5c9266f756e0..e5f85cd75b43 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -69,7 +69,7 @@ struct genapic apic_default = { .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = default_phys_pkg_id, - .mps_oem_check = mps_oem_check, + .mps_oem_check = NULL, .get_apic_id = get_apic_id, .set_apic_id = NULL, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 52787e34c9cc..f861163cd396 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -36,11 +36,12 @@ static int probe_es7000(void) } static __init int -mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { if (mpc->oemptr) { struct mpc_oemtable *oem_table = (struct mpc_oemtable *)mpc->oemptr; + if (!strncmp(oem, "UNISYS", 6)) return parse_unisys_oem((char *)oem_table); } @@ -123,7 +124,7 @@ struct genapic apic_es7000 = { .check_phys_apicid_present = es7000_check_phys_apicid_present, .enable_apic_mode = es7000_enable_apic_mode, .phys_pkg_id = es7000_phys_pkg_id, - .mps_oem_check = mps_oem_check, + .mps_oem_check = es7000_mps_oem_check, .get_apic_id = get_apic_id, .set_apic_id = NULL, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 6a1134e6d72d..517882c9c15a 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -19,7 +19,7 @@ #include #include -static int mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +static int __numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { numaq_mps_oem_check(mpc, oem, productid); return found_numaq; @@ -88,7 +88,7 @@ struct genapic apic_numaq = { .check_phys_apicid_present = numaq_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = numaq_phys_pkg_id, - .mps_oem_check = mps_oem_check, + .mps_oem_check = __numaq_mps_oem_check, .get_apic_id = get_apic_id, .set_apic_id = NULL, diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index 799a70f4d90e..ab68c6e5c48a 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -110,7 +110,8 @@ void __init generic_apic_probe(void) /* These functions can switch the APIC even after the initial ->probe() */ -int __init mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +int __init +generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { int i; diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 2d6843a61d97..719e944ff308 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -68,7 +68,7 @@ struct genapic apic_summit = { .check_phys_apicid_present = summit_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = summit_phys_pkg_id, - .mps_oem_check = mps_oem_check, + .mps_oem_check = summit_mps_oem_check, .get_apic_id = get_apic_id, .set_apic_id = NULL, From ca6c8ed4646f8ccaa4f7db618bf69b8b8fb49767 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 14:08:38 +0100 Subject: [PATCH 236/566] x86, apic: refactor ->get_apic_id() & GET_APIC_ID() - spread out the namespace on a per driver basis - get rid of macro wrappers - small cleanups Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apicdef.h | 6 ++---- arch/x86/include/asm/es7000/apicdef.h | 6 ++---- arch/x86/include/asm/mach-default/mach_apic.h | 2 +- arch/x86/include/asm/mach-default/mach_apicdef.h | 10 +++++----- arch/x86/include/asm/mach-generic/mach_apicdef.h | 1 - arch/x86/include/asm/numaq/apicdef.h | 7 ++----- arch/x86/include/asm/smp.h | 2 +- arch/x86/include/asm/summit/apicdef.h | 6 ++---- arch/x86/kernel/genapic_flat_64.c | 9 +++++---- arch/x86/kernel/genx2apic_cluster.c | 4 ++-- arch/x86/kernel/genx2apic_phys.c | 4 ++-- arch/x86/kernel/genx2apic_uv_x.c | 6 +++--- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 17 files changed, 32 insertions(+), 41 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apicdef.h b/arch/x86/include/asm/bigsmp/apicdef.h index 392c3f5ef2fe..ed25dd6503b2 100644 --- a/arch/x86/include/asm/bigsmp/apicdef.h +++ b/arch/x86/include/asm/bigsmp/apicdef.h @@ -3,11 +3,9 @@ #define APIC_ID_MASK (0xFF<<24) -static inline unsigned get_apic_id(unsigned long x) +static inline unsigned bigsmp_get_apic_id(unsigned long x) { - return (((x)>>24)&0xFF); + return (x >> 24) & 0xFF; } -#define GET_APIC_ID(x) get_apic_id(x) - #endif diff --git a/arch/x86/include/asm/es7000/apicdef.h b/arch/x86/include/asm/es7000/apicdef.h index 8b234a3cb851..e23791762a19 100644 --- a/arch/x86/include/asm/es7000/apicdef.h +++ b/arch/x86/include/asm/es7000/apicdef.h @@ -3,11 +3,9 @@ #define APIC_ID_MASK (0xFF<<24) -static inline unsigned get_apic_id(unsigned long x) +static inline unsigned int es7000_get_apic_id(unsigned long x) { - return (((x)>>24)&0xFF); + return (x >> 24) & 0xFF; } -#define GET_APIC_ID(x) get_apic_id(x) - #endif diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index d0605281a6b7..8719208f2735 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -21,7 +21,7 @@ static inline const struct cpumask *default_target_cpus(void) #include #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) -#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) +#define read_apic_id() (apic->get_apic_id(apic_read(APIC_ID))) #define send_IPI_self (apic->send_IPI_self) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void default_setup_apic_routing(void); diff --git a/arch/x86/include/asm/mach-default/mach_apicdef.h b/arch/x86/include/asm/mach-default/mach_apicdef.h index b4dcc0971c76..e84d437ba2b2 100644 --- a/arch/x86/include/asm/mach-default/mach_apicdef.h +++ b/arch/x86/include/asm/mach-default/mach_apicdef.h @@ -5,20 +5,20 @@ #ifdef CONFIG_X86_64 #define APIC_ID_MASK (apic->apic_id_mask) -#define GET_APIC_ID(x) (apic->get_apic_id(x)) #define SET_APIC_ID(x) (apic->set_apic_id(x)) #else #define APIC_ID_MASK (0xF<<24) -static inline unsigned get_apic_id(unsigned long x) + +static inline unsigned default_get_apic_id(unsigned long x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + if (APIC_XAPIC(ver)) - return (((x)>>24)&0xFF); + return (x >> 24) & 0xFF; else - return (((x)>>24)&0xF); + return (x >> 24) & 0x0F; } -#define GET_APIC_ID(x) get_apic_id(x) #endif #endif /* _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_apicdef.h b/arch/x86/include/asm/mach-generic/mach_apicdef.h index acc9adddb344..645520bcd2c2 100644 --- a/arch/x86/include/asm/mach-generic/mach_apicdef.h +++ b/arch/x86/include/asm/mach-generic/mach_apicdef.h @@ -4,7 +4,6 @@ #ifndef APIC_DEFINITION #include -#define GET_APIC_ID (apic->get_apic_id) #define APIC_ID_MASK (apic->apic_id_mask) #endif diff --git a/arch/x86/include/asm/numaq/apicdef.h b/arch/x86/include/asm/numaq/apicdef.h index e012a46cc22a..29f5e3d34e5b 100644 --- a/arch/x86/include/asm/numaq/apicdef.h +++ b/arch/x86/include/asm/numaq/apicdef.h @@ -1,14 +1,11 @@ #ifndef __ASM_NUMAQ_APICDEF_H #define __ASM_NUMAQ_APICDEF_H - #define APIC_ID_MASK (0xF<<24) -static inline unsigned get_apic_id(unsigned long x) +static inline unsigned int numaq_get_apic_id(unsigned long x) { - return (((x)>>24)&0x0F); + return (x >> 24) & 0x0F; } -#define GET_APIC_ID(x) get_apic_id(x) - #endif diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 45ef8a1b9d7c..c63d480802af 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -189,7 +189,7 @@ static inline unsigned int read_apic_id(void) reg = *(u32 *)(APIC_BASE + APIC_ID); - return GET_APIC_ID(reg); + return apic->get_apic_id(reg); } #endif diff --git a/arch/x86/include/asm/summit/apicdef.h b/arch/x86/include/asm/summit/apicdef.h index f3fbca1f61c1..4286528af7c6 100644 --- a/arch/x86/include/asm/summit/apicdef.h +++ b/arch/x86/include/asm/summit/apicdef.h @@ -3,11 +3,9 @@ #define APIC_ID_MASK (0xFF<<24) -static inline unsigned get_apic_id(unsigned long x) +static inline unsigned summit_get_apic_id(unsigned long x) { - return (x>>24)&0xFF; + return (x >> 24) & 0xFF; } -#define GET_APIC_ID(x) get_apic_id(x) - #endif diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index cc9e07b96094..ab47091dac2b 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -126,11 +126,12 @@ static void flat_send_IPI_all(int vector) __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); } -static unsigned int get_apic_id(unsigned long x) +static unsigned int flat_get_apic_id(unsigned long x) { unsigned int id; id = (((x)>>24) & 0xFFu); + return id; } @@ -146,7 +147,7 @@ static unsigned int read_xapic_id(void) { unsigned int id; - id = get_apic_id(apic_read(APIC_ID)); + id = flat_get_apic_id(apic_read(APIC_ID)); return id; } @@ -205,7 +206,7 @@ struct genapic apic_flat = { .phys_pkg_id = flat_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu << 24, @@ -349,7 +350,7 @@ struct genapic apic_physflat = { .phys_pkg_id = flat_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu<<24, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 18b6f14376eb..c7557e051848 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -141,7 +141,7 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -static unsigned int get_apic_id(unsigned long x) +static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) { unsigned int id; @@ -207,7 +207,7 @@ struct genapic apic_x2apic_cluster = { .phys_pkg_id = x2apic_cluster_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = x2apic_cluster_phys_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFFFFFFFu, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 2cb6f49e4c50..80cba49cfd89 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -140,7 +140,7 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -static unsigned int get_apic_id(unsigned long x) +static unsigned int x2apic_phys_get_apic_id(unsigned long x) { unsigned int id; @@ -203,7 +203,7 @@ struct genapic apic_x2apic_phys = { .phys_pkg_id = x2apic_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = x2apic_phys_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFFFFFFFu, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 67e7658775e7..50310b96adc3 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -201,7 +201,7 @@ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -static unsigned int get_apic_id(unsigned long x) +static unsigned int x2apic_get_apic_id(unsigned long x) { unsigned int id; @@ -223,7 +223,7 @@ static unsigned long set_apic_id(unsigned int id) static unsigned int uv_read_apic_id(void) { - return get_apic_id(apic_read(APIC_ID)); + return x2apic_get_apic_id(apic_read(APIC_ID)); } static int uv_phys_pkg_id(int initial_apicid, int index_msb) @@ -268,7 +268,7 @@ struct genapic apic_x2apic_uv_x = { .phys_pkg_id = uv_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = x2apic_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFFFFFFFu, diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 6bf6aafeb2c6..9eca977227c4 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -90,7 +90,7 @@ struct genapic apic_bigsmp = { .phys_pkg_id = bigsmp_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = bigsmp_get_apic_id, .set_apic_id = NULL, .apic_id_mask = APIC_ID_MASK, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index e5f85cd75b43..d51a3f0335ae 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -71,7 +71,7 @@ struct genapic apic_default = { .phys_pkg_id = default_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = default_get_apic_id, .set_apic_id = NULL, .apic_id_mask = APIC_ID_MASK, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index f861163cd396..1944675db629 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -126,7 +126,7 @@ struct genapic apic_es7000 = { .phys_pkg_id = es7000_phys_pkg_id, .mps_oem_check = es7000_mps_oem_check, - .get_apic_id = get_apic_id, + .get_apic_id = es7000_get_apic_id, .set_apic_id = NULL, .apic_id_mask = APIC_ID_MASK, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 517882c9c15a..fcbba84c090f 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -90,7 +90,7 @@ struct genapic apic_numaq = { .phys_pkg_id = numaq_phys_pkg_id, .mps_oem_check = __numaq_mps_oem_check, - .get_apic_id = get_apic_id, + .get_apic_id = numaq_get_apic_id, .set_apic_id = NULL, .apic_id_mask = APIC_ID_MASK, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 719e944ff308..5650eaf9061f 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -70,7 +70,7 @@ struct genapic apic_summit = { .phys_pkg_id = summit_phys_pkg_id, .mps_oem_check = summit_mps_oem_check, - .get_apic_id = get_apic_id, + .get_apic_id = summit_get_apic_id, .set_apic_id = NULL, .apic_id_mask = APIC_ID_MASK, From 5b8127277bc4cdca78eda5ee900a314642822ace Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 14:59:17 +0100 Subject: [PATCH 237/566] x86, apic: refactor ->apic_id_mask & APIC_ID_MASK - spread out the namespace on a per driver basis - get rid of wrapper macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apicdef.h | 2 +- arch/x86/include/asm/es7000/apicdef.h | 2 +- arch/x86/include/asm/mach-default/mach_apicdef.h | 3 +-- arch/x86/include/asm/mach-generic/mach_apicdef.h | 2 -- arch/x86/include/asm/numaq/apicdef.h | 2 +- arch/x86/include/asm/summit/apicdef.h | 2 +- arch/x86/kernel/apic.c | 4 ++-- arch/x86/kernel/genapic_flat_64.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 13 files changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apicdef.h b/arch/x86/include/asm/bigsmp/apicdef.h index ed25dd6503b2..6e587818c7ed 100644 --- a/arch/x86/include/asm/bigsmp/apicdef.h +++ b/arch/x86/include/asm/bigsmp/apicdef.h @@ -1,7 +1,7 @@ #ifndef __ASM_MACH_APICDEF_H #define __ASM_MACH_APICDEF_H -#define APIC_ID_MASK (0xFF<<24) +#define BIGSMP_APIC_ID_MASK (0xFF<<24) static inline unsigned bigsmp_get_apic_id(unsigned long x) { diff --git a/arch/x86/include/asm/es7000/apicdef.h b/arch/x86/include/asm/es7000/apicdef.h index e23791762a19..476da0c7f5cc 100644 --- a/arch/x86/include/asm/es7000/apicdef.h +++ b/arch/x86/include/asm/es7000/apicdef.h @@ -1,7 +1,7 @@ #ifndef __ASM_ES7000_APICDEF_H #define __ASM_ES7000_APICDEF_H -#define APIC_ID_MASK (0xFF<<24) +#define ES7000_APIC_ID_MASK (0xFF<<24) static inline unsigned int es7000_get_apic_id(unsigned long x) { diff --git a/arch/x86/include/asm/mach-default/mach_apicdef.h b/arch/x86/include/asm/mach-default/mach_apicdef.h index e84d437ba2b2..8318d121ea66 100644 --- a/arch/x86/include/asm/mach-default/mach_apicdef.h +++ b/arch/x86/include/asm/mach-default/mach_apicdef.h @@ -4,10 +4,9 @@ #include #ifdef CONFIG_X86_64 -#define APIC_ID_MASK (apic->apic_id_mask) #define SET_APIC_ID(x) (apic->set_apic_id(x)) #else -#define APIC_ID_MASK (0xF<<24) +#define DEFAULT_APIC_ID_MASK (0x0F<<24) static inline unsigned default_get_apic_id(unsigned long x) { diff --git a/arch/x86/include/asm/mach-generic/mach_apicdef.h b/arch/x86/include/asm/mach-generic/mach_apicdef.h index 645520bcd2c2..61caa65b13fb 100644 --- a/arch/x86/include/asm/mach-generic/mach_apicdef.h +++ b/arch/x86/include/asm/mach-generic/mach_apicdef.h @@ -3,8 +3,6 @@ #ifndef APIC_DEFINITION #include - -#define APIC_ID_MASK (apic->apic_id_mask) #endif #endif /* _ASM_X86_MACH_GENERIC_MACH_APICDEF_H */ diff --git a/arch/x86/include/asm/numaq/apicdef.h b/arch/x86/include/asm/numaq/apicdef.h index 29f5e3d34e5b..6f2cc5df0b15 100644 --- a/arch/x86/include/asm/numaq/apicdef.h +++ b/arch/x86/include/asm/numaq/apicdef.h @@ -1,7 +1,7 @@ #ifndef __ASM_NUMAQ_APICDEF_H #define __ASM_NUMAQ_APICDEF_H -#define APIC_ID_MASK (0xF<<24) +#define NUMAQ_APIC_ID_MASK (0xF<<24) static inline unsigned int numaq_get_apic_id(unsigned long x) { diff --git a/arch/x86/include/asm/summit/apicdef.h b/arch/x86/include/asm/summit/apicdef.h index 4286528af7c6..0373f0c7b5db 100644 --- a/arch/x86/include/asm/summit/apicdef.h +++ b/arch/x86/include/asm/summit/apicdef.h @@ -1,7 +1,7 @@ #ifndef __ASM_SUMMIT_APICDEF_H #define __ASM_SUMMIT_APICDEF_H -#define APIC_ID_MASK (0xFF<<24) +#define SUMMIT_APIC_ID_MASK (0xFF<<24) static inline unsigned summit_get_apic_id(unsigned long x) { diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 9d6374da4784..5f7f3a9a47ad 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1009,11 +1009,11 @@ int __init verify_local_APIC(void) */ reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); + apic_write(APIC_ID, reg0 ^ apic->apic_id_mask); reg1 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); apic_write(APIC_ID, reg0); - if (reg1 != (reg0 ^ APIC_ID_MASK)) + if (reg1 != (reg0 ^ apic->apic_id_mask)) return 0; /* diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index ab47091dac2b..78baa55cd0e9 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -352,7 +352,7 @@ struct genapic apic_physflat = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .apic_id_mask = 0xFFu<<24, + .apic_id_mask = 0xFFu << 24, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 9eca977227c4..1f4ad4f77022 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -92,7 +92,7 @@ struct genapic apic_bigsmp = { .get_apic_id = bigsmp_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = APIC_ID_MASK, + .apic_id_mask = BIGSMP_APIC_ID_MASK, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index d51a3f0335ae..239af25615b1 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -73,7 +73,7 @@ struct genapic apic_default = { .get_apic_id = default_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = APIC_ID_MASK, + .apic_id_mask = DEFAULT_APIC_ID_MASK, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 1944675db629..21fb33eea91b 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -128,7 +128,7 @@ struct genapic apic_es7000 = { .get_apic_id = es7000_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = APIC_ID_MASK, + .apic_id_mask = ES7000_APIC_ID_MASK, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index fcbba84c090f..27d2d1f2d6f0 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -92,7 +92,7 @@ struct genapic apic_numaq = { .get_apic_id = numaq_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = APIC_ID_MASK, + .apic_id_mask = NUMAQ_APIC_ID_MASK, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 5650eaf9061f..f24cba1b29da 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -72,7 +72,7 @@ struct genapic apic_summit = { .get_apic_id = summit_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = APIC_ID_MASK, + .apic_id_mask = SUMMIT_APIC_ID_MASK, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, From 94af18755266edf46803564414d74f9621aaded8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 15:08:53 +0100 Subject: [PATCH 238/566] x86, apic: get rid of *_APIC_ID_MASK definitions Impact: cleanup Remove the *_APIC_ID_MASK subarch definitions and move them straight to the genapic driver initialization code. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apicdef.h | 2 -- arch/x86/include/asm/es7000/apicdef.h | 2 -- arch/x86/include/asm/mach-default/mach_apicdef.h | 1 - arch/x86/include/asm/numaq/apicdef.h | 2 -- arch/x86/include/asm/summit/apicdef.h | 2 -- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 10 files changed, 5 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apicdef.h b/arch/x86/include/asm/bigsmp/apicdef.h index 6e587818c7ed..e58dee847573 100644 --- a/arch/x86/include/asm/bigsmp/apicdef.h +++ b/arch/x86/include/asm/bigsmp/apicdef.h @@ -1,8 +1,6 @@ #ifndef __ASM_MACH_APICDEF_H #define __ASM_MACH_APICDEF_H -#define BIGSMP_APIC_ID_MASK (0xFF<<24) - static inline unsigned bigsmp_get_apic_id(unsigned long x) { return (x >> 24) & 0xFF; diff --git a/arch/x86/include/asm/es7000/apicdef.h b/arch/x86/include/asm/es7000/apicdef.h index 476da0c7f5cc..c74881a7b3d8 100644 --- a/arch/x86/include/asm/es7000/apicdef.h +++ b/arch/x86/include/asm/es7000/apicdef.h @@ -1,8 +1,6 @@ #ifndef __ASM_ES7000_APICDEF_H #define __ASM_ES7000_APICDEF_H -#define ES7000_APIC_ID_MASK (0xFF<<24) - static inline unsigned int es7000_get_apic_id(unsigned long x) { return (x >> 24) & 0xFF; diff --git a/arch/x86/include/asm/mach-default/mach_apicdef.h b/arch/x86/include/asm/mach-default/mach_apicdef.h index 8318d121ea66..5141085962d3 100644 --- a/arch/x86/include/asm/mach-default/mach_apicdef.h +++ b/arch/x86/include/asm/mach-default/mach_apicdef.h @@ -6,7 +6,6 @@ #ifdef CONFIG_X86_64 #define SET_APIC_ID(x) (apic->set_apic_id(x)) #else -#define DEFAULT_APIC_ID_MASK (0x0F<<24) static inline unsigned default_get_apic_id(unsigned long x) { diff --git a/arch/x86/include/asm/numaq/apicdef.h b/arch/x86/include/asm/numaq/apicdef.h index 6f2cc5df0b15..cd927d5bd505 100644 --- a/arch/x86/include/asm/numaq/apicdef.h +++ b/arch/x86/include/asm/numaq/apicdef.h @@ -1,8 +1,6 @@ #ifndef __ASM_NUMAQ_APICDEF_H #define __ASM_NUMAQ_APICDEF_H -#define NUMAQ_APIC_ID_MASK (0xF<<24) - static inline unsigned int numaq_get_apic_id(unsigned long x) { return (x >> 24) & 0x0F; diff --git a/arch/x86/include/asm/summit/apicdef.h b/arch/x86/include/asm/summit/apicdef.h index 0373f0c7b5db..c24b0df2dec6 100644 --- a/arch/x86/include/asm/summit/apicdef.h +++ b/arch/x86/include/asm/summit/apicdef.h @@ -1,8 +1,6 @@ #ifndef __ASM_SUMMIT_APICDEF_H #define __ASM_SUMMIT_APICDEF_H -#define SUMMIT_APIC_ID_MASK (0xFF<<24) - static inline unsigned summit_get_apic_id(unsigned long x) { return (x >> 24) & 0xFF; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 1f4ad4f77022..ee52c59aa3a4 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -92,7 +92,7 @@ struct genapic apic_bigsmp = { .get_apic_id = bigsmp_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = BIGSMP_APIC_ID_MASK, + .apic_id_mask = 0xFF << 24, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 239af25615b1..e4ed7e6d6263 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -73,7 +73,7 @@ struct genapic apic_default = { .get_apic_id = default_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = DEFAULT_APIC_ID_MASK, + .apic_id_mask = 0x0F << 24, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 21fb33eea91b..3d046dec0c9a 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -128,7 +128,7 @@ struct genapic apic_es7000 = { .get_apic_id = es7000_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = ES7000_APIC_ID_MASK, + .apic_id_mask = 0xFF << 24, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 27d2d1f2d6f0..a7bf1aa02e1a 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -92,7 +92,7 @@ struct genapic apic_numaq = { .get_apic_id = numaq_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = NUMAQ_APIC_ID_MASK, + .apic_id_mask = 0x0F << 24, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index f24cba1b29da..a0ae6b910488 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -72,7 +72,7 @@ struct genapic apic_summit = { .get_apic_id = summit_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = SUMMIT_APIC_ID_MASK, + .apic_id_mask = 0xFF << 24, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, From debccb3e77be52cfc26c5a99e123c114c5c72aeb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 15:20:18 +0100 Subject: [PATCH 239/566] x86, apic: refactor ->cpu_mask_to_apicid*() - spread out the namespace on a per driver basis - clean up the functions - get rid of macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 17 +++++------ arch/x86/include/asm/es7000/apic.h | 29 ++++++++++++------- arch/x86/include/asm/mach-default/mach_apic.h | 10 +++---- arch/x86/include/asm/mach-generic/mach_apic.h | 2 -- arch/x86/include/asm/numaq/apic.h | 11 +++---- arch/x86/include/asm/summit/apic.h | 21 +++++++++----- arch/x86/kernel/genapic_flat_64.c | 4 ++- arch/x86/kernel/genx2apic_cluster.c | 15 ++++++---- arch/x86/kernel/genx2apic_phys.c | 15 ++++++---- arch/x86/kernel/genx2apic_uv_x.c | 14 +++++---- arch/x86/kernel/io_apic.c | 21 ++++++++------ arch/x86/mach-generic/bigsmp.c | 4 +-- arch/x86/mach-generic/default.c | 4 +-- arch/x86/mach-generic/es7000.c | 6 ++-- arch/x86/mach-generic/numaq.c | 4 +-- arch/x86/mach-generic/summit.c | 4 +-- 16 files changed, 101 insertions(+), 80 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 1230f5d7a38e..ee29d66cd302 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -105,18 +105,14 @@ static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) } /* As we are using single CPU as destination, pick only one CPU here */ -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask) { - int cpu; - int apicid; - - cpu = first_cpu(*cpumask); - apicid = bigsmp_cpu_to_logical_apicid(cpu); - return apicid; + return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask)); } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static inline unsigned int +bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -124,9 +120,10 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - for_each_cpu_and(cpu, cpumask, andmask) + for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; + } if (cpu < nr_cpu_ids) return bigsmp_cpu_to_logical_apicid(cpu); diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index f183dfb4de4a..b89b45db735d 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -137,12 +137,12 @@ static inline int es7000_check_phys_apicid_present(int cpu_physical_apicid) } static inline unsigned int -cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) +es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) { - int num_bits_set; int cpus_found = 0; - int cpu; + int num_bits_set; int apicid; + int cpu; num_bits_set = cpumask_weight(cpumask); /* Return id to all */ @@ -154,12 +154,15 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) */ cpu = cpumask_first(cpumask); apicid = es7000_cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { if (cpumask_test_cpu(cpu, cpumask)) { int new_apicid = es7000_cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ + apicid_cluster(new_apicid)) { printk ("%s: Not a valid mask!\n", __func__); + return 0xFF; } apicid = new_apicid; @@ -170,12 +173,12 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) { - int num_bits_set; int cpus_found = 0; - int cpu; + int num_bits_set; int apicid; + int cpu; num_bits_set = cpus_weight(*cpumask); /* Return id to all */ @@ -190,9 +193,11 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) while (cpus_found < num_bits_set) { if (cpu_isset(cpu, *cpumask)) { int new_apicid = es7000_cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ + apicid_cluster(new_apicid)) { printk ("%s: Not a valid mask!\n", __func__); + return es7000_cpu_to_logical_apicid(0); } apicid = new_apicid; @@ -204,8 +209,9 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) +static inline unsigned int +es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) { int apicid = es7000_cpu_to_logical_apicid(0); cpumask_var_t cpumask; @@ -215,9 +221,10 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, cpumask_and(cpumask, inmask, andmask); cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = cpu_mask_to_apicid(cpumask); + apicid = es7000_cpu_mask_to_apicid(cpumask); free_cpumask_var(cpumask); + return apicid; } diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 8719208f2735..8972f8434145 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -19,8 +19,6 @@ static inline const struct cpumask *default_target_cpus(void) #ifdef CONFIG_X86_64 #include -#define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) -#define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) #define read_apic_id() (apic->get_apic_id(apic_read(APIC_ID))) #define send_IPI_self (apic->send_IPI_self) #define wakeup_secondary_cpu (apic->wakeup_cpu) @@ -49,13 +47,15 @@ static inline int default_apic_id_registered(void) return physid_isset(read_apic_id(), phys_cpu_present_map); } -static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline unsigned int +default_cpu_mask_to_apicid(const struct cpumask *cpumask) { return cpumask_bits(cpumask)[0]; } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static inline unsigned int +default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { unsigned long mask1 = cpumask_bits(cpumask)[0]; unsigned long mask2 = cpumask_bits(andmask)[0]; diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 1eeb5b61e488..ca460e459913 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,8 +3,6 @@ #include -#define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) -#define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void generic_bigsmp_probe(void); diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 765c4d5124cb..ce95e79f7233 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -101,15 +101,16 @@ static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) * We use physical apicids here, not logical, so just return the default * physical broadcast to stop people from breaking us */ -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask) { - return (int) 0xF; + return 0x0F; } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static inline unsigned int +numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { - return (int) 0xF; + return 0x0F; } /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index fa6b3b45290d..15b8dbd19e1a 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -125,29 +125,32 @@ static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) return 1; } -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) { - int num_bits_set; int cpus_found = 0; - int cpu; + int num_bits_set; int apicid; + int cpu; num_bits_set = cpus_weight(*cpumask); /* Return id to all */ if (num_bits_set >= nr_cpu_ids) - return (int) 0xFF; + return 0xFF; /* * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of target_cpus(): */ cpu = first_cpu(*cpumask); apicid = summit_cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { if (cpu_isset(cpu, *cpumask)) { int new_apicid = summit_cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ + apicid_cluster(new_apicid)) { printk ("%s: Not a valid mask!\n", __func__); + return 0xFF; } apicid = apicid | new_apicid; @@ -158,8 +161,9 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) +static inline unsigned int +summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) { int apicid = summit_cpu_to_logical_apicid(0); cpumask_var_t cpumask; @@ -169,9 +173,10 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, cpumask_and(cpumask, inmask, andmask); cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = cpu_mask_to_apicid(cpumask); + apicid = summit_cpu_mask_to_apicid(cpumask); free_cpumask_var(cpumask); + return apicid; } diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 78baa55cd0e9..b941b112cafb 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -309,11 +309,13 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - for_each_cpu_and(cpu, cpumask, andmask) + for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; + } if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; } diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index c7557e051848..62f9fccf01dd 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -111,21 +111,21 @@ static int x2apic_apic_id_registered(void) static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) { - int cpu; - /* * We're using fixed IRQ delivery, can only return one logical APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + int cpu = cpumask_first(cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_logical_apicid, cpu); else return BAD_APICID; } -static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static unsigned int +x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -133,11 +133,14 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one logical APIC ID. * May as well be the first. */ - for_each_cpu_and(cpu, cpumask, andmask) + for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; + } + if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_logical_apicid, cpu); + return BAD_APICID; } diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 80cba49cfd89..3da1675b2604 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -110,21 +110,21 @@ static int x2apic_apic_id_registered(void) static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) { - int cpu; - /* * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + int cpu = cpumask_first(cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static unsigned int +x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -132,11 +132,14 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - for_each_cpu_and(cpu, cpumask, andmask) + for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; + } + if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; } diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 50310b96adc3..f957878c21e9 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -171,21 +171,21 @@ static void uv_init_apic_ldr(void) static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) { - int cpu; - /* * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + int cpu = cpumask_first(cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static unsigned int +uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -193,11 +193,13 @@ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - for_each_cpu_and(cpu, cpumask, andmask) + for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; + } if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 3d85d3d810b2..01a2505d7275 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -563,8 +563,9 @@ static int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); /* - * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid - * of that, or returns BAD_APICID and leaves desc->affinity untouched. + * Either sets desc->affinity to a valid value, and returns + * ->cpu_mask_to_apicid of that, or returns BAD_APICID and + * leaves desc->affinity untouched. */ static unsigned int set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) @@ -582,7 +583,8 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) cpumask_and(desc->affinity, cfg->domain, mask); set_extra_move_desc(desc, mask); - return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); + + return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); } static void @@ -1562,7 +1564,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; - dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1666,7 +1668,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, */ entry.dest_mode = apic->irq_dest_mode; entry.mask = 1; /* mask IRQ now */ - entry.dest = cpu_mask_to_apicid(apic->target_cpus()); + entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; entry.trigger = 0; @@ -2367,7 +2369,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) set_extra_move_desc(desc, mask); - dest = cpu_mask_to_apicid_and(cfg->domain, mask); + dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { @@ -3270,7 +3272,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms if (err) return err; - dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { @@ -3708,7 +3710,8 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) struct ht_irq_msg msg; unsigned dest; - dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + dest = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus()); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -3773,7 +3776,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, entry->polarity = 0; entry->trigger = 0; entry->mask = 0; - entry->dest = cpu_mask_to_apicid(eligible_cpu); + entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index ee52c59aa3a4..22c2c7b8e4ab 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -94,8 +94,8 @@ struct genapic apic_bigsmp = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, .send_IPI_mask = send_IPI_mask, .send_IPI_mask_allbutself = NULL, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index e4ed7e6d6263..477ebec16749 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -75,8 +75,8 @@ struct genapic apic_default = { .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, - .cpu_mask_to_apicid = cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = send_IPI_mask, .send_IPI_mask_allbutself = NULL, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 3d046dec0c9a..d758cf65d736 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -26,7 +26,7 @@ void __init es7000_update_genapic_to_cluster(void) apic->init_apic_ldr = es7000_init_apic_ldr_cluster; - apic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster; + apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster; } static int probe_es7000(void) @@ -130,8 +130,8 @@ struct genapic apic_es7000 = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, .send_IPI_mask = send_IPI_mask, .send_IPI_mask_allbutself = NULL, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index a7bf1aa02e1a..eb7d56a521d4 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -94,8 +94,8 @@ struct genapic apic_numaq = { .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, - .cpu_mask_to_apicid = cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, .send_IPI_mask = send_IPI_mask, .send_IPI_mask_allbutself = NULL, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index a0ae6b910488..8c293055bdf0 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -74,8 +74,8 @@ struct genapic apic_summit = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, .send_IPI_mask = send_IPI_mask, .send_IPI_mask_allbutself = NULL, From dac5f4121df3c39fdb2ea57acd669a0ae19e46f8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 15:42:24 +0100 Subject: [PATCH 240/566] x86, apic: untangle the send_IPI_*() jungle Our send_IPI_*() methods and definitions are a twisted mess: the same symbol is defined to different things depending on .config details, in a non-transparent way. - spread out the quirks into separately named per apic driver methods - prefix the standard PC methods with default_ - get rid of wrapper macro obfuscation - clean up various details Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/ipi.h | 16 ++++---- arch/x86/include/asm/es7000/ipi.h | 16 ++++---- arch/x86/include/asm/hw_irq.h | 4 +- arch/x86/include/asm/ipi.h | 38 +++++++++--------- arch/x86/include/asm/mach-default/mach_apic.h | 1 - arch/x86/include/asm/mach-default/mach_ipi.h | 40 ++++++++----------- arch/x86/include/asm/mach-generic/mach_ipi.h | 4 -- arch/x86/include/asm/numaq/ipi.h | 16 ++++---- arch/x86/include/asm/summit/ipi.h | 16 ++++---- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/genapic_64.c | 2 +- arch/x86/kernel/genapic_flat_64.c | 24 ++++++----- arch/x86/kernel/genx2apic_cluster.c | 38 ++++++++++-------- arch/x86/kernel/genx2apic_phys.c | 36 +++++++---------- arch/x86/kernel/genx2apic_uv_x.c | 21 +++++----- arch/x86/kernel/io_apic.c | 10 ++--- arch/x86/kernel/ipi.c | 28 +++++++------ arch/x86/kernel/kgdb.c | 2 +- arch/x86/kernel/reboot.c | 2 +- arch/x86/kernel/smp.c | 10 ++--- arch/x86/mach-generic/bigsmp.c | 6 +-- arch/x86/mach-generic/default.c | 6 +-- arch/x86/mach-generic/es7000.c | 6 +-- arch/x86/mach-generic/numaq.c | 6 +-- arch/x86/mach-generic/summit.c | 6 +-- arch/x86/mm/tlb.c | 2 +- 26 files changed, 178 insertions(+), 180 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h index 27fcd01b3ae6..a91db69cda6b 100644 --- a/arch/x86/include/asm/bigsmp/ipi.h +++ b/arch/x86/include/asm/bigsmp/ipi.h @@ -1,22 +1,22 @@ #ifndef __ASM_MACH_IPI_H #define __ASM_MACH_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void default_send_IPI_mask(const struct cpumask *mask, int vector) { - send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence(mask, vector); } -static inline void send_IPI_allbutself(int vector) +static inline void bigsmp_send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself(cpu_online_mask, vector); } -static inline void send_IPI_all(int vector) +static inline void bigsmp_send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + default_send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_MACH_IPI_H */ diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h index 7e8ed24d4b8a..81e77c812baa 100644 --- a/arch/x86/include/asm/es7000/ipi.h +++ b/arch/x86/include/asm/es7000/ipi.h @@ -1,22 +1,22 @@ #ifndef __ASM_ES7000_IPI_H #define __ASM_ES7000_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void es7000_send_IPI_mask(const struct cpumask *mask, int vector) { - send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence(mask, vector); } -static inline void send_IPI_allbutself(int vector) +static inline void es7000_send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself(cpu_online_mask, vector); } -static inline void send_IPI_all(int vector) +static inline void es7000_send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + es7000_send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_ES7000_IPI_H */ diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 8de644b6b959..bfa921fad133 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -73,9 +73,9 @@ extern void enable_IO_APIC(void); /* IPI functions */ #ifdef CONFIG_X86_32 -extern void send_IPI_self(int vector); +extern void default_send_IPI_self(int vector); #endif -extern void send_IPI(int dest, int vector); +extern void default_send_IPI(int dest, int vector); /* Statistics */ extern atomic_t irq_err_count; diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index c745a306f7d3..a8d717f2c7e7 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -55,8 +55,9 @@ static inline void __xapic_wait_icr_idle(void) cpu_relax(); } -static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, - unsigned int dest) +static inline void +__default_send_IPI_shortcut(unsigned int shortcut, + int vector, unsigned int dest) { /* * Subtle. In the case of the 'never do double writes' workaround @@ -87,8 +88,8 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, * This is used to send an IPI with no shorthand notation (the destination is * specified in bits 56 to 63 of the ICR). */ -static inline void __send_IPI_dest_field(unsigned int mask, int vector, - unsigned int dest) +static inline void + __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) { unsigned long cfg; @@ -117,11 +118,11 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, native_apic_mem_write(APIC_ICR, cfg); } -static inline void send_IPI_mask_sequence(const struct cpumask *mask, - int vector) +static inline void +default_send_IPI_mask_sequence(const struct cpumask *mask, int vector) { - unsigned long flags; unsigned long query_cpu; + unsigned long flags; /* * Hack. The clustered APIC addressing mode doesn't allow us to send @@ -130,27 +131,28 @@ static inline void send_IPI_mask_sequence(const struct cpumask *mask, */ local_irq_save(flags); for_each_cpu(query_cpu, mask) { - __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, + query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } -static inline void send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) +static inline void +default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - unsigned long flags; - unsigned int query_cpu; unsigned int this_cpu = smp_processor_id(); + unsigned int query_cpu; + unsigned long flags; /* See Hack comment above */ local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) - __send_IPI_dest_field( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, + query_cpu), vector, APIC_DEST_PHYSICAL); + } local_irq_restore(flags); } diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 8972f8434145..2e4104cf3481 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -20,7 +20,6 @@ static inline const struct cpumask *default_target_cpus(void) #ifdef CONFIG_X86_64 #include #define read_apic_id() (apic->get_apic_id(apic_read(APIC_ID))) -#define send_IPI_self (apic->send_IPI_self) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void default_setup_apic_routing(void); #else diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h index 089399643dfa..85dec630c69c 100644 --- a/arch/x86/include/asm/mach-default/mach_ipi.h +++ b/arch/x86/include/asm/mach-default/mach_ipi.h @@ -4,45 +4,40 @@ /* Avoid include hell */ #define NMI_VECTOR 0x02 -void send_IPI_mask_bitmask(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -void __send_IPI_shortcut(unsigned int shortcut, int vector); +void default_send_IPI_mask_bitmask(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void __default_send_IPI_shortcut(unsigned int shortcut, int vector); extern int no_broadcast; #ifdef CONFIG_X86_64 #include -#define send_IPI_mask (apic->send_IPI_mask) -#define send_IPI_mask_allbutself (apic->send_IPI_mask_allbutself) #else -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void default_send_IPI_mask(const struct cpumask *mask, int vector) { - send_IPI_mask_bitmask(mask, vector); + default_send_IPI_mask_bitmask(mask, vector); } -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); #endif -static inline void __local_send_IPI_allbutself(int vector) +static inline void __default_local_send_IPI_allbutself(int vector) { if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask_allbutself(cpu_online_mask, vector); + apic->send_IPI_mask_allbutself(cpu_online_mask, vector); else - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector); } -static inline void __local_send_IPI_all(int vector) +static inline void __default_local_send_IPI_all(int vector) { if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask(cpu_online_mask, vector); + apic->send_IPI_mask(cpu_online_mask, vector); else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector); + __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector); } -#ifdef CONFIG_X86_64 -#define send_IPI_allbutself (apic->send_IPI_allbutself) -#define send_IPI_all (apic->send_IPI_all) -#else -static inline void send_IPI_allbutself(int vector) +#ifdef CONFIG_X86_32 +static inline void default_send_IPI_allbutself(int vector) { /* * if there are no other CPUs in the system then we get an APIC send @@ -51,13 +46,12 @@ static inline void send_IPI_allbutself(int vector) if (!(num_online_cpus() > 1)) return; - __local_send_IPI_allbutself(vector); - return; + __default_local_send_IPI_allbutself(vector); } -static inline void send_IPI_all(int vector) +static inline void default_send_IPI_all(int vector) { - __local_send_IPI_all(vector); + __default_local_send_IPI_all(vector); } #endif diff --git a/arch/x86/include/asm/mach-generic/mach_ipi.h b/arch/x86/include/asm/mach-generic/mach_ipi.h index 75e54bd6cbdc..5691c09645c5 100644 --- a/arch/x86/include/asm/mach-generic/mach_ipi.h +++ b/arch/x86/include/asm/mach-generic/mach_ipi.h @@ -3,8 +3,4 @@ #include -#define send_IPI_mask (apic->send_IPI_mask) -#define send_IPI_allbutself (apic->send_IPI_allbutself) -#define send_IPI_all (apic->send_IPI_all) - #endif /* _ASM_X86_MACH_GENERIC_MACH_IPI_H */ diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h index a8374c652778..5dbc4b4cd5e5 100644 --- a/arch/x86/include/asm/numaq/ipi.h +++ b/arch/x86/include/asm/numaq/ipi.h @@ -1,22 +1,22 @@ #ifndef __ASM_NUMAQ_IPI_H #define __ASM_NUMAQ_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) { - send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence(mask, vector); } -static inline void send_IPI_allbutself(int vector) +static inline void numaq_send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself(cpu_online_mask, vector); } -static inline void send_IPI_all(int vector) +static inline void numaq_send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + numaq_send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_NUMAQ_IPI_H */ diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h index a8a2c24f50cc..f87a43fe0aed 100644 --- a/arch/x86/include/asm/summit/ipi.h +++ b/arch/x86/include/asm/summit/ipi.h @@ -1,26 +1,26 @@ #ifndef __ASM_SUMMIT_IPI_H #define __ASM_SUMMIT_IPI_H -void send_IPI_mask_sequence(const cpumask_t *mask, int vector); -void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); +void default_send_IPI_mask_sequence(const cpumask_t *mask, int vector); +void default_send_IPI_mask_allbutself(const cpumask_t *mask, int vector); -static inline void send_IPI_mask(const cpumask_t *mask, int vector) +static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) { - send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence(mask, vector); } -static inline void send_IPI_allbutself(int vector) +static inline void summit_send_IPI_allbutself(int vector) { cpumask_t mask = cpu_online_map; cpu_clear(smp_processor_id(), mask); if (!cpus_empty(mask)) - send_IPI_mask(&mask, vector); + summit_send_IPI_mask(&mask, vector); } -static inline void send_IPI_all(int vector) +static inline void summit_send_IPI_all(int vector) { - send_IPI_mask(&cpu_online_map, vector); + summit_send_IPI_mask(&cpu_online_map, vector); } #endif /* __ASM_SUMMIT_IPI_H */ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 5f7f3a9a47ad..84b8e7c57abc 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -475,7 +475,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, static void lapic_timer_broadcast(const struct cpumask *mask) { #ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); + apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR); #endif } diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index d57d2138f078..820dea5d0ebe 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -65,7 +65,7 @@ void __init default_setup_apic_routing(void) void apic_send_IPI_self(int vector) { - __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); + __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index b941b112cafb..7c648ccea514 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -74,7 +74,7 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) unsigned long flags; local_irq_save(flags); - __send_IPI_dest_field(mask, vector, apic->dest_logical); + __default_send_IPI_dest_field(mask, vector, apic->dest_logical); local_irq_restore(flags); } @@ -85,14 +85,15 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) _flat_send_IPI_mask(mask, vector); } -static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, - int vector) +static void + flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { unsigned long mask = cpumask_bits(cpumask)[0]; int cpu = smp_processor_id(); if (cpu < BITS_PER_LONG) clear_bit(cpu, &mask); + _flat_send_IPI_mask(mask, vector); } @@ -114,16 +115,19 @@ static void flat_send_IPI_allbutself(int vector) _flat_send_IPI_mask(mask, vector); } } else if (num_online_cpus() > 1) { - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical); + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, + vector, apic->dest_logical); } } static void flat_send_IPI_all(int vector) { - if (vector == NMI_VECTOR) + if (vector == NMI_VECTOR) { flat_send_IPI_mask(cpu_online_mask, vector); - else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); + } else { + __default_send_IPI_shortcut(APIC_DEST_ALLINC, + vector, apic->dest_logical); + } } static unsigned int flat_get_apic_id(unsigned long x) @@ -265,18 +269,18 @@ static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) { - send_IPI_mask_sequence(cpumask, vector); + default_send_IPI_mask_sequence(cpumask, vector); } static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { - send_IPI_mask_allbutself(cpumask, vector); + default_send_IPI_mask_allbutself(cpumask, vector); } static void physflat_send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself(cpu_online_mask, vector); } static void physflat_send_IPI_all(int vector) diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 62f9fccf01dd..2d97726a973e 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -36,8 +36,8 @@ static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) cpumask_set_cpu(cpu, retmask); } -static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, - unsigned int dest) +static void + __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) { unsigned long cfg; @@ -57,45 +57,50 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, */ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) { - unsigned long flags; unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); - for_each_cpu(query_cpu, mask) + for_each_cpu(query_cpu, mask) { __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), vector, apic->dest_logical); + } local_irq_restore(flags); } -static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) +static void + x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - unsigned long flags; - unsigned long query_cpu; unsigned long this_cpu = smp_processor_id(); + unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), vector, apic->dest_logical); + } local_irq_restore(flags); } static void x2apic_send_IPI_allbutself(int vector) { - unsigned long flags; - unsigned long query_cpu; unsigned long this_cpu = smp_processor_id(); + unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); - for_each_online_cpu(query_cpu) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( + for_each_online_cpu(query_cpu) { + if (query_cpu == this_cpu) + continue; + __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), vector, apic->dest_logical); + } local_irq_restore(flags); } @@ -175,7 +180,6 @@ static void init_x2apic_ldr(void) int cpu = smp_processor_id(); per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); - return; } struct genapic apic_x2apic_cluster = { diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3da1675b2604..74777c276693 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -55,8 +55,8 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) { - unsigned long flags; unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); for_each_cpu(query_cpu, mask) { @@ -66,12 +66,12 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) local_irq_restore(flags); } -static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) +static void + x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - unsigned long flags; - unsigned long query_cpu; unsigned long this_cpu = smp_processor_id(); + unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); for_each_cpu(query_cpu, mask) { @@ -85,16 +85,17 @@ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, static void x2apic_send_IPI_allbutself(int vector) { - unsigned long flags; - unsigned long query_cpu; unsigned long this_cpu = smp_processor_id(); + unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); - for_each_online_cpu(query_cpu) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); + for_each_online_cpu(query_cpu) { + if (query_cpu == this_cpu) + continue; + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } local_irq_restore(flags); } @@ -145,18 +146,12 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, static unsigned int x2apic_phys_get_apic_id(unsigned long x) { - unsigned int id; - - id = x; - return id; + return x; } static unsigned long set_apic_id(unsigned int id) { - unsigned long x; - - x = id; - return x; + return id; } static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) @@ -171,7 +166,6 @@ static void x2apic_send_IPI_self(int vector) static void init_x2apic_ldr(void) { - return; } struct genapic apic_x2apic_phys = { diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index f957878c21e9..24b9f42db9b7 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -118,12 +118,13 @@ static void uv_send_IPI_one(int cpu, int vector) int pnode; apicid = per_cpu(x86_cpu_to_apicid, cpu); - lapicid = apicid & 0x3f; /* ZZZ macro needed */ + lapicid = apicid & 0x3f; /* ZZZ macro needed */ pnode = uv_apicid_to_pnode(apicid); - val = - (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << - UVH_IPI_INT_APIC_ID_SHFT) | - (vector << UVH_IPI_INT_VECTOR_SHFT); + + val = ( 1UL << UVH_IPI_INT_SEND_SHFT ) | + ( lapicid << UVH_IPI_INT_APIC_ID_SHFT ) | + ( vector << UVH_IPI_INT_VECTOR_SHFT ); + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } @@ -137,22 +138,24 @@ static void uv_send_IPI_mask(const struct cpumask *mask, int vector) static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - unsigned int cpu; unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; - for_each_cpu(cpu, mask) + for_each_cpu(cpu, mask) { if (cpu != this_cpu) uv_send_IPI_one(cpu, vector); + } } static void uv_send_IPI_allbutself(int vector) { - unsigned int cpu; unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { if (cpu != this_cpu) uv_send_IPI_one(cpu, vector); + } } static void uv_send_IPI_all(int vector) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 01a2505d7275..e90970ce21a0 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -514,11 +514,11 @@ static void send_cleanup_vector(struct irq_cfg *cfg) for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) cfg->move_cleanup_count++; for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); + apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); } else { cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); cfg->move_cleanup_count = cpumask_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } cfg->move_in_progress = 0; @@ -800,7 +800,7 @@ static void clear_IO_APIC (void) } #if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) -void send_IPI_self(int vector) +void default_send_IPI_self(int vector) { unsigned int cfg; @@ -2297,7 +2297,7 @@ static int ioapic_retrigger_irq(unsigned int irq) unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); + apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); spin_unlock_irqrestore(&vector_lock, flags); return 1; @@ -2305,7 +2305,7 @@ static int ioapic_retrigger_irq(unsigned int irq) #else static int ioapic_retrigger_irq(unsigned int irq) { - send_IPI_self(irq_cfg(irq)->vector); + apic->send_IPI_self(irq_cfg(irq)->vector); return 1; } diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 367c5e684fa1..e16c41b2e4ec 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -48,7 +48,7 @@ static inline int __prepare_ICR2(unsigned int mask) return SET_APIC_DEST_FIELD(mask); } -void __send_IPI_shortcut(unsigned int shortcut, int vector) +void __default_send_IPI_shortcut(unsigned int shortcut, int vector) { /* * Subtle. In the case of the 'never do double writes' workaround @@ -75,16 +75,16 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector) apic_write(APIC_ICR, cfg); } -void send_IPI_self(int vector) +void default_send_IPI_self(int vector) { - __send_IPI_shortcut(APIC_DEST_SELF, vector); + __default_send_IPI_shortcut(APIC_DEST_SELF, vector); } /* * This is used to send an IPI with no shorthand notation (the destination is * specified in bits 56 to 63 of the ICR). */ -static inline void __send_IPI_dest_field(unsigned long mask, int vector) +static inline void __default_send_IPI_dest_field(unsigned long mask, int vector) { unsigned long cfg; @@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) /* * This is only used on smaller machines. */ -void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) +void default_send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) { unsigned long mask = cpumask_bits(cpumask)[0]; unsigned long flags; local_irq_save(flags); WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); - __send_IPI_dest_field(mask, vector); + __default_send_IPI_dest_field(mask, vector); local_irq_restore(flags); } -void send_IPI_mask_sequence(const struct cpumask *mask, int vector) +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector) { unsigned long flags; unsigned int query_cpu; @@ -140,11 +140,11 @@ void send_IPI_mask_sequence(const struct cpumask *mask, int vector) local_irq_save(flags); for_each_cpu(query_cpu, mask) - __send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); + __default_send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); local_irq_restore(flags); } -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { unsigned long flags; unsigned int query_cpu; @@ -153,10 +153,12 @@ void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) /* See Hack comment above */ local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) - __send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), - vector); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field( + apic->cpu_to_logical_apicid(query_cpu), vector); + } local_irq_restore(flags); } diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 10435a120d22..b62a3811e01c 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -347,7 +347,7 @@ void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code) */ void kgdb_roundup_cpus(unsigned long flags) { - send_IPI_allbutself(APIC_DM_NMI); + apic->send_IPI_allbutself(APIC_DM_NMI); } #endif diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f8536fee5c12..38dace28d625 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -651,7 +651,7 @@ static int crash_nmi_callback(struct notifier_block *self, static void smp_send_nmi_allbutself(void) { - send_IPI_allbutself(NMI_VECTOR); + apic->send_IPI_allbutself(NMI_VECTOR); } static struct notifier_block crash_nmi_nb = { diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index e6faa3316bd2..c48ba6cc32aa 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -118,12 +118,12 @@ static void native_smp_send_reschedule(int cpu) WARN_ON(1); return; } - send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); + apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); } void native_send_call_func_single_ipi(int cpu) { - send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); + apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); } void native_send_call_func_ipi(const struct cpumask *mask) @@ -131,7 +131,7 @@ void native_send_call_func_ipi(const struct cpumask *mask) cpumask_var_t allbutself; if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { - send_IPI_mask(mask, CALL_FUNCTION_VECTOR); + apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); return; } @@ -140,9 +140,9 @@ void native_send_call_func_ipi(const struct cpumask *mask) if (cpumask_equal(mask, allbutself) && cpumask_equal(cpu_online_mask, cpu_callout_mask)) - send_IPI_allbutself(CALL_FUNCTION_VECTOR); + apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR); else - send_IPI_mask(mask, CALL_FUNCTION_VECTOR); + apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); free_cpumask_var(allbutself); } diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 22c2c7b8e4ab..4782b5547881 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -97,10 +97,10 @@ struct genapic apic_bigsmp = { .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, - .send_IPI_mask = send_IPI_mask, + .send_IPI_mask = default_send_IPI_mask, .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = send_IPI_allbutself, - .send_IPI_all = send_IPI_all, + .send_IPI_allbutself = bigsmp_send_IPI_allbutself, + .send_IPI_all = bigsmp_send_IPI_all, .send_IPI_self = NULL, .wakeup_cpu = NULL, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 477ebec16749..bf4670db25fe 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -78,10 +78,10 @@ struct genapic apic_default = { .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, - .send_IPI_mask = send_IPI_mask, + .send_IPI_mask = default_send_IPI_mask, .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = send_IPI_allbutself, - .send_IPI_all = send_IPI_all, + .send_IPI_allbutself = default_send_IPI_allbutself, + .send_IPI_all = default_send_IPI_all, .send_IPI_self = NULL, .wakeup_cpu = NULL, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index d758cf65d736..d36642e6d908 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -133,10 +133,10 @@ struct genapic apic_es7000 = { .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, - .send_IPI_mask = send_IPI_mask, + .send_IPI_mask = es7000_send_IPI_mask, .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = send_IPI_allbutself, - .send_IPI_all = send_IPI_all, + .send_IPI_allbutself = es7000_send_IPI_allbutself, + .send_IPI_all = es7000_send_IPI_all, .send_IPI_self = NULL, .wakeup_cpu = NULL, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index eb7d56a521d4..135b1832ad80 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -97,10 +97,10 @@ struct genapic apic_numaq = { .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, - .send_IPI_mask = send_IPI_mask, + .send_IPI_mask = numaq_send_IPI_mask, .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = send_IPI_allbutself, - .send_IPI_all = send_IPI_all, + .send_IPI_allbutself = numaq_send_IPI_allbutself, + .send_IPI_all = numaq_send_IPI_all, .send_IPI_self = NULL, .wakeup_cpu = NULL, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 8c293055bdf0..77196a4a9d2b 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -77,10 +77,10 @@ struct genapic apic_summit = { .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, - .send_IPI_mask = send_IPI_mask, + .send_IPI_mask = summit_send_IPI_mask, .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = send_IPI_allbutself, - .send_IPI_all = send_IPI_all, + .send_IPI_allbutself = summit_send_IPI_allbutself, + .send_IPI_all = summit_send_IPI_all, .send_IPI_self = NULL, .wakeup_cpu = NULL, diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 72a6d4ebe34d..6348e1146925 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -196,7 +196,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(to_cpumask(f->flush_cpumask), + apic->send_IPI_mask(to_cpumask(f->flush_cpumask), INVALIDATE_TLB_VECTOR_START + sender); while (!cpumask_empty(to_cpumask(f->flush_cpumask))) From 6f177c01db6b865181fbc6c948381b290ee09718 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:09:23 +0100 Subject: [PATCH 241/566] x86, smp: clean up ->trampoline_phys_low/high handling - spread out the namespace on a per apic driver basis - remove wrapper macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/wakecpu.h | 4 ++-- arch/x86/include/asm/mach-default/mach_wakecpu.h | 4 ++-- arch/x86/include/asm/mach-default/smpboot_hooks.h | 6 +++--- arch/x86/include/asm/mach-generic/mach_wakecpu.h | 2 -- arch/x86/include/asm/numaq/wakecpu.h | 12 ++++++------ arch/x86/mach-generic/bigsmp.c | 4 ++-- arch/x86/mach-generic/default.c | 4 ++-- arch/x86/mach-generic/es7000.c | 4 ++-- arch/x86/mach-generic/numaq.c | 4 ++-- arch/x86/mach-generic/summit.c | 4 ++-- 10 files changed, 23 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index 78f0daaee436..4c01be6ff80c 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h @@ -1,8 +1,8 @@ #ifndef __ASM_ES7000_WAKECPU_H #define __ASM_ES7000_WAKECPU_H -#define TRAMPOLINE_PHYS_LOW 0x467 -#define TRAMPOLINE_PHYS_HIGH 0x469 +#define ES7000_TRAMPOLINE_PHYS_LOW 0x467 +#define ES7000_TRAMPOLINE_PHYS_HIGH 0x469 static inline void wait_for_init_deassert(atomic_t *deassert) { diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index 89897a6a65b9..0a8d7860e44b 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -1,8 +1,8 @@ #ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H #define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H -#define TRAMPOLINE_PHYS_LOW (0x467) -#define TRAMPOLINE_PHYS_HIGH (0x469) +#define DEFAULT_TRAMPOLINE_PHYS_LOW (0x467) +#define DEFAULT_TRAMPOLINE_PHYS_HIGH (0x469) static inline void wait_for_init_deassert(atomic_t *deassert) { diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/mach-default/smpboot_hooks.h index 23bf52103b89..1def60114906 100644 --- a/arch/x86/include/asm/mach-default/smpboot_hooks.h +++ b/arch/x86/include/asm/mach-default/smpboot_hooks.h @@ -13,10 +13,10 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) CMOS_WRITE(0xa, 0xf); local_flush_tlb(); pr_debug("1.\n"); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = + *((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_high)) = start_eip >> 4; pr_debug("2.\n"); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = + *((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_low)) = start_eip & 0xf; pr_debug("3.\n"); } @@ -34,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void) */ CMOS_WRITE(0, 0xf); - *((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; + *((volatile long *)phys_to_virt(apic->trampoline_phys_low)) = 0; } static inline void __init smpboot_setup_io_apic(void) diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h index 22006bbee617..2031377a954c 100644 --- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H #define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define TRAMPOLINE_PHYS_LOW (apic->trampoline_phys_low) -#define TRAMPOLINE_PHYS_HIGH (apic->trampoline_phys_high) #define wait_for_init_deassert (apic->wait_for_init_deassert) #define smp_callin_clear_local_apic (apic->smp_callin_clear_local_apic) #define store_NMI_vector (apic->store_NMI_vector) diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index 6f499df8eddb..8b6c16d8558d 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h @@ -3,8 +3,8 @@ /* This file copes with machines that wakeup secondary CPUs by NMIs */ -#define TRAMPOLINE_PHYS_LOW (0x8) -#define TRAMPOLINE_PHYS_HIGH (0xa) +#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) +#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) /* We don't do anything here because we use NMI's to boot instead */ static inline void wait_for_init_deassert(atomic_t *deassert) @@ -24,17 +24,17 @@ static inline void store_NMI_vector(unsigned short *high, unsigned short *low) { printk("Storing NMI vector\n"); *high = - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)); + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)); *low = - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)); + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); } static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { printk("Restoring NMI vector\n"); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)) = *high; - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)) = *low; } diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 4782b5547881..a317fbe07fdf 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -104,8 +104,8 @@ struct genapic apic_bigsmp = { .send_IPI_self = NULL, .wakeup_cpu = NULL, - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = wait_for_init_deassert, .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index bf4670db25fe..17d8f9c22180 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -85,8 +85,8 @@ struct genapic apic_default = { .send_IPI_self = NULL, .wakeup_cpu = NULL, - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = wait_for_init_deassert, .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index d36642e6d908..871e85445e21 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -140,8 +140,8 @@ struct genapic apic_es7000 = { .send_IPI_self = NULL, .wakeup_cpu = NULL, - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = wait_for_init_deassert, .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 135b1832ad80..0b496ab5450c 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -104,8 +104,8 @@ struct genapic apic_numaq = { .send_IPI_self = NULL, .wakeup_cpu = NULL, - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = wait_for_init_deassert, .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 77196a4a9d2b..c4799cd34592 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -84,8 +84,8 @@ struct genapic apic_summit = { .send_IPI_self = NULL, .wakeup_cpu = NULL, - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = wait_for_init_deassert, .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, From abfa584c8df8b691cf18f51c7d4af27e5b32be4a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:15:16 +0100 Subject: [PATCH 242/566] x86: set ->trampoline_phys_low/high on 64-bit too 64-bit x86 has zero for ->trampoline_phys_low/high, but the smpboot code can use these values - so it's better to set them up to their correct values. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 6 ++++++ arch/x86/include/asm/mach-default/mach_wakecpu.h | 3 --- arch/x86/kernel/genapic_flat_64.c | 8 ++++---- arch/x86/kernel/genx2apic_cluster.c | 4 ++-- arch/x86/kernel/genx2apic_phys.c | 4 ++-- arch/x86/kernel/genx2apic_uv_x.c | 4 ++-- 6 files changed, 16 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 8bb1c73c55b7..90e83a769a1c 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -88,6 +88,12 @@ struct genapic { extern struct genapic *apic; +/* + * Warm reset vector default position: + */ +#define DEFAULT_TRAMPOLINE_PHYS_LOW 0x467 +#define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469 + #ifdef CONFIG_X86_32 extern void es7000_update_genapic_to_cluster(void); #else diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index 0a8d7860e44b..a327a675e473 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -1,9 +1,6 @@ #ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H #define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H -#define DEFAULT_TRAMPOLINE_PHYS_LOW (0x467) -#define DEFAULT_TRAMPOLINE_PHYS_HIGH (0x469) - static inline void wait_for_init_deassert(atomic_t *deassert) { while (!atomic_read(deassert)) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 7c648ccea514..3a28d6a8c497 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -224,8 +224,8 @@ struct genapic apic_flat = { .send_IPI_self = apic_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, @@ -370,8 +370,8 @@ struct genapic apic_physflat = { .send_IPI_self = apic_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 2d97726a973e..abc5ee329f21 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -228,8 +228,8 @@ struct genapic apic_x2apic_cluster = { .send_IPI_self = x2apic_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 74777c276693..dc815ef22d8c 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -214,8 +214,8 @@ struct genapic apic_x2apic_phys = { .send_IPI_self = x2apic_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 24b9f42db9b7..b5908735ca50 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -287,8 +287,8 @@ struct genapic apic_x2apic_uv_x = { .send_IPI_self = uv_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, From a965936643e28af8152d9e960b966baa1a5588a2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:21:32 +0100 Subject: [PATCH 243/566] x86, smp: refactor ->wait_for_init_deassert() - spread out the namespace on a per APIC driver basis - handle a NULL ->wait_for_init_deassert() as a 'dont wait' default method - remove NUMAQ and Summit handlers Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/wakecpu.h | 2 +- arch/x86/include/asm/mach-default/mach_wakecpu.h | 2 +- arch/x86/include/asm/mach-generic/mach_wakecpu.h | 1 - arch/x86/include/asm/numaq/wakecpu.h | 5 ----- arch/x86/kernel/es7000_32.c | 6 +----- arch/x86/kernel/smpboot.c | 3 ++- arch/x86/mach-generic/bigsmp.c | 4 +++- arch/x86/mach-generic/default.c | 4 +++- arch/x86/mach-generic/es7000.c | 4 +++- arch/x86/mach-generic/numaq.c | 5 ++++- arch/x86/mach-generic/summit.c | 4 +++- 11 files changed, 21 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index 4c01be6ff80c..5c4d05f46d2d 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h @@ -4,7 +4,7 @@ #define ES7000_TRAMPOLINE_PHYS_LOW 0x467 #define ES7000_TRAMPOLINE_PHYS_HIGH 0x469 -static inline void wait_for_init_deassert(atomic_t *deassert) +static inline void es7000_wait_for_init_deassert(atomic_t *deassert) { #ifndef CONFIG_ES7000_CLUSTERED_APIC while (!atomic_read(deassert)) diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index a327a675e473..1d34c69a758b 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H #define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H -static inline void wait_for_init_deassert(atomic_t *deassert) +static inline void default_wait_for_init_deassert(atomic_t *deassert) { while (!atomic_read(deassert)) cpu_relax(); diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h index 2031377a954c..58e54122f730 100644 --- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H #define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define wait_for_init_deassert (apic->wait_for_init_deassert) #define smp_callin_clear_local_apic (apic->smp_callin_clear_local_apic) #define store_NMI_vector (apic->store_NMI_vector) #define restore_NMI_vector (apic->restore_NMI_vector) diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index 8b6c16d8558d..884b95cf5846 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h @@ -6,11 +6,6 @@ #define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) #define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) -/* We don't do anything here because we use NMI's to boot instead */ -static inline void wait_for_init_deassert(atomic_t *deassert) -{ -} - /* * Because we use NMIs rather than the INIT-STARTUP sequence to * bootstrap the CPUs, the APIC may be in a weird state. Kick it. diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index e73fe18488ac..d7f433ee602d 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -182,10 +182,6 @@ static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) return 0; } -static void noop_wait_for_deassert(atomic_t *deassert_not_used) -{ -} - static int __init es7000_update_genapic(void) { apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; @@ -194,7 +190,7 @@ static int __init es7000_update_genapic(void) if (boot_cpu_data.x86 == 6 && (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { es7000_update_genapic_to_cluster(); - apic->wait_for_init_deassert = noop_wait_for_deassert; + apic->wait_for_init_deassert = NULL; apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ab83be2f8e0f..558af378a61d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -196,7 +196,8 @@ static void __cpuinit smp_callin(void) * our local APIC. We have to wait for the IPI or we'll * lock up on an APIC access. */ - wait_for_init_deassert(&init_deasserted); + if (apic->wait_for_init_deassert) + apic->wait_for_init_deassert(&init_deasserted); /* * (This works even if the APIC is not enabled.) diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index a317fbe07fdf..40910bfd1b42 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -106,7 +106,9 @@ struct genapic apic_bigsmp = { .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = wait_for_init_deassert, + + .wait_for_init_deassert = default_wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 17d8f9c22180..c2464843df9e 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -87,7 +87,9 @@ struct genapic apic_default = { .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = wait_for_init_deassert, + + .wait_for_init_deassert = default_wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 871e85445e21..4cb3984834ed 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -142,7 +142,9 @@ struct genapic apic_es7000 = { .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = wait_for_init_deassert, + + .wait_for_init_deassert = default_wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 0b496ab5450c..fb03867e7c0f 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -106,7 +106,10 @@ struct genapic apic_numaq = { .wakeup_cpu = NULL, .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = wait_for_init_deassert, + + /* We don't do anything here because we use NMI's to boot instead */ + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index c4799cd34592..fdca78b96b6a 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -86,7 +86,9 @@ struct genapic apic_summit = { .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = wait_for_init_deassert, + + .wait_for_init_deassert = default_wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, From 333344d94300500e401cffb4eea10a5ab6e5a41d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:31:52 +0100 Subject: [PATCH 244/566] x86, smp: refactor ->smp_callin_clear_local_apic() methods Only NUMAQ does something substantial here, because it initializes via NMIs (not via INIT as standard SMP startup) - so it needs to reset the APIC. - extend the generic code to handle NULL methods - clear out dummy methods and replace them with NULL - clean up: remove wrapper macros, etc. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/wakecpu.h | 5 ----- arch/x86/include/asm/mach-default/mach_wakecpu.h | 5 ----- arch/x86/include/asm/mach-generic/mach_wakecpu.h | 1 - arch/x86/include/asm/numaq/wakecpu.h | 4 ++-- arch/x86/kernel/smpboot.c | 3 ++- arch/x86/mach-generic/bigsmp.c | 3 ++- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 4 +++- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 3 ++- 10 files changed, 13 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index 5c4d05f46d2d..e8e03962633b 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h @@ -13,11 +13,6 @@ static inline void es7000_wait_for_init_deassert(atomic_t *deassert) return; } -/* Nothing to do for most platforms, since cleared by the INIT cycle */ -static inline void smp_callin_clear_local_apic(void) -{ -} - static inline void store_NMI_vector(unsigned short *high, unsigned short *low) { } diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index 1d34c69a758b..d059807cd7e8 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -8,11 +8,6 @@ static inline void default_wait_for_init_deassert(atomic_t *deassert) return; } -/* Nothing to do for most platforms, since cleared by the INIT cycle */ -static inline void smp_callin_clear_local_apic(void) -{ -} - static inline void store_NMI_vector(unsigned short *high, unsigned short *low) { } diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h index 58e54122f730..30515a154d8e 100644 --- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H #define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define smp_callin_clear_local_apic (apic->smp_callin_clear_local_apic) #define store_NMI_vector (apic->store_NMI_vector) #define restore_NMI_vector (apic->restore_NMI_vector) #define inquire_remote_apic (apic->inquire_remote_apic) diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index 884b95cf5846..61d0a7d96136 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h @@ -8,9 +8,9 @@ /* * Because we use NMIs rather than the INIT-STARTUP sequence to - * bootstrap the CPUs, the APIC may be in a weird state. Kick it. + * bootstrap the CPUs, the APIC may be in a weird state. Kick it: */ -static inline void smp_callin_clear_local_apic(void) +static inline void numaq_smp_callin_clear_local_apic(void) { clear_local_APIC(); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 558af378a61d..10873a46b299 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -244,7 +244,8 @@ static void __cpuinit smp_callin(void) */ pr_debug("CALLIN, before setup_local_APIC().\n"); - smp_callin_clear_local_apic(); + if (apic->smp_callin_clear_local_apic) + apic->smp_callin_clear_local_apic(); setup_local_APIC(); end_local_APIC_setup(); map_cpu_to_logical_apicid(); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 40910bfd1b42..bd069e7b521c 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -109,7 +109,8 @@ struct genapic apic_bigsmp = { .wait_for_init_deassert = default_wait_for_init_deassert, - .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, .inquire_remote_apic = inquire_remote_apic, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index c2464843df9e..a25e6eff048f 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -90,7 +90,7 @@ struct genapic apic_default = { .wait_for_init_deassert = default_wait_for_init_deassert, - .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .smp_callin_clear_local_apic = NULL, .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, .inquire_remote_apic = inquire_remote_apic, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 4cb3984834ed..ab41b5439145 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -145,7 +145,9 @@ struct genapic apic_es7000 = { .wait_for_init_deassert = default_wait_for_init_deassert, - .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + /* Nothing to do for most platforms, since cleared by the INIT cycle: */ + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, .inquire_remote_apic = inquire_remote_apic, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index fb03867e7c0f..4d3924f8cd0b 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -110,7 +110,7 @@ struct genapic apic_numaq = { /* We don't do anything here because we use NMI's to boot instead */ .wait_for_init_deassert = NULL, - .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, .inquire_remote_apic = inquire_remote_apic, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index fdca78b96b6a..2595baa7997e 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -89,7 +89,8 @@ struct genapic apic_summit = { .wait_for_init_deassert = default_wait_for_init_deassert, - .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, .inquire_remote_apic = inquire_remote_apic, From 7bd06ec63a1204ca44b9f1dc487b8632016162d1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:31:52 +0100 Subject: [PATCH 245/566] x86, smp: refactor ->store/restore_NMI_vector() methods Only NUMAQ does something substantial here, because it initializes via NMIs (not via INIT as standard SMP startup) - so it needs to store and restore the NMI vector. - extend the generic code to handle NULL methods - clear out dummy methods and replace them with NULL - clean up: remove wrapper macros, etc. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/wakecpu.h | 8 -------- arch/x86/include/asm/mach-default/mach_wakecpu.h | 8 -------- arch/x86/include/asm/mach-generic/mach_wakecpu.h | 2 -- arch/x86/include/asm/numaq/wakecpu.h | 6 ++++-- arch/x86/kernel/smpboot.c | 3 ++- arch/x86/mach-generic/bigsmp.c | 5 ++--- arch/x86/mach-generic/default.c | 4 ++-- arch/x86/mach-generic/es7000.c | 5 ++--- arch/x86/mach-generic/numaq.c | 4 ++-- arch/x86/mach-generic/summit.c | 5 ++--- 10 files changed, 16 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index e8e03962633b..71a3a412d0e4 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h @@ -13,14 +13,6 @@ static inline void es7000_wait_for_init_deassert(atomic_t *deassert) return; } -static inline void store_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - -static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - extern void __inquire_remote_apic(int apicid); static inline void inquire_remote_apic(int apicid) diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index d059807cd7e8..656bb5e52bcc 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -8,14 +8,6 @@ static inline void default_wait_for_init_deassert(atomic_t *deassert) return; } -static inline void store_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - -static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - #ifdef CONFIG_SMP extern void __inquire_remote_apic(int apicid); #else /* CONFIG_SMP */ diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h index 30515a154d8e..93207dfe8f50 100644 --- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H #define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define store_NMI_vector (apic->store_NMI_vector) -#define restore_NMI_vector (apic->restore_NMI_vector) #define inquire_remote_apic (apic->inquire_remote_apic) #endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index 61d0a7d96136..123201712a96 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h @@ -15,7 +15,8 @@ static inline void numaq_smp_callin_clear_local_apic(void) clear_local_APIC(); } -static inline void store_NMI_vector(unsigned short *high, unsigned short *low) +static inline void +numaq_store_NMI_vector(unsigned short *high, unsigned short *low) { printk("Storing NMI vector\n"); *high = @@ -24,7 +25,8 @@ static inline void store_NMI_vector(unsigned short *high, unsigned short *low) *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); } -static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) +static inline void +numaq_restore_NMI_vector(unsigned short *high, unsigned short *low) { printk("Restoring NMI vector\n"); *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)) = diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 10873a46b299..1492024592ff 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -826,7 +826,8 @@ do_rest: pr_debug("Setting warm reset code and vector.\n"); - store_NMI_vector(&nmi_high, &nmi_low); + if (apic->store_NMI_vector) + apic->store_NMI_vector(&nmi_high, &nmi_low); smpboot_setup_warm_reset_vector(start_ip); /* diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index bd069e7b521c..ecdb230d0f2e 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -110,8 +110,7 @@ struct genapic apic_bigsmp = { .wait_for_init_deassert = default_wait_for_init_deassert, .smp_callin_clear_local_apic = NULL, - - .store_NMI_vector = store_NMI_vector, - .restore_NMI_vector = restore_NMI_vector, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index a25e6eff048f..950925615a9e 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -91,7 +91,7 @@ struct genapic apic_default = { .wait_for_init_deassert = default_wait_for_init_deassert, .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = store_NMI_vector, - .restore_NMI_vector = restore_NMI_vector, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index ab41b5439145..131907091380 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -147,8 +147,7 @@ struct genapic apic_es7000 = { /* Nothing to do for most platforms, since cleared by the INIT cycle: */ .smp_callin_clear_local_apic = NULL, - - .store_NMI_vector = store_NMI_vector, - .restore_NMI_vector = restore_NMI_vector, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 4d3924f8cd0b..d7f7fcf21c39 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -111,7 +111,7 @@ struct genapic apic_numaq = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, - .store_NMI_vector = store_NMI_vector, - .restore_NMI_vector = restore_NMI_vector, + .store_NMI_vector = numaq_store_NMI_vector, + .restore_NMI_vector = numaq_restore_NMI_vector, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 2595baa7997e..46fca79f8310 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -90,8 +90,7 @@ struct genapic apic_summit = { .wait_for_init_deassert = default_wait_for_init_deassert, .smp_callin_clear_local_apic = NULL, - - .store_NMI_vector = store_NMI_vector, - .restore_NMI_vector = restore_NMI_vector, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; From 3d5f597e938c425554cb7668fd3c9d6a536a984a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:43:47 +0100 Subject: [PATCH 246/566] x86, smp: remove ->restore_NMI_vector() Nothing actually restores the NMI vector - so remove this logic altogether. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 1 - arch/x86/include/asm/numaq/wakecpu.h | 10 ---------- arch/x86/kernel/genapic_flat_64.c | 2 -- arch/x86/kernel/genx2apic_cluster.c | 1 - arch/x86/kernel/genx2apic_phys.c | 1 - arch/x86/kernel/genx2apic_uv_x.c | 1 - arch/x86/mach-generic/bigsmp.c | 1 - arch/x86/mach-generic/default.c | 1 - arch/x86/mach-generic/es7000.c | 1 - arch/x86/mach-generic/numaq.c | 1 - arch/x86/mach-generic/summit.c | 1 - 11 files changed, 21 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 90e83a769a1c..e5f9c5696fb6 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -82,7 +82,6 @@ struct genapic { void (*wait_for_init_deassert)(atomic_t *deassert); void (*smp_callin_clear_local_apic)(void); void (*store_NMI_vector)(unsigned short *high, unsigned short *low); - void (*restore_NMI_vector)(unsigned short *high, unsigned short *low); void (*inquire_remote_apic)(int apicid); }; diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index 123201712a96..920dcfefa83a 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h @@ -25,16 +25,6 @@ numaq_store_NMI_vector(unsigned short *high, unsigned short *low) *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); } -static inline void -numaq_restore_NMI_vector(unsigned short *high, unsigned short *low) -{ - printk("Restoring NMI vector\n"); - *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)) = - *high; - *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)) = - *low; -} - static inline void inquire_remote_apic(int apicid) { } diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 3a28d6a8c497..e9237f599282 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -229,7 +229,6 @@ struct genapic apic_flat = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; @@ -375,6 +374,5 @@ struct genapic apic_physflat = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index abc5ee329f21..7c87156b6411 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -233,6 +233,5 @@ struct genapic apic_x2apic_cluster = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index dc815ef22d8c..5cbae8aa0408 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -219,6 +219,5 @@ struct genapic apic_x2apic_phys = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index b5908735ca50..6adb5e6f4d92 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -292,7 +292,6 @@ struct genapic apic_x2apic_uv_x = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index ecdb230d0f2e..d9377af88cb3 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -111,6 +111,5 @@ struct genapic apic_bigsmp = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 950925615a9e..b004257035c7 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -92,6 +92,5 @@ struct genapic apic_default = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 131907091380..62673a8002ff 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -148,6 +148,5 @@ struct genapic apic_es7000 = { /* Nothing to do for most platforms, since cleared by the INIT cycle: */ .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index d7f7fcf21c39..2c3341564d14 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -112,6 +112,5 @@ struct genapic apic_numaq = { .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, .store_NMI_vector = numaq_store_NMI_vector, - .restore_NMI_vector = numaq_restore_NMI_vector, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 46fca79f8310..c2471a9fa8f3 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -91,6 +91,5 @@ struct genapic apic_summit = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; From 25dc004903a38f0b6f6626dbbab058c8709c5398 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:31:52 +0100 Subject: [PATCH 247/566] x86, smp: refactor ->inquire_remote_apic() methods Nothing exciting - a few subarches dont want APIC remote reads to be performed - the others are content with the default method. - extend the generic code to handle NULL methods - clear out dummy methods and replace them with NULL - clean up: remove wrapper macros, etc. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/wakecpu.h | 8 -------- arch/x86/include/asm/mach-default/mach_wakecpu.h | 2 +- arch/x86/include/asm/mach-generic/mach_wakecpu.h | 2 -- arch/x86/include/asm/numaq/wakecpu.h | 4 ---- arch/x86/kernel/smpboot.c | 4 ++-- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 10 files changed, 8 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index 71a3a412d0e4..99c72be1840e 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h @@ -13,12 +13,4 @@ static inline void es7000_wait_for_init_deassert(atomic_t *deassert) return; } -extern void __inquire_remote_apic(int apicid); - -static inline void inquire_remote_apic(int apicid) -{ - if (apic_verbosity >= APIC_DEBUG) - __inquire_remote_apic(apicid); -} - #endif /* __ASM_MACH_WAKECPU_H */ diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index 656bb5e52bcc..b1cde560e4c1 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -16,7 +16,7 @@ static inline void __inquire_remote_apic(int apicid) } #endif /* CONFIG_SMP */ -static inline void inquire_remote_apic(int apicid) +static inline void default_inquire_remote_apic(int apicid) { if (apic_verbosity >= APIC_DEBUG) __inquire_remote_apic(apicid); diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h index 93207dfe8f50..0b884c03a3fc 100644 --- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h @@ -1,6 +1,4 @@ #ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H #define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define inquire_remote_apic (apic->inquire_remote_apic) - #endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index 920dcfefa83a..afe81439c7db 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h @@ -25,8 +25,4 @@ numaq_store_NMI_vector(unsigned short *high, unsigned short *low) *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); } -static inline void inquire_remote_apic(int apicid) -{ -} - #endif /* __ASM_NUMAQ_WAKECPU_H */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1492024592ff..170adc5b6cb3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -876,8 +876,8 @@ do_rest: else /* trampoline code not run */ printk(KERN_ERR "Not responding.\n"); - if (get_uv_system_type() != UV_NON_UNIQUE_APIC) - inquire_remote_apic(apicid); + if (apic->inquire_remote_apic) + apic->inquire_remote_apic(apicid); } } diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index d9377af88cb3..4d8b2d442bae 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -111,5 +111,5 @@ struct genapic apic_bigsmp = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .inquire_remote_apic = inquire_remote_apic, + .inquire_remote_apic = default_inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index b004257035c7..c12dd2300a59 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -92,5 +92,5 @@ struct genapic apic_default = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .inquire_remote_apic = inquire_remote_apic, + .inquire_remote_apic = default_inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 62673a8002ff..be090b2037ca 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -148,5 +148,5 @@ struct genapic apic_es7000 = { /* Nothing to do for most platforms, since cleared by the INIT cycle: */ .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .inquire_remote_apic = inquire_remote_apic, + .inquire_remote_apic = default_inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 2c3341564d14..ddb50fba2868 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -112,5 +112,5 @@ struct genapic apic_numaq = { .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, .store_NMI_vector = numaq_store_NMI_vector, - .inquire_remote_apic = inquire_remote_apic, + .inquire_remote_apic = NULL, }; diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index c2471a9fa8f3..d5db3045437c 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -91,5 +91,5 @@ struct genapic apic_summit = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .inquire_remote_apic = inquire_remote_apic, + .inquire_remote_apic = default_inquire_remote_apic, }; From 018e047f3a98bd8d9e9d78b19bc38415f0c34dd7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:09:58 +0100 Subject: [PATCH 248/566] x86, ES7000: consolidate the APIC code Consolidate all the ES7000 APIC code into arch/x86/mach-generic/es7000.c. With this ES7000 ceases to rely on any subarchitecture include files. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/apic.h | 236 ------------------- arch/x86/include/asm/es7000/apicdef.h | 9 - arch/x86/include/asm/es7000/ipi.h | 22 -- arch/x86/include/asm/es7000/mpparse.h | 23 -- arch/x86/include/asm/es7000/wakecpu.h | 16 -- arch/x86/mach-generic/es7000.c | 314 ++++++++++++++++++++++++-- 6 files changed, 295 insertions(+), 325 deletions(-) delete mode 100644 arch/x86/include/asm/es7000/apic.h delete mode 100644 arch/x86/include/asm/es7000/apicdef.h delete mode 100644 arch/x86/include/asm/es7000/ipi.h delete mode 100644 arch/x86/include/asm/es7000/mpparse.h delete mode 100644 arch/x86/include/asm/es7000/wakecpu.h diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h deleted file mode 100644 index b89b45db735d..000000000000 --- a/arch/x86/include/asm/es7000/apic.h +++ /dev/null @@ -1,236 +0,0 @@ -#ifndef __ASM_ES7000_APIC_H -#define __ASM_ES7000_APIC_H - -#include - -#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) - -static inline int es7000_apic_id_registered(void) -{ - return 1; -} - -static inline const cpumask_t *target_cpus_cluster(void) -{ - return &CPU_MASK_ALL; -} - -static inline const cpumask_t *es7000_target_cpus(void) -{ - return &cpumask_of_cpu(smp_processor_id()); -} - -#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) -#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) -#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ - -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - -static inline unsigned long -es7000_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return 0; -} -static inline unsigned long es7000_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -extern void es7000_enable_apic_mode(void); - -#define apicid_cluster(apicid) (apicid & 0xF0) - -static inline unsigned long calculate_ldr(int cpu) -{ - unsigned long id; - id = xapic_phys_to_log_apicid(cpu); - return (SET_APIC_LOGICAL_ID(id)); -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LdR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static inline void es7000_init_apic_ldr_cluster(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static inline void es7000_init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -extern int apic_version [MAX_APICS]; -static inline void es7000_setup_apic_routing(void) -{ - int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", - (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", - nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); -} - -static inline int es7000_apicid_to_node(int logical_apicid) -{ - return 0; -} - - -static inline int es7000_cpu_present_to_apicid(int mps_cpu) -{ - if (!mps_cpu) - return boot_cpu_physical_apicid; - else if (mps_cpu < nr_cpu_ids) - return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static inline physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) -{ - static int id = 0; - physid_mask_t mask; - mask = physid_mask_of_physid(id); - ++id; - return mask; -} - -extern u8 cpu_2_logical_apicid[]; -/* Mapping from cpu number to logical apicid */ -static inline int es7000_cpu_to_logical_apicid(int cpu) -{ -#ifdef CONFIG_SMP - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -#else - return logical_smp_processor_id(); -#endif -} - -static inline physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xff); -} - -extern unsigned int boot_cpu_physical_apicid; - -static inline int es7000_check_phys_apicid_present(int cpu_physical_apicid) -{ - boot_cpu_physical_apicid = read_apic_id(); - return (1); -} - -static inline unsigned int -es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) -{ - int cpus_found = 0; - int num_bits_set; - int apicid; - int cpu; - - num_bits_set = cpumask_weight(cpumask); - /* Return id to all */ - if (num_bits_set == nr_cpu_ids) - return 0xFF; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of target_cpus(): - */ - cpu = cpumask_first(cpumask); - apicid = es7000_cpu_to_logical_apicid(cpu); - - while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { - int new_apicid = es7000_cpu_to_logical_apicid(cpu); - - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { - printk ("%s: Not a valid mask!\n", __func__); - - return 0xFF; - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -static inline unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - int cpus_found = 0; - int num_bits_set; - int apicid; - int cpu; - - num_bits_set = cpus_weight(*cpumask); - /* Return id to all */ - if (num_bits_set == nr_cpu_ids) - return es7000_cpu_to_logical_apicid(0); - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of target_cpus(): - */ - cpu = first_cpu(*cpumask); - apicid = es7000_cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask)) { - int new_apicid = es7000_cpu_to_logical_apicid(cpu); - - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { - printk ("%s: Not a valid mask!\n", __func__); - - return es7000_cpu_to_logical_apicid(0); - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - - -static inline unsigned int -es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int apicid = es7000_cpu_to_logical_apicid(0); - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = es7000_cpu_mask_to_apicid(cpumask); - - free_cpumask_var(cpumask); - - return apicid; -} - -static inline int es7000_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* __ASM_ES7000_APIC_H */ diff --git a/arch/x86/include/asm/es7000/apicdef.h b/arch/x86/include/asm/es7000/apicdef.h deleted file mode 100644 index c74881a7b3d8..000000000000 --- a/arch/x86/include/asm/es7000/apicdef.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_ES7000_APICDEF_H -#define __ASM_ES7000_APICDEF_H - -static inline unsigned int es7000_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0xFF; -} - -#endif diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h deleted file mode 100644 index 81e77c812baa..000000000000 --- a/arch/x86/include/asm/es7000/ipi.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __ASM_ES7000_IPI_H -#define __ASM_ES7000_IPI_H - -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - -static inline void es7000_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence(mask, vector); -} - -static inline void es7000_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself(cpu_online_mask, vector); -} - -static inline void es7000_send_IPI_all(int vector) -{ - es7000_send_IPI_mask(cpu_online_mask, vector); -} - -#endif /* __ASM_ES7000_IPI_H */ diff --git a/arch/x86/include/asm/es7000/mpparse.h b/arch/x86/include/asm/es7000/mpparse.h deleted file mode 100644 index 662eb1e574de..000000000000 --- a/arch/x86/include/asm/es7000/mpparse.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __ASM_ES7000_MPPARSE_H -#define __ASM_ES7000_MPPARSE_H - -#include - -extern int parse_unisys_oem (char *oemptr); -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); -extern void setup_unisys(void); - -#ifdef CONFIG_ACPI -static inline int es7000_check_dsdt(void) -{ - struct acpi_table_header header; - - if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && - !strncmp(header.oem_id, "UNISYS", 6)) - return 1; - return 0; -} -#endif - -#endif /* __ASM_MACH_MPPARSE_H */ diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h deleted file mode 100644 index 99c72be1840e..000000000000 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef __ASM_ES7000_WAKECPU_H -#define __ASM_ES7000_WAKECPU_H - -#define ES7000_TRAMPOLINE_PHYS_LOW 0x467 -#define ES7000_TRAMPOLINE_PHYS_HIGH 0x469 - -static inline void es7000_wait_for_init_deassert(atomic_t *deassert) -{ -#ifndef CONFIG_ES7000_CLUSTERED_APIC - while (!atomic_read(deassert)) - cpu_relax(); -#endif - return; -} - -#endif /* __ASM_MACH_WAKECPU_H */ diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index be090b2037ca..8b6113ec380c 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -11,13 +11,300 @@ #include #include #include -#include +#include #include -#include -#include -#include +#include #include +#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) +#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) +#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +extern void es7000_enable_apic_mode(void); +extern int apic_version [MAX_APICS]; +extern u8 cpu_2_logical_apicid[]; +extern unsigned int boot_cpu_physical_apicid; + +extern int parse_unisys_oem (char *oemptr); +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); +extern void setup_unisys(void); + +#define apicid_cluster(apicid) (apicid & 0xF0) +#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) + +static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; +} + + +static void es7000_wait_for_init_deassert(atomic_t *deassert) +{ +#ifndef CONFIG_ES7000_CLUSTERED_APIC + while (!atomic_read(deassert)) + cpu_relax(); +#endif + return; +} + +static unsigned int es7000_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0xFF; +} + +#ifdef CONFIG_ACPI +static int es7000_check_dsdt(void) +{ + struct acpi_table_header header; + + if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && + !strncmp(header.oem_id, "UNISYS", 6)) + return 1; + return 0; +} +#endif + +static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static void es7000_send_IPI_allbutself(int vector) +{ + default_send_IPI_mask_allbutself(cpu_online_mask, vector); +} + +static void es7000_send_IPI_all(int vector) +{ + es7000_send_IPI_mask(cpu_online_mask, vector); +} + +static int es7000_apic_id_registered(void) +{ + return 1; +} + +static const cpumask_t *target_cpus_cluster(void) +{ + return &CPU_MASK_ALL; +} + +static const cpumask_t *es7000_target_cpus(void) +{ + return &cpumask_of_cpu(smp_processor_id()); +} + +static unsigned long +es7000_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} +static unsigned long es7000_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static unsigned long calculate_ldr(int cpu) +{ + unsigned long id = xapic_phys_to_log_apicid(cpu); + + return (SET_APIC_LOGICAL_ID(id)); +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LdR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static void es7000_init_apic_ldr_cluster(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static void es7000_init_apic_ldr(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static void es7000_setup_apic_routing(void) +{ + int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); + printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + (apic_version[apic] == 0x14) ? + "Physical Cluster" : "Logical Cluster", + nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); +} + +static int es7000_apicid_to_node(int logical_apicid) +{ + return 0; +} + + +static int es7000_cpu_present_to_apicid(int mps_cpu) +{ + if (!mps_cpu) + return boot_cpu_physical_apicid; + else if (mps_cpu < nr_cpu_ids) + return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) +{ + static int id = 0; + physid_mask_t mask; + + mask = physid_mask_of_physid(id); + ++id; + + return mask; +} + +/* Mapping from cpu number to logical apicid */ +static int es7000_cpu_to_logical_apicid(int cpu) +{ +#ifdef CONFIG_SMP + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +#else + return logical_smp_processor_id(); +#endif +} + +static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0xff); +} + +static int es7000_check_phys_apicid_present(int cpu_physical_apicid) +{ + boot_cpu_physical_apicid = read_apic_id(); + return (1); +} + +static unsigned int +es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) +{ + int cpus_found = 0; + int num_bits_set; + int apicid; + int cpu; + + num_bits_set = cpumask_weight(cpumask); + /* Return id to all */ + if (num_bits_set == nr_cpu_ids) + return 0xFF; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of target_cpus(): + */ + cpu = cpumask_first(cpumask); + apicid = es7000_cpu_to_logical_apicid(cpu); + + while (cpus_found < num_bits_set) { + if (cpumask_test_cpu(cpu, cpumask)) { + int new_apicid = es7000_cpu_to_logical_apicid(cpu); + + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk ("%s: Not a valid mask!\n", __func__); + + return 0xFF; + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + int cpus_found = 0; + int num_bits_set; + int apicid; + int cpu; + + num_bits_set = cpus_weight(*cpumask); + /* Return id to all */ + if (num_bits_set == nr_cpu_ids) + return es7000_cpu_to_logical_apicid(0); + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of target_cpus(): + */ + cpu = first_cpu(*cpumask); + apicid = es7000_cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, *cpumask)) { + int new_apicid = es7000_cpu_to_logical_apicid(cpu); + + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk ("%s: Not a valid mask!\n", __func__); + + return es7000_cpu_to_logical_apicid(0); + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static unsigned int +es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) +{ + int apicid = es7000_cpu_to_logical_apicid(0); + cpumask_var_t cpumask; + + if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) + return apicid; + + cpumask_and(cpumask, inmask, andmask); + cpumask_and(cpumask, cpumask, cpu_online_mask); + apicid = es7000_cpu_mask_to_apicid(cpumask); + + free_cpumask_var(cpumask); + + return apicid; +} + +static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + void __init es7000_update_genapic_to_cluster(void) { apic->target_cpus = target_cpus_cluster; @@ -80,18 +367,6 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) } #endif -static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; -} struct genapic apic_es7000 = { @@ -140,10 +415,11 @@ struct genapic apic_es7000 = { .send_IPI_self = NULL, .wakeup_cpu = NULL, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = default_wait_for_init_deassert, + .trampoline_phys_low = 0x467, + .trampoline_phys_high = 0x469, + + .wait_for_init_deassert = es7000_wait_for_init_deassert, /* Nothing to do for most platforms, since cleared by the INIT cycle: */ .smp_callin_clear_local_apic = NULL, From 0939e4fd351c58d08d25650797749f18904461af Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:16:25 +0100 Subject: [PATCH 249/566] x86, smp: eliminate asm/mach-default/mach_wakecpu.h Spread mach_wakecpu.h's definitions into apic.h and genapic.h and remove mach_wakecpu.h. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 15 +++++++++++ arch/x86/include/asm/genapic.h | 7 ++++++ .../include/asm/mach-default/mach_wakecpu.h | 25 ------------------- arch/x86/kernel/smpboot.c | 1 - arch/x86/mach-generic/bigsmp.c | 1 - arch/x86/mach-generic/default.c | 1 - arch/x86/mach-generic/es7000.c | 1 - arch/x86/mach-generic/summit.c | 1 - 8 files changed, 22 insertions(+), 30 deletions(-) delete mode 100644 arch/x86/include/asm/mach-default/mach_wakecpu.h diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index ab1d51a8855e..e8f030440bc7 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -41,6 +41,21 @@ extern unsigned int apic_verbosity; extern int local_apic_timer_c2_ok; extern int disable_apic; + +#ifdef CONFIG_SMP +extern void __inquire_remote_apic(int apicid); +#else /* CONFIG_SMP */ +static inline void __inquire_remote_apic(int apicid) +{ +} +#endif /* CONFIG_SMP */ + +static inline void default_inquire_remote_apic(int apicid) +{ + if (apic_verbosity >= APIC_DEBUG) + __inquire_remote_apic(apicid); +} + /* * Basic functions accessing APICs. */ diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index e5f9c5696fb6..1772dad01b1d 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -113,4 +113,11 @@ extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); #endif +static inline void default_wait_for_init_deassert(atomic_t *deassert) +{ + while (!atomic_read(deassert)) + cpu_relax(); + return; +} + #endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h deleted file mode 100644 index b1cde560e4c1..000000000000 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H -#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H - -static inline void default_wait_for_init_deassert(atomic_t *deassert) -{ - while (!atomic_read(deassert)) - cpu_relax(); - return; -} - -#ifdef CONFIG_SMP -extern void __inquire_remote_apic(int apicid); -#else /* CONFIG_SMP */ -static inline void __inquire_remote_apic(int apicid) -{ -} -#endif /* CONFIG_SMP */ - -static inline void default_inquire_remote_apic(int apicid) -{ - if (apic_verbosity >= APIC_DEBUG) - __inquire_remote_apic(apicid); -} - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 170adc5b6cb3..1fdc1a7e7b56 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -66,7 +66,6 @@ #include #include -#include #include #ifdef CONFIG_X86_32 diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 4d8b2d442bae..6fcccfb5918e 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -17,7 +17,6 @@ #include #include #include -#include static int dmi_bigsmp; /* can be set by dmi scanners */ diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index c12dd2300a59..e3c5114fd91d 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -16,7 +16,6 @@ #include #include #include -#include static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) { diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 8b6113ec380c..bb11166b7c3c 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -14,7 +14,6 @@ #include #include #include -#include #define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) #define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index d5db3045437c..673a64f8b463 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -16,7 +16,6 @@ #include #include #include -#include static int probe_summit(void) { From fb5b33c9f62ca9222c11841d61ddb7dc1a6552e9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:29:27 +0100 Subject: [PATCH 250/566] x86: eliminate asm/mach-*/mach_mpparse.h Move the definition to mpparse.h. Signed-off-by: Ingo Molnar --- .../x86/include/asm/mach-default/mach_mpparse.h | 17 ----------------- .../x86/include/asm/mach-generic/mach_mpparse.h | 8 -------- arch/x86/include/asm/mpspec.h | 4 ++++ arch/x86/kernel/acpi/boot.c | 1 - arch/x86/kernel/es7000_32.c | 1 - arch/x86/kernel/mpparse.c | 1 - arch/x86/mach-generic/bigsmp.c | 1 - arch/x86/mach-generic/default.c | 1 - 8 files changed, 4 insertions(+), 30 deletions(-) delete mode 100644 arch/x86/include/asm/mach-default/mach_mpparse.h delete mode 100644 arch/x86/include/asm/mach-generic/mach_mpparse.h diff --git a/arch/x86/include/asm/mach-default/mach_mpparse.h b/arch/x86/include/asm/mach-default/mach_mpparse.h deleted file mode 100644 index af0da140df95..000000000000 --- a/arch/x86/include/asm/mach-default/mach_mpparse.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H -#define _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H - -static inline int -generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - return 0; -} - -/* Hook from generic ACPI tables.c */ -static inline int default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} - - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_mpparse.h b/arch/x86/include/asm/mach-generic/mach_mpparse.h deleted file mode 100644 index 22bfb56f8fbd..000000000000 --- a/arch/x86/include/asm/mach-generic/mach_mpparse.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H -#define _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H - -extern int generic_mps_oem_check(struct mpc_table *, char *, char *); - -extern int default_acpi_madt_oem_check(char *, char *); - -#endif /* _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H */ diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 62d14ce3cd00..432e9cbc6076 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -142,4 +142,8 @@ static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) extern physid_mask_t phys_cpu_present_map; +extern int generic_mps_oem_check(struct mpc_table *, char *, char *); + +extern int default_acpi_madt_oem_check(char *, char *); + #endif /* _ASM_X86_MPSPEC_H */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 539163161a4c..7b02a1cedca0 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -63,7 +63,6 @@ EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_X86_LOCAL_APIC #include -#include #endif /* CONFIG_X86_LOCAL_APIC */ #endif /* X86 */ diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index d7f433ee602d..8faea13c8fac 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b12fa5ce6f58..c6930162b3be 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -32,7 +32,6 @@ #include #ifdef CONFIG_X86_32 #include -#include #endif /* diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 6fcccfb5918e..626f45ca4e7e 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -16,7 +16,6 @@ #include #include #include -#include static int dmi_bigsmp; /* can be set by dmi scanners */ diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index e3c5114fd91d..6485e57e29b1 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -15,7 +15,6 @@ #include #include #include -#include static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) { From b2af018ff26f1a2a026f548f7f0e552589905689 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:36:56 +0100 Subject: [PATCH 251/566] x86: remove mach_mpspec.h Move its definitions into mpspec.h. Signed-off-by: Ingo Molnar --- .../include/asm/mach-default/mach_mpspec.h | 12 --------- .../include/asm/mach-generic/mach_mpspec.h | 12 --------- arch/x86/include/asm/mpspec.h | 25 ++++++++++++++----- 3 files changed, 19 insertions(+), 30 deletions(-) delete mode 100644 arch/x86/include/asm/mach-default/mach_mpspec.h delete mode 100644 arch/x86/include/asm/mach-generic/mach_mpspec.h diff --git a/arch/x86/include/asm/mach-default/mach_mpspec.h b/arch/x86/include/asm/mach-default/mach_mpspec.h deleted file mode 100644 index e85ede686be8..000000000000 --- a/arch/x86/include/asm/mach-default/mach_mpspec.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H -#define _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H - -#define MAX_IRQ_SOURCES 256 - -#if CONFIG_BASE_SMALL == 0 -#define MAX_MP_BUSSES 256 -#else -#define MAX_MP_BUSSES 32 -#endif - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_mpspec.h b/arch/x86/include/asm/mach-generic/mach_mpspec.h deleted file mode 100644 index 3bc407226578..000000000000 --- a/arch/x86/include/asm/mach-generic/mach_mpspec.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H -#define _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H - -#define MAX_IRQ_SOURCES 256 - -/* Summit or generic (i.e. installer) kernels need lots of bus entries. */ -/* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */ -#define MAX_MP_BUSSES 260 - -extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); - -#endif /* _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H */ diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 432e9cbc6076..03fb0d396543 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -9,7 +9,18 @@ extern int apic_version[MAX_APICS]; extern int pic_mode; #ifdef CONFIG_X86_32 -#include + +/* + * Summit or generic (i.e. installer) kernels need lots of bus entries. + * Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. + */ +#if CONFIG_BASE_SMALL == 0 +# define MAX_MP_BUSSES 260 +#else +# define MAX_MP_BUSSES 32 +#endif + +#define MAX_IRQ_SOURCES 256 extern unsigned int def_to_bigsmp; extern u8 apicid_2_node[]; @@ -20,15 +31,15 @@ extern int mp_bus_id_to_local[MAX_MP_BUSSES]; extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; #endif -#define MAX_APICID 256 +#define MAX_APICID 256 -#else +#else /* CONFIG_X86_64: */ -#define MAX_MP_BUSSES 256 +#define MAX_MP_BUSSES 256 /* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */ -#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) +#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) -#endif +#endif /* CONFIG_X86_64 */ extern void early_find_smp_config(void); extern void early_get_smp_config(void); @@ -146,4 +157,6 @@ extern int generic_mps_oem_check(struct mpc_table *, char *, char *); extern int default_acpi_madt_oem_check(char *, char *); +extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); + #endif /* _ASM_X86_MPSPEC_H */ From 1f75ed0c1311a50ed393bcac258de65680d360e5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:36:56 +0100 Subject: [PATCH 252/566] x86: remove mach_apicdef.h Move its definitions into apic.h. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 16 ++++++++++++++ arch/x86/include/asm/mach-default/mach_apic.h | 1 - .../include/asm/mach-default/mach_apicdef.h | 22 ------------------- .../include/asm/mach-generic/mach_apicdef.h | 8 ------- arch/x86/include/asm/smp.h | 4 ++-- arch/x86/kernel/apic.c | 1 - arch/x86/kernel/genapic_flat_64.c | 1 - arch/x86/kernel/io_apic.c | 1 - arch/x86/kernel/mpparse.c | 3 --- arch/x86/mach-generic/default.c | 1 - 10 files changed, 18 insertions(+), 40 deletions(-) delete mode 100644 arch/x86/include/asm/mach-default/mach_apicdef.h delete mode 100644 arch/x86/include/asm/mach-generic/mach_apicdef.h diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index e8f030440bc7..3a3202074c63 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -211,4 +211,20 @@ static inline void disable_local_APIC(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ +#ifdef CONFIG_X86_64 +#define SET_APIC_ID(x) (apic->set_apic_id(x)) +#else + +static inline unsigned default_get_apic_id(unsigned long x) +{ + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + + if (APIC_XAPIC(ver)) + return (x >> 24) & 0xFF; + else + return (x >> 24) & 0x0F; +} + +#endif + #endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 2e4104cf3481..b60b767d5be0 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -3,7 +3,6 @@ #ifdef CONFIG_X86_LOCAL_APIC -#include #include #define APIC_DFR_VALUE (APIC_DFR_FLAT) diff --git a/arch/x86/include/asm/mach-default/mach_apicdef.h b/arch/x86/include/asm/mach-default/mach_apicdef.h deleted file mode 100644 index 5141085962d3..000000000000 --- a/arch/x86/include/asm/mach-default/mach_apicdef.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H -#define _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H - -#include - -#ifdef CONFIG_X86_64 -#define SET_APIC_ID(x) (apic->set_apic_id(x)) -#else - -static inline unsigned default_get_apic_id(unsigned long x) -{ - unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - - if (APIC_XAPIC(ver)) - return (x >> 24) & 0xFF; - else - return (x >> 24) & 0x0F; -} - -#endif - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_apicdef.h b/arch/x86/include/asm/mach-generic/mach_apicdef.h deleted file mode 100644 index 61caa65b13fb..000000000000 --- a/arch/x86/include/asm/mach-generic/mach_apicdef.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_APICDEF_H -#define _ASM_X86_MACH_GENERIC_MACH_APICDEF_H - -#ifndef APIC_DEFINITION -#include -#endif - -#endif /* _ASM_X86_MACH_GENERIC_MACH_APICDEF_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c63d480802af..d4ac4de4bcec 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -173,6 +173,8 @@ extern int safe_smp_processor_id(void); #endif +#include + #ifdef CONFIG_X86_LOCAL_APIC #ifndef CONFIG_X86_64 @@ -182,7 +184,6 @@ static inline int logical_smp_processor_id(void) return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); } -#include static inline unsigned int read_apic_id(void) { unsigned int reg; @@ -197,7 +198,6 @@ static inline unsigned int read_apic_id(void) # if defined(APIC_DEFINITION) || defined(CONFIG_X86_64) extern int hard_smp_processor_id(void); # else -#include static inline int hard_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 84b8e7c57abc..e6220809ca11 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -50,7 +50,6 @@ #include #include -#include #include /* diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index e9237f599282..19bffb3f7320 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -19,7 +19,6 @@ #include #include #include -#include #ifdef CONFIG_ACPI #include diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index e90970ce21a0..abae81989c2f 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -64,7 +64,6 @@ #include #include -#include #define __apicdebuginit(type) static type __init diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index c6930162b3be..a1452a53d14f 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -30,9 +30,6 @@ #include #include -#ifdef CONFIG_X86_32 -#include -#endif /* * Checksum an MP configuration block. diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 6485e57e29b1..07817b2a7876 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include From 328386d7ab600aa0993a1226f5817ac30a735724 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:50:18 +0100 Subject: [PATCH 253/566] x86, smp: refactor ->wake_cpu - remove macro wrappers Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mach-default/mach_apic.h | 2 -- arch/x86/include/asm/mach-generic/mach_apic.h | 2 -- arch/x86/kernel/setup.c | 5 ++--- arch/x86/kernel/smpboot.c | 4 ++-- 4 files changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index b60b767d5be0..bae053cdcde5 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -19,10 +19,8 @@ static inline const struct cpumask *default_target_cpus(void) #ifdef CONFIG_X86_64 #include #define read_apic_id() (apic->get_apic_id(apic_read(APIC_ID))) -#define wakeup_secondary_cpu (apic->wakeup_cpu) extern void default_setup_apic_routing(void); #else -#define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* * Set up the logical destination ID. * diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index ca460e459913..96f217f819e3 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,8 +3,6 @@ #include -#define wakeup_secondary_cpu (apic->wakeup_cpu) - extern void generic_bigsmp_probe(void); #endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a58e9f5e6030..6b27f6dc7bf7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -589,9 +589,8 @@ early_param("elfcorehdr", setup_elfcorehdr); static int __init default_update_genapic(void) { #ifdef CONFIG_X86_SMP -# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) - apic->wakeup_cpu = wakeup_secondary_cpu_via_init; -# endif + if (!apic->wakeup_cpu) + apic->wakeup_cpu = wakeup_secondary_cpu_via_init; #endif return 0; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1fdc1a7e7b56..3fed177f3457 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -750,7 +750,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. - * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. + * Returns zero if CPU booted OK, else error code from ->wakeup_cpu. */ { unsigned long boot_error = 0; @@ -841,7 +841,7 @@ do_rest: /* * Starting actual IPI sequence... */ - boot_error = wakeup_secondary_cpu(apicid, start_ip); + boot_error = apic->wakeup_cpu(apicid, start_ip); if (!boot_error) { /* From 5a44632f77a9c867621f7bf80c233eac75fea672 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 18:47:24 +0100 Subject: [PATCH 254/566] x86, numaq: consolidate code Move all the NUMAQ subarch definitions into numaq.c. With this it ceases to depend on build-time subarch features. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/numaq/apic.h | 122 ------------------- arch/x86/include/asm/numaq/apicdef.h | 9 -- arch/x86/include/asm/numaq/ipi.h | 22 ---- arch/x86/include/asm/numaq/mpparse.h | 6 - arch/x86/include/asm/numaq/wakecpu.h | 28 ----- arch/x86/mach-generic/numaq.c | 171 ++++++++++++++++++++++++++- arch/x86/pci/numaq_32.c | 2 +- 7 files changed, 167 insertions(+), 193 deletions(-) delete mode 100644 arch/x86/include/asm/numaq/apic.h delete mode 100644 arch/x86/include/asm/numaq/apicdef.h delete mode 100644 arch/x86/include/asm/numaq/ipi.h delete mode 100644 arch/x86/include/asm/numaq/mpparse.h delete mode 100644 arch/x86/include/asm/numaq/wakecpu.h diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h deleted file mode 100644 index ce95e79f7233..000000000000 --- a/arch/x86/include/asm/numaq/apic.h +++ /dev/null @@ -1,122 +0,0 @@ -#ifndef __ASM_NUMAQ_APIC_H -#define __ASM_NUMAQ_APIC_H - -#include -#include -#include - -#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static inline const cpumask_t *numaq_target_cpus(void) -{ - return &CPU_MASK_ALL; -} - -static inline unsigned long -numaq_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return physid_isset(apicid, bitmap); -} -static inline unsigned long numaq_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} -#define apicid_cluster(apicid) (apicid & 0xF0) - -static inline int numaq_apic_id_registered(void) -{ - return 1; -} - -static inline void numaq_init_apic_ldr(void) -{ - /* Already done in NUMA-Q firmware */ -} - -static inline void numaq_setup_apic_routing(void) -{ - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "NUMA-Q", nr_ioapics); -} - -/* - * Skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum. - */ -static inline int numaq_multi_timer_check(int apic, int irq) -{ - return apic != 0 && irq == 0; -} - -static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* We don't have a good way to do this yet - hack */ - return physids_promote(0xFUL); -} - -/* Mapping from cpu number to logical apicid */ -extern u8 cpu_2_logical_apicid[]; - -static inline int numaq_cpu_to_logical_apicid(int cpu) -{ - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -} - -/* - * Supporting over 60 cpus on NUMA-Q requires a locality-dependent - * cpu to APIC ID relation to properly interact with the intelligent - * mode of the cluster controller. - */ -static inline int numaq_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < 60) - return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); - else - return BAD_APICID; -} - -static inline int numaq_apicid_to_node(int logical_apicid) -{ - return logical_apicid >> 4; -} - -static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) -{ - int node = numaq_apicid_to_node(logical_apicid); - int cpu = __ffs(logical_apicid & 0xf); - - return physid_mask_of_physid(cpu + 4*node); -} - -extern void *xquad_portio; - -static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return 1; -} - -/* - * We use physical apicids here, not logical, so just return the default - * physical broadcast to stop people from breaking us - */ -static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - return 0x0F; -} - -static inline unsigned int -numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - return 0x0F; -} - -/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ -static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* __ASM_NUMAQ_APIC_H */ diff --git a/arch/x86/include/asm/numaq/apicdef.h b/arch/x86/include/asm/numaq/apicdef.h deleted file mode 100644 index cd927d5bd505..000000000000 --- a/arch/x86/include/asm/numaq/apicdef.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_NUMAQ_APICDEF_H -#define __ASM_NUMAQ_APICDEF_H - -static inline unsigned int numaq_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0x0F; -} - -#endif diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h deleted file mode 100644 index 5dbc4b4cd5e5..000000000000 --- a/arch/x86/include/asm/numaq/ipi.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __ASM_NUMAQ_IPI_H -#define __ASM_NUMAQ_IPI_H - -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - -static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence(mask, vector); -} - -static inline void numaq_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself(cpu_online_mask, vector); -} - -static inline void numaq_send_IPI_all(int vector) -{ - numaq_send_IPI_mask(cpu_online_mask, vector); -} - -#endif /* __ASM_NUMAQ_IPI_H */ diff --git a/arch/x86/include/asm/numaq/mpparse.h b/arch/x86/include/asm/numaq/mpparse.h deleted file mode 100644 index a2eeefcd1cc7..000000000000 --- a/arch/x86/include/asm/numaq/mpparse.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_NUMAQ_MPPARSE_H -#define __ASM_NUMAQ_MPPARSE_H - -extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); - -#endif /* __ASM_NUMAQ_MPPARSE_H */ diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h deleted file mode 100644 index afe81439c7db..000000000000 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef __ASM_NUMAQ_WAKECPU_H -#define __ASM_NUMAQ_WAKECPU_H - -/* This file copes with machines that wakeup secondary CPUs by NMIs */ - -#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) -#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) - -/* - * Because we use NMIs rather than the INIT-STARTUP sequence to - * bootstrap the CPUs, the APIC may be in a weird state. Kick it: - */ -static inline void numaq_smp_callin_clear_local_apic(void) -{ - clear_local_APIC(); -} - -static inline void -numaq_store_NMI_vector(unsigned short *high, unsigned short *low) -{ - printk("Storing NMI vector\n"); - *high = - *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)); - *low = - *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); -} - -#endif /* __ASM_NUMAQ_WAKECPU_H */ diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index ddb50fba2868..c221cfb2c2db 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -11,14 +11,175 @@ #include #include #include -#include +#include #include -#include -#include -#include -#include #include +#include +#include +#include +#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) + +static inline unsigned int numaq_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0x0F; +} + +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); + +static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static inline void numaq_send_IPI_allbutself(int vector) +{ + default_send_IPI_mask_allbutself(cpu_online_mask, vector); +} + +static inline void numaq_send_IPI_all(int vector) +{ + numaq_send_IPI_mask(cpu_online_mask, vector); +} + +extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); + +#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) +#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) + +/* + * Because we use NMIs rather than the INIT-STARTUP sequence to + * bootstrap the CPUs, the APIC may be in a weird state. Kick it: + */ +static inline void numaq_smp_callin_clear_local_apic(void) +{ + clear_local_APIC(); +} + +static inline void +numaq_store_NMI_vector(unsigned short *high, unsigned short *low) +{ + printk("Storing NMI vector\n"); + *high = + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)); + *low = + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); +} + +static inline const cpumask_t *numaq_target_cpus(void) +{ + return &CPU_MASK_ALL; +} + +static inline unsigned long +numaq_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} + +static inline unsigned long numaq_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +#define apicid_cluster(apicid) (apicid & 0xF0) + +static inline int numaq_apic_id_registered(void) +{ + return 1; +} + +static inline void numaq_init_apic_ldr(void) +{ + /* Already done in NUMA-Q firmware */ +} + +static inline void numaq_setup_apic_routing(void) +{ + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "NUMA-Q", nr_ioapics); +} + +/* + * Skip adding the timer int on secondary nodes, which causes + * a small but painful rift in the time-space continuum. + */ +static inline int numaq_multi_timer_check(int apic, int irq) +{ + return apic != 0 && irq == 0; +} + +static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* We don't have a good way to do this yet - hack */ + return physids_promote(0xFUL); +} + +/* Mapping from cpu number to logical apicid */ +extern u8 cpu_2_logical_apicid[]; + +static inline int numaq_cpu_to_logical_apicid(int cpu) +{ + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +} + +/* + * Supporting over 60 cpus on NUMA-Q requires a locality-dependent + * cpu to APIC ID relation to properly interact with the intelligent + * mode of the cluster controller. + */ +static inline int numaq_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < 60) + return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); + else + return BAD_APICID; +} + +static inline int numaq_apicid_to_node(int logical_apicid) +{ + return logical_apicid >> 4; +} + +static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) +{ + int node = numaq_apicid_to_node(logical_apicid); + int cpu = __ffs(logical_apicid & 0xf); + + return physid_mask_of_physid(cpu + 4*node); +} + +extern void *xquad_portio; + +static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return 1; +} + +/* + * We use physical apicids here, not logical, so just return the default + * physical broadcast to stop people from breaking us + */ +static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + return 0x0F; +} + +static inline unsigned int +numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + return 0x0F; +} + +/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ +static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} static int __numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { numaq_mps_oem_check(mpc, oem, productid); diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 2089354968a2..1b2d773612e7 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include From b11b867f78910192fc54bd0d09148cf768c7aaad Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 18:49:31 +0100 Subject: [PATCH 255/566] x86, summit: consolidate code Consolidate all the Summit code into a single file: arch/x86/kernel/summit_32.c. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/summit/apic.h | 195 ------------ arch/x86/include/asm/summit/apicdef.h | 9 - arch/x86/include/asm/summit/ipi.h | 26 -- arch/x86/include/asm/summit/mpparse.h | 109 ------- arch/x86/kernel/summit_32.c | 417 +++++++++++++++++++++++++- arch/x86/mach-generic/Makefile | 1 - arch/x86/mach-generic/summit.c | 94 ------ 7 files changed, 416 insertions(+), 435 deletions(-) delete mode 100644 arch/x86/include/asm/summit/apic.h delete mode 100644 arch/x86/include/asm/summit/apicdef.h delete mode 100644 arch/x86/include/asm/summit/ipi.h delete mode 100644 arch/x86/include/asm/summit/mpparse.h delete mode 100644 arch/x86/mach-generic/summit.c diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h deleted file mode 100644 index 15b8dbd19e1a..000000000000 --- a/arch/x86/include/asm/summit/apic.h +++ /dev/null @@ -1,195 +0,0 @@ -#ifndef __ASM_SUMMIT_APIC_H -#define __ASM_SUMMIT_APIC_H - -#include -#include - -/* In clustered mode, the high nibble of APIC ID is a cluster number. - * The low nibble is a 4-bit bitmap. */ -#define XAPIC_DEST_CPUS_SHIFT 4 -#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) -#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) - -#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static inline const cpumask_t *summit_target_cpus(void) -{ - /* CPU_MASK_ALL (0xff) has undefined behaviour with - * dest_LowestPrio mode logical clustered apic interrupt routing - * Just start on cpu 0. IRQ balancing will spread load - */ - return &cpumask_of_cpu(0); -} - -static inline unsigned long -summit_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return 0; -} - -/* we don't use the phys_cpu_present_map to indicate apicid presence */ -static inline unsigned long summit_check_apicid_present(int bit) -{ - return 1; -} - -#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) - -extern u8 cpu_2_logical_apicid[]; - -static inline void summit_init_apic_ldr(void) -{ - unsigned long val, id; - int count = 0; - u8 my_id = (u8)hard_smp_processor_id(); - u8 my_cluster = (u8)apicid_cluster(my_id); -#ifdef CONFIG_SMP - u8 lid; - int i; - - /* Create logical APIC IDs by counting CPUs already in cluster. */ - for (count = 0, i = nr_cpu_ids; --i >= 0; ) { - lid = cpu_2_logical_apicid[i]; - if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) - ++count; - } -#endif - /* We only have a 4 wide bitmap in cluster mode. If a deranged - * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ - BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); - id = my_cluster | (1UL << count); - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(id); - apic_write(APIC_LDR, val); -} - -static inline int summit_apic_id_registered(void) -{ - return 1; -} - -static inline void summit_setup_apic_routing(void) -{ - printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", - nr_ioapics); -} - -static inline int summit_apicid_to_node(int logical_apicid) -{ -#ifdef CONFIG_SMP - return apicid_2_node[hard_smp_processor_id()]; -#else - return 0; -#endif -} - -/* Mapping from cpu number to logical apicid */ -static inline int summit_cpu_to_logical_apicid(int cpu) -{ -#ifdef CONFIG_SMP - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -#else - return logical_smp_processor_id(); -#endif -} - -static inline int summit_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < nr_cpu_ids) - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static inline physid_mask_t -summit_ioapic_phys_id_map(physid_mask_t phys_id_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0x0F); -} - -static inline physid_mask_t summit_apicid_to_cpu_present(int apicid) -{ - return physid_mask_of_physid(0); -} - -static inline void summit_setup_portio_remap(void) -{ -} - -static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return 1; -} - -static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - int cpus_found = 0; - int num_bits_set; - int apicid; - int cpu; - - num_bits_set = cpus_weight(*cpumask); - /* Return id to all */ - if (num_bits_set >= nr_cpu_ids) - return 0xFF; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of target_cpus(): - */ - cpu = first_cpu(*cpumask); - apicid = summit_cpu_to_logical_apicid(cpu); - - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask)) { - int new_apicid = summit_cpu_to_logical_apicid(cpu); - - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { - printk ("%s: Not a valid mask!\n", __func__); - - return 0xFF; - } - apicid = apicid | new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -static inline unsigned int -summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int apicid = summit_cpu_to_logical_apicid(0); - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = summit_cpu_mask_to_apicid(cpumask); - - free_cpumask_var(cpumask); - - return apicid; -} - -/* - * cpuid returns the value latched in the HW at reset, not the APIC ID - * register's value. For any box whose BIOS changes APIC IDs, like - * clustered APIC systems, we must use hard_smp_processor_id. - * - * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. - */ -static inline int summit_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return hard_smp_processor_id() >> index_msb; -} - -#endif /* __ASM_SUMMIT_APIC_H */ diff --git a/arch/x86/include/asm/summit/apicdef.h b/arch/x86/include/asm/summit/apicdef.h deleted file mode 100644 index c24b0df2dec6..000000000000 --- a/arch/x86/include/asm/summit/apicdef.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_SUMMIT_APICDEF_H -#define __ASM_SUMMIT_APICDEF_H - -static inline unsigned summit_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0xFF; -} - -#endif diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h deleted file mode 100644 index f87a43fe0aed..000000000000 --- a/arch/x86/include/asm/summit/ipi.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef __ASM_SUMMIT_IPI_H -#define __ASM_SUMMIT_IPI_H - -void default_send_IPI_mask_sequence(const cpumask_t *mask, int vector); -void default_send_IPI_mask_allbutself(const cpumask_t *mask, int vector); - -static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) -{ - default_send_IPI_mask_sequence(mask, vector); -} - -static inline void summit_send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - summit_send_IPI_mask(&mask, vector); -} - -static inline void summit_send_IPI_all(int vector) -{ - summit_send_IPI_mask(&cpu_online_map, vector); -} - -#endif /* __ASM_SUMMIT_IPI_H */ diff --git a/arch/x86/include/asm/summit/mpparse.h b/arch/x86/include/asm/summit/mpparse.h deleted file mode 100644 index 4bbcce39acb8..000000000000 --- a/arch/x86/include/asm/summit/mpparse.h +++ /dev/null @@ -1,109 +0,0 @@ -#ifndef __ASM_SUMMIT_MPPARSE_H -#define __ASM_SUMMIT_MPPARSE_H - -#include - -extern int use_cyclone; - -#ifdef CONFIG_X86_SUMMIT_NUMA -extern void setup_summit(void); -#else -#define setup_summit() {} -#endif - -static inline int -summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - if (!strncmp(oem, "IBM ENSW", 8) && - (!strncmp(productid, "VIGIL SMP", 9) - || !strncmp(productid, "EXA", 3) - || !strncmp(productid, "RUTHLESS SMP", 12))){ - mark_tsc_unstable("Summit based system"); - use_cyclone = 1; /*enable cyclone-timer*/ - setup_summit(); - return 1; - } - return 0; -} - -/* Hook from generic ACPI tables.c */ -static inline int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (!strncmp(oem_id, "IBM", 3) && - (!strncmp(oem_table_id, "SERVIGIL", 8) - || !strncmp(oem_table_id, "EXA", 3))){ - mark_tsc_unstable("Summit based system"); - use_cyclone = 1; /*enable cyclone-timer*/ - setup_summit(); - return 1; - } - return 0; -} - -struct rio_table_hdr { - unsigned char version; /* Version number of this data structure */ - /* Version 3 adds chassis_num & WP_index */ - unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */ - unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */ -} __attribute__((packed)); - -struct scal_detail { - unsigned char node_id; /* Scalability Node ID */ - unsigned long CBAR; /* Address of 1MB register space */ - unsigned char port0node; /* Node ID port connected to: 0xFF=None */ - unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port1node; /* Node ID port connected to: 0xFF = None */ - unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port2node; /* Node ID port connected to: 0xFF = None */ - unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */ -} __attribute__((packed)); - -struct rio_detail { - unsigned char node_id; /* RIO Node ID */ - unsigned long BBAR; /* Address of 1MB register space */ - unsigned char type; /* Type of device */ - unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/ - /* For CYC: Node ID of Twister that owns this CYC */ - unsigned char port0node; /* Node ID port connected to: 0xFF=None */ - unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port1node; /* Node ID port connected to: 0xFF=None */ - unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */ - /* For CYC: 0 */ - unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */ - /* = 0 : the XAPIC is not used, ie:*/ - /* ints fwded to another XAPIC */ - /* Bits1:7 Reserved */ - /* For CYC: Bits0:7 Reserved */ - unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */ - /* lower slot numbers/PCI bus numbers */ - /* For CYC: No meaning */ - unsigned char chassis_num; /* 1 based Chassis number */ - /* For LookOut WPEGs this field indicates the */ - /* Expansion Chassis #, enumerated from Boot */ - /* Node WPEG external port, then Boot Node CYC */ - /* external port, then Next Vigil chassis WPEG */ - /* external port, etc. */ - /* Shared Lookouts have only 1 chassis number (the */ - /* first one assigned) */ -} __attribute__((packed)); - - -typedef enum { - CompatTwister = 0, /* Compatibility Twister */ - AltTwister = 1, /* Alternate Twister of internal 8-way */ - CompatCyclone = 2, /* Compatibility Cyclone */ - AltCyclone = 3, /* Alternate Cyclone of internal 8-way */ - CompatWPEG = 4, /* Compatibility WPEG */ - AltWPEG = 5, /* Second Planar WPEG */ - LookOutAWPEG = 6, /* LookOut WPEG */ - LookOutBWPEG = 7, /* LookOut WPEG */ -} node_type; - -static inline int is_WPEG(struct rio_detail *rio){ - return (rio->type == CompatWPEG || rio->type == AltWPEG || - rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); -} - -#endif /* __ASM_SUMMIT_MPPARSE_H */ diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 7b987852e876..3b60dd5e57fa 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -30,7 +30,364 @@ #include #include #include -#include + +/* + * APIC driver for the IBM "Summit" chipset. + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline unsigned summit_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0xFF; +} + +void default_send_IPI_mask_sequence(const cpumask_t *mask, int vector); +void default_send_IPI_mask_allbutself(const cpumask_t *mask, int vector); + +static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static inline void summit_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + summit_send_IPI_mask(&mask, vector); +} + +static inline void summit_send_IPI_all(int vector) +{ + summit_send_IPI_mask(&cpu_online_map, vector); +} + +#include + +extern int use_cyclone; + +#ifdef CONFIG_X86_SUMMIT_NUMA +extern void setup_summit(void); +#else +#define setup_summit() {} +#endif + +static inline int +summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +{ + if (!strncmp(oem, "IBM ENSW", 8) && + (!strncmp(productid, "VIGIL SMP", 9) + || !strncmp(productid, "EXA", 3) + || !strncmp(productid, "RUTHLESS SMP", 12))){ + mark_tsc_unstable("Summit based system"); + use_cyclone = 1; /*enable cyclone-timer*/ + setup_summit(); + return 1; + } + return 0; +} + +/* Hook from generic ACPI tables.c */ +static inline int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strncmp(oem_id, "IBM", 3) && + (!strncmp(oem_table_id, "SERVIGIL", 8) + || !strncmp(oem_table_id, "EXA", 3))){ + mark_tsc_unstable("Summit based system"); + use_cyclone = 1; /*enable cyclone-timer*/ + setup_summit(); + return 1; + } + return 0; +} + +struct rio_table_hdr { + unsigned char version; /* Version number of this data structure */ + /* Version 3 adds chassis_num & WP_index */ + unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */ + unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */ +} __attribute__((packed)); + +struct scal_detail { + unsigned char node_id; /* Scalability Node ID */ + unsigned long CBAR; /* Address of 1MB register space */ + unsigned char port0node; /* Node ID port connected to: 0xFF=None */ + unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port1node; /* Node ID port connected to: 0xFF = None */ + unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port2node; /* Node ID port connected to: 0xFF = None */ + unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */ +} __attribute__((packed)); + +struct rio_detail { + unsigned char node_id; /* RIO Node ID */ + unsigned long BBAR; /* Address of 1MB register space */ + unsigned char type; /* Type of device */ + unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/ + /* For CYC: Node ID of Twister that owns this CYC */ + unsigned char port0node; /* Node ID port connected to: 0xFF=None */ + unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port1node; /* Node ID port connected to: 0xFF=None */ + unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */ + /* For CYC: 0 */ + unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */ + /* = 0 : the XAPIC is not used, ie:*/ + /* ints fwded to another XAPIC */ + /* Bits1:7 Reserved */ + /* For CYC: Bits0:7 Reserved */ + unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */ + /* lower slot numbers/PCI bus numbers */ + /* For CYC: No meaning */ + unsigned char chassis_num; /* 1 based Chassis number */ + /* For LookOut WPEGs this field indicates the */ + /* Expansion Chassis #, enumerated from Boot */ + /* Node WPEG external port, then Boot Node CYC */ + /* external port, then Next Vigil chassis WPEG */ + /* external port, etc. */ + /* Shared Lookouts have only 1 chassis number (the */ + /* first one assigned) */ +} __attribute__((packed)); + + +typedef enum { + CompatTwister = 0, /* Compatibility Twister */ + AltTwister = 1, /* Alternate Twister of internal 8-way */ + CompatCyclone = 2, /* Compatibility Cyclone */ + AltCyclone = 3, /* Alternate Cyclone of internal 8-way */ + CompatWPEG = 4, /* Compatibility WPEG */ + AltWPEG = 5, /* Second Planar WPEG */ + LookOutAWPEG = 6, /* LookOut WPEG */ + LookOutBWPEG = 7, /* LookOut WPEG */ +} node_type; + +static inline int is_WPEG(struct rio_detail *rio){ + return (rio->type == CompatWPEG || rio->type == AltWPEG || + rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); +} + + +/* In clustered mode, the high nibble of APIC ID is a cluster number. + * The low nibble is a 4-bit bitmap. */ +#define XAPIC_DEST_CPUS_SHIFT 4 +#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) +#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) + +#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) + +static inline const cpumask_t *summit_target_cpus(void) +{ + /* CPU_MASK_ALL (0xff) has undefined behaviour with + * dest_LowestPrio mode logical clustered apic interrupt routing + * Just start on cpu 0. IRQ balancing will spread load + */ + return &cpumask_of_cpu(0); +} + +static inline unsigned long +summit_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} + +/* we don't use the phys_cpu_present_map to indicate apicid presence */ +static inline unsigned long summit_check_apicid_present(int bit) +{ + return 1; +} + +#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) + +extern u8 cpu_2_logical_apicid[]; + +static inline void summit_init_apic_ldr(void) +{ + unsigned long val, id; + int count = 0; + u8 my_id = (u8)hard_smp_processor_id(); + u8 my_cluster = (u8)apicid_cluster(my_id); +#ifdef CONFIG_SMP + u8 lid; + int i; + + /* Create logical APIC IDs by counting CPUs already in cluster. */ + for (count = 0, i = nr_cpu_ids; --i >= 0; ) { + lid = cpu_2_logical_apicid[i]; + if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) + ++count; + } +#endif + /* We only have a 4 wide bitmap in cluster mode. If a deranged + * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ + BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); + id = my_cluster | (1UL << count); + apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(id); + apic_write(APIC_LDR, val); +} + +static inline int summit_apic_id_registered(void) +{ + return 1; +} + +static inline void summit_setup_apic_routing(void) +{ + printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", + nr_ioapics); +} + +static inline int summit_apicid_to_node(int logical_apicid) +{ +#ifdef CONFIG_SMP + return apicid_2_node[hard_smp_processor_id()]; +#else + return 0; +#endif +} + +/* Mapping from cpu number to logical apicid */ +static inline int summit_cpu_to_logical_apicid(int cpu) +{ +#ifdef CONFIG_SMP + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +#else + return logical_smp_processor_id(); +#endif +} + +static inline int summit_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline physid_mask_t +summit_ioapic_phys_id_map(physid_mask_t phys_id_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0x0F); +} + +static inline physid_mask_t summit_apicid_to_cpu_present(int apicid) +{ + return physid_mask_of_physid(0); +} + +static inline void summit_setup_portio_remap(void) +{ +} + +static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return 1; +} + +static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + int cpus_found = 0; + int num_bits_set; + int apicid; + int cpu; + + num_bits_set = cpus_weight(*cpumask); + /* Return id to all */ + if (num_bits_set >= nr_cpu_ids) + return 0xFF; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of target_cpus(): + */ + cpu = first_cpu(*cpumask); + apicid = summit_cpu_to_logical_apicid(cpu); + + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, *cpumask)) { + int new_apicid = summit_cpu_to_logical_apicid(cpu); + + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk ("%s: Not a valid mask!\n", __func__); + + return 0xFF; + } + apicid = apicid | new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static inline unsigned int +summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) +{ + int apicid = summit_cpu_to_logical_apicid(0); + cpumask_var_t cpumask; + + if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) + return apicid; + + cpumask_and(cpumask, inmask, andmask); + cpumask_and(cpumask, cpumask, cpu_online_mask); + apicid = summit_cpu_mask_to_apicid(cpumask); + + free_cpumask_var(cpumask); + + return apicid; +} + +/* + * cpuid returns the value latched in the HW at reset, not the APIC ID + * register's value. For any box whose BIOS changes APIC IDs, like + * clustered APIC systems, we must use hard_smp_processor_id. + * + * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. + */ +static inline int summit_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return hard_smp_processor_id() >> index_msb; +} + +static int probe_summit(void) +{ + /* probed later in mptable/ACPI hooks */ + return 0; +} + +static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; +} static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; @@ -186,3 +543,61 @@ void __init setup_summit(void) next_wpeg = 0; } while (next_wpeg != 0); } + + +struct genapic apic_summit = { + + .name = "summit", + .probe = probe_summit, + .acpi_madt_oem_check = summit_acpi_madt_oem_check, + .apic_id_registered = summit_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, + + .target_cpus = summit_target_cpus, + .disable_esr = 1, + .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = summit_check_apicid_used, + .check_apicid_present = summit_check_apicid_present, + + .vector_allocation_domain = summit_vector_allocation_domain, + .init_apic_ldr = summit_init_apic_ldr, + + .ioapic_phys_id_map = summit_ioapic_phys_id_map, + .setup_apic_routing = summit_setup_apic_routing, + .multi_timer_check = NULL, + .apicid_to_node = summit_apicid_to_node, + .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, + .cpu_present_to_apicid = summit_cpu_present_to_apicid, + .apicid_to_cpu_present = summit_apicid_to_cpu_present, + .setup_portio_remap = NULL, + .check_phys_apicid_present = summit_check_phys_apicid_present, + .enable_apic_mode = NULL, + .phys_pkg_id = summit_phys_pkg_id, + .mps_oem_check = summit_mps_oem_check, + + .get_apic_id = summit_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0xFF << 24, + + .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, + + .send_IPI_mask = summit_send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = summit_send_IPI_allbutself, + .send_IPI_all = summit_send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, + + .wait_for_init_deassert = default_wait_for_init_deassert, + + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .inquire_remote_apic = default_inquire_remote_apic, +}; diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 6730f4e7c744..78ab5735cb80 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -6,6 +6,5 @@ EXTRA_CFLAGS := -Iarch/x86/kernel obj-y := probe.o default.o obj-$(CONFIG_X86_NUMAQ) += numaq.o -obj-$(CONFIG_X86_SUMMIT) += summit.o obj-$(CONFIG_X86_BIGSMP) += bigsmp.o obj-$(CONFIG_X86_ES7000) += es7000.o diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c deleted file mode 100644 index 673a64f8b463..000000000000 --- a/arch/x86/mach-generic/summit.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * APIC driver for the IBM "Summit" chipset. - */ -#define APIC_DEFINITION 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static int probe_summit(void) -{ - /* probed later in mptable/ACPI hooks */ - return 0; -} - -static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; -} - -struct genapic apic_summit = { - - .name = "summit", - .probe = probe_summit, - .acpi_madt_oem_check = summit_acpi_madt_oem_check, - .apic_id_registered = summit_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* logical delivery broadcast to all CPUs: */ - .irq_dest_mode = 1, - - .target_cpus = summit_target_cpus, - .disable_esr = 1, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = summit_check_apicid_used, - .check_apicid_present = summit_check_apicid_present, - - .vector_allocation_domain = summit_vector_allocation_domain, - .init_apic_ldr = summit_init_apic_ldr, - - .ioapic_phys_id_map = summit_ioapic_phys_id_map, - .setup_apic_routing = summit_setup_apic_routing, - .multi_timer_check = NULL, - .apicid_to_node = summit_apicid_to_node, - .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, - .cpu_present_to_apicid = summit_cpu_present_to_apicid, - .apicid_to_cpu_present = summit_apicid_to_cpu_present, - .setup_portio_remap = NULL, - .check_phys_apicid_present = summit_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = summit_phys_pkg_id, - .mps_oem_check = summit_mps_oem_check, - - .get_apic_id = summit_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0xFF << 24, - - .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, - - .send_IPI_mask = summit_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = summit_send_IPI_allbutself, - .send_IPI_all = summit_send_IPI_all, - .send_IPI_self = NULL, - - .wakeup_cpu = NULL, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - - .wait_for_init_deassert = default_wait_for_init_deassert, - - .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, - .inquire_remote_apic = default_inquire_remote_apic, -}; From a448720ca3248e8a7a426336885549d6e923fd8e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 15:42:23 -0800 Subject: [PATCH 256/566] x86: unify asm/io.h: IO_SPACE_LIMIT Impact: Cleanup (trivial unification) Move common define IO_SPACE_LIMIT from to . Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/io.h | 1 + arch/x86/include/asm/io_32.h | 2 -- arch/x86/include/asm/io_64.h | 2 -- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 05cfed4485fa..975207c08b3e 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -106,5 +106,6 @@ extern void __iomem *early_memremap(unsigned long offset, unsigned long size); extern void early_iounmap(void __iomem *addr, unsigned long size); extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); +#define IO_SPACE_LIMIT 0xffff #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/io_32.h b/arch/x86/include/asm/io_32.h index d8e242e1b396..e08d8ed05a1f 100644 --- a/arch/x86/include/asm/io_32.h +++ b/arch/x86/include/asm/io_32.h @@ -37,8 +37,6 @@ * - Arnaldo Carvalho de Melo */ -#define IO_SPACE_LIMIT 0xffff - #define XQUAD_PORTIO_BASE 0xfe400000 #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h index 563c16270ba6..1131d8ea2c61 100644 --- a/arch/x86/include/asm/io_64.h +++ b/arch/x86/include/asm/io_64.h @@ -136,8 +136,6 @@ __OUTS(b) __OUTS(w) __OUTS(l) -#define IO_SPACE_LIMIT 0xffff - #if defined(__KERNEL__) && defined(__x86_64__) #include From 7c20dcc545d78946e40e8fab99637fe815b1d211 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 29 Jan 2009 11:29:22 +0100 Subject: [PATCH 257/566] x86, summit: consolidate code, fix Build fix for !NUMA Summit. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/summit_32.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 37fa30bada17..a382ca6f6a17 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -73,7 +73,7 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-$(CONFIG_X86_ES7000) += es7000_32.o -obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o +obj-$(CONFIG_X86_SUMMIT) += summit_32.o obj-y += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_MODULES) += module_$(BITS).o diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 3b60dd5e57fa..84ff9ebbcc97 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -389,6 +389,7 @@ static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } +#ifdef CONFIG_X86_SUMMIT_NUMA static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; @@ -543,7 +544,7 @@ void __init setup_summit(void) next_wpeg = 0; } while (next_wpeg != 0); } - +#endif struct genapic apic_summit = { From 1dcdd3d15ecea0c22a09d4d001a39d425fceff2c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:55:37 +0100 Subject: [PATCH 258/566] x86: remove mach_apic.h Spread mach_apic.h definitions into genapic.h. (with some knock-on effects on smp.h and apic.h.) Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 2 +- arch/x86/include/asm/genapic.h | 139 +++++++++++++++++ arch/x86/include/asm/mach-default/mach_apic.h | 144 ------------------ arch/x86/include/asm/mach-generic/mach_apic.h | 8 - arch/x86/include/asm/smp.h | 19 --- arch/x86/kernel/acpi/boot.c | 14 +- arch/x86/kernel/apic.c | 22 ++- arch/x86/kernel/cpu/addon_cpuid_features.c | 2 +- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/io_apic.c | 2 +- arch/x86/kernel/ipi.c | 2 +- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/mpparse.c | 3 +- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smp.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/tlb_uv.c | 2 +- arch/x86/kernel/visws_quirks.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/probe.c | 5 - 22 files changed, 175 insertions(+), 207 deletions(-) delete mode 100644 arch/x86/include/asm/mach-default/mach_apic.h delete mode 100644 arch/x86/include/asm/mach-generic/mach_apic.h diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3a3202074c63..6a77068e261a 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -215,7 +215,7 @@ static inline void disable_local_APIC(void) { } #define SET_APIC_ID(x) (apic->set_apic_id(x)) #else -static inline unsigned default_get_apic_id(unsigned long x) +static inline unsigned default_get_apic_id(unsigned long x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 1772dad01b1d..ce3655a4948d 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -120,4 +120,143 @@ static inline void default_wait_for_init_deassert(atomic_t *deassert) return; } +extern void generic_bigsmp_probe(void); + + +#ifdef CONFIG_X86_LOCAL_APIC + +#include + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +static inline const struct cpumask *default_target_cpus(void) +{ +#ifdef CONFIG_SMP + return cpu_online_mask; +#else + return cpumask_of(0); +#endif +} + +DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); + + +static inline unsigned int read_apic_id(void) +{ + unsigned int reg; + + reg = *(u32 *)(APIC_BASE + APIC_ID); + + return apic->get_apic_id(reg); +} + +#ifdef CONFIG_X86_64 +extern void default_setup_apic_routing(void); +#else + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +extern void default_init_apic_ldr(void); + +static inline int default_apic_id_registered(void) +{ + return physid_isset(read_apic_id(), phys_cpu_present_map); +} + +static inline unsigned int +default_cpu_mask_to_apicid(const struct cpumask *cpumask) +{ + return cpumask_bits(cpumask)[0]; +} + +static inline unsigned int +default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + unsigned long mask1 = cpumask_bits(cpumask)[0]; + unsigned long mask2 = cpumask_bits(andmask)[0]; + unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; + + return (unsigned int)(mask1 & mask2 & mask3); +} + +static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +static inline void default_setup_apic_routing(void) +{ +#ifdef CONFIG_X86_IO_APIC + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "Flat", nr_ioapics); +#endif +} + +extern int default_apicid_to_node(int logical_apicid); + +#endif + +static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} + +static inline unsigned long default_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) +{ + return phys_map; +} + +/* Mapping from cpu number to logical apicid */ +static inline int default_cpu_to_logical_apicid(int cpu) +{ + return 1 << cpu; +} + +static inline int __default_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline int +__default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); +} + +#ifdef CONFIG_X86_32 +static inline int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} + +static inline int +default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return __default_check_phys_apicid_present(boot_cpu_physical_apicid); +} +#else +extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); +#endif + +static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) +{ + return physid_mask_of_physid(phys_apicid); +} + +#endif /* CONFIG_X86_LOCAL_APIC */ #endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h deleted file mode 100644 index bae053cdcde5..000000000000 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ /dev/null @@ -1,144 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_APIC_H -#define _ASM_X86_MACH_DEFAULT_MACH_APIC_H - -#ifdef CONFIG_X86_LOCAL_APIC - -#include - -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - -static inline const struct cpumask *default_target_cpus(void) -{ -#ifdef CONFIG_SMP - return cpu_online_mask; -#else - return cpumask_of(0); -#endif -} - -#ifdef CONFIG_X86_64 -#include -#define read_apic_id() (apic->get_apic_id(apic_read(APIC_ID))) -extern void default_setup_apic_routing(void); -#else -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static inline void default_init_apic_ldr(void) -{ - unsigned long val; - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); - apic_write(APIC_LDR, val); -} - -static inline int default_apic_id_registered(void) -{ - return physid_isset(read_apic_id(), phys_cpu_present_map); -} - -static inline unsigned int -default_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - return cpumask_bits(cpumask)[0]; -} - -static inline unsigned int -default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - unsigned long mask1 = cpumask_bits(cpumask)[0]; - unsigned long mask2 = cpumask_bits(andmask)[0]; - unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; - - return (unsigned int)(mask1 & mask2 & mask3); -} - -static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -static inline void default_setup_apic_routing(void) -{ -#ifdef CONFIG_X86_IO_APIC - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "Flat", nr_ioapics); -#endif -} - -static inline int default_apicid_to_node(int logical_apicid) -{ -#ifdef CONFIG_SMP - return apicid_2_node[hard_smp_processor_id()]; -#else - return 0; -#endif -} - -#endif - -static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return physid_isset(apicid, bitmap); -} - -static inline unsigned long default_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) -{ - return phys_map; -} - -/* Mapping from cpu number to logical apicid */ -static inline int default_cpu_to_logical_apicid(int cpu) -{ - return 1 << cpu; -} - -static inline int __default_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static inline int -__default_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); -} - -#ifdef CONFIG_X86_32 -static inline int default_cpu_present_to_apicid(int mps_cpu) -{ - return __default_cpu_present_to_apicid(mps_cpu); -} - -static inline int -default_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return __default_check_phys_apicid_present(boot_cpu_physical_apicid); -} -#else -extern int default_cpu_present_to_apicid(int mps_cpu); -extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); -#endif - -static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - -#endif /* CONFIG_X86_LOCAL_APIC */ -#endif /* _ASM_X86_MACH_DEFAULT_MACH_APIC_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h deleted file mode 100644 index 96f217f819e3..000000000000 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_APIC_H -#define _ASM_X86_MACH_GENERIC_MACH_APIC_H - -#include - -extern void generic_bigsmp_probe(void); - -#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index d4ac4de4bcec..47d0e21f2b9e 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -173,8 +173,6 @@ extern int safe_smp_processor_id(void); #endif -#include - #ifdef CONFIG_X86_LOCAL_APIC #ifndef CONFIG_X86_64 @@ -184,26 +182,9 @@ static inline int logical_smp_processor_id(void) return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); } -static inline unsigned int read_apic_id(void) -{ - unsigned int reg; - - reg = *(u32 *)(APIC_BASE + APIC_ID); - - return apic->get_apic_id(reg); -} #endif - -# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64) extern int hard_smp_processor_id(void); -# else -static inline int hard_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return read_apic_id(); -} -# endif /* APIC_DEFINITION */ #else /* CONFIG_X86_LOCAL_APIC */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7b02a1cedca0..cb8b52785e37 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -42,10 +42,6 @@ #include #include -#ifdef CONFIG_X86_LOCAL_APIC -# include -#endif - static int __initdata acpi_force = 0; u32 acpi_rsdt_forced; #ifdef CONFIG_ACPI @@ -56,15 +52,7 @@ int acpi_disabled = 1; EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_X86_64 - -#include - -#else /* X86 */ - -#ifdef CONFIG_X86_LOCAL_APIC -#include -#endif /* CONFIG_X86_LOCAL_APIC */ - +# include #endif /* X86 */ #define BAD_MADT_ENTRY(entry, end) ( \ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index e6220809ca11..41a0ba34d6b2 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -49,7 +49,6 @@ #include #include -#include #include /* @@ -1910,11 +1909,30 @@ void __cpuinit generic_processor_info(int apicid, int version) set_cpu_present(cpu, true); } -#ifdef CONFIG_X86_64 int hard_smp_processor_id(void) { return read_apic_id(); } + +void default_init_apic_ldr(void) +{ + unsigned long val; + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); + apic_write(APIC_LDR, val); +} + +#ifdef CONFIG_X86_32 +int default_apicid_to_node(int logical_apicid) +{ +#ifdef CONFIG_SMP + return apicid_2_node[hard_smp_processor_id()]; +#else + return 0; +#endif +} #endif /* diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index e8bb892c09fd..4a48bb409748 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -7,7 +7,7 @@ #include #include -#include +#include struct cpuid_bit { u16 feature; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7c878f6aa919..ff4d7b9e32e4 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -12,7 +12,7 @@ # include #endif -#include +#include #include "cpu.h" diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 055b9c3a6600..c4bdc7f00207 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -26,7 +26,7 @@ #ifdef CONFIG_X86_LOCAL_APIC #include #include -#include +#include #include #include #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5deefae9064d..1cef0aa5e5dc 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -24,7 +24,7 @@ #ifdef CONFIG_X86_LOCAL_APIC #include #include -#include +#include #endif static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index abae81989c2f..e0744ea6d0f1 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -63,7 +63,7 @@ #include #include -#include +#include #define __apicdebuginit(type) static type __init diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index e16c41b2e4ec..50076d92fbc0 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -19,7 +19,7 @@ #include #ifdef CONFIG_X86_32 -#include +#include #include /* diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index e0f29be8ab0b..d802c844991e 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -231,7 +231,7 @@ unsigned int do_IRQ(struct pt_regs *regs) } #ifdef CONFIG_HOTPLUG_CPU -#include +#include /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index a1452a53d14f..94fe71029c37 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -29,8 +29,7 @@ #include #include -#include - +#include /* * Checksum an MP configuration block. */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6b27f6dc7bf7..92e42939fb08 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -97,7 +97,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index c48ba6cc32aa..892e7c389be1 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include /* * Some notes on x86 processor bugs affecting SMP operation: * diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3fed177f3457..489fde9d9476 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -65,7 +65,7 @@ #include #include -#include +#include #include #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 89fce1b6d01f..755ede02b13f 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -20,7 +20,7 @@ #include #include -#include +#include static struct bau_control **uv_bau_table_bases __read_mostly; static int uv_bau_retry_limit __read_mostly; diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 2ed5bdf15c9f..3bd7f47a91b7 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -34,7 +34,7 @@ #include -#include "mach_apic.h" +#include #include diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 07817b2a7876..7d5123e474e1 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index ab68c6e5c48a..c03c72221320 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -154,8 +154,3 @@ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) } return 0; } - -int hard_smp_processor_id(void) -{ - return apic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID)); -} From 83d7aeabe4cf20e59b5d7fd56a75cfd0e0b6b880 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 28 Jan 2009 17:52:57 -0800 Subject: [PATCH 259/566] x86: remove mach_apic.h, fix Use apic_read() instead of open-coded mmio. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index ce3655a4948d..ccfcd19d5b7d 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -145,7 +145,7 @@ static inline unsigned int read_apic_id(void) { unsigned int reg; - reg = *(u32 *)(APIC_BASE + APIC_ID); + reg = apic_read(APIC_ID); return apic->get_apic_id(reg); } From 2e096df8edefad78155bb406a5a86c182b17786e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:01:05 +0100 Subject: [PATCH 260/566] x86, ES7000: Consolidate code Move all ES7000 code into arch/x86/kernel/es7000_32.c. With this it ceases to rely on any build-time subarch features. Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 428 +++++++++++++++++++++++++++++++++ arch/x86/mach-generic/Makefile | 1 - arch/x86/mach-generic/es7000.c | 427 -------------------------------- 3 files changed, 428 insertions(+), 428 deletions(-) delete mode 100644 arch/x86/mach-generic/es7000.c diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 8faea13c8fac..078364ccfa07 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -372,3 +372,431 @@ void __init es7000_enable_apic_mode(void) mip_status); } } + +/* + * APIC driver for the Unisys ES7000 chipset. + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) +#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) +#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +extern void es7000_enable_apic_mode(void); +extern int apic_version [MAX_APICS]; +extern u8 cpu_2_logical_apicid[]; +extern unsigned int boot_cpu_physical_apicid; + +extern int parse_unisys_oem (char *oemptr); +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); +extern void setup_unisys(void); + +#define apicid_cluster(apicid) (apicid & 0xF0) +#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) + +static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; +} + + +static void es7000_wait_for_init_deassert(atomic_t *deassert) +{ +#ifndef CONFIG_ES7000_CLUSTERED_APIC + while (!atomic_read(deassert)) + cpu_relax(); +#endif + return; +} + +static unsigned int es7000_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0xFF; +} + +#ifdef CONFIG_ACPI +static int es7000_check_dsdt(void) +{ + struct acpi_table_header header; + + if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && + !strncmp(header.oem_id, "UNISYS", 6)) + return 1; + return 0; +} +#endif + +static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static void es7000_send_IPI_allbutself(int vector) +{ + default_send_IPI_mask_allbutself(cpu_online_mask, vector); +} + +static void es7000_send_IPI_all(int vector) +{ + es7000_send_IPI_mask(cpu_online_mask, vector); +} + +static int es7000_apic_id_registered(void) +{ + return 1; +} + +static const cpumask_t *target_cpus_cluster(void) +{ + return &CPU_MASK_ALL; +} + +static const cpumask_t *es7000_target_cpus(void) +{ + return &cpumask_of_cpu(smp_processor_id()); +} + +static unsigned long +es7000_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} +static unsigned long es7000_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static unsigned long calculate_ldr(int cpu) +{ + unsigned long id = xapic_phys_to_log_apicid(cpu); + + return (SET_APIC_LOGICAL_ID(id)); +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LdR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static void es7000_init_apic_ldr_cluster(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static void es7000_init_apic_ldr(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static void es7000_setup_apic_routing(void) +{ + int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); + printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + (apic_version[apic] == 0x14) ? + "Physical Cluster" : "Logical Cluster", + nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); +} + +static int es7000_apicid_to_node(int logical_apicid) +{ + return 0; +} + + +static int es7000_cpu_present_to_apicid(int mps_cpu) +{ + if (!mps_cpu) + return boot_cpu_physical_apicid; + else if (mps_cpu < nr_cpu_ids) + return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) +{ + static int id = 0; + physid_mask_t mask; + + mask = physid_mask_of_physid(id); + ++id; + + return mask; +} + +/* Mapping from cpu number to logical apicid */ +static int es7000_cpu_to_logical_apicid(int cpu) +{ +#ifdef CONFIG_SMP + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +#else + return logical_smp_processor_id(); +#endif +} + +static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0xff); +} + +static int es7000_check_phys_apicid_present(int cpu_physical_apicid) +{ + boot_cpu_physical_apicid = read_apic_id(); + return (1); +} + +static unsigned int +es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) +{ + int cpus_found = 0; + int num_bits_set; + int apicid; + int cpu; + + num_bits_set = cpumask_weight(cpumask); + /* Return id to all */ + if (num_bits_set == nr_cpu_ids) + return 0xFF; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of target_cpus(): + */ + cpu = cpumask_first(cpumask); + apicid = es7000_cpu_to_logical_apicid(cpu); + + while (cpus_found < num_bits_set) { + if (cpumask_test_cpu(cpu, cpumask)) { + int new_apicid = es7000_cpu_to_logical_apicid(cpu); + + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk ("%s: Not a valid mask!\n", __func__); + + return 0xFF; + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + int cpus_found = 0; + int num_bits_set; + int apicid; + int cpu; + + num_bits_set = cpus_weight(*cpumask); + /* Return id to all */ + if (num_bits_set == nr_cpu_ids) + return es7000_cpu_to_logical_apicid(0); + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of target_cpus(): + */ + cpu = first_cpu(*cpumask); + apicid = es7000_cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, *cpumask)) { + int new_apicid = es7000_cpu_to_logical_apicid(cpu); + + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk ("%s: Not a valid mask!\n", __func__); + + return es7000_cpu_to_logical_apicid(0); + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static unsigned int +es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) +{ + int apicid = es7000_cpu_to_logical_apicid(0); + cpumask_var_t cpumask; + + if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) + return apicid; + + cpumask_and(cpumask, inmask, andmask); + cpumask_and(cpumask, cpumask, cpu_online_mask); + apicid = es7000_cpu_mask_to_apicid(cpumask); + + free_cpumask_var(cpumask); + + return apicid; +} + +static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +void __init es7000_update_genapic_to_cluster(void) +{ + apic->target_cpus = target_cpus_cluster; + apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER; + apic->irq_dest_mode = INT_DEST_MODE_CLUSTER; + + apic->init_apic_ldr = es7000_init_apic_ldr_cluster; + + apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster; +} + +static int probe_es7000(void) +{ + /* probed later in mptable/ACPI hooks */ + return 0; +} + +static __init int +es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +{ + if (mpc->oemptr) { + struct mpc_oemtable *oem_table = + (struct mpc_oemtable *)mpc->oemptr; + + if (!strncmp(oem, "UNISYS", 6)) + return parse_unisys_oem((char *)oem_table); + } + return 0; +} + +#ifdef CONFIG_ACPI +/* Hook from generic ACPI tables.c */ +static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + unsigned long oem_addr = 0; + int check_dsdt; + int ret = 0; + + /* check dsdt at first to avoid clear fix_map for oem_addr */ + check_dsdt = es7000_check_dsdt(); + + if (!find_unisys_acpi_oem_table(&oem_addr)) { + if (check_dsdt) + ret = parse_unisys_oem((char *)oem_addr); + else { + setup_unisys(); + ret = 1; + } + /* + * we need to unmap it + */ + unmap_unisys_acpi_oem_table(oem_addr); + } + return ret; +} +#else +static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 0; +} +#endif + + +struct genapic apic_es7000 = { + + .name = "es7000", + .probe = probe_es7000, + .acpi_madt_oem_check = es7000_acpi_madt_oem_check, + .apic_id_registered = es7000_apic_id_registered, + + .irq_delivery_mode = dest_Fixed, + /* phys delivery to target CPUs: */ + .irq_dest_mode = 0, + + .target_cpus = es7000_target_cpus, + .disable_esr = 1, + .dest_logical = 0, + .check_apicid_used = es7000_check_apicid_used, + .check_apicid_present = es7000_check_apicid_present, + + .vector_allocation_domain = es7000_vector_allocation_domain, + .init_apic_ldr = es7000_init_apic_ldr, + + .ioapic_phys_id_map = es7000_ioapic_phys_id_map, + .setup_apic_routing = es7000_setup_apic_routing, + .multi_timer_check = NULL, + .apicid_to_node = es7000_apicid_to_node, + .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, + .cpu_present_to_apicid = es7000_cpu_present_to_apicid, + .apicid_to_cpu_present = es7000_apicid_to_cpu_present, + .setup_portio_remap = NULL, + .check_phys_apicid_present = es7000_check_phys_apicid_present, + .enable_apic_mode = es7000_enable_apic_mode, + .phys_pkg_id = es7000_phys_pkg_id, + .mps_oem_check = es7000_mps_oem_check, + + .get_apic_id = es7000_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0xFF << 24, + + .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, + + .send_IPI_mask = es7000_send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = es7000_send_IPI_allbutself, + .send_IPI_all = es7000_send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + + .trampoline_phys_low = 0x467, + .trampoline_phys_high = 0x469, + + .wait_for_init_deassert = es7000_wait_for_init_deassert, + + /* Nothing to do for most platforms, since cleared by the INIT cycle: */ + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .inquire_remote_apic = default_inquire_remote_apic, +}; diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 78ab5735cb80..05e47acfd666 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -7,4 +7,3 @@ EXTRA_CFLAGS := -Iarch/x86/kernel obj-y := probe.o default.o obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_BIGSMP) += bigsmp.o -obj-$(CONFIG_X86_ES7000) += es7000.o diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c deleted file mode 100644 index bb11166b7c3c..000000000000 --- a/arch/x86/mach-generic/es7000.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * APIC driver for the Unisys ES7000 chipset. - */ -#define APIC_DEFINITION 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) -#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) -#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ - -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - -extern void es7000_enable_apic_mode(void); -extern int apic_version [MAX_APICS]; -extern u8 cpu_2_logical_apicid[]; -extern unsigned int boot_cpu_physical_apicid; - -extern int parse_unisys_oem (char *oemptr); -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); -extern void setup_unisys(void); - -#define apicid_cluster(apicid) (apicid & 0xF0) -#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) - -static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; -} - - -static void es7000_wait_for_init_deassert(atomic_t *deassert) -{ -#ifndef CONFIG_ES7000_CLUSTERED_APIC - while (!atomic_read(deassert)) - cpu_relax(); -#endif - return; -} - -static unsigned int es7000_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0xFF; -} - -#ifdef CONFIG_ACPI -static int es7000_check_dsdt(void) -{ - struct acpi_table_header header; - - if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && - !strncmp(header.oem_id, "UNISYS", 6)) - return 1; - return 0; -} -#endif - -static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence(mask, vector); -} - -static void es7000_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself(cpu_online_mask, vector); -} - -static void es7000_send_IPI_all(int vector) -{ - es7000_send_IPI_mask(cpu_online_mask, vector); -} - -static int es7000_apic_id_registered(void) -{ - return 1; -} - -static const cpumask_t *target_cpus_cluster(void) -{ - return &CPU_MASK_ALL; -} - -static const cpumask_t *es7000_target_cpus(void) -{ - return &cpumask_of_cpu(smp_processor_id()); -} - -static unsigned long -es7000_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return 0; -} -static unsigned long es7000_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -static unsigned long calculate_ldr(int cpu) -{ - unsigned long id = xapic_phys_to_log_apicid(cpu); - - return (SET_APIC_LOGICAL_ID(id)); -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LdR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static void es7000_init_apic_ldr_cluster(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static void es7000_init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static void es7000_setup_apic_routing(void) -{ - int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", - (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", - nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); -} - -static int es7000_apicid_to_node(int logical_apicid) -{ - return 0; -} - - -static int es7000_cpu_present_to_apicid(int mps_cpu) -{ - if (!mps_cpu) - return boot_cpu_physical_apicid; - else if (mps_cpu < nr_cpu_ids) - return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) -{ - static int id = 0; - physid_mask_t mask; - - mask = physid_mask_of_physid(id); - ++id; - - return mask; -} - -/* Mapping from cpu number to logical apicid */ -static int es7000_cpu_to_logical_apicid(int cpu) -{ -#ifdef CONFIG_SMP - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -#else - return logical_smp_processor_id(); -#endif -} - -static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xff); -} - -static int es7000_check_phys_apicid_present(int cpu_physical_apicid) -{ - boot_cpu_physical_apicid = read_apic_id(); - return (1); -} - -static unsigned int -es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) -{ - int cpus_found = 0; - int num_bits_set; - int apicid; - int cpu; - - num_bits_set = cpumask_weight(cpumask); - /* Return id to all */ - if (num_bits_set == nr_cpu_ids) - return 0xFF; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of target_cpus(): - */ - cpu = cpumask_first(cpumask); - apicid = es7000_cpu_to_logical_apicid(cpu); - - while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { - int new_apicid = es7000_cpu_to_logical_apicid(cpu); - - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { - printk ("%s: Not a valid mask!\n", __func__); - - return 0xFF; - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - int cpus_found = 0; - int num_bits_set; - int apicid; - int cpu; - - num_bits_set = cpus_weight(*cpumask); - /* Return id to all */ - if (num_bits_set == nr_cpu_ids) - return es7000_cpu_to_logical_apicid(0); - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of target_cpus(): - */ - cpu = first_cpu(*cpumask); - apicid = es7000_cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask)) { - int new_apicid = es7000_cpu_to_logical_apicid(cpu); - - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { - printk ("%s: Not a valid mask!\n", __func__); - - return es7000_cpu_to_logical_apicid(0); - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -static unsigned int -es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int apicid = es7000_cpu_to_logical_apicid(0); - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = es7000_cpu_mask_to_apicid(cpumask); - - free_cpumask_var(cpumask); - - return apicid; -} - -static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -void __init es7000_update_genapic_to_cluster(void) -{ - apic->target_cpus = target_cpus_cluster; - apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER; - apic->irq_dest_mode = INT_DEST_MODE_CLUSTER; - - apic->init_apic_ldr = es7000_init_apic_ldr_cluster; - - apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster; -} - -static int probe_es7000(void) -{ - /* probed later in mptable/ACPI hooks */ - return 0; -} - -static __init int -es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - if (mpc->oemptr) { - struct mpc_oemtable *oem_table = - (struct mpc_oemtable *)mpc->oemptr; - - if (!strncmp(oem, "UNISYS", 6)) - return parse_unisys_oem((char *)oem_table); - } - return 0; -} - -#ifdef CONFIG_ACPI -/* Hook from generic ACPI tables.c */ -static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - unsigned long oem_addr = 0; - int check_dsdt; - int ret = 0; - - /* check dsdt at first to avoid clear fix_map for oem_addr */ - check_dsdt = es7000_check_dsdt(); - - if (!find_unisys_acpi_oem_table(&oem_addr)) { - if (check_dsdt) - ret = parse_unisys_oem((char *)oem_addr); - else { - setup_unisys(); - ret = 1; - } - /* - * we need to unmap it - */ - unmap_unisys_acpi_oem_table(oem_addr); - } - return ret; -} -#else -static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} -#endif - - -struct genapic apic_es7000 = { - - .name = "es7000", - .probe = probe_es7000, - .acpi_madt_oem_check = es7000_acpi_madt_oem_check, - .apic_id_registered = es7000_apic_id_registered, - - .irq_delivery_mode = dest_Fixed, - /* phys delivery to target CPUs: */ - .irq_dest_mode = 0, - - .target_cpus = es7000_target_cpus, - .disable_esr = 1, - .dest_logical = 0, - .check_apicid_used = es7000_check_apicid_used, - .check_apicid_present = es7000_check_apicid_present, - - .vector_allocation_domain = es7000_vector_allocation_domain, - .init_apic_ldr = es7000_init_apic_ldr, - - .ioapic_phys_id_map = es7000_ioapic_phys_id_map, - .setup_apic_routing = es7000_setup_apic_routing, - .multi_timer_check = NULL, - .apicid_to_node = es7000_apicid_to_node, - .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, - .cpu_present_to_apicid = es7000_cpu_present_to_apicid, - .apicid_to_cpu_present = es7000_apicid_to_cpu_present, - .setup_portio_remap = NULL, - .check_phys_apicid_present = es7000_check_phys_apicid_present, - .enable_apic_mode = es7000_enable_apic_mode, - .phys_pkg_id = es7000_phys_pkg_id, - .mps_oem_check = es7000_mps_oem_check, - - .get_apic_id = es7000_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0xFF << 24, - - .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, - - .send_IPI_mask = es7000_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = es7000_send_IPI_allbutself, - .send_IPI_all = es7000_send_IPI_all, - .send_IPI_self = NULL, - - .wakeup_cpu = NULL, - - .trampoline_phys_low = 0x467, - .trampoline_phys_high = 0x469, - - .wait_for_init_deassert = es7000_wait_for_init_deassert, - - /* Nothing to do for most platforms, since cleared by the INIT cycle: */ - .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, - .inquire_remote_apic = default_inquire_remote_apic, -}; From 61b90b7ca10cc65d8b850ab542859dc593e5a381 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:01:05 +0100 Subject: [PATCH 261/566] x86, NUMAQ: Consolidate code Move all NUMAQ code into arch/x86/kernel/numaq.c. With this it ceases to rely on any build-time subarch features. Signed-off-by: Ingo Molnar --- arch/x86/kernel/numaq_32.c | 278 +++++++++++++++++++++++++++++++++ arch/x86/mach-generic/Makefile | 1 - arch/x86/mach-generic/numaq.c | 277 -------------------------------- 3 files changed, 278 insertions(+), 278 deletions(-) delete mode 100644 arch/x86/mach-generic/numaq.c diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 3928280278f0..83bb05524f43 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -291,3 +291,281 @@ int __init get_memcfg_numaq(void) smp_dump_qct(); return 1; } + +/* + * APIC driver for the IBM NUMAQ chipset. + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) + +static inline unsigned int numaq_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0x0F; +} + +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); + +static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static inline void numaq_send_IPI_allbutself(int vector) +{ + default_send_IPI_mask_allbutself(cpu_online_mask, vector); +} + +static inline void numaq_send_IPI_all(int vector) +{ + numaq_send_IPI_mask(cpu_online_mask, vector); +} + +extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); + +#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) +#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) + +/* + * Because we use NMIs rather than the INIT-STARTUP sequence to + * bootstrap the CPUs, the APIC may be in a weird state. Kick it: + */ +static inline void numaq_smp_callin_clear_local_apic(void) +{ + clear_local_APIC(); +} + +static inline void +numaq_store_NMI_vector(unsigned short *high, unsigned short *low) +{ + printk("Storing NMI vector\n"); + *high = + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)); + *low = + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); +} + +static inline const cpumask_t *numaq_target_cpus(void) +{ + return &CPU_MASK_ALL; +} + +static inline unsigned long +numaq_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} + +static inline unsigned long numaq_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +#define apicid_cluster(apicid) (apicid & 0xF0) + +static inline int numaq_apic_id_registered(void) +{ + return 1; +} + +static inline void numaq_init_apic_ldr(void) +{ + /* Already done in NUMA-Q firmware */ +} + +static inline void numaq_setup_apic_routing(void) +{ + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "NUMA-Q", nr_ioapics); +} + +/* + * Skip adding the timer int on secondary nodes, which causes + * a small but painful rift in the time-space continuum. + */ +static inline int numaq_multi_timer_check(int apic, int irq) +{ + return apic != 0 && irq == 0; +} + +static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* We don't have a good way to do this yet - hack */ + return physids_promote(0xFUL); +} + +/* Mapping from cpu number to logical apicid */ +extern u8 cpu_2_logical_apicid[]; + +static inline int numaq_cpu_to_logical_apicid(int cpu) +{ + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +} + +/* + * Supporting over 60 cpus on NUMA-Q requires a locality-dependent + * cpu to APIC ID relation to properly interact with the intelligent + * mode of the cluster controller. + */ +static inline int numaq_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < 60) + return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); + else + return BAD_APICID; +} + +static inline int numaq_apicid_to_node(int logical_apicid) +{ + return logical_apicid >> 4; +} + +static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) +{ + int node = numaq_apicid_to_node(logical_apicid); + int cpu = __ffs(logical_apicid & 0xf); + + return physid_mask_of_physid(cpu + 4*node); +} + +extern void *xquad_portio; + +static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return 1; +} + +/* + * We use physical apicids here, not logical, so just return the default + * physical broadcast to stop people from breaking us + */ +static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + return 0x0F; +} + +static inline unsigned int +numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + return 0x0F; +} + +/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ +static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} +static int __numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +{ + numaq_mps_oem_check(mpc, oem, productid); + return found_numaq; +} + +static int probe_numaq(void) +{ + /* already know from get_memcfg_numaq() */ + return found_numaq; +} + +static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; +} + +static void numaq_setup_portio_remap(void) +{ + int num_quads = num_online_nodes(); + + if (num_quads <= 1) + return; + + printk("Remapping cross-quad port I/O for %d quads\n", num_quads); + xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); + printk("xquad_portio vaddr 0x%08lx, len %08lx\n", + (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); +} + +struct genapic apic_numaq = { + + .name = "NUMAQ", + .probe = probe_numaq, + .acpi_madt_oem_check = NULL, + .apic_id_registered = numaq_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* physical delivery on LOCAL quad: */ + .irq_dest_mode = 0, + + .target_cpus = numaq_target_cpus, + .disable_esr = 1, + .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = numaq_check_apicid_used, + .check_apicid_present = numaq_check_apicid_present, + + .vector_allocation_domain = numaq_vector_allocation_domain, + .init_apic_ldr = numaq_init_apic_ldr, + + .ioapic_phys_id_map = numaq_ioapic_phys_id_map, + .setup_apic_routing = numaq_setup_apic_routing, + .multi_timer_check = numaq_multi_timer_check, + .apicid_to_node = numaq_apicid_to_node, + .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, + .cpu_present_to_apicid = numaq_cpu_present_to_apicid, + .apicid_to_cpu_present = numaq_apicid_to_cpu_present, + .setup_portio_remap = numaq_setup_portio_remap, + .check_phys_apicid_present = numaq_check_phys_apicid_present, + .enable_apic_mode = NULL, + .phys_pkg_id = numaq_phys_pkg_id, + .mps_oem_check = __numaq_mps_oem_check, + + .get_apic_id = numaq_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0x0F << 24, + + .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, + + .send_IPI_mask = numaq_send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = numaq_send_IPI_allbutself, + .send_IPI_all = numaq_send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, + + /* We don't do anything here because we use NMI's to boot instead */ + .wait_for_init_deassert = NULL, + + .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, + .store_NMI_vector = numaq_store_NMI_vector, + .inquire_remote_apic = NULL, +}; diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 05e47acfd666..4ede08d309e6 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -5,5 +5,4 @@ EXTRA_CFLAGS := -Iarch/x86/kernel obj-y := probe.o default.o -obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_BIGSMP) += bigsmp.o diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c deleted file mode 100644 index c221cfb2c2db..000000000000 --- a/arch/x86/mach-generic/numaq.c +++ /dev/null @@ -1,277 +0,0 @@ -/* - * APIC driver for the IBM NUMAQ chipset. - */ -#define APIC_DEFINITION 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static inline unsigned int numaq_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0x0F; -} - -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - -static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence(mask, vector); -} - -static inline void numaq_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself(cpu_online_mask, vector); -} - -static inline void numaq_send_IPI_all(int vector) -{ - numaq_send_IPI_mask(cpu_online_mask, vector); -} - -extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); - -#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) -#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) - -/* - * Because we use NMIs rather than the INIT-STARTUP sequence to - * bootstrap the CPUs, the APIC may be in a weird state. Kick it: - */ -static inline void numaq_smp_callin_clear_local_apic(void) -{ - clear_local_APIC(); -} - -static inline void -numaq_store_NMI_vector(unsigned short *high, unsigned short *low) -{ - printk("Storing NMI vector\n"); - *high = - *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)); - *low = - *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); -} - -static inline const cpumask_t *numaq_target_cpus(void) -{ - return &CPU_MASK_ALL; -} - -static inline unsigned long -numaq_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return physid_isset(apicid, bitmap); -} - -static inline unsigned long numaq_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -#define apicid_cluster(apicid) (apicid & 0xF0) - -static inline int numaq_apic_id_registered(void) -{ - return 1; -} - -static inline void numaq_init_apic_ldr(void) -{ - /* Already done in NUMA-Q firmware */ -} - -static inline void numaq_setup_apic_routing(void) -{ - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "NUMA-Q", nr_ioapics); -} - -/* - * Skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum. - */ -static inline int numaq_multi_timer_check(int apic, int irq) -{ - return apic != 0 && irq == 0; -} - -static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* We don't have a good way to do this yet - hack */ - return physids_promote(0xFUL); -} - -/* Mapping from cpu number to logical apicid */ -extern u8 cpu_2_logical_apicid[]; - -static inline int numaq_cpu_to_logical_apicid(int cpu) -{ - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -} - -/* - * Supporting over 60 cpus on NUMA-Q requires a locality-dependent - * cpu to APIC ID relation to properly interact with the intelligent - * mode of the cluster controller. - */ -static inline int numaq_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < 60) - return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); - else - return BAD_APICID; -} - -static inline int numaq_apicid_to_node(int logical_apicid) -{ - return logical_apicid >> 4; -} - -static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) -{ - int node = numaq_apicid_to_node(logical_apicid); - int cpu = __ffs(logical_apicid & 0xf); - - return physid_mask_of_physid(cpu + 4*node); -} - -extern void *xquad_portio; - -static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return 1; -} - -/* - * We use physical apicids here, not logical, so just return the default - * physical broadcast to stop people from breaking us - */ -static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - return 0x0F; -} - -static inline unsigned int -numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - return 0x0F; -} - -/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ -static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} -static int __numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - numaq_mps_oem_check(mpc, oem, productid); - return found_numaq; -} - -static int probe_numaq(void) -{ - /* already know from get_memcfg_numaq() */ - return found_numaq; -} - -static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; -} - -static void numaq_setup_portio_remap(void) -{ - int num_quads = num_online_nodes(); - - if (num_quads <= 1) - return; - - printk("Remapping cross-quad port I/O for %d quads\n", num_quads); - xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); - printk("xquad_portio vaddr 0x%08lx, len %08lx\n", - (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); -} - -struct genapic apic_numaq = { - - .name = "NUMAQ", - .probe = probe_numaq, - .acpi_madt_oem_check = NULL, - .apic_id_registered = numaq_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* physical delivery on LOCAL quad: */ - .irq_dest_mode = 0, - - .target_cpus = numaq_target_cpus, - .disable_esr = 1, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = numaq_check_apicid_used, - .check_apicid_present = numaq_check_apicid_present, - - .vector_allocation_domain = numaq_vector_allocation_domain, - .init_apic_ldr = numaq_init_apic_ldr, - - .ioapic_phys_id_map = numaq_ioapic_phys_id_map, - .setup_apic_routing = numaq_setup_apic_routing, - .multi_timer_check = numaq_multi_timer_check, - .apicid_to_node = numaq_apicid_to_node, - .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, - .cpu_present_to_apicid = numaq_cpu_present_to_apicid, - .apicid_to_cpu_present = numaq_apicid_to_cpu_present, - .setup_portio_remap = numaq_setup_portio_remap, - .check_phys_apicid_present = numaq_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = numaq_phys_pkg_id, - .mps_oem_check = __numaq_mps_oem_check, - - .get_apic_id = numaq_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0x0F << 24, - - .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, - - .send_IPI_mask = numaq_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = numaq_send_IPI_allbutself, - .send_IPI_all = numaq_send_IPI_all, - .send_IPI_self = NULL, - - .wakeup_cpu = NULL, - .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, - - /* We don't do anything here because we use NMI's to boot instead */ - .wait_for_init_deassert = NULL, - - .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, - .store_NMI_vector = numaq_store_NMI_vector, - .inquire_remote_apic = NULL, -}; From b3daa3a1a56cf09fb91773f3658692fd02d08bb1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:04:37 +0100 Subject: [PATCH 262/566] x86, bigsmp: consolidate code Move all code to arch/x86/kernel/bigsmp_32.c. With this it ceases to rely on any build-time subarch features. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/{mach-generic/bigsmp.c => kernel/bigsmp_32.c} | 0 arch/x86/mach-generic/Makefile | 1 - 3 files changed, 1 insertion(+), 1 deletion(-) rename arch/x86/{mach-generic/bigsmp.c => kernel/bigsmp_32.c} (100%) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a382ca6f6a17..4239847906a1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -71,6 +71,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o +obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-$(CONFIG_X86_ES7000) += es7000_32.o obj-$(CONFIG_X86_SUMMIT) += summit_32.o diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/kernel/bigsmp_32.c similarity index 100% rename from arch/x86/mach-generic/bigsmp.c rename to arch/x86/kernel/bigsmp_32.c diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 4ede08d309e6..05e4a7ca7742 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -5,4 +5,3 @@ EXTRA_CFLAGS := -Iarch/x86/kernel obj-y := probe.o default.o -obj-$(CONFIG_X86_BIGSMP) += bigsmp.o From 9f4187f0a3b93fc215b4472063b6c0b44364e60c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:19:12 +0100 Subject: [PATCH 263/566] x86, bigsmp: consolidate header code Move all the asm/bigsmp/*.h definitions into bigsmp_32.c. Signed-off-by: Ingo Molnar --- arch/x86/kernel/bigsmp_32.c | 163 +++++++++++++++++++++++++++++++++++- 1 file changed, 159 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index 626f45ca4e7e..b1f91931003f 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -12,10 +12,165 @@ #include #include #include -#include #include -#include -#include + + +static inline unsigned bigsmp_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0xFF; +} + +#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) + +static inline int bigsmp_apic_id_registered(void) +{ + return 1; +} + +static inline const cpumask_t *bigsmp_target_cpus(void) +{ +#ifdef CONFIG_SMP + return &cpu_online_map; +#else + return &cpumask_of_cpu(0); +#endif +} + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +static inline unsigned long +bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} + +static inline unsigned long bigsmp_check_apicid_present(int bit) +{ + return 1; +} + +static inline unsigned long calculate_ldr(int cpu) +{ + unsigned long val, id; + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + id = xapic_phys_to_log_apicid(cpu); + val |= SET_APIC_LOGICAL_ID(id); + return val; +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static inline void bigsmp_init_apic_ldr(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static inline void bigsmp_setup_apic_routing(void) +{ + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "Physflat", nr_ioapics); +} + +static inline int bigsmp_apicid_to_node(int logical_apicid) +{ + return apicid_2_node[hard_smp_processor_id()]; +} + +static inline int bigsmp_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids) + return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); + + return BAD_APICID; +} + +static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) +{ + return physid_mask_of_physid(phys_apicid); +} + +extern u8 cpu_2_logical_apicid[]; +/* Mapping from cpu number to logical apicid */ +static inline int bigsmp_cpu_to_logical_apicid(int cpu) +{ + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return cpu_physical_id(cpu); +} + +static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0xFFL); +} + +static inline void bigsmp_setup_portio_remap(void) +{ +} + +static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return 1; +} + +/* As we are using single CPU as destination, pick only one CPU here */ +static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask)); +} + +static inline unsigned int +bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + for_each_cpu_and(cpu, cpumask, andmask) { + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + } + if (cpu < nr_cpu_ids) + return bigsmp_cpu_to_logical_apicid(cpu); + + return BAD_APICID; +} + +static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); + +static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static inline void bigsmp_send_IPI_allbutself(int vector) +{ + default_send_IPI_mask_allbutself(cpu_online_mask, vector); +} + +static inline void bigsmp_send_IPI_all(int vector) +{ + bigsmp_send_IPI_mask(cpu_online_mask, vector); +} static int dmi_bigsmp; /* can be set by dmi scanners */ @@ -95,7 +250,7 @@ struct genapic apic_bigsmp = { .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, - .send_IPI_mask = default_send_IPI_mask, + .send_IPI_mask = bigsmp_send_IPI_mask, .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = bigsmp_send_IPI_allbutself, .send_IPI_all = bigsmp_send_IPI_all, From d53e2f2855f1c7c2725d550c1ae6b26f4d671c50 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:14:52 +0100 Subject: [PATCH 264/566] x86, smp: remove mach_ipi.h Move mach_ipi.h definitions into genapic.h. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 1 + arch/x86/include/asm/ipi.h | 61 ++++++++++++++++++- arch/x86/include/asm/mach-default/mach_ipi.h | 58 ------------------ arch/x86/include/asm/mach-generic/gpio.h | 15 ----- arch/x86/include/asm/mach-generic/mach_ipi.h | 6 -- .../include/asm/mach-generic/mach_wakecpu.h | 4 -- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/crash.c | 2 +- arch/x86/kernel/io_apic.c | 1 - arch/x86/kernel/ipi.c | 1 - arch/x86/kernel/kgdb.c | 2 +- arch/x86/kernel/reboot.c | 2 +- arch/x86/kernel/smp.c | 1 - arch/x86/kernel/visws_quirks.c | 2 +- arch/x86/mach-default/setup.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mm/tlb.c | 2 +- 17 files changed, 68 insertions(+), 96 deletions(-) delete mode 100644 arch/x86/include/asm/mach-default/mach_ipi.h delete mode 100644 arch/x86/include/asm/mach-generic/gpio.h delete mode 100644 arch/x86/include/asm/mach-generic/mach_ipi.h delete mode 100644 arch/x86/include/asm/mach-generic/mach_wakecpu.h diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index ccfcd19d5b7d..273b99452ae0 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -259,4 +259,5 @@ static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) } #endif /* CONFIG_X86_LOCAL_APIC */ + #endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index a8d717f2c7e7..e2e8e4e0a656 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_IPI_H #define _ASM_X86_IPI_H +#ifdef CONFIG_X86_LOCAL_APIC + /* * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 @@ -56,8 +58,7 @@ static inline void __xapic_wait_icr_idle(void) } static inline void -__default_send_IPI_shortcut(unsigned int shortcut, - int vector, unsigned int dest) +__default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) { /* * Subtle. In the case of the 'never do double writes' workaround @@ -156,4 +157,60 @@ default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) local_irq_restore(flags); } + +/* Avoid include hell */ +#define NMI_VECTOR 0x02 + +void default_send_IPI_mask_bitmask(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); + +extern int no_broadcast; + +#ifdef CONFIG_X86_64 +#include +#else +static inline void default_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_bitmask(mask, vector); +} +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +#endif + +static inline void __default_local_send_IPI_allbutself(int vector) +{ + if (no_broadcast || vector == NMI_VECTOR) + apic->send_IPI_mask_allbutself(cpu_online_mask, vector); + else + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical); +} + +static inline void __default_local_send_IPI_all(int vector) +{ + if (no_broadcast || vector == NMI_VECTOR) + apic->send_IPI_mask(cpu_online_mask, vector); + else + __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); +} + +#ifdef CONFIG_X86_32 +static inline void default_send_IPI_allbutself(int vector) +{ + /* + * if there are no other CPUs in the system then we get an APIC send + * error if we try to broadcast, thus avoid sending IPIs in this case. + */ + if (!(num_online_cpus() > 1)) + return; + + __default_local_send_IPI_allbutself(vector); +} + +static inline void default_send_IPI_all(int vector) +{ + __default_local_send_IPI_all(vector); +} +#endif + +#endif + #endif /* _ASM_X86_IPI_H */ diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h deleted file mode 100644 index 85dec630c69c..000000000000 --- a/arch/x86/include/asm/mach-default/mach_ipi.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_IPI_H -#define _ASM_X86_MACH_DEFAULT_MACH_IPI_H - -/* Avoid include hell */ -#define NMI_VECTOR 0x02 - -void default_send_IPI_mask_bitmask(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -void __default_send_IPI_shortcut(unsigned int shortcut, int vector); - -extern int no_broadcast; - -#ifdef CONFIG_X86_64 -#include -#else -static inline void default_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_bitmask(mask, vector); -} -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -#endif - -static inline void __default_local_send_IPI_allbutself(int vector) -{ - if (no_broadcast || vector == NMI_VECTOR) - apic->send_IPI_mask_allbutself(cpu_online_mask, vector); - else - __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector); -} - -static inline void __default_local_send_IPI_all(int vector) -{ - if (no_broadcast || vector == NMI_VECTOR) - apic->send_IPI_mask(cpu_online_mask, vector); - else - __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector); -} - -#ifdef CONFIG_X86_32 -static inline void default_send_IPI_allbutself(int vector) -{ - /* - * if there are no other CPUs in the system then we get an APIC send - * error if we try to broadcast, thus avoid sending IPIs in this case. - */ - if (!(num_online_cpus() > 1)) - return; - - __default_local_send_IPI_allbutself(vector); -} - -static inline void default_send_IPI_all(int vector) -{ - __default_local_send_IPI_all(vector); -} -#endif - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_IPI_H */ diff --git a/arch/x86/include/asm/mach-generic/gpio.h b/arch/x86/include/asm/mach-generic/gpio.h deleted file mode 100644 index 995c45efdb33..000000000000 --- a/arch/x86/include/asm/mach-generic/gpio.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_GPIO_H -#define _ASM_X86_MACH_GENERIC_GPIO_H - -int gpio_request(unsigned gpio, const char *label); -void gpio_free(unsigned gpio); -int gpio_direction_input(unsigned gpio); -int gpio_direction_output(unsigned gpio, int value); -int gpio_get_value(unsigned gpio); -void gpio_set_value(unsigned gpio, int value); -int gpio_to_irq(unsigned gpio); -int irq_to_gpio(unsigned irq); - -#include /* cansleep wrappers */ - -#endif /* _ASM_X86_MACH_GENERIC_GPIO_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_ipi.h b/arch/x86/include/asm/mach-generic/mach_ipi.h deleted file mode 100644 index 5691c09645c5..000000000000 --- a/arch/x86/include/asm/mach-generic/mach_ipi.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_IPI_H -#define _ASM_X86_MACH_GENERIC_MACH_IPI_H - -#include - -#endif /* _ASM_X86_MACH_GENERIC_MACH_IPI_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h deleted file mode 100644 index 0b884c03a3fc..000000000000 --- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H - -#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 41a0ba34d6b2..81efe86eca81 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -49,7 +49,7 @@ #include #include -#include +#include /* * Sanity check diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 11b93cabdf78..ad7f2a696f4a 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -28,7 +28,7 @@ #include #include -#include +#include #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index e0744ea6d0f1..241a01d6fd4b 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -62,7 +62,6 @@ #include #include -#include #include #define __apicdebuginit(type) static type __init diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 50076d92fbc0..0893fa144581 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -20,7 +20,6 @@ #ifdef CONFIG_X86_32 #include -#include /* * the following functions deal with sending IPIs between CPUs. diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index b62a3811e01c..5c4f55483849 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -46,7 +46,7 @@ #include #include -#include +#include /* * Put the error code here just in case the user cares: diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 38dace28d625..32e8f0af292c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -24,7 +24,7 @@ # include #endif -#include +#include /* * Power off function, if any diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 892e7c389be1..0eb32ae9bf1f 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -26,7 +26,6 @@ #include #include #include -#include #include /* * Some notes on x86 processor bugs affecting SMP operation: diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 3bd7f47a91b7..4fd646e6dd43 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -32,7 +32,7 @@ #include #include -#include +#include #include diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index df167f265622..b65ff0bf730e 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -10,7 +10,7 @@ #include #include -#include +#include #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 7d5123e474e1..d9d44c8c3db8 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) { diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 6348e1146925..14c5af4d11e6 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -14,7 +14,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0, }; -#include +#include /* * Smarter SMP flushing macros. * c/o Linus Torvalds. From 7b38725318f4517af6168ccbff99060d67aba1c8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:11:44 +0100 Subject: [PATCH 265/566] x86: remove subarchitecture support code Remove remaining bits of the subarchitecture code. Now that all the special platforms are runtime probed and runtime handled, we can remove these facilities. Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 5 - arch/x86/kernel/Makefile | 2 +- .../probe.c => kernel/probe_32.c} | 92 +++++++++++++++++++ arch/x86/mach-generic/Makefile | 7 -- 4 files changed, 93 insertions(+), 13 deletions(-) rename arch/x86/{mach-generic/probe.c => kernel/probe_32.c} (54%) delete mode 100644 arch/x86/mach-generic/Makefile diff --git a/arch/x86/Makefile b/arch/x86/Makefile index cacee981d166..799a0d931c81 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -113,11 +113,6 @@ mcore-y := arch/x86/mach-default/ mflags-$(CONFIG_X86_VOYAGER) := -Iarch/x86/include/asm/mach-voyager mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ -# generic subarchitecture -mflags-$(CONFIG_X86_GENERICARCH):= -Iarch/x86/include/asm/mach-generic -fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ -mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ - # default subarch .h files mflags-y += -Iarch/x86/include/asm/mach-default diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4239847906a1..d3f8f49aed65 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -30,7 +30,7 @@ obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o obj-y += setup.o i8259.o irqinit_$(BITS).o obj-$(CONFIG_X86_VISWS) += visws_quirks.o -obj-$(CONFIG_X86_32) += probe_roms_32.o +obj-$(CONFIG_X86_32) += probe_32.o probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/kernel/probe_32.c similarity index 54% rename from arch/x86/mach-generic/probe.c rename to arch/x86/kernel/probe_32.c index c03c72221320..a6fba6914167 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/kernel/probe_32.c @@ -1,4 +1,6 @@ /* + * Default generic APIC driver. This handles up to 8 CPUs. + * * Copyright 2003 Andi Kleen, SuSE Labs. * Subject to the GNU Public License, v.2 * @@ -17,6 +19,96 @@ #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + /* + * Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; +} + +/* should be called last. */ +static int probe_default(void) +{ + return 1; +} + +struct genapic apic_default = { + + .name = "default", + .probe = probe_default, + .acpi_madt_oem_check = NULL, + .apic_id_registered = default_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, + + .target_cpus = default_target_cpus, + .disable_esr = 0, + .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = default_check_apicid_used, + .check_apicid_present = default_check_apicid_present, + + .vector_allocation_domain = default_vector_allocation_domain, + .init_apic_ldr = default_init_apic_ldr, + + .ioapic_phys_id_map = default_ioapic_phys_id_map, + .setup_apic_routing = default_setup_apic_routing, + .multi_timer_check = NULL, + .apicid_to_node = default_apicid_to_node, + .cpu_to_logical_apicid = default_cpu_to_logical_apicid, + .cpu_present_to_apicid = default_cpu_present_to_apicid, + .apicid_to_cpu_present = default_apicid_to_cpu_present, + .setup_portio_remap = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, + .enable_apic_mode = NULL, + .phys_pkg_id = default_phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = default_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0x0F << 24, + + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + + .send_IPI_mask = default_send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = default_send_IPI_allbutself, + .send_IPI_all = default_send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, + + .wait_for_init_deassert = default_wait_for_init_deassert, + + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .inquire_remote_apic = default_inquire_remote_apic, +}; + extern struct genapic apic_numaq; extern struct genapic apic_summit; extern struct genapic apic_bigsmp; diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile deleted file mode 100644 index 05e4a7ca7742..000000000000 --- a/arch/x86/mach-generic/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the generic architecture -# - -EXTRA_CFLAGS := -Iarch/x86/kernel - -obj-y := probe.o default.o From 1164dd0099c0d79146a55319670f57ab7ad1d352 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:34:09 +0100 Subject: [PATCH 266/566] x86: move mach-default/*.h files to asm/ We are getting rid of subarchitecture support - move the hook files to asm/. (These are now stale and should be replaced with more explicit runtime mechanisms - but the transition is simpler this way.) Signed-off-by: Ingo Molnar --- arch/x86/include/asm/{mach-default => }/apm.h | 0 arch/x86/include/asm/{mach-default => }/do_timer.h | 0 arch/x86/include/asm/{mach-default => }/entry_arch.h | 5 +++++ arch/x86/include/asm/{mach-default => }/mach_timer.h | 0 arch/x86/include/asm/{mach-default => }/mach_traps.h | 0 arch/x86/include/asm/{mach-default => }/pci-functions.h | 0 arch/x86/include/asm/{mach-default => }/setup_arch.h | 0 arch/x86/include/asm/{mach-default => }/smpboot_hooks.h | 0 arch/x86/kernel/apm_32.c | 2 +- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/nmi.c | 2 +- arch/x86/kernel/probe_roms_32.c | 2 +- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/time_32.c | 2 +- arch/x86/kernel/traps.c | 2 +- arch/x86/pci/pcbios.c | 2 +- drivers/clocksource/acpi_pm.c | 2 +- drivers/clocksource/cyclone.c | 2 +- 19 files changed, 16 insertions(+), 11 deletions(-) rename arch/x86/include/asm/{mach-default => }/apm.h (100%) rename arch/x86/include/asm/{mach-default => }/do_timer.h (100%) rename arch/x86/include/asm/{mach-default => }/entry_arch.h (95%) rename arch/x86/include/asm/{mach-default => }/mach_timer.h (100%) rename arch/x86/include/asm/{mach-default => }/mach_traps.h (100%) rename arch/x86/include/asm/{mach-default => }/pci-functions.h (100%) rename arch/x86/include/asm/{mach-default => }/setup_arch.h (100%) rename arch/x86/include/asm/{mach-default => }/smpboot_hooks.h (100%) diff --git a/arch/x86/include/asm/mach-default/apm.h b/arch/x86/include/asm/apm.h similarity index 100% rename from arch/x86/include/asm/mach-default/apm.h rename to arch/x86/include/asm/apm.h diff --git a/arch/x86/include/asm/mach-default/do_timer.h b/arch/x86/include/asm/do_timer.h similarity index 100% rename from arch/x86/include/asm/mach-default/do_timer.h rename to arch/x86/include/asm/do_timer.h diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/entry_arch.h similarity index 95% rename from arch/x86/include/asm/mach-default/entry_arch.h rename to arch/x86/include/asm/entry_arch.h index 6fa399ad1de2..b87b077cc231 100644 --- a/arch/x86/include/asm/mach-default/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -41,10 +41,15 @@ BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7, * a much simpler SMP time architecture: */ #ifdef CONFIG_X86_LOCAL_APIC + BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) +#ifdef CONFIG_PERF_COUNTERS +BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) +#endif + #ifdef CONFIG_X86_MCE_P4THERMAL BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) #endif diff --git a/arch/x86/include/asm/mach-default/mach_timer.h b/arch/x86/include/asm/mach_timer.h similarity index 100% rename from arch/x86/include/asm/mach-default/mach_timer.h rename to arch/x86/include/asm/mach_timer.h diff --git a/arch/x86/include/asm/mach-default/mach_traps.h b/arch/x86/include/asm/mach_traps.h similarity index 100% rename from arch/x86/include/asm/mach-default/mach_traps.h rename to arch/x86/include/asm/mach_traps.h diff --git a/arch/x86/include/asm/mach-default/pci-functions.h b/arch/x86/include/asm/pci-functions.h similarity index 100% rename from arch/x86/include/asm/mach-default/pci-functions.h rename to arch/x86/include/asm/pci-functions.h diff --git a/arch/x86/include/asm/mach-default/setup_arch.h b/arch/x86/include/asm/setup_arch.h similarity index 100% rename from arch/x86/include/asm/mach-default/setup_arch.h rename to arch/x86/include/asm/setup_arch.h diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h similarity index 100% rename from arch/x86/include/asm/mach-default/smpboot_hooks.h rename to arch/x86/include/asm/smpboot_hooks.h diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 98807bb095ad..37ba5f85b718 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -301,7 +301,7 @@ extern int (*console_blank_hook)(int); */ #define APM_ZERO_SEGS -#include "apm.h" +#include /* * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index a0b91aac72a1..65efd42454be 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -688,7 +688,7 @@ ENDPROC(name) #define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) /* The include is where all of the SMP etc. interrupts come from */ -#include "entry_arch.h" +#include ENTRY(coprocessor_error) RING0_INT_FRAME diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 23b6d9e6e4f5..bdfad80c3cf1 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -34,7 +34,7 @@ #include -#include +#include int unknown_nmi_panic; int nmi_watchdog_enabled; diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms_32.c index 675a48c404a5..071e7fea42e5 100644 --- a/arch/x86/kernel/probe_roms_32.c +++ b/arch/x86/kernel/probe_roms_32.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include static struct resource system_rom_resource = { .name = "System ROM", diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 92e42939fb08..e645d4793e4d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -81,7 +81,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 489fde9d9476..e90b3e50b54f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -66,7 +66,7 @@ #include #include -#include +#include #ifdef CONFIG_X86_32 u8 apicid_2_node[MAX_APICID]; diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 3985cac0ed47..764c74e871f2 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -38,7 +38,7 @@ #include #include -#include "do_timer.h" +#include int timer_ack; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ed5aee5f3fcc..214bc327a0c3 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -54,7 +54,7 @@ #include #include -#include +#include #ifdef CONFIG_X86_64 #include diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index b82cae970dfd..1c975cc9839e 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include /* BIOS32 signature: "_32_" */ #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c index e1129fad96dd..ee19b6e8fcb4 100644 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c @@ -143,7 +143,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_LE, #endif #ifndef CONFIG_X86_64 -#include "mach_timer.h" +#include #define PMTMR_EXPECTED_RATE \ ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10)) /* diff --git a/drivers/clocksource/cyclone.c b/drivers/clocksource/cyclone.c index 1bde303b970b..8615059a8729 100644 --- a/drivers/clocksource/cyclone.c +++ b/drivers/clocksource/cyclone.c @@ -7,7 +7,7 @@ #include #include -#include "mach_timer.h" +#include #define CYCLONE_CBAR_ADDR 0xFEB00CD0 /* base address ptr */ #define CYCLONE_PMCC_OFFSET 0x51A0 /* offset to control register */ From 6bda2c8b32febeb38ee128047253751e080bad52 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:32:55 +0100 Subject: [PATCH 267/566] x86: remove subarchitecture support Remove the 32-bit subarchitecture support code. All subarchitectures but Voyager have been converted. Voyager will be done later or will be removed. Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 21 ---- arch/x86/include/asm/apic.h | 2 + arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/io_apic.c | 17 ---- arch/x86/kernel/probe_32.c | 163 ++++++++++++++++++++++++++++++++ arch/x86/mach-default/setup.c | 162 ------------------------------- arch/x86/mach-generic/default.c | 93 ------------------ 7 files changed, 166 insertions(+), 294 deletions(-) delete mode 100644 arch/x86/mach-default/setup.c delete mode 100644 arch/x86/mach-generic/default.c diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 799a0d931c81..99550c407990 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -102,24 +102,6 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # prevent gcc from generating any FP code by mistake KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) -### -# Sub architecture support -# fcore-y is linked before mcore-y files. - -# Default subarch .c files -mcore-y := arch/x86/mach-default/ - -# Voyager subarch support -mflags-$(CONFIG_X86_VOYAGER) := -Iarch/x86/include/asm/mach-voyager -mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ - -# default subarch .h files -mflags-y += -Iarch/x86/include/asm/mach-default - -# 64 bit does not support subarch support - clear sub arch variables -fcore-$(CONFIG_X86_64) := -mcore-$(CONFIG_X86_64) := - KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) @@ -145,9 +127,6 @@ core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/ core-y += arch/x86/kernel/ core-y += arch/x86/mm/ -# Remaining sub architecture files -core-y += $(mcore-y) - core-y += arch/x86/crypto/ core-y += arch/x86/vdso/ core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 6a77068e261a..b03711d7990b 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -215,6 +215,7 @@ static inline void disable_local_APIC(void) { } #define SET_APIC_ID(x) (apic->set_apic_id(x)) #else +#ifdef CONFIG_X86_LOCAL_APIC static inline unsigned default_get_apic_id(unsigned long x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); @@ -224,6 +225,7 @@ static inline unsigned default_get_apic_id(unsigned long x) else return (x >> 24) & 0x0F; } +#endif #endif diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d3f8f49aed65..1bc4cdc797db 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -63,7 +63,7 @@ obj-$(CONFIG_SMP) += setup_percpu.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o ipi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 241a01d6fd4b..3378ffb21407 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -797,23 +797,6 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } -#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) -void default_send_IPI_self(int vector) -{ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | apic->dest_logical; - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); -} -#endif /* !CONFIG_SMP && CONFIG_X86_32*/ - #ifdef CONFIG_X86_32 /* * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index a6fba6914167..61339a0d55cf 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -32,6 +32,26 @@ #include #include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_HOTPLUG_CPU +#define DEFAULT_SEND_IPI (1) +#else +#define DEFAULT_SEND_IPI (0) +#endif + +int no_broadcast = DEFAULT_SEND_IPI; + +#ifdef CONFIG_X86_LOCAL_APIC + static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) { /* @@ -246,3 +266,146 @@ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) } return 0; } + +#endif /* CONFIG_X86_LOCAL_APIC */ + +/** + * pre_intr_init_hook - initialisation prior to setting up interrupt vectors + * + * Description: + * Perform any necessary interrupt initialisation prior to setting up + * the "ordinary" interrupt call gates. For legacy reasons, the ISA + * interrupts should be initialised here if the machine emulates a PC + * in any way. + **/ +void __init pre_intr_init_hook(void) +{ + if (x86_quirks->arch_pre_intr_init) { + if (x86_quirks->arch_pre_intr_init()) + return; + } + init_ISA_irqs(); +} + +/** + * intr_init_hook - post gate setup interrupt initialisation + * + * Description: + * Fill in any interrupts that may have been left out by the general + * init_IRQ() routine. interrupts having to do with the machine rather + * than the devices on the I/O bus (like APIC interrupts in intel MP + * systems) are started here. + **/ +void __init intr_init_hook(void) +{ + if (x86_quirks->arch_intr_init) { + if (x86_quirks->arch_intr_init()) + return; + } +} + +/** + * pre_setup_arch_hook - hook called prior to any setup_arch() execution + * + * Description: + * generally used to activate any machine specific identification + * routines that may be needed before setup_arch() runs. On Voyager + * this is used to get the board revision and type. + **/ +void __init pre_setup_arch_hook(void) +{ +} + +/** + * trap_init_hook - initialise system specific traps + * + * Description: + * Called as the final act of trap_init(). Used in VISWS to initialise + * the various board specific APIC traps. + **/ +void __init trap_init_hook(void) +{ + if (x86_quirks->arch_trap_init) { + if (x86_quirks->arch_trap_init()) + return; + } +} + +static struct irqaction irq0 = { + .handler = timer_interrupt, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, + .mask = CPU_MASK_NONE, + .name = "timer" +}; + +/** + * pre_time_init_hook - do any specific initialisations before. + * + **/ +void __init pre_time_init_hook(void) +{ + if (x86_quirks->arch_pre_time_init) + x86_quirks->arch_pre_time_init(); +} + +/** + * time_init_hook - do any specific initialisations for the system timer. + * + * Description: + * Must plug the system timer interrupt source at HZ into the IRQ listed + * in irq_vectors.h:TIMER_IRQ + **/ +void __init time_init_hook(void) +{ + if (x86_quirks->arch_time_init) { + /* + * A nonzero return code does not mean failure, it means + * that the architecture quirk does not want any + * generic (timer) setup to be performed after this: + */ + if (x86_quirks->arch_time_init()) + return; + } + + irq0.mask = cpumask_of_cpu(0); + setup_irq(0, &irq0); +} + +#ifdef CONFIG_MCA +/** + * mca_nmi_hook - hook into MCA specific NMI chain + * + * Description: + * The MCA (Microchannel Architecture) has an NMI chain for NMI sources + * along the MCA bus. Use this to hook into that chain if you will need + * it. + **/ +void mca_nmi_hook(void) +{ + /* + * If I recall correctly, there's a whole bunch of other things that + * we can do to check for NMI problems, but that's all I know about + * at the moment. + */ + pr_warning("NMI generated from unknown source!\n"); +} +#endif + +static __init int no_ipi_broadcast(char *str) +{ + get_option(&str, &no_broadcast); + pr_info("Using %s mode\n", + no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); + return 1; +} +__setup("no_ipi_broadcast=", no_ipi_broadcast); + +static int __init print_ipi_mode(void) +{ + pr_info("Using IPI %s mode\n", + no_broadcast ? "No-Shortcut" : "Shortcut"); + return 0; +} + +late_initcall(print_ipi_mode); + diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c deleted file mode 100644 index b65ff0bf730e..000000000000 --- a/arch/x86/mach-default/setup.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Machine specific setup for generic - */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -#ifdef CONFIG_HOTPLUG_CPU -#define DEFAULT_SEND_IPI (1) -#else -#define DEFAULT_SEND_IPI (0) -#endif - -int no_broadcast = DEFAULT_SEND_IPI; - -/** - * pre_intr_init_hook - initialisation prior to setting up interrupt vectors - * - * Description: - * Perform any necessary interrupt initialisation prior to setting up - * the "ordinary" interrupt call gates. For legacy reasons, the ISA - * interrupts should be initialised here if the machine emulates a PC - * in any way. - **/ -void __init pre_intr_init_hook(void) -{ - if (x86_quirks->arch_pre_intr_init) { - if (x86_quirks->arch_pre_intr_init()) - return; - } - init_ISA_irqs(); -} - -/** - * intr_init_hook - post gate setup interrupt initialisation - * - * Description: - * Fill in any interrupts that may have been left out by the general - * init_IRQ() routine. interrupts having to do with the machine rather - * than the devices on the I/O bus (like APIC interrupts in intel MP - * systems) are started here. - **/ -void __init intr_init_hook(void) -{ - if (x86_quirks->arch_intr_init) { - if (x86_quirks->arch_intr_init()) - return; - } -} - -/** - * pre_setup_arch_hook - hook called prior to any setup_arch() execution - * - * Description: - * generally used to activate any machine specific identification - * routines that may be needed before setup_arch() runs. On Voyager - * this is used to get the board revision and type. - **/ -void __init pre_setup_arch_hook(void) -{ -} - -/** - * trap_init_hook - initialise system specific traps - * - * Description: - * Called as the final act of trap_init(). Used in VISWS to initialise - * the various board specific APIC traps. - **/ -void __init trap_init_hook(void) -{ - if (x86_quirks->arch_trap_init) { - if (x86_quirks->arch_trap_init()) - return; - } -} - -static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, - .mask = CPU_MASK_NONE, - .name = "timer" -}; - -/** - * pre_time_init_hook - do any specific initialisations before. - * - **/ -void __init pre_time_init_hook(void) -{ - if (x86_quirks->arch_pre_time_init) - x86_quirks->arch_pre_time_init(); -} - -/** - * time_init_hook - do any specific initialisations for the system timer. - * - * Description: - * Must plug the system timer interrupt source at HZ into the IRQ listed - * in irq_vectors.h:TIMER_IRQ - **/ -void __init time_init_hook(void) -{ - if (x86_quirks->arch_time_init) { - /* - * A nonzero return code does not mean failure, it means - * that the architecture quirk does not want any - * generic (timer) setup to be performed after this: - */ - if (x86_quirks->arch_time_init()) - return; - } - - irq0.mask = cpumask_of_cpu(0); - setup_irq(0, &irq0); -} - -#ifdef CONFIG_MCA -/** - * mca_nmi_hook - hook into MCA specific NMI chain - * - * Description: - * The MCA (Microchannel Architecture) has an NMI chain for NMI sources - * along the MCA bus. Use this to hook into that chain if you will need - * it. - **/ -void mca_nmi_hook(void) -{ - /* - * If I recall correctly, there's a whole bunch of other things that - * we can do to check for NMI problems, but that's all I know about - * at the moment. - */ - pr_warning("NMI generated from unknown source!\n"); -} -#endif - -static __init int no_ipi_broadcast(char *str) -{ - get_option(&str, &no_broadcast); - pr_info("Using %s mode\n", - no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); - return 1; -} -__setup("no_ipi_broadcast=", no_ipi_broadcast); - -static int __init print_ipi_mode(void) -{ - pr_info("Using IPI %s mode\n", - no_broadcast ? "No-Shortcut" : "Shortcut"); - return 0; -} - -late_initcall(print_ipi_mode); - diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c deleted file mode 100644 index d9d44c8c3db8..000000000000 --- a/arch/x86/mach-generic/default.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Default generic APIC driver. This handles up to 8 CPUs. - */ -#define APIC_DEFINITION 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* - * Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; -} - -/* should be called last. */ -static int probe_default(void) -{ - return 1; -} - -struct genapic apic_default = { - - .name = "default", - .probe = probe_default, - .acpi_madt_oem_check = NULL, - .apic_id_registered = default_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* logical delivery broadcast to all CPUs: */ - .irq_dest_mode = 1, - - .target_cpus = default_target_cpus, - .disable_esr = 0, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = default_check_apicid_used, - .check_apicid_present = default_check_apicid_present, - - .vector_allocation_domain = default_vector_allocation_domain, - .init_apic_ldr = default_init_apic_ldr, - - .ioapic_phys_id_map = default_ioapic_phys_id_map, - .setup_apic_routing = default_setup_apic_routing, - .multi_timer_check = NULL, - .apicid_to_node = default_apicid_to_node, - .cpu_to_logical_apicid = default_cpu_to_logical_apicid, - .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = default_apicid_to_cpu_present, - .setup_portio_remap = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = default_phys_pkg_id, - .mps_oem_check = NULL, - - .get_apic_id = default_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0x0F << 24, - - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, - - .send_IPI_mask = default_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = default_send_IPI_allbutself, - .send_IPI_all = default_send_IPI_all, - .send_IPI_self = NULL, - - .wakeup_cpu = NULL, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - - .wait_for_init_deassert = default_wait_for_init_deassert, - - .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, - .inquire_remote_apic = default_inquire_remote_apic, -}; From e7c64981949ec983ee41c2927c036fdb00d8e68b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:54:17 +0100 Subject: [PATCH 268/566] x86/Voyager: clean up BROKEN Kconfig reference CONFIG_BROKEN has been removed from the upstream kernel years ago, but X86_VOYAGER still had a stale reference to it - remove it. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d6218e6c9824..45c7bdbc156e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -303,7 +303,7 @@ config X86_ELAN config X86_VOYAGER bool "Voyager (NCR)" - depends on X86_32 && (SMP || BROKEN) && !PCI + depends on X86_32 && SMP && !PCI help Voyager is an MCA-based 32-way capable SMP architecture proprietary to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. From 61b8172e57c4b8db1fcb7f5fd8dfda85ffd8e052 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:55:34 +0100 Subject: [PATCH 269/566] x86: disable Voyager temporarily x86/Voyager does not build right now and it's unclear whether it will be cleaned up and ported to the subarch-less 32-bit x86 code - so disable it for now. If it's fixed we'll re-enable it - or remove it after some time. There's a very low number of systems running development kernels on x86/Voyager currently. (one or two on the whole planet) Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 45c7bdbc156e..f983f4055b18 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -303,7 +303,7 @@ config X86_ELAN config X86_VOYAGER bool "Voyager (NCR)" - depends on X86_32 && SMP && !PCI + depends on X86_32 && SMP && !PCI && BROKEN help Voyager is an MCA-based 32-way capable SMP architecture proprietary to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. From 72ee6ebbb3fe1ac23d1a669b177b858d2028bf09 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 16:57:49 +0100 Subject: [PATCH 270/566] x86/Voyager: remove MCA Kconfig quirk Remove Voyager Kconfig quirk: just like any other hardware platform users of Voyager systems can configure in the hardware drivers they need. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f983f4055b18..ae37a7504b57 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1873,8 +1873,7 @@ config EISA source "drivers/eisa/Kconfig" config MCA - bool "MCA support" if !X86_VOYAGER - default y if X86_VOYAGER + bool "MCA support" help MicroChannel Architecture is found in some IBM PS/2 machines and laptops. It is a bus system similar to PCI or ISA. See From 07ef83ae9e99377f38f4d0e472ba6ff09324e5e9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 16:59:11 +0100 Subject: [PATCH 271/566] x86/Voyager: remove NATSEMI Kconfig quirk x86/Voyager has this quirk for SCx200 support: config SCx200 tristate "NatSemi SCx200 support" depends on !X86_VOYAGER Remove it - Voyager users can disable drivers they dont need. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ae37a7504b57..9727dd59f54c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1884,7 +1884,6 @@ source "drivers/mca/Kconfig" config SCx200 tristate "NatSemi SCx200 support" - depends on !X86_VOYAGER help This provides basic support for National Semiconductor's (now AMD's) Geode processors. The driver probes for the From e0ec9483dbe8f534e0f3ef413f9eba9a5ff78050 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:01:14 +0100 Subject: [PATCH 272/566] x86/Voyager: remove KVM Kconfig quirk Voyager and other subarchitectures have this Kconfig quirk: select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) This is unnecessary, as KVM cleanly detects based on CPUID capabilities. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9727dd59f54c..a783fe7759ca 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -34,7 +34,7 @@ config X86 select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST - select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) + select HAVE_KVM select HAVE_ARCH_KGDB if !X86_VOYAGER select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_DMA_COHERENT if X86_32 From 49793b0341a802cf5ee4179e837a2eb20f12c9fe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:02:29 +0100 Subject: [PATCH 273/566] x86/Voyager: remove KGDB Kconfig quirk x86/Voyager has this KGDB quirk: select HAVE_ARCH_KGDB if !X86_VOYAGER This is completely pointless - there's nothing in KGDB that cannot work on Voyager. Remove it. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a783fe7759ca..12b433aaaa60 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -35,7 +35,7 @@ config X86 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KVM - select HAVE_ARCH_KGDB if !X86_VOYAGER + select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS From aced3cee555f0b2fd58501e9b8a8a1295011e134 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:03:24 +0100 Subject: [PATCH 274/566] x86/Voyager: remove HIBERNATION Kconfig quirk Voyager has this hibernation quirk: config ARCH_HIBERNATION_POSSIBLE def_bool y depends on !SMP || !X86_VOYAGER Hibernation is a generic facility provided on all x86 platforms. If it is buggy on Voyager then that bug should be fixed - not worked around. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 12b433aaaa60..df952e788a64 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -140,7 +140,7 @@ config HAVE_CPUMASK_OF_CPU_MAP config ARCH_HIBERNATION_POSSIBLE def_bool y - depends on !SMP || !X86_VOYAGER + depends on !SMP config ARCH_SUSPEND_POSSIBLE def_bool y From f2fc0e3071230bb9ea9f64a08c3e619ad1357cfb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:04:39 +0100 Subject: [PATCH 275/566] x86/Voyager: remove ARCH_SUSPEND_POSSIBLE Kconfig quirk Voyager has this Kconfig quirk for suspend/resume: config ARCH_SUSPEND_POSSIBLE def_bool y depends on !X86_VOYAGER The proper mechanism to not suspend on a piece of hardware to disable CONFIG_SUSPEND. Remove the quirk. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df952e788a64..8e6413e0e7cb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -144,7 +144,6 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SUSPEND_POSSIBLE def_bool y - depends on !X86_VOYAGER config ZONE_DMA32 bool From 3e5095d15276efd14a45393666b1bb7536bf179f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:07:08 +0100 Subject: [PATCH 276/566] x86: replace CONFIG_X86_SMP with CONFIG_SMP The x86/Voyager subarch used to have this distinction between 'x86 SMP support' and 'Voyager SMP support': config X86_SMP bool depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64) This is a pointless distinction - Voyager can (and already does) use smp_ops to implement various SMP quirks it has - and it can be extended more to cover all the specialities of Voyager. So remove this complication in the Kconfig space. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 7 +------ arch/x86/Kconfig.debug | 2 +- arch/x86/include/asm/entry_arch.h | 2 +- arch/x86/include/asm/hw_irq.h | 2 +- arch/x86/kernel/Makefile | 4 ++-- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/cpu/addon_cpuid_features.c | 2 +- arch/x86/kernel/process.c | 2 +- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/tsc.c | 2 +- arch/x86/kernel/vmiclock_32.c | 2 +- arch/x86/mm/Makefile | 2 +- 12 files changed, 13 insertions(+), 18 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8e6413e0e7cb..3671506fb8df 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -173,11 +173,6 @@ config GENERIC_PENDING_IRQ depends on GENERIC_HARDIRQS && SMP default y -config X86_SMP - bool - depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64) - default y - config USE_GENERIC_SMP_HELPERS def_bool y depends on SMP @@ -203,7 +198,7 @@ config X86_BIOS_REBOOT config X86_TRAMPOLINE bool - depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP) + depends on SMP || (64BIT && ACPI_SLEEP) default y config KTIME_SCALAR diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 28f111461ca8..a38dd6064f10 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -83,7 +83,7 @@ config DEBUG_PAGEALLOC config DEBUG_PER_CPU_MAPS bool "Debug access to per_cpu maps" depends on DEBUG_KERNEL - depends on X86_SMP + depends on SMP default n help Say Y to verify that the per_cpu map being accessed has diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index b87b077cc231..854d538ae857 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -9,7 +9,7 @@ * is no hardware IRQ pin equivalent for them, they are triggered * through the ICC by us (IPIs) */ -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index bfa921fad133..415507973968 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -98,7 +98,7 @@ extern asmlinkage void qic_call_function_interrupt(void); extern void smp_apic_timer_interrupt(struct pt_regs *); extern void smp_spurious_interrupt(struct pt_regs *); extern void smp_error_interrupt(struct pt_regs *); -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP extern void smp_reschedule_interrupt(struct pt_regs *); extern void smp_call_function_interrupt(struct pt_regs *); extern void smp_call_function_single_interrupt(struct pt_regs *); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 1bc4cdc797db..dc05a57a474a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -57,8 +57,8 @@ obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o -obj-$(CONFIG_X86_SMP) += smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o ipi.o obj-$(CONFIG_SMP) += setup_percpu.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 81efe86eca81..968c817762a4 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1900,7 +1900,7 @@ void __cpuinit generic_processor_info(int apicid, int version) } #endif -#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) +#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; #endif diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 4a48bb409748..e48640cfac0c 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) */ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP unsigned int eax, ebx, ecx, edx, sub_index; unsigned int ht_mask_width, core_plus_mask_width; unsigned int core_select_mask, core_level_siblings; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e68bb9e30864..89537f678b2d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -344,7 +344,7 @@ static void c1e_idle(void) void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP if (pm_idle == poll_idle && smp_num_siblings > 1) { printk(KERN_WARNING "WARNING: polling idle and HT enabled," " performance may degrade.\n"); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e645d4793e4d..eeb180b1d7aa 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -588,7 +588,7 @@ early_param("elfcorehdr", setup_elfcorehdr); static int __init default_update_genapic(void) { -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP if (!apic->wakeup_cpu) apic->wakeup_cpu = wakeup_secondary_cpu_via_init; #endif diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 599e58168631..83d53ce5d4c4 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -773,7 +773,7 @@ __cpuinit int unsynchronized_tsc(void) if (!cpu_has_tsc || tsc_unstable) return 1; -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP if (apic_is_clustered_box()) return 1; #endif diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index c4c1f9e09402..a4791ef412d1 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -256,7 +256,7 @@ void __devinit vmi_time_bsp_init(void) */ clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); local_irq_disable(); -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP /* * XXX handle_percpu_irq only defined for SMP; we need to switch over * to using it, since this is a local interrupt, which each CPU must diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 9f05157220f5..2b938a384910 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,7 +1,7 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ pat.o pgtable.o gup.o -obj-$(CONFIG_X86_SMP) += tlb.o +obj-$(CONFIG_SMP) += tlb.o obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o From c0b5842a457d44c8788b3fd0c64969be7ef673cd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:13:05 +0100 Subject: [PATCH 277/566] x86: generalize boot_cpu_id x86/Voyager can boot on non-zero processors. While that can probably be fixed by properly remapping the physical CPU IDs, keep boot_cpu_id for now for easier transition - and expand it to all of x86. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ---- arch/x86/include/asm/cpu.h | 6 +----- arch/x86/kernel/setup.c | 14 ++++++++++++++ arch/x86/kernel/smpboot.c | 12 ------------ 4 files changed, 15 insertions(+), 21 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3671506fb8df..f4dd851384db 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -238,10 +238,6 @@ config SMP If you don't know what to do here, say N. -config X86_HAS_BOOT_CPU_ID - def_bool y - depends on X86_VOYAGER - config SPARSE_IRQ bool "Support sparse irq numbering" depends on PCI_MSI || HT_IRQ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index f03b23e32864..b185091bf19c 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -32,10 +32,6 @@ extern void arch_unregister_cpu(int); DECLARE_PER_CPU(int, cpu_state); -#ifdef CONFIG_X86_HAS_BOOT_CPU_ID -extern unsigned char boot_cpu_id; -#else -#define boot_cpu_id 0 -#endif +extern unsigned int boot_cpu_id; #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index eeb180b1d7aa..609e5af60282 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -112,6 +112,20 @@ #define ARCH_SETUP #endif +unsigned int boot_cpu_id __read_mostly; + +#ifdef CONFIG_X86_64 +int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} + +int default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return __default_check_phys_apicid_present(boot_cpu_physical_apicid); +} +#endif + #ifndef CONFIG_DEBUG_BOOT_PARAMS struct boot_params __initdata boot_params; #else diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e90b3e50b54f..bc7e220ba0b4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -905,18 +905,6 @@ do_rest: return boot_error; } -#ifdef CONFIG_X86_64 -int default_cpu_present_to_apicid(int mps_cpu) -{ - return __default_cpu_present_to_apicid(mps_cpu); -} - -int default_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return __default_check_phys_apicid_present(boot_cpu_physical_apicid); -} -#endif - int __cpuinit native_cpu_up(unsigned int cpu) { int apicid = apic->cpu_present_to_apicid(cpu); From 23394d1c9346d9c0aabbc1b6fca52a9c0aa1c297 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:16:16 +0100 Subject: [PATCH 278/566] x86/Voyager: remove X86_HT Kconfig quirk Voyager has this Kconfig quirk: depends on (X86_32 && !X86_VOYAGER) || X86_64 That is unnecessary as HT support is CPUID driven and explicitly enumerated. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f4dd851384db..1d50c9de4d20 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -188,7 +188,6 @@ config X86_64_SMP config X86_HT bool depends on SMP - depends on (X86_32 && !X86_VOYAGER) || X86_64 default y config X86_BIOS_REBOOT From f095df0a0cb35a52605541f619d038339b90d7cc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:17:55 +0100 Subject: [PATCH 279/566] x86/Voyager: remove X86_BIOS_REBOOT Kconfig quirk Voyager has this Kconfig quirk: config X86_BIOS_REBOOT bool depends on !X86_VOYAGER default y Voyager should use the existing machine_ops.emergency_restart reboot quirk mechanism instead of a build-time quirk. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 6 ------ arch/x86/include/asm/proto.h | 4 ---- arch/x86/kernel/Makefile | 2 +- 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1d50c9de4d20..c0d79ab366de 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -190,11 +190,6 @@ config X86_HT depends on SMP default y -config X86_BIOS_REBOOT - bool - depends on !X86_VOYAGER - default y - config X86_TRAMPOLINE bool depends on SMP || (64BIT && ACPI_SLEEP) @@ -1361,7 +1356,6 @@ source kernel/Kconfig.hz config KEXEC bool "kexec system call" - depends on X86_BIOS_REBOOT help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index d6a22f92ba77..49fb3ecf3bb3 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -18,11 +18,7 @@ extern void syscall32_cpu_init(void); extern void check_efer(void); -#ifdef CONFIG_X86_BIOS_REBOOT extern int reboot_force; -#else -static const int reboot_force = 0; -#endif long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index dc05a57a474a..24f357e7557a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -50,7 +50,7 @@ obj-y += step.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ obj-y += acpi/ -obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o +obj-y += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o From 550fe4f198558c147c6b8273a709568222a1668a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:28:08 +0100 Subject: [PATCH 280/566] x86/Voyager: remove X86_FIND_SMP_CONFIG Kconfig quirk x86/Voyager had this Kconfig quirk: config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER Which splits off the find_smp_config() callback into a build-time quirk. Voyager should use the existing x86_quirks.mach_find_smp_config() callback to introduce SMP-config quirks. NUMAQ-32 and VISWS already use this. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ---- arch/x86/include/asm/mpspec.h | 4 +++- arch/x86/kernel/setup.c | 3 +-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c0d79ab366de..df7cb8d68e2f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -254,10 +254,6 @@ config NUMA_MIGRATE_IRQ_DESC If you don't know what to do here, say N. -config X86_FIND_SMP_CONFIG - def_bool y - depends on X86_MPPARSE || X86_VOYAGER - config X86_MPPARSE bool "Enable MPS table" if ACPI default y diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 03fb0d396543..d22f732eab8f 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -56,11 +56,13 @@ extern int smp_found_config; extern int mpc_default_type; extern unsigned long mp_lapic_addr; -extern void find_smp_config(void); extern void get_smp_config(void); + #ifdef CONFIG_X86_MPPARSE +extern void find_smp_config(void); extern void early_reserve_e820_mpc_new(void); #else +static inline void find_smp_config(void) { } static inline void early_reserve_e820_mpc_new(void) { } #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 609e5af60282..6abce6703c53 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -905,12 +905,11 @@ void __init setup_arch(char **cmdline_p) */ acpi_reserve_bootmem(); #endif -#ifdef CONFIG_X86_FIND_SMP_CONFIG /* * Find and reserve possible boot-time SMP configuration: */ find_smp_config(); -#endif + reserve_crashkernel(); #ifdef CONFIG_X86_64 From 36619a8a80793a803588a17f772313d5c948357d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:33:34 +0100 Subject: [PATCH 281/566] x86/VisWS: remove Kconfig quirk VisWS has this quirk currently: config X86_VISWS bool "SGI 320/540 (Visual Workstation)" depends on X86_32 && PCI && !X86_VOYAGER && X86_MPPARSE && PCI_GODIRECT The !Voyager quirk is unnecessary. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df7cb8d68e2f..f59d29201980 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -351,7 +351,7 @@ endchoice config X86_VISWS bool "SGI 320/540 (Visual Workstation)" - depends on X86_32 && PCI && !X86_VOYAGER && X86_MPPARSE && PCI_GODIRECT + depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT help The SGI Visual Workstation series is an IA32-based workstation based on SGI systems chips with some legacy PC hardware attached. From f154f47d5180c2012bf97999e6c600d45db8af2d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:34:56 +0100 Subject: [PATCH 282/566] x86/Voyager: remove VMI Kconfig quirk x86/Voyager has this build-time quirk: bool "VMI Guest support" select PARAVIRT depends on X86_32 depends on !X86_VOYAGER Since VMI is auto-detected (and Voyager will be auto-detected) there's no reason for this quirk. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f59d29201980..4bd0c8febae9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -406,7 +406,6 @@ config VMI bool "VMI Guest support" select PARAVIRT depends on X86_32 - depends on !X86_VOYAGER help VMI provides a paravirtualized interface to the VMware ESX server (it could be used by other hypervisors in theory too, but is not From e084e531000a488d2d27864266c13ac824575a8b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:36:31 +0100 Subject: [PATCH 283/566] x86/Voyager: remove KVM_CLOCK quirk Voyager has this build-time quirk to exclude KVM_CLOCK: bool "KVM paravirtualized clock" select PARAVIRT select PARAVIRT_CLOCK depends on !X86_VOYAGER Voyager support built into a kernel image does not exclude KVM paravirt clock support - so remove this quirk. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4bd0c8febae9..0bf0653969dd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -416,7 +416,6 @@ config KVM_CLOCK bool "KVM paravirtualized clock" select PARAVIRT select PARAVIRT_CLOCK - depends on !X86_VOYAGER help Turning on this option will allow you to run a paravirtualized clock when running over the KVM hypervisor. Instead of relying on a PIT From 54523edd237b9e792a3b76988fde23a91d739f43 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:37:33 +0100 Subject: [PATCH 284/566] x86/Voyager: remove KVM_GUEST quirk Voyager has this quirk currently: config KVM_GUEST bool "KVM Guest support" select PARAVIRT depends on !X86_VOYAGER Voyager support built into a kernel image does not exclude KVM paravirt guest support - so remove this quirk. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0bf0653969dd..363111f40f34 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -426,7 +426,6 @@ config KVM_CLOCK config KVM_GUEST bool "KVM Guest support" select PARAVIRT - depends on !X86_VOYAGER help This option enables various optimizations for running under the KVM hypervisor. From c3e6a2042fef33b747d2ae3961f5312af801973d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:38:46 +0100 Subject: [PATCH 285/566] x86/Voyager: remove PARAVIRT Kconfig quirk Remove this Kconfig quirk: config PARAVIRT bool "Enable paravirtualization code" depends on !X86_VOYAGER help Voyager support built into a kernel does not preclude paravirt support. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 363111f40f34..82105349612d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -434,7 +434,6 @@ source "arch/x86/lguest/Kconfig" config PARAVIRT bool "Enable paravirtualization code" - depends on !X86_VOYAGER help This changes the kernel so it can modify itself when it is run under a hypervisor, potentially improving performance significantly From 7cd92366a593246650cc7d6198e2c7d3af8c1d8a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:40:48 +0100 Subject: [PATCH 286/566] x86/Voyager: remove APIC/IO-APIC Kbuild quirk The lapic/ioapic code properly auto-detects and is safe to run on CPUs that have no local APIC. (or which have their lapic turned off in the hardware) Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 82105349612d..6fbb3639d7bb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -642,7 +642,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on X86_32 && !SMP && !(X86_VOYAGER || X86_GENERICARCH) + depends on X86_32 && !SMP && !X86_GENERICARCH help A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -667,11 +667,11 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y - depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH)) + depends on X86_64 || SMP || X86_GENERICARCH || X86_UP_APIC config X86_IO_APIC def_bool y - depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH)) + depends on X86_64 || SMP || X86_GENERICARCH || X86_UP_APIC config X86_VISWS_APIC def_bool y From e006235e5b9cfb785ecbc05551788e33f96ea0ce Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:45:17 +0100 Subject: [PATCH 287/566] x86/Voyager: remove MCE quirk If no MCE code is desired on Voyager hw then the solution is to turn them off in the .config - and to extend the MCE code to not initialize on Voyager. Remove the build-time quirk. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6fbb3639d7bb..7cf9f0a8c8c3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -703,7 +703,6 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS config X86_MCE bool "Machine Check Exception" - depends on !X86_VOYAGER ---help--- Machine Check Exception support allows the processor to notify the kernel if it detects a problem (e.g. overheating, component failure). From 4b19ed915576e8034c3653b4b10b79bde10f69fa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:47:24 +0100 Subject: [PATCH 288/566] x86/Voyager: remove HOTPLUG_CPU Kconfig quirk Voyager has this Kconfig quirk: config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" depends on SMP && HOTPLUG && !X86_VOYAGER But this exception will be moot once Voyager starts using the generic x86 code. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7cf9f0a8c8c3..206645ac6f6a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1470,7 +1470,7 @@ config PHYSICAL_ALIGN config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" - depends on SMP && HOTPLUG && !X86_VOYAGER + depends on SMP && HOTPLUG ---help--- Say Y here to allow turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. From 1c61d8c309a4080980474de8c6689527be180782 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:49:22 +0100 Subject: [PATCH 289/566] x86/Voyager: remove power management Kconfig quirk Voyager has this PM/ACPI Kconfig quirk: menu "Power management and ACPI options" depends on !X86_VOYAGER Most of the PM features are auto-detect so they should be safe to run on just about any hardware. (If not, those instances need fixing.) In any case, if a kernel is built for Voyager, the power management options can be disabled. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 206645ac6f6a..f95c3b40e45d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1551,7 +1551,6 @@ config HAVE_ARCH_EARLY_PFN_TO_NID depends on NUMA menu "Power management and ACPI options" - depends on !X86_VOYAGER config ARCH_HIBERNATION_HEADER def_bool y From 1ec2dafd937c0f6fed46cbd8f6878f2c1db4a623 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:51:37 +0100 Subject: [PATCH 290/566] x86/Voyager: remove ISA quirk Voyager has this ISA quirk (because Voyager has no ISA support): config ISA bool "ISA support" depends on !X86_VOYAGER There's a ton of x86 hardware that does not support ISA, and because most ISA drivers cannot auto-detect in a safe way, the convention in the kernel has always been to not enable ISA drivers if they are not needed. Voyager users can do likewise - no need for a Kconfig quirk. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f95c3b40e45d..38ed1a6c6d86 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1819,7 +1819,6 @@ if X86_32 config ISA bool "ISA support" - depends on !X86_VOYAGER help Find out whether you have ISA slots on your motherboard. ISA is the name of a bus system, i.e. the way the CPU talks to the other stuff From 06ac8346af04f6a972072f6c5780ba734832ad13 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:11:43 +0100 Subject: [PATCH 291/566] x86: cleanup, introduce CONFIG_NON_STANDARD_PLATFORMS Introduce a Y/N Kconfig option for non-PC x86 platforms. Make VisWS, RDC321 and SGI/UV depend on this. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 38ed1a6c6d86..b090e5a27dac 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -349,9 +349,23 @@ config X86_VSMP endchoice +config X86_NON_STANDARD + bool "Support for non-standard x86 platforms" + help + If you disable this option then the kernel will only support + standard PC platforms. (which covers the vast majority of + systems out there.) + + If you enable this option then you'll be able to select a number + of less common non-PC x86 platforms: VisWS, RDC321, SGI/UV. + + If you have one of these systems, or if you want to build a + generic distribution kernel, say Y here - otherwise say N. + config X86_VISWS bool "SGI 320/540 (Visual Workstation)" depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT + depends on X86_NON_STANDARD help The SGI Visual Workstation series is an IA32-based workstation based on SGI systems chips with some legacy PC hardware attached. @@ -364,6 +378,7 @@ config X86_VISWS config X86_RDC321X bool "RDC R-321x SoC" depends on X86_32 + depends on X86_NON_STANDARD select M486 select X86_REBOOTFIXUPS help @@ -374,6 +389,7 @@ config X86_RDC321X config X86_UV bool "SGI Ultraviolet" depends on X86_64 + depends on X86_NON_STANDARD help This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. From 9e111f3e167a14dd6252cff14fc7dd2ba4c650c6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:18:25 +0100 Subject: [PATCH 292/566] x86: move ELAN to the NON_STANDARD_PLATFORM section Move X86_ELAN (old, AMD based web-boxes) from the subarchitecture menu to the non-standard-platform section. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b090e5a27dac..b7617e3781a2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -271,16 +271,6 @@ config X86_PC help Choose this option if your computer is a standard PC or compatible. -config X86_ELAN - bool "AMD Elan" - depends on X86_32 - help - Select this for an AMD Elan processor. - - Do not use this option for K6/Athlon/Opteron processors! - - If unsure, choose "PC-compatible" instead. - config X86_VOYAGER bool "Voyager (NCR)" depends on X86_32 && SMP && !PCI && BROKEN @@ -394,6 +384,17 @@ config X86_UV This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. +config X86_ELAN + bool "AMD Elan" + depends on X86_32 + depends on X86_NON_STANDARD + help + Select this for an AMD Elan processor. + + Do not use this option for K6/Athlon/Opteron processors! + + If unsure, choose "PC-compatible" instead. + config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" From f67ae5c9e52e385492b94c14376e322004701555 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:20:09 +0100 Subject: [PATCH 293/566] x86: move VOYAGER to the NON_STANDARD_PLATFORM section Move X86_ELAN (old, NCR hw platform built on Intel CPUs) from the subarchitecture menu to the non-standard-platform section. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b7617e3781a2..207dc9592d54 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -271,18 +271,6 @@ config X86_PC help Choose this option if your computer is a standard PC or compatible. -config X86_VOYAGER - bool "Voyager (NCR)" - depends on X86_32 && SMP && !PCI && BROKEN - help - Voyager is an MCA-based 32-way capable SMP architecture proprietary - to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. - - *** WARNING *** - - If you do not specifically know you have a Voyager based machine, - say N here, otherwise the kernel you build will not be bootable. - config X86_GENERICARCH bool "Generic architecture" depends on X86_32 @@ -395,6 +383,19 @@ config X86_ELAN If unsure, choose "PC-compatible" instead. +config X86_VOYAGER + bool "Voyager (NCR)" + depends on X86_32 && SMP && !PCI && BROKEN + depends on X86_NON_STANDARD + help + Voyager is an MCA-based 32-way capable SMP architecture proprietary + to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. + + *** WARNING *** + + If you do not specifically know you have a Voyager based machine, + say N here, otherwise the kernel you build will not be bootable. + config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" From 9c39801763ed6e08ea8bc694c5ab936643a2b763 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:24:57 +0100 Subject: [PATCH 294/566] x86: move non-standard 32-bit platform Kconfig entries - make X86_GENERICARCH depend X86_NON_STANDARD - move X86_SUMMIT, X86_ES7000 and X86_BIGSMP out of the subarchitecture menu and under this option Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 88 +++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 207dc9592d54..2fe298297cee 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -271,51 +271,6 @@ config X86_PC help Choose this option if your computer is a standard PC or compatible. -config X86_GENERICARCH - bool "Generic architecture" - depends on X86_32 - help - This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default - subarchitectures. It is intended for a generic binary kernel. - if you select them all, kernel will probe it one by one. and will - fallback to default. - -if X86_GENERICARCH - -config X86_NUMAQ - bool "NUMAQ (IBM/Sequent)" - depends on SMP && X86_32 && PCI && X86_MPPARSE - select NUMA - help - This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) - NUMA multiquad box. This changes the way that processors are - bootstrapped, and uses Clustered Logical APIC addressing mode instead - of Flat Logical. You will need a new lynxer.elf file to flash your - firmware with - send email to . - -config X86_SUMMIT - bool "Summit/EXA (IBM x440)" - depends on X86_32 && SMP - help - This option is needed for IBM systems that use the Summit/EXA chipset. - In particular, it is needed for the x440. - -config X86_ES7000 - bool "Support for Unisys ES7000 IA32 series" - depends on X86_32 && SMP - help - Support for Unisys ES7000 systems. Say 'Y' here if this kernel is - supposed to run on an IA32-based Unisys ES7000 system. - -config X86_BIGSMP - bool "Support for big SMP systems with more than 8 CPUs" - depends on X86_32 && SMP - help - This option is needed for the systems that have more than 8 CPUs - and if the system is not of any sub-arch type above. - -endif - config X86_VSMP bool "Support for ScaleMP vSMP" select PARAVIRT @@ -396,6 +351,49 @@ config X86_VOYAGER If you do not specifically know you have a Voyager based machine, say N here, otherwise the kernel you build will not be bootable. +config X86_GENERICARCH + bool "Support non-standard 32-bit SMP architectures" + depends on X86_32 && SMP + depends on X86_NON_STANDARD + help + This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default + subarchitectures. It is intended for a generic binary kernel. + if you select them all, kernel will probe it one by one. and will + fallback to default. + +config X86_NUMAQ + bool "NUMAQ (IBM/Sequent)" + depends on X86_GENERICARCH + select NUMA + select X86_MPPARSE + help + This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) + NUMA multiquad box. This changes the way that processors are + bootstrapped, and uses Clustered Logical APIC addressing mode instead + of Flat Logical. You will need a new lynxer.elf file to flash your + firmware with - send email to . + +config X86_SUMMIT + bool "Summit/EXA (IBM x440)" + depends on X86_GENERICARCH + help + This option is needed for IBM systems that use the Summit/EXA chipset. + In particular, it is needed for the x440. + +config X86_ES7000 + bool "Support for Unisys ES7000 IA32 series" + depends on X86_GENERICARCH + help + Support for Unisys ES7000 systems. Say 'Y' here if this kernel is + supposed to run on an IA32-based Unisys ES7000 system. + +config X86_BIGSMP + bool "Support for big SMP systems with more than 8 CPUs" + depends on X86_GENERICARCH + help + This option is needed for the systems that have more than 8 CPUs + and if the system is not of any sub-arch type above. + config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" From 6a48565ed6ac76f351def25cd5e9f181331065f6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:29:13 +0100 Subject: [PATCH 295/566] x86: move X86_VSMP from subarch menu Move X86_VSMP out of the subarch menu - this way it can be enabled together with standard PC support as well, in the same kernel. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2fe298297cee..ddddfb8fe091 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -271,15 +271,6 @@ config X86_PC help Choose this option if your computer is a standard PC or compatible. -config X86_VSMP - bool "Support for ScaleMP vSMP" - select PARAVIRT - depends on X86_64 && PCI - help - Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is - supposed to run on these EM64T-based machines. Only choose this option - if you have one of these machines. - endchoice config X86_NON_STANDARD @@ -327,6 +318,16 @@ config X86_UV This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. +config X86_VSMP + bool "Support for ScaleMP vSMP" + select PARAVIRT + depends on X86_64 && PCI + depends on X86_NON_STANDARD + help + Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is + supposed to run on these EM64T-based machines. Only choose this option + if you have one of these machines. + config X86_ELAN bool "AMD Elan" depends on X86_32 From e2c75d9f54334646b3dcdf1fea0d1afe7bfbf644 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:31:41 +0100 Subject: [PATCH 296/566] x86: remove the subarch menu Remove the subarch menu and standardize on X86_PC. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ddddfb8fe091..4773f1c54fb2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -262,16 +262,8 @@ config X86_MPPARSE For old smp systems that do not have proper acpi support. Newer systems (esp with 64bit cpus) with acpi support, MADT and DSDT will override it -choice - prompt "Subarchitecture Type" - default X86_PC - config X86_PC - bool "PC-compatible" - help - Choose this option if your computer is a standard PC or compatible. - -endchoice + def_bool y config X86_NON_STANDARD bool "Support for non-standard x86 platforms" From e0c7ae376a13fd79a4dad8becab51040d13dfa90 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:43:09 +0100 Subject: [PATCH 297/566] x86: rename X86_GENERICARCH to X86_32_NON_STANDARD X86_GENERICARCH is a misnomer - it contains non-PC 32-bit architectures that are not included in the default build. Rename it to X86_32_NON_STANDARD. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 22 +++++++++++----------- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/mpparse.c | 2 +- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- drivers/mtd/nand/Kconfig | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4773f1c54fb2..1427cb1ccd97 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -344,7 +344,7 @@ config X86_VOYAGER If you do not specifically know you have a Voyager based machine, say N here, otherwise the kernel you build will not be bootable. -config X86_GENERICARCH +config X86_32_NON_STANDARD bool "Support non-standard 32-bit SMP architectures" depends on X86_32 && SMP depends on X86_NON_STANDARD @@ -356,7 +356,7 @@ config X86_GENERICARCH config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" - depends on X86_GENERICARCH + depends on X86_32_NON_STANDARD select NUMA select X86_MPPARSE help @@ -368,21 +368,21 @@ config X86_NUMAQ config X86_SUMMIT bool "Summit/EXA (IBM x440)" - depends on X86_GENERICARCH + depends on X86_32_NON_STANDARD help This option is needed for IBM systems that use the Summit/EXA chipset. In particular, it is needed for the x440. config X86_ES7000 bool "Support for Unisys ES7000 IA32 series" - depends on X86_GENERICARCH + depends on X86_32_NON_STANDARD help Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. config X86_BIGSMP bool "Support for big SMP systems with more than 8 CPUs" - depends on X86_GENERICARCH + depends on X86_32_NON_STANDARD help This option is needed for the systems that have more than 8 CPUs and if the system is not of any sub-arch type above. @@ -475,11 +475,11 @@ config MEMTEST config X86_SUMMIT_NUMA def_bool y - depends on X86_32 && NUMA && X86_GENERICARCH + depends on X86_32 && NUMA && X86_32_NON_STANDARD config X86_CYCLONE_TIMER def_bool y - depends on X86_GENERICARCH + depends on X86_32_NON_STANDARD source "arch/x86/Kconfig.cpu" @@ -651,7 +651,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on X86_32 && !SMP && !X86_GENERICARCH + depends on X86_32 && !SMP && !X86_32_NON_STANDARD help A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -676,11 +676,11 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y - depends on X86_64 || SMP || X86_GENERICARCH || X86_UP_APIC + depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC config X86_IO_APIC def_bool y - depends on X86_64 || SMP || X86_GENERICARCH || X86_UP_APIC + depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC config X86_VISWS_APIC def_bool y @@ -1122,7 +1122,7 @@ config ARCH_SPARSEMEM_DEFAULT config ARCH_SPARSEMEM_ENABLE def_bool y - depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_GENERICARCH + depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_32_NON_STANDARD select SPARSEMEM_STATIC if X86_32 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index cb8b52785e37..7352c60f29db 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1335,7 +1335,7 @@ static void __init acpi_process_madt(void) if (!error) { acpi_lapic = 1; -#ifdef CONFIG_X86_GENERICARCH +#ifdef CONFIG_X86_32_NON_STANDARD generic_bigsmp_probe(); #endif /* diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 94fe71029c37..89aaced51bd3 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -372,7 +372,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) (*x86_quirks->mpc_record)++; } -#ifdef CONFIG_X86_GENERICARCH +#ifdef CONFIG_X86_32_NON_STANDARD generic_bigsmp_probe(); #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6abce6703c53..f64e1a487c9e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -936,7 +936,7 @@ void __init setup_arch(char **cmdline_p) map_vsyscall(); #endif -#ifdef CONFIG_X86_GENERICARCH +#ifdef CONFIG_X86_32_NON_STANDARD generic_apic_probe(); #endif diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bc7e220ba0b4..fc80bc18943e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1007,7 +1007,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_WARNING "More than 8 CPUs detected - skipping them.\n" - "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); + "Use CONFIG_X86_32_NON_STANDARD and CONFIG_X86_BIGSMP.\n"); nr = 0; for_each_present_cpu(cpu) { diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 8b12e6e109d3..928923665f6c 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -273,7 +273,7 @@ config MTD_NAND_CAFE config MTD_NAND_CS553X tristate "NAND support for CS5535/CS5536 (AMD Geode companion chip)" - depends on X86_32 && (X86_PC || X86_GENERICARCH) + depends on X86_32 && (X86_PC || X86_32_NON_STANDARD) help The CS553x companion chips for the AMD Geode processor include NAND flash controllers with built-in hardware ECC From 3769e7b4d8ef113e08221a210f849ba57475aff5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:46:23 +0100 Subject: [PATCH 298/566] x86/Voyager: move to the X86_32_NON_STANDARD code section Make Voyager depend on X86_32_NON_STANDARD - it is a non-standard 32-bit SMP architecture. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1427cb1ccd97..5bf0e0c58289 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -331,19 +331,6 @@ config X86_ELAN If unsure, choose "PC-compatible" instead. -config X86_VOYAGER - bool "Voyager (NCR)" - depends on X86_32 && SMP && !PCI && BROKEN - depends on X86_NON_STANDARD - help - Voyager is an MCA-based 32-way capable SMP architecture proprietary - to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. - - *** WARNING *** - - If you do not specifically know you have a Voyager based machine, - say N here, otherwise the kernel you build will not be bootable. - config X86_32_NON_STANDARD bool "Support non-standard 32-bit SMP architectures" depends on X86_32 && SMP @@ -354,6 +341,13 @@ config X86_32_NON_STANDARD if you select them all, kernel will probe it one by one. and will fallback to default. +config X86_BIGSMP + bool "Support for big SMP systems with more than 8 CPUs" + depends on X86_32_NON_STANDARD + help + This option is needed for the systems that have more than 8 CPUs + and if the system is not of any sub-arch type above. + config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" depends on X86_32_NON_STANDARD @@ -380,12 +374,18 @@ config X86_ES7000 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. -config X86_BIGSMP - bool "Support for big SMP systems with more than 8 CPUs" +config X86_VOYAGER + bool "Voyager (NCR)" + depends on SMP && !PCI && BROKEN depends on X86_32_NON_STANDARD help - This option is needed for the systems that have more than 8 CPUs - and if the system is not of any sub-arch type above. + Voyager is an MCA-based 32-way capable SMP architecture proprietary + to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. + + *** WARNING *** + + If you do not specifically know you have a Voyager based machine, + say N here, otherwise the kernel you build will not be bootable. config SCHED_OMIT_FRAME_POINTER def_bool y From 0a1e8869f453ceda121a1ff8d6c4380832377be7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 28 Jan 2009 23:21:25 +0300 Subject: [PATCH 299/566] x86: trampoline_64.S - use predefined constants with simplification Impact: cleanup We may use macros from processor-flags.h instead of hardcoding bits. Actually it's not direct mapping of old instructions with new ones -- BTS does change CF flag while MOV does not. But i didn't find any dependency on CF in this code. Signed-off-by: Cyrill Gorcunov Acked-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/trampoline_64.S | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 894293c598db..95a012a4664e 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -29,6 +29,7 @@ #include #include #include +#include .section .rodata, "a", @progbits @@ -37,7 +38,7 @@ ENTRY(trampoline_data) r_base = . cli # We should be safe anyway - wbinvd + wbinvd mov %cs, %ax # Code and data in the same place mov %ax, %ds mov %ax, %es @@ -73,9 +74,8 @@ r_base = . lidtl tidt - r_base # load idt with 0, 0 lgdtl tgdt - r_base # load gdt with whatever is appropriate - xor %ax, %ax - inc %ax # protected mode (PE) bit - lmsw %ax # into protected mode + mov $X86_CR0_PE, %ax # protected mode (PE) bit + lmsw %ax # into protected mode # flush prefetch and jump to startup_32 ljmpl *(startup_32_vector - r_base) @@ -86,9 +86,8 @@ startup_32: movl $__KERNEL_DS, %eax # Initialize the %ds segment register movl %eax, %ds - xorl %eax, %eax - btsl $5, %eax # Enable PAE mode - movl %eax, %cr4 + movl $X86_CR4_PAE, %eax + movl %eax, %cr4 # Enable PAE mode # Setup trampoline 4 level pagetables leal (trampoline_level4_pgt - r_base)(%esi), %eax @@ -99,9 +98,9 @@ startup_32: xorl %edx, %edx wrmsr - xorl %eax, %eax - btsl $31, %eax # Enable paging and in turn activate Long Mode - btsl $0, %eax # Enable protected mode + # Enable paging and in turn activate Long Mode + # Enable protected mode + movl $(X86_CR0_PG | X86_CR0_PE), %eax movl %eax, %cr0 /* From 010060741ad35eacb504414bc6fb9bb575b15f62 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Jan 2009 16:02:12 +0100 Subject: [PATCH 300/566] x86: add might_sleep() to do_page_fault() Impact: widen debug checks VirtualBox calls do_page_fault() from an atomic context but runs into a might_sleep() way pas this point, cure that. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8f4b859a04b3..eb4d7fe05938 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -888,6 +888,12 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) return; } down_read(&mm->mmap_sem); + } else { + /* + * The above down_read_trylock() might have succeeded in which + * case we'll have missed the might_sleep() from down_read(). + */ + might_sleep(); } vma = find_vma(mm, address); From dba3d36b2f0842ed7f25c33cd3a2ccdb3d0df9db Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 29 Jan 2009 17:10:12 +0100 Subject: [PATCH 301/566] Revert "generic, x86: fix __per_cpu_load relocation" This reverts commit 5a611268b69f05262936dd177205acbce4471358. It is causing occasional boot crashes, caused by certain linker versions (GNU ld version 2.18.50.0.6-2 20080403) messing up: 82dcc000 D __per_cpu_load c16e6000 A __per_cpu_load_abs The __per_cpu_load value is out of whack. Hpa noticed the following detail: * (gdb) p/x -(0xc16e6000-0x82dcc000) * $2 = 0xc16e6000 * I.e. one is the other << 1 The two symbols should be equal. Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f3180a85c66a..53e21f36a802 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -451,18 +451,17 @@ * end offset. */ #define PERCPU_VADDR(vaddr, phdr) \ - VMLINUX_SYMBOL(__per_cpu_load_abs) = .; \ - .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load_abs) \ + VMLINUX_SYMBOL(__per_cpu_load) = .; \ + .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ - VMLINUX_SYMBOL(__per_cpu_load) = LOADADDR(.data.percpu) + LOAD_OFFSET;\ *(.data.percpu.first) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ } phdr \ - . = VMLINUX_SYMBOL(__per_cpu_load_abs) + SIZEOF(.data.percpu); + . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); /** * PERCPU - define output section for percpu area, simple version From fbeb2ca0224182033f196cf8f63989c3e6b90aba Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 29 Jan 2009 12:11:08 -0800 Subject: [PATCH 302/566] x86: unify genapic code, unify subarchitectures, remove old subarchitecture code, xapic fix xapic fix for 32bit platform with less than 8 cpu's. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/probe_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index 61339a0d55cf..b5db26f8e069 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -113,7 +113,7 @@ struct genapic apic_default = { .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = default_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, + .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself, .send_IPI_allbutself = default_send_IPI_allbutself, .send_IPI_all = default_send_IPI_all, .send_IPI_self = NULL, From 019a1369667c3978f9644982ebe6d261dd2bbc40 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 29 Jan 2009 11:49:18 -0800 Subject: [PATCH 303/566] x86: uaccess: fix compilation error on CONFIG_M386 In case of !CONFIG_X86_WP_WORKS_OK, __put_user_size_ex() is not defined. Add macros for !CONFIG_X86_WP_WORKS_OK case. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 0ec6de4bcb0b..b9a24155f7af 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -525,8 +525,6 @@ struct __large_struct { unsigned long buf[100]; }; */ #define get_user_try uaccess_try #define get_user_catch(err) uaccess_catch(err) -#define put_user_try uaccess_try -#define put_user_catch(err) uaccess_catch(err) #define get_user_ex(x, ptr) do { \ unsigned long __gue_val; \ @@ -534,9 +532,29 @@ struct __large_struct { unsigned long buf[100]; }; (x) = (__force __typeof__(*(ptr)))__gue_val; \ } while (0) +#ifdef CONFIG_X86_WP_WORKS_OK + +#define put_user_try uaccess_try +#define put_user_catch(err) uaccess_catch(err) + #define put_user_ex(x, ptr) \ __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) +#else /* !CONFIG_X86_WP_WORKS_OK */ + +#define put_user_try do { \ + int __uaccess_err = 0; + +#define put_user_catch(err) \ + (err) |= __uaccess_err; \ +} while (0) + +#define put_user_ex(x, ptr) do { \ + __uaccess_err |= __put_user(x, ptr); \ +} while (0) + +#endif /* CONFIG_X86_WP_WORKS_OK */ + /* * movsl can be slow when source and dest are not both 8-byte aligned */ From 4272ebfbefd0db40073f3ee5990bceaf2894f08b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 Jan 2009 15:14:46 -0800 Subject: [PATCH 304/566] x86: allow more than 8 cpus to be used on 32-bit X86_PC is the only remaining 'sub' architecture, so we dont need it anymore. This also cleans up a few spurious references to X86_PC in the driver space - those certainly should be X86. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 6 +----- arch/x86/configs/i386_defconfig | 1 - arch/x86/configs/x86_64_defconfig | 1 - arch/x86/kernel/smpboot.c | 2 +- drivers/eisa/Kconfig | 6 +++--- drivers/input/keyboard/Kconfig | 4 ++-- drivers/input/mouse/Kconfig | 2 +- drivers/mtd/nand/Kconfig | 2 +- sound/drivers/Kconfig | 2 +- 9 files changed, 10 insertions(+), 16 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5bf0e0c58289..afaf2cb7c1ac 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -262,9 +262,6 @@ config X86_MPPARSE For old smp systems that do not have proper acpi support. Newer systems (esp with 64bit cpus) with acpi support, MADT and DSDT will override it -config X86_PC - def_bool y - config X86_NON_STANDARD bool "Support for non-standard x86 platforms" help @@ -1019,7 +1016,6 @@ config NUMA bool "Numa Memory Allocation and Scheduler Support" depends on SMP depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) - default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) help Enable NUMA (Non Uniform Memory Access) support. @@ -1122,7 +1118,7 @@ config ARCH_SPARSEMEM_DEFAULT config ARCH_SPARSEMEM_ENABLE def_bool y - depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_32_NON_STANDARD + depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD select SPARSEMEM_STATIC if X86_32 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index edba00d98ac3..739bce993b56 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -188,7 +188,6 @@ CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y -CONFIG_X86_PC=y # CONFIG_X86_ELAN is not set # CONFIG_X86_VOYAGER is not set # CONFIG_X86_GENERICARCH is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 322dd2748fc9..02b514e8f4c4 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -187,7 +187,6 @@ CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y -CONFIG_X86_PC=y # CONFIG_X86_ELAN is not set # CONFIG_X86_VOYAGER is not set # CONFIG_X86_GENERICARCH is not set diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fc80bc18943e..2912fa3a8ef2 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1000,7 +1000,7 @@ static int __init smp_sanity_check(unsigned max_cpus) { preempt_disable(); -#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) +#ifndef CONFIG_X86_BIGSMP if (def_to_bigsmp && nr_cpu_ids > 8) { unsigned int cpu; unsigned nr; diff --git a/drivers/eisa/Kconfig b/drivers/eisa/Kconfig index c0646576cf47..2705284f6223 100644 --- a/drivers/eisa/Kconfig +++ b/drivers/eisa/Kconfig @@ -3,7 +3,7 @@ # config EISA_VLB_PRIMING bool "Vesa Local Bus priming" - depends on X86_PC && EISA + depends on X86 && EISA default n ---help--- Activate this option if your system contains a Vesa Local @@ -24,11 +24,11 @@ config EISA_PCI_EISA When in doubt, say Y. # Using EISA_VIRTUAL_ROOT on something other than an Alpha or -# an X86_PC may lead to crashes... +# an X86 may lead to crashes... config EISA_VIRTUAL_ROOT bool "EISA virtual root device" - depends on EISA && (ALPHA || X86_PC) + depends on EISA && (ALPHA || X86) default y ---help--- Activate this option if your system only have EISA bus diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 35561689ff38..ea2638b41982 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -13,11 +13,11 @@ menuconfig INPUT_KEYBOARD if INPUT_KEYBOARD config KEYBOARD_ATKBD - tristate "AT keyboard" if EMBEDDED || !X86_PC + tristate "AT keyboard" if EMBEDDED || !X86 default y select SERIO select SERIO_LIBPS2 - select SERIO_I8042 if X86_PC + select SERIO_I8042 if X86 select SERIO_GSCPS2 if GSC help Say Y here if you want to use a standard AT or PS/2 keyboard. Usually diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig index 093c8c1bca74..9bef935ef19f 100644 --- a/drivers/input/mouse/Kconfig +++ b/drivers/input/mouse/Kconfig @@ -17,7 +17,7 @@ config MOUSE_PS2 default y select SERIO select SERIO_LIBPS2 - select SERIO_I8042 if X86_PC + select SERIO_I8042 if X86 select SERIO_GSCPS2 if GSC help Say Y here if you have a PS/2 mouse connected to your system. This diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 928923665f6c..2ff88791cebc 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -273,7 +273,7 @@ config MTD_NAND_CAFE config MTD_NAND_CS553X tristate "NAND support for CS5535/CS5536 (AMD Geode companion chip)" - depends on X86_32 && (X86_PC || X86_32_NON_STANDARD) + depends on X86_32 help The CS553x companion chips for the AMD Geode processor include NAND flash controllers with built-in hardware ECC diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig index 0bcf14640fde..84714a65e5c8 100644 --- a/sound/drivers/Kconfig +++ b/sound/drivers/Kconfig @@ -33,7 +33,7 @@ if SND_DRIVERS config SND_PCSP tristate "PC-Speaker support (READ HELP!)" - depends on PCSPKR_PLATFORM && X86_PC && HIGH_RES_TIMERS + depends on PCSPKR_PLATFORM && X86 && HIGH_RES_TIMERS depends on INPUT depends on EXPERIMENTAL select SND_PCM From 754ef0cd65faac4840ada4362bda322d9a811fbf Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Wed, 28 Jan 2009 12:51:09 +0900 Subject: [PATCH 305/566] x86: fix debug message of CPU clock speed Impact: Fixes incorrect printk LOCAL APIC is corrected by PM-Timer, when SMI occurred while LOCAL APIC is calibrated. In this case, LOCAL APIC debug message(Boot with apic=debug) is displayed correctly, however, CPU clock speed debug message is displayed wrongly . When SMI occured on my machine, which has 1.6GHz CPU, CPU clock speed is displayed 3622.0205 MHz as follow. CPU0: Intel(R) Xeon(R) CPU 5110 @ 1.60GHz stepping 06 Using local APIC timer interrupts. calibrating APIC timer ... ... lapic delta = 3773130 ... PM timer delta = 812434 APIC calibration not consistent with PM Timer: 226ms instead of 100ms APIC delta adjusted to PM-Timer: 1662420 (3773130) ..... delta 1662420 ..... mult: 71411249 ..... calibration result: 265987 ..... CPU clock speed is 3622.0205 MHz. =====> here ..... host bus clock speed is 265.0987 MHz. This patch fixes to displaying CPU clock speed correctly as follow. CPU0: Intel(R) Xeon(R) CPU 5110 @ 1.60GHz stepping 06 Using local APIC timer interrupts. calibrating APIC timer ... ... lapic delta = 3773131 ... PM timer delta = 812434 APIC calibration not consistent with PM Timer: 226ms instead of 100ms APIC delta adjusted to PM-Timer: 1662420 (3773131) TSC delta adjusted to PM-Timer: 159592409 (362220564) ..... delta 1662420 ..... mult: 71411249 ..... calibration result: 265987 ..... CPU clock speed is 1595.0924 MHz. ..... host bus clock speed is 265.0987 MHz. Signed-off-by: Yasuaki Ishimatsu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic.c | 47 +++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 4b6df2469fe3..7bcd746d704c 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -535,7 +535,8 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) } } -static int __init calibrate_by_pmtimer(long deltapm, long *delta) +static int __init +calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) { const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; const long pm_thresh = pm_100ms / 100; @@ -557,18 +558,29 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta) if (deltapm > (pm_100ms - pm_thresh) && deltapm < (pm_100ms + pm_thresh)) { apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); - } else { - res = (((u64)deltapm) * mult) >> 22; - do_div(res, 1000000); - pr_warning("APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n", - (long)res); - /* Correct the lapic counter value */ - res = (((u64)(*delta)) * pm_100ms); + return 0; + } + + res = (((u64)deltapm) * mult) >> 22; + do_div(res, 1000000); + pr_warning("APIC calibration not consistent " + "with PM Timer: %ldms instead of 100ms\n",(long)res); + + /* Correct the lapic counter value */ + res = (((u64)(*delta)) * pm_100ms); + do_div(res, deltapm); + pr_info("APIC delta adjusted to PM-Timer: " + "%lu (%ld)\n", (unsigned long)res, *delta); + *delta = (long)res; + + /* Correct the tsc counter value */ + if (cpu_has_tsc) { + res = (((u64)(*deltatsc)) * pm_100ms); do_div(res, deltapm); - pr_info("APIC delta adjusted to PM-Timer: " - "%lu (%ld)\n", (unsigned long)res, *delta); - *delta = (long)res; + apic_printk(APIC_VERBOSE, "TSC delta adjusted to " + "PM-Timer: %lu (%ld) \n", + (unsigned long)res, *deltatsc); + *deltatsc = (long)res; } return 0; @@ -579,7 +591,7 @@ static int __init calibrate_APIC_clock(void) struct clock_event_device *levt = &__get_cpu_var(lapic_events); void (*real_handler)(struct clock_event_device *dev); unsigned long deltaj; - long delta; + long delta, deltatsc; int pm_referenced = 0; local_irq_disable(); @@ -609,9 +621,11 @@ static int __init calibrate_APIC_clock(void) delta = lapic_cal_t1 - lapic_cal_t2; apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); + deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); + /* we trust the PM based calibration if possible */ pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, - &delta); + &delta, &deltatsc); /* Calculate the scaled math multiplication factor */ lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, @@ -629,11 +643,10 @@ static int __init calibrate_APIC_clock(void) calibration_result); if (cpu_has_tsc) { - delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); apic_printk(APIC_VERBOSE, "..... CPU clock speed is " "%ld.%04ld MHz.\n", - (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), - (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); + (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), + (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ)); } apic_printk(APIC_VERBOSE, "..... host bus clock speed is " From 39ba5d43fc9133696240fc8b6b13e7a41fea87cd Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Wed, 28 Jan 2009 12:52:24 +0900 Subject: [PATCH 306/566] x86: unify PM-Timer messages Impact: Cleans up printk formatting When LOCAL APIC was calibrated, the debug message is displayed as follows. CPU0: Intel(R) Xeon(R) CPU 5110 @ 1.60GHz stepping 06 Using local APIC timer interrupts. calibrating APIC timer ... ... lapic delta = 3773131 ... PM timer delta = 812434 APIC calibration not consistent with PM Timer: 226ms instead of 100ms APIC delta adjusted to PM-Timer: 1662420 (3773131) TSC delta adjusted to PM-Timer: 159592409 (362220564) ..... delta 1662420 ..... mult: 71411249 ..... calibration result: 265987 ..... CPU clock speed is 1595.0924 MHz. ..... host bus clock speed is 265.0987 MHz. There are three type of PM-Timer (PM-Timer, PM Timer, and PM timer), in this message. This patch unifies those messages to PM-Timer. Signed-off-by: Yasuaki Ishimatsu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 7bcd746d704c..0d935d218d0e 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -547,7 +547,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) return -1; #endif - apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); + apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm); /* Check, if the PM timer is available */ if (!deltapm) @@ -557,14 +557,14 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) if (deltapm > (pm_100ms - pm_thresh) && deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); + apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n"); return 0; } res = (((u64)deltapm) * mult) >> 22; do_div(res, 1000000); pr_warning("APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n",(long)res); + "with PM-Timer: %ldms instead of 100ms\n",(long)res); /* Correct the lapic counter value */ res = (((u64)(*delta)) * pm_100ms); From 36ef4944ee8118491631e317e406f9bd15e20e97 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 Jan 2009 19:29:24 -0800 Subject: [PATCH 307/566] x86, apic unification: remove left over files Impact: cleanup remove unused files Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 138 -------------------------- arch/x86/include/asm/bigsmp/apicdef.h | 9 -- arch/x86/include/asm/bigsmp/ipi.h | 22 ---- arch/x86/mach-default/Makefile | 5 - 4 files changed, 174 deletions(-) delete mode 100644 arch/x86/include/asm/bigsmp/apic.h delete mode 100644 arch/x86/include/asm/bigsmp/apicdef.h delete mode 100644 arch/x86/include/asm/bigsmp/ipi.h delete mode 100644 arch/x86/mach-default/Makefile diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h deleted file mode 100644 index ee29d66cd302..000000000000 --- a/arch/x86/include/asm/bigsmp/apic.h +++ /dev/null @@ -1,138 +0,0 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H - -#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) - -static inline int bigsmp_apic_id_registered(void) -{ - return 1; -} - -static inline const cpumask_t *bigsmp_target_cpus(void) -{ -#ifdef CONFIG_SMP - return &cpu_online_map; -#else - return &cpumask_of_cpu(0); -#endif -} - -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - -static inline unsigned long -bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return 0; -} - -static inline unsigned long bigsmp_check_apicid_present(int bit) -{ - return 1; -} - -static inline unsigned long calculate_ldr(int cpu) -{ - unsigned long val, id; - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - id = xapic_phys_to_log_apicid(cpu); - val |= SET_APIC_LOGICAL_ID(id); - return val; -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static inline void bigsmp_init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static inline void bigsmp_setup_apic_routing(void) -{ - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "Physflat", nr_ioapics); -} - -static inline int bigsmp_apicid_to_node(int logical_apicid) -{ - return apicid_2_node[hard_smp_processor_id()]; -} - -static inline int bigsmp_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < nr_cpu_ids) - return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); - - return BAD_APICID; -} - -static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - -extern u8 cpu_2_logical_apicid[]; -/* Mapping from cpu number to logical apicid */ -static inline int bigsmp_cpu_to_logical_apicid(int cpu) -{ - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return cpu_physical_id(cpu); -} - -static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xFFL); -} - -static inline void bigsmp_setup_portio_remap(void) -{ -} - -static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return 1; -} - -/* As we are using single CPU as destination, pick only one CPU here */ -static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask)); -} - -static inline unsigned int -bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - if (cpu < nr_cpu_ids) - return bigsmp_cpu_to_logical_apicid(cpu); - - return BAD_APICID; -} - -static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* __ASM_MACH_APIC_H */ diff --git a/arch/x86/include/asm/bigsmp/apicdef.h b/arch/x86/include/asm/bigsmp/apicdef.h deleted file mode 100644 index e58dee847573..000000000000 --- a/arch/x86/include/asm/bigsmp/apicdef.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H - -static inline unsigned bigsmp_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0xFF; -} - -#endif diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h deleted file mode 100644 index a91db69cda6b..000000000000 --- a/arch/x86/include/asm/bigsmp/ipi.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H - -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - -static inline void default_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence(mask, vector); -} - -static inline void bigsmp_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself(cpu_online_mask, vector); -} - -static inline void bigsmp_send_IPI_all(int vector) -{ - default_send_IPI_mask(cpu_online_mask, vector); -} - -#endif /* __ASM_MACH_IPI_H */ diff --git a/arch/x86/mach-default/Makefile b/arch/x86/mach-default/Makefile deleted file mode 100644 index 012fe34459e6..000000000000 --- a/arch/x86/mach-default/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# -# Makefile for the linux kernel. -# - -obj-y := setup.o From 1ff2f20de354a621ef4b56b9cfe6f9139a7e493b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 Jan 2009 19:30:04 -0800 Subject: [PATCH 308/566] x86: fix compiling with 64bit with def_to_bigsmp only need to do cut off with 32bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2912fa3a8ef2..4c3cff574947 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1000,7 +1000,7 @@ static int __init smp_sanity_check(unsigned max_cpus) { preempt_disable(); -#ifndef CONFIG_X86_BIGSMP +#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32) if (def_to_bigsmp && nr_cpu_ids > 8) { unsigned int cpu; unsigned nr; From 43f39890db2959b10891cf7bbf3f53fffc8ce3bd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 Jan 2009 19:31:49 -0800 Subject: [PATCH 309/566] x86: seperate default_send_IPI_mask_sequence/allbutself from logical Impact: 32-bit should use logical version there are two version: for default_send_IPI_mask_sequence/allbutself one in ipi.h and one in ipi.c for 32bit it seems .h version overwrote ipi.c for a while. restore it so 32 bit could use its old logical version. also remove dupicated functions in .c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ipi.h | 71 ++++++++++++--- arch/x86/kernel/bigsmp_32.c | 8 +- arch/x86/kernel/es7000_32.c | 4 +- arch/x86/kernel/genapic_flat_64.c | 6 +- arch/x86/kernel/ipi.c | 139 +----------------------------- arch/x86/kernel/numaq_32.c | 8 +- arch/x86/kernel/probe_32.c | 4 +- arch/x86/kernel/summit_32.c | 6 +- 8 files changed, 77 insertions(+), 169 deletions(-) diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index e2e8e4e0a656..aa79945445b5 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -120,7 +120,7 @@ static inline void } static inline void -default_send_IPI_mask_sequence(const struct cpumask *mask, int vector) +default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) { unsigned long query_cpu; unsigned long flags; @@ -139,7 +139,7 @@ default_send_IPI_mask_sequence(const struct cpumask *mask, int vector) } static inline void -default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int vector) { unsigned int this_cpu = smp_processor_id(); unsigned int query_cpu; @@ -157,23 +157,72 @@ default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) local_irq_restore(flags); } +#include + +static inline void +default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector) +{ + unsigned long flags; + unsigned int query_cpu; + + /* + * Hack. The clustered APIC addressing mode doesn't allow us to send + * to an arbitrary mask, so I do a unicasts to each CPU instead. This + * should be modified to do 1 message per cluster ID - mbligh + */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) + __default_send_IPI_dest_field( + apic->cpu_to_logical_apicid(query_cpu), vector, + apic->dest_logical); + local_irq_restore(flags); +} + +static inline void +default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector) +{ + unsigned long flags; + unsigned int query_cpu; + unsigned int this_cpu = smp_processor_id(); + + /* See Hack comment above */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field( + apic->cpu_to_logical_apicid(query_cpu), vector, + apic->dest_logical); + } + local_irq_restore(flags); +} /* Avoid include hell */ #define NMI_VECTOR 0x02 -void default_send_IPI_mask_bitmask(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - extern int no_broadcast; -#ifdef CONFIG_X86_64 -#include -#else -static inline void default_send_IPI_mask(const struct cpumask *mask, int vector) +#ifndef CONFIG_X86_64 +/* + * This is only used on smaller machines. + */ +static inline void default_send_IPI_mask_bitmask_logical(const struct cpumask *cpumask, int vector) { - default_send_IPI_mask_bitmask(mask, vector); + unsigned long mask = cpumask_bits(cpumask)[0]; + unsigned long flags; + + local_irq_save(flags); + WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); + __default_send_IPI_dest_field(mask, vector, apic->dest_logical); + local_irq_restore(flags); +} + +static inline void default_send_IPI_mask_logical(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_bitmask_logical(mask, vector); } -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); #endif static inline void __default_local_send_IPI_allbutself(int vector) diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index b1f91931003f..ab645c93a6e7 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -154,17 +155,14 @@ static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector) { - default_send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence_phys(mask, vector); } static inline void bigsmp_send_IPI_allbutself(int vector) { - default_send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); } static inline void bigsmp_send_IPI_all(int vector) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 078364ccfa07..b5b50e8b94a9 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -451,12 +451,12 @@ static int es7000_check_dsdt(void) static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) { - default_send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence_phys(mask, vector); } static void es7000_send_IPI_allbutself(int vector) { - default_send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); } static void es7000_send_IPI_all(int vector) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 19bffb3f7320..249d2d3c034c 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -267,18 +267,18 @@ static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) { - default_send_IPI_mask_sequence(cpumask, vector); + default_send_IPI_mask_sequence_phys(cpumask, vector); } static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { - default_send_IPI_mask_allbutself(cpumask, vector); + default_send_IPI_mask_allbutself_phys(cpumask, vector); } static void physflat_send_IPI_allbutself(int vector) { - default_send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); } static void physflat_send_IPI_all(int vector) diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 0893fa144581..339f4f3feee5 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -17,148 +17,13 @@ #include #include #include +#include #ifdef CONFIG_X86_32 -#include - -/* - * the following functions deal with sending IPIs between CPUs. - * - * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. - */ - -static inline int __prepare_ICR(unsigned int shortcut, int vector) -{ - unsigned int icr = shortcut | apic->dest_logical; - - switch (vector) { - default: - icr |= APIC_DM_FIXED | vector; - break; - case NMI_VECTOR: - icr |= APIC_DM_NMI; - break; - } - return icr; -} - -static inline int __prepare_ICR2(unsigned int mask) -{ - return SET_APIC_DEST_FIELD(mask); -} - -void __default_send_IPI_shortcut(unsigned int shortcut, int vector) -{ - /* - * Subtle. In the case of the 'never do double writes' workaround - * we have to lock out interrupts to be safe. As we don't care - * of the value read we use an atomic rmw access to avoid costly - * cli/sti. Otherwise we use an even cheaper single atomic write - * to the APIC. - */ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - /* - * No need to touch the target chip field - */ - cfg = __prepare_ICR(shortcut, vector); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); -} void default_send_IPI_self(int vector) { - __default_send_IPI_shortcut(APIC_DEST_SELF, vector); -} - -/* - * This is used to send an IPI with no shorthand notation (the destination is - * specified in bits 56 to 63 of the ICR). - */ -static inline void __default_send_IPI_dest_field(unsigned long mask, int vector) -{ - unsigned long cfg; - - /* - * Wait for idle. - */ - if (unlikely(vector == NMI_VECTOR)) - safe_apic_wait_icr_idle(); - else - apic_wait_icr_idle(); - - /* - * prepare target chip field - */ - cfg = __prepare_ICR2(mask); - apic_write(APIC_ICR2, cfg); - - /* - * program the ICR - */ - cfg = __prepare_ICR(0, vector); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); -} - -/* - * This is only used on smaller machines. - */ -void default_send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - unsigned long flags; - - local_irq_save(flags); - WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); - __default_send_IPI_dest_field(mask, vector); - local_irq_restore(flags); -} - -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector) -{ - unsigned long flags; - unsigned int query_cpu; - - /* - * Hack. The clustered APIC addressing mode doesn't allow us to send - * to an arbitrary mask, so I do a unicasts to each CPU instead. This - * should be modified to do 1 message per cluster ID - mbligh - */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - __default_send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); - local_irq_restore(flags); -} - -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) -{ - unsigned long flags; - unsigned int query_cpu; - unsigned int this_cpu = smp_processor_id(); - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu == this_cpu) - continue; - __default_send_IPI_dest_field( - apic->cpu_to_logical_apicid(query_cpu), vector); - } - local_irq_restore(flags); + __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical); } /* must come after the send_IPI functions above for inlining */ diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 83bb05524f43..c143b3292163 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -302,6 +302,7 @@ int __init get_memcfg_numaq(void) #include #include #include +#include #include #include #include @@ -319,17 +320,14 @@ static inline unsigned int numaq_get_apic_id(unsigned long x) return (x >> 24) & 0x0F; } -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) { - default_send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence_logical(mask, vector); } static inline void numaq_send_IPI_allbutself(int vector) { - default_send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); } static inline void numaq_send_IPI_all(int vector) diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index b5db26f8e069..3f12a4011822 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -112,8 +112,8 @@ struct genapic apic_default = { .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, - .send_IPI_mask = default_send_IPI_mask, - .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself, + .send_IPI_mask = default_send_IPI_mask_logical, + .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, .send_IPI_allbutself = default_send_IPI_allbutself, .send_IPI_all = default_send_IPI_all, .send_IPI_self = NULL, diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 84ff9ebbcc97..ecb41b9d7aa0 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -54,12 +55,9 @@ static inline unsigned summit_get_apic_id(unsigned long x) return (x >> 24) & 0xFF; } -void default_send_IPI_mask_sequence(const cpumask_t *mask, int vector); -void default_send_IPI_mask_allbutself(const cpumask_t *mask, int vector); - static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) { - default_send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence_logical(mask, vector); } static inline void summit_send_IPI_allbutself(int vector) From 26f7ef14a76b0e590a3797fd7b2f3cee868d9664 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 Jan 2009 14:19:22 -0800 Subject: [PATCH 310/566] x86: don't treat bigsmp as non-standard just like 64 bit switch from flat logical APIC messages to flat physical mode automatically. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 15 +++++++-------- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/mpparse.c | 4 ++-- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index afaf2cb7c1ac..c6e567bb6491 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -262,6 +262,12 @@ config X86_MPPARSE For old smp systems that do not have proper acpi support. Newer systems (esp with 64bit cpus) with acpi support, MADT and DSDT will override it +config X86_BIGSMP + bool "Support for big SMP systems with more than 8 CPUs" + depends on X86_32 && SMP + help + This option is needed for the systems that have more than 8 CPUs + config X86_NON_STANDARD bool "Support for non-standard x86 platforms" help @@ -338,13 +344,6 @@ config X86_32_NON_STANDARD if you select them all, kernel will probe it one by one. and will fallback to default. -config X86_BIGSMP - bool "Support for big SMP systems with more than 8 CPUs" - depends on X86_32_NON_STANDARD - help - This option is needed for the systems that have more than 8 CPUs - and if the system is not of any sub-arch type above. - config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" depends on X86_32_NON_STANDARD @@ -366,7 +365,7 @@ config X86_SUMMIT config X86_ES7000 bool "Support for Unisys ES7000 IA32 series" - depends on X86_32_NON_STANDARD + depends on X86_32_NON_STANDARD && X86_BIGSMP help Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7352c60f29db..3efa996b036c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1335,7 +1335,7 @@ static void __init acpi_process_madt(void) if (!error) { acpi_lapic = 1; -#ifdef CONFIG_X86_32_NON_STANDARD +#ifdef CONFIG_X86_BIGSMP generic_bigsmp_probe(); #endif /* diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 89aaced51bd3..b46ca7d31feb 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -372,8 +372,8 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) (*x86_quirks->mpc_record)++; } -#ifdef CONFIG_X86_32_NON_STANDARD - generic_bigsmp_probe(); +#ifdef CONFIG_X86_BIGSMP + generic_bigsmp_probe(); #endif if (apic->setup_apic_routing) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f64e1a487c9e..df64afff5806 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -936,7 +936,7 @@ void __init setup_arch(char **cmdline_p) map_vsyscall(); #endif -#ifdef CONFIG_X86_32_NON_STANDARD +#if defined(CONFIG_X86_32_NON_STANDARD) || defined(CONFIG_X86_BIGSMP) generic_apic_probe(); #endif diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4c3cff574947..1268a862abb7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1007,7 +1007,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_WARNING "More than 8 CPUs detected - skipping them.\n" - "Use CONFIG_X86_32_NON_STANDARD and CONFIG_X86_BIGSMP.\n"); + "Use CONFIG_X86_BIGSMP.\n"); nr = 0; for_each_present_cpu(cpu) { From 3ac6cffea4aa18007a454a7442da2855882f403d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 30 Jan 2009 16:32:22 +0900 Subject: [PATCH 311/566] linker script: use separate simpler definition for PERCPU() Impact: fix linker screwup on x86_32 Recent x86_64 zerobased patches introduced PERCPU_VADDR() to put .data.percpu to a predefined address and re-defined PERCPU() in terms of it. The new macro defined one extra symbol, __per_cpu_load, for LMA of the section so that the init data could be accessed. This new symbol introduced the following problems to x86_32. 1. If __per_cpu_load is defined outside of .data.percpu as an absolute symbol, relocation generation for relocatable kernel fails due to absolute relocation. 2. If __per_cpu_load is put inside .data.percpu with absolute address assignment to work around #1, linker gets confused and under certain configurations ends up relocating the symbol against .data.percpu such that the load address gets added on top of already set load address. As x86_32 doesn't use predefined address for .data.percpu, there's no need for it to care about the possibility of __per_cpu_load being different from __per_cpu_start. This patch defines PERCPU() separately so that __per_cpu_load is defined inside .data.percpu so that everything is ordinary linking-wise. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 53e21f36a802..5406e70aba86 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -445,10 +445,9 @@ * section in the linker script will go there too. @phdr should have * a leading colon. * - * This macro defines three symbols, __per_cpu_load, __per_cpu_start - * and __per_cpu_end. The first one is the vaddr of loaded percpu - * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the - * end offset. + * Note that this macros defines __per_cpu_load as an absolute symbol. + * If there is no need to put the percpu section at a predetermined + * address, use PERCPU(). */ #define PERCPU_VADDR(vaddr, phdr) \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ @@ -470,7 +469,20 @@ * Align to @align and outputs output section for percpu area. This * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and * __per_cpu_start will be identical. + * + * This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except + * that __per_cpu_load is defined as a relative symbol against + * .data.percpu which is required for relocatable x86_32 + * configuration. */ #define PERCPU(align) \ . = ALIGN(align); \ - PERCPU_VADDR( , ) + .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__per_cpu_load) = .; \ + VMLINUX_SYMBOL(__per_cpu_start) = .; \ + *(.data.percpu.first) \ + *(.data.percpu.page_aligned) \ + *(.data.percpu) \ + *(.data.percpu.shared_aligned) \ + VMLINUX_SYMBOL(__per_cpu_end) = .; \ + } From 6b64ee02da20d6c0d97115e0b1ab47f9fa2f0d8f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 30 Jan 2009 23:42:18 +0100 Subject: [PATCH 312/566] x86, apic, 32-bit: add self-IPI methods Impact: fix rare crash on 32-bit The 32-bit APIC drivers had their send_IPI_self vectors set to NULL, but ioapic_retrigger_irq() depends on it being always set. Fix it. Signed-off-by: Ingo Molnar --- arch/x86/kernel/bigsmp_32.c | 2 +- arch/x86/kernel/es7000_32.c | 2 +- arch/x86/kernel/numaq_32.c | 2 +- arch/x86/kernel/probe_32.c | 2 +- arch/x86/kernel/summit_32.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index ab645c93a6e7..47a62f46afdb 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -252,7 +252,7 @@ struct genapic apic_bigsmp = { .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = bigsmp_send_IPI_allbutself, .send_IPI_all = bigsmp_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index b5b50e8b94a9..d6184c12a182 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -786,7 +786,7 @@ struct genapic apic_es7000 = { .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = es7000_send_IPI_allbutself, .send_IPI_all = es7000_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index c143b3292163..947748e17211 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -554,7 +554,7 @@ struct genapic apic_numaq = { .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = numaq_send_IPI_allbutself, .send_IPI_all = numaq_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index 3f12a4011822..22337b75de62 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -116,7 +116,7 @@ struct genapic apic_default = { .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, .send_IPI_allbutself = default_send_IPI_allbutself, .send_IPI_all = default_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index ecb41b9d7aa0..1e733eff9b33 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -588,7 +588,7 @@ struct genapic apic_summit = { .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = summit_send_IPI_allbutself, .send_IPI_all = summit_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, From 319f3ba52c71630865b10ac3b99dd020440d681d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:01 -0800 Subject: [PATCH 313/566] xen: move remaining mmu-related stuff into mmu.c Impact: Cleanup Move remaining mmu-related stuff into mmu.c. A general cleanup, and lay the groundwork for later patches. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/enlighten.c | 731 +-------------------------------------- arch/x86/xen/mmu.c | 700 +++++++++++++++++++++++++++++++++++++ arch/x86/xen/mmu.h | 3 + arch/x86/xen/xen-ops.h | 8 + 4 files changed, 712 insertions(+), 730 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 6b3f7eef57e3..0cd2a165f179 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -61,35 +61,6 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); -/* - * Identity map, in addition to plain kernel map. This needs to be - * large enough to allocate page table pages to allocate the rest. - * Each page can map 2MB. - */ -static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; - -#ifdef CONFIG_X86_64 -/* l3 pud for userspace vsyscall mapping */ -static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; -#endif /* CONFIG_X86_64 */ - -/* - * Note about cr3 (pagetable base) values: - * - * xen_cr3 contains the current logical cr3 value; it contains the - * last set cr3. This may not be the current effective cr3, because - * its update may be being lazily deferred. However, a vcpu looking - * at its own cr3 can use this value knowing that it everything will - * be self-consistent. - * - * xen_current_cr3 contains the actual vcpu cr3; it is set once the - * hypercall to set the vcpu cr3 is complete (so it may be a little - * out of date, but it will never be set early). If one vcpu is - * looking at another vcpu's cr3 value, it should use this variable. - */ -DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ -DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ - struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); @@ -237,7 +208,7 @@ static unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } -static void xen_leave_lazy(void) +void xen_leave_lazy(void) { paravirt_leave_lazy(paravirt_get_lazy_mode()); xen_mc_flush(); @@ -598,76 +569,6 @@ static struct apic_ops xen_basic_apic_ops = { #endif -static void xen_flush_tlb(void) -{ - struct mmuext_op *op; - struct multicall_space mcs; - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = MMUEXT_TLB_FLUSH_LOCAL; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -static void xen_flush_tlb_single(unsigned long addr) -{ - struct mmuext_op *op; - struct multicall_space mcs; - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*op)); - op = mcs.args; - op->cmd = MMUEXT_INVLPG_LOCAL; - op->arg1.linear_addr = addr & PAGE_MASK; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -static void xen_flush_tlb_others(const struct cpumask *cpus, - struct mm_struct *mm, unsigned long va) -{ - struct { - struct mmuext_op op; - DECLARE_BITMAP(mask, NR_CPUS); - } *args; - struct multicall_space mcs; - - BUG_ON(cpumask_empty(cpus)); - BUG_ON(!mm); - - mcs = xen_mc_entry(sizeof(*args)); - args = mcs.args; - args->op.arg2.vcpumask = to_cpumask(args->mask); - - /* Remove us, and any offline CPUS. */ - cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); - if (unlikely(cpumask_empty(to_cpumask(args->mask)))) - goto issue; - - if (va == TLB_FLUSH_ALL) { - args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; - } else { - args->op.cmd = MMUEXT_INVLPG_MULTI; - args->op.arg1.linear_addr = va; - } - - MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); - -issue: - xen_mc_issue(PARAVIRT_LAZY_MMU); -} static void xen_clts(void) { @@ -693,21 +594,6 @@ static void xen_write_cr0(unsigned long cr0) xen_mc_issue(PARAVIRT_LAZY_CPU); } -static void xen_write_cr2(unsigned long cr2) -{ - percpu_read(xen_vcpu)->arch.cr2 = cr2; -} - -static unsigned long xen_read_cr2(void) -{ - return percpu_read(xen_vcpu)->arch.cr2; -} - -static unsigned long xen_read_cr2_direct(void) -{ - return percpu_read(xen_vcpu_info.arch.cr2); -} - static void xen_write_cr4(unsigned long cr4) { cr4 &= ~X86_CR4_PGE; @@ -716,71 +602,6 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } -static unsigned long xen_read_cr3(void) -{ - return percpu_read(xen_cr3); -} - -static void set_current_cr3(void *v) -{ - percpu_write(xen_current_cr3, (unsigned long)v); -} - -static void __xen_write_cr3(bool kernel, unsigned long cr3) -{ - struct mmuext_op *op; - struct multicall_space mcs; - unsigned long mfn; - - if (cr3) - mfn = pfn_to_mfn(PFN_DOWN(cr3)); - else - mfn = 0; - - WARN_ON(mfn == 0 && kernel); - - mcs = __xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; - op->arg1.mfn = mfn; - - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - if (kernel) { - percpu_write(xen_cr3, cr3); - - /* Update xen_current_cr3 once the batch has actually - been submitted. */ - xen_mc_callback(set_current_cr3, (void *)cr3); - } -} - -static void xen_write_cr3(unsigned long cr3) -{ - BUG_ON(preemptible()); - - xen_mc_batch(); /* disables interrupts */ - - /* Update while interrupts are disabled, so its atomic with - respect to ipis */ - percpu_write(xen_cr3, cr3); - - __xen_write_cr3(true, cr3); - -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); - if (user_pgd) - __xen_write_cr3(false, __pa(user_pgd)); - else - __xen_write_cr3(false, 0); - } -#endif - - xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ -} - static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; @@ -822,185 +643,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) return ret; } -/* Early in boot, while setting up the initial pagetable, assume - everything is pinned. */ -static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) -{ -#ifdef CONFIG_FLATMEM - BUG_ON(mem_map); /* should only be used early */ -#endif - make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); -} - -/* Early release_pte assumes that all pts are pinned, since there's - only init_mm and anything attached to that is pinned. */ -static void xen_release_pte_init(unsigned long pfn) -{ - make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); -} - -static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) -{ - struct mmuext_op op; - op.cmd = cmd; - op.arg1.mfn = pfn_to_mfn(pfn); - if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) - BUG(); -} - -/* This needs to make sure the new pte page is pinned iff its being - attached to a pinned pagetable. */ -static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) -{ - struct page *page = pfn_to_page(pfn); - - if (PagePinned(virt_to_page(mm->pgd))) { - SetPagePinned(page); - - vm_unmap_aliases(); - if (!PageHighMem(page)) { - make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); - if (level == PT_PTE && USE_SPLIT_PTLOCKS) - pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - } else { - /* make sure there are no stray mappings of - this page */ - kmap_flush_unused(); - } - } -} - -static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PTE); -} - -static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PMD); -} - -static int xen_pgd_alloc(struct mm_struct *mm) -{ - pgd_t *pgd = mm->pgd; - int ret = 0; - - BUG_ON(PagePinned(virt_to_page(pgd))); - -#ifdef CONFIG_X86_64 - { - struct page *page = virt_to_page(pgd); - pgd_t *user_pgd; - - BUG_ON(page->private != 0); - - ret = -ENOMEM; - - user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - page->private = (unsigned long)user_pgd; - - if (user_pgd != NULL) { - user_pgd[pgd_index(VSYSCALL_START)] = - __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); - ret = 0; - } - - BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); - } -#endif - - return ret; -} - -static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ -#ifdef CONFIG_X86_64 - pgd_t *user_pgd = xen_get_user_pgd(pgd); - - if (user_pgd) - free_page((unsigned long)user_pgd); -#endif -} - -/* This should never happen until we're OK to use struct page */ -static void xen_release_ptpage(unsigned long pfn, unsigned level) -{ - struct page *page = pfn_to_page(pfn); - - if (PagePinned(page)) { - if (!PageHighMem(page)) { - if (level == PT_PTE && USE_SPLIT_PTLOCKS) - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); - make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); - } - ClearPagePinned(page); - } -} - -static void xen_release_pte(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PTE); -} - -static void xen_release_pmd(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PMD); -} - -#if PAGETABLE_LEVELS == 4 -static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PUD); -} - -static void xen_release_pud(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PUD); -} -#endif - -#ifdef CONFIG_HIGHPTE -static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) -{ - pgprot_t prot = PAGE_KERNEL; - - if (PagePinned(page)) - prot = PAGE_KERNEL_RO; - - if (0 && PageHighMem(page)) - printk("mapping highpte %lx type %d prot %s\n", - page_to_pfn(page), type, - (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); - - return kmap_atomic_prot(page, type, prot); -} -#endif - -#ifdef CONFIG_X86_32 -static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) -{ - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ - if (pte_val_ma(*ptep) & _PAGE_PRESENT) - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); - - return pte; -} - -/* Init-time set_pte while constructing initial pagetables, which - doesn't allow RO pagetable pages to be remapped RW */ -static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) -{ - pte = mask_rw_pte(ptep, pte); - - xen_set_pte(ptep, pte); -} -#endif - -static __init void xen_pagetable_setup_start(pgd_t *base) -{ -} - void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { @@ -1021,37 +663,6 @@ void xen_setup_shared_info(void) xen_setup_mfn_list_list(); } -static __init void xen_pagetable_setup_done(pgd_t *base) -{ - xen_setup_shared_info(); -} - -static __init void xen_post_allocator_init(void) -{ - pv_mmu_ops.set_pte = xen_set_pte; - pv_mmu_ops.set_pmd = xen_set_pmd; - pv_mmu_ops.set_pud = xen_set_pud; -#if PAGETABLE_LEVELS == 4 - pv_mmu_ops.set_pgd = xen_set_pgd; -#endif - - /* This will work as long as patching hasn't happened yet - (which it hasn't) */ - pv_mmu_ops.alloc_pte = xen_alloc_pte; - pv_mmu_ops.alloc_pmd = xen_alloc_pmd; - pv_mmu_ops.release_pte = xen_release_pte; - pv_mmu_ops.release_pmd = xen_release_pmd; -#if PAGETABLE_LEVELS == 4 - pv_mmu_ops.alloc_pud = xen_alloc_pud; - pv_mmu_ops.release_pud = xen_release_pud; -#endif - -#ifdef CONFIG_X86_64 - SetPagePinned(virt_to_page(level3_user_vsyscall)); -#endif - xen_mark_init_mm_pinned(); -} - /* This is called once we have the cpu_possible_map */ void xen_setup_vcpu_info_placement(void) { @@ -1126,49 +737,6 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, return ret; } -static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) -{ - pte_t pte; - - phys >>= PAGE_SHIFT; - - switch (idx) { - case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: -#ifdef CONFIG_X86_F00F_BUG - case FIX_F00F_IDT: -#endif -#ifdef CONFIG_X86_32 - case FIX_WP_TEST: - case FIX_VDSO: -# ifdef CONFIG_HIGHMEM - case FIX_KMAP_BEGIN ... FIX_KMAP_END: -# endif -#else - case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: -#endif -#ifdef CONFIG_X86_LOCAL_APIC - case FIX_APIC_BASE: /* maps dummy local APIC */ -#endif - pte = pfn_pte(phys, prot); - break; - - default: - pte = mfn_pte(phys, prot); - break; - } - - __native_set_fixmap(idx, pte); - -#ifdef CONFIG_X86_64 - /* Replicate changes to map the vsyscall page into the user - pagetable vsyscall mapping. */ - if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { - unsigned long vaddr = __fix_to_virt(idx); - set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); - } -#endif -} - static const struct pv_info xen_info __initdata = { .paravirt_enabled = 1, .shared_kernel_pmd = 0, @@ -1264,86 +832,6 @@ static const struct pv_apic_ops xen_apic_ops __initdata = { #endif }; -static const struct pv_mmu_ops xen_mmu_ops __initdata = { - .pagetable_setup_start = xen_pagetable_setup_start, - .pagetable_setup_done = xen_pagetable_setup_done, - - .read_cr2 = xen_read_cr2, - .write_cr2 = xen_write_cr2, - - .read_cr3 = xen_read_cr3, - .write_cr3 = xen_write_cr3, - - .flush_tlb_user = xen_flush_tlb, - .flush_tlb_kernel = xen_flush_tlb, - .flush_tlb_single = xen_flush_tlb_single, - .flush_tlb_others = xen_flush_tlb_others, - - .pte_update = paravirt_nop, - .pte_update_defer = paravirt_nop, - - .pgd_alloc = xen_pgd_alloc, - .pgd_free = xen_pgd_free, - - .alloc_pte = xen_alloc_pte_init, - .release_pte = xen_release_pte_init, - .alloc_pmd = xen_alloc_pte_init, - .alloc_pmd_clone = paravirt_nop, - .release_pmd = xen_release_pte_init, - -#ifdef CONFIG_HIGHPTE - .kmap_atomic_pte = xen_kmap_atomic_pte, -#endif - -#ifdef CONFIG_X86_64 - .set_pte = xen_set_pte, -#else - .set_pte = xen_set_pte_init, -#endif - .set_pte_at = xen_set_pte_at, - .set_pmd = xen_set_pmd_hyper, - - .ptep_modify_prot_start = __ptep_modify_prot_start, - .ptep_modify_prot_commit = __ptep_modify_prot_commit, - - .pte_val = xen_pte_val, - .pgd_val = xen_pgd_val, - - .make_pte = xen_make_pte, - .make_pgd = xen_make_pgd, - -#ifdef CONFIG_X86_PAE - .set_pte_atomic = xen_set_pte_atomic, - .set_pte_present = xen_set_pte_at, - .pte_clear = xen_pte_clear, - .pmd_clear = xen_pmd_clear, -#endif /* CONFIG_X86_PAE */ - .set_pud = xen_set_pud_hyper, - - .make_pmd = xen_make_pmd, - .pmd_val = xen_pmd_val, - -#if PAGETABLE_LEVELS == 4 - .pud_val = xen_pud_val, - .make_pud = xen_make_pud, - .set_pgd = xen_set_pgd_hyper, - - .alloc_pud = xen_alloc_pte_init, - .release_pud = xen_release_pte_init, -#endif /* PAGETABLE_LEVELS == 4 */ - - .activate_mm = xen_activate_mm, - .dup_mmap = xen_dup_mmap, - .exit_mmap = xen_exit_mmap, - - .lazy_mode = { - .enter = paravirt_enter_lazy_mmu, - .leave = xen_leave_lazy, - }, - - .set_fixmap = xen_set_fixmap, -}; - static void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; @@ -1386,223 +874,6 @@ static const struct machine_ops __initdata xen_machine_ops = { }; -static void __init xen_reserve_top(void) -{ -#ifdef CONFIG_X86_32 - unsigned long top = HYPERVISOR_VIRT_START; - struct xen_platform_parameters pp; - - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) - top = pp.virt_start; - - reserve_top_address(-top); -#endif /* CONFIG_X86_32 */ -} - -/* - * Like __va(), but returns address in the kernel mapping (which is - * all we have until the physical memory mapping has been set up. - */ -static void *__ka(phys_addr_t paddr) -{ -#ifdef CONFIG_X86_64 - return (void *)(paddr + __START_KERNEL_map); -#else - return __va(paddr); -#endif -} - -/* Convert a machine address to physical address */ -static unsigned long m2p(phys_addr_t maddr) -{ - phys_addr_t paddr; - - maddr &= PTE_PFN_MASK; - paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; - - return paddr; -} - -/* Convert a machine address to kernel virtual */ -static void *m2v(phys_addr_t maddr) -{ - return __ka(m2p(maddr)); -} - -static void set_page_prot(void *addr, pgprot_t prot) -{ - unsigned long pfn = __pa(addr) >> PAGE_SHIFT; - pte_t pte = pfn_pte(pfn, prot); - - if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) - BUG(); -} - -static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) -{ - unsigned pmdidx, pteidx; - unsigned ident_pte; - unsigned long pfn; - - ident_pte = 0; - pfn = 0; - for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { - pte_t *pte_page; - - /* Reuse or allocate a page of ptes */ - if (pmd_present(pmd[pmdidx])) - pte_page = m2v(pmd[pmdidx].pmd); - else { - /* Check for free pte pages */ - if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) - break; - - pte_page = &level1_ident_pgt[ident_pte]; - ident_pte += PTRS_PER_PTE; - - pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); - } - - /* Install mappings */ - for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { - pte_t pte; - - if (pfn > max_pfn_mapped) - max_pfn_mapped = pfn; - - if (!pte_none(pte_page[pteidx])) - continue; - - pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); - pte_page[pteidx] = pte; - } - } - - for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) - set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); - - set_page_prot(pmd, PAGE_KERNEL_RO); -} - -#ifdef CONFIG_X86_64 -static void convert_pfn_mfn(void *v) -{ - pte_t *pte = v; - int i; - - /* All levels are converted the same way, so just treat them - as ptes. */ - for (i = 0; i < PTRS_PER_PTE; i++) - pte[i] = xen_make_pte(pte[i].pte); -} - -/* - * Set up the inital kernel pagetable. - * - * We can construct this by grafting the Xen provided pagetable into - * head_64.S's preconstructed pagetables. We copy the Xen L2's into - * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This - * means that only the kernel has a physical mapping to start with - - * but that's enough to get __va working. We need to fill in the rest - * of the physical mapping once some sort of allocator has been set - * up. - */ -static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) -{ - pud_t *l3; - pmd_t *l2; - - /* Zap identity mapping */ - init_level4_pgt[0] = __pgd(0); - - /* Pre-constructed entries are in pfn, so convert to mfn */ - convert_pfn_mfn(init_level4_pgt); - convert_pfn_mfn(level3_ident_pgt); - convert_pfn_mfn(level3_kernel_pgt); - - l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); - l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); - - memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - - l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); - l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); - memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - - /* Set up identity map */ - xen_map_identity_early(level2_ident_pgt, max_pfn); - - /* Make pagetable pieces RO */ - set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); - set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); - - /* Pin down new L4 */ - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, - PFN_DOWN(__pa_symbol(init_level4_pgt))); - - /* Unpin Xen-provided one */ - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - - /* Switch over */ - pgd = init_level4_pgt; - - /* - * At this stage there can be no user pgd, and no page - * structure to attach it to, so make sure we just set kernel - * pgd. - */ - xen_mc_batch(); - __xen_write_cr3(true, __pa(pgd)); - xen_mc_issue(PARAVIRT_LAZY_CPU); - - reserve_early(__pa(xen_start_info->pt_base), - __pa(xen_start_info->pt_base + - xen_start_info->nr_pt_frames * PAGE_SIZE), - "XEN PAGETABLES"); - - return pgd; -} -#else /* !CONFIG_X86_64 */ -static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; - -static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) -{ - pmd_t *kernel_pmd; - - init_pg_tables_start = __pa(pgd); - init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; - max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); - - kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); - - xen_map_identity_early(level2_kernel_pgt, max_pfn); - - memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); - set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], - __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); - - set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); - set_page_prot(empty_zero_page, PAGE_KERNEL_RO); - - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - - xen_write_cr3(__pa(swapper_pg_dir)); - - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); - - return swapper_pg_dir; -} -#endif /* CONFIG_X86_64 */ - /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 98cb9869eb24..94e452c0b00c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -55,6 +56,8 @@ #include #include +#include +#include #include "multicalls.h" #include "mmu.h" @@ -114,6 +117,37 @@ static inline void check_zero(void) #endif /* CONFIG_XEN_DEBUG_FS */ + +/* + * Identity map, in addition to plain kernel map. This needs to be + * large enough to allocate page table pages to allocate the rest. + * Each page can map 2MB. + */ +static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; + +#ifdef CONFIG_X86_64 +/* l3 pud for userspace vsyscall mapping */ +static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; +#endif /* CONFIG_X86_64 */ + +/* + * Note about cr3 (pagetable base) values: + * + * xen_cr3 contains the current logical cr3 value; it contains the + * last set cr3. This may not be the current effective cr3, because + * its update may be being lazily deferred. However, a vcpu looking + * at its own cr3 can use this value knowing that it everything will + * be self-consistent. + * + * xen_current_cr3 contains the actual vcpu cr3; it is set once the + * hypercall to set the vcpu cr3 is complete (so it may be a little + * out of date, but it will never be set early). If one vcpu is + * looking at another vcpu's cr3 value, it should use this variable. + */ +DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ +DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ + + /* * Just beyond the highest usermode address. STACK_TOP_MAX has a * redzone above it, so round it up to a PGD boundary. @@ -1152,6 +1186,672 @@ void xen_exit_mmap(struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } +static __init void xen_pagetable_setup_start(pgd_t *base) +{ +} + +static __init void xen_pagetable_setup_done(pgd_t *base) +{ + xen_setup_shared_info(); +} + +static void xen_write_cr2(unsigned long cr2) +{ + percpu_read(xen_vcpu)->arch.cr2 = cr2; +} + +static unsigned long xen_read_cr2(void) +{ + return percpu_read(xen_vcpu)->arch.cr2; +} + +unsigned long xen_read_cr2_direct(void) +{ + return percpu_read(xen_vcpu_info.arch.cr2); +} + +static void xen_flush_tlb(void) +{ + struct mmuext_op *op; + struct multicall_space mcs; + + preempt_disable(); + + mcs = xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = MMUEXT_TLB_FLUSH_LOCAL; + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} + +static void xen_flush_tlb_single(unsigned long addr) +{ + struct mmuext_op *op; + struct multicall_space mcs; + + preempt_disable(); + + mcs = xen_mc_entry(sizeof(*op)); + op = mcs.args; + op->cmd = MMUEXT_INVLPG_LOCAL; + op->arg1.linear_addr = addr & PAGE_MASK; + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} + +static void xen_flush_tlb_others(const struct cpumask *cpus, + struct mm_struct *mm, unsigned long va) +{ + struct { + struct mmuext_op op; + DECLARE_BITMAP(mask, NR_CPUS); + } *args; + struct multicall_space mcs; + + BUG_ON(cpumask_empty(cpus)); + BUG_ON(!mm); + + mcs = xen_mc_entry(sizeof(*args)); + args = mcs.args; + args->op.arg2.vcpumask = to_cpumask(args->mask); + + /* Remove us, and any offline CPUS. */ + cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); + if (unlikely(cpumask_empty(to_cpumask(args->mask)))) + goto issue; + + if (va == TLB_FLUSH_ALL) { + args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; + } else { + args->op.cmd = MMUEXT_INVLPG_MULTI; + args->op.arg1.linear_addr = va; + } + + MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); + +issue: + xen_mc_issue(PARAVIRT_LAZY_MMU); +} + +static unsigned long xen_read_cr3(void) +{ + return percpu_read(xen_cr3); +} + +static void set_current_cr3(void *v) +{ + percpu_write(xen_current_cr3, (unsigned long)v); +} + +static void __xen_write_cr3(bool kernel, unsigned long cr3) +{ + struct mmuext_op *op; + struct multicall_space mcs; + unsigned long mfn; + + if (cr3) + mfn = pfn_to_mfn(PFN_DOWN(cr3)); + else + mfn = 0; + + WARN_ON(mfn == 0 && kernel); + + mcs = __xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; + op->arg1.mfn = mfn; + + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + if (kernel) { + percpu_write(xen_cr3, cr3); + + /* Update xen_current_cr3 once the batch has actually + been submitted. */ + xen_mc_callback(set_current_cr3, (void *)cr3); + } +} + +static void xen_write_cr3(unsigned long cr3) +{ + BUG_ON(preemptible()); + + xen_mc_batch(); /* disables interrupts */ + + /* Update while interrupts are disabled, so its atomic with + respect to ipis */ + percpu_write(xen_cr3, cr3); + + __xen_write_cr3(true, cr3); + +#ifdef CONFIG_X86_64 + { + pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); + if (user_pgd) + __xen_write_cr3(false, __pa(user_pgd)); + else + __xen_write_cr3(false, 0); + } +#endif + + xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ +} + +static int xen_pgd_alloc(struct mm_struct *mm) +{ + pgd_t *pgd = mm->pgd; + int ret = 0; + + BUG_ON(PagePinned(virt_to_page(pgd))); + +#ifdef CONFIG_X86_64 + { + struct page *page = virt_to_page(pgd); + pgd_t *user_pgd; + + BUG_ON(page->private != 0); + + ret = -ENOMEM; + + user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + page->private = (unsigned long)user_pgd; + + if (user_pgd != NULL) { + user_pgd[pgd_index(VSYSCALL_START)] = + __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); + ret = 0; + } + + BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); + } +#endif + + return ret; +} + +static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ +#ifdef CONFIG_X86_64 + pgd_t *user_pgd = xen_get_user_pgd(pgd); + + if (user_pgd) + free_page((unsigned long)user_pgd); +#endif +} + + +/* Early in boot, while setting up the initial pagetable, assume + everything is pinned. */ +static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) +{ +#ifdef CONFIG_FLATMEM + BUG_ON(mem_map); /* should only be used early */ +#endif + make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); +} + +/* Early release_pte assumes that all pts are pinned, since there's + only init_mm and anything attached to that is pinned. */ +static void xen_release_pte_init(unsigned long pfn) +{ + make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); +} + +static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) +{ + struct mmuext_op op; + op.cmd = cmd; + op.arg1.mfn = pfn_to_mfn(pfn); + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) + BUG(); +} + +/* This needs to make sure the new pte page is pinned iff its being + attached to a pinned pagetable. */ +static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) +{ + struct page *page = pfn_to_page(pfn); + + if (PagePinned(virt_to_page(mm->pgd))) { + SetPagePinned(page); + + vm_unmap_aliases(); + if (!PageHighMem(page)) { + make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); + if (level == PT_PTE && USE_SPLIT_PTLOCKS) + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); + } else { + /* make sure there are no stray mappings of + this page */ + kmap_flush_unused(); + } + } +} + +static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) +{ + xen_alloc_ptpage(mm, pfn, PT_PTE); +} + +static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) +{ + xen_alloc_ptpage(mm, pfn, PT_PMD); +} + +/* This should never happen until we're OK to use struct page */ +static void xen_release_ptpage(unsigned long pfn, unsigned level) +{ + struct page *page = pfn_to_page(pfn); + + if (PagePinned(page)) { + if (!PageHighMem(page)) { + if (level == PT_PTE && USE_SPLIT_PTLOCKS) + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); + make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); + } + ClearPagePinned(page); + } +} + +static void xen_release_pte(unsigned long pfn) +{ + xen_release_ptpage(pfn, PT_PTE); +} + +static void xen_release_pmd(unsigned long pfn) +{ + xen_release_ptpage(pfn, PT_PMD); +} + +#if PAGETABLE_LEVELS == 4 +static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) +{ + xen_alloc_ptpage(mm, pfn, PT_PUD); +} + +static void xen_release_pud(unsigned long pfn) +{ + xen_release_ptpage(pfn, PT_PUD); +} +#endif + +void __init xen_reserve_top(void) +{ +#ifdef CONFIG_X86_32 + unsigned long top = HYPERVISOR_VIRT_START; + struct xen_platform_parameters pp; + + if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) + top = pp.virt_start; + + reserve_top_address(-top); +#endif /* CONFIG_X86_32 */ +} + +/* + * Like __va(), but returns address in the kernel mapping (which is + * all we have until the physical memory mapping has been set up. + */ +static void *__ka(phys_addr_t paddr) +{ +#ifdef CONFIG_X86_64 + return (void *)(paddr + __START_KERNEL_map); +#else + return __va(paddr); +#endif +} + +/* Convert a machine address to physical address */ +static unsigned long m2p(phys_addr_t maddr) +{ + phys_addr_t paddr; + + maddr &= PTE_PFN_MASK; + paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; + + return paddr; +} + +/* Convert a machine address to kernel virtual */ +static void *m2v(phys_addr_t maddr) +{ + return __ka(m2p(maddr)); +} + +static void set_page_prot(void *addr, pgprot_t prot) +{ + unsigned long pfn = __pa(addr) >> PAGE_SHIFT; + pte_t pte = pfn_pte(pfn, prot); + + if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) + BUG(); +} + +static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) +{ + unsigned pmdidx, pteidx; + unsigned ident_pte; + unsigned long pfn; + + ident_pte = 0; + pfn = 0; + for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { + pte_t *pte_page; + + /* Reuse or allocate a page of ptes */ + if (pmd_present(pmd[pmdidx])) + pte_page = m2v(pmd[pmdidx].pmd); + else { + /* Check for free pte pages */ + if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) + break; + + pte_page = &level1_ident_pgt[ident_pte]; + ident_pte += PTRS_PER_PTE; + + pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); + } + + /* Install mappings */ + for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { + pte_t pte; + + if (pfn > max_pfn_mapped) + max_pfn_mapped = pfn; + + if (!pte_none(pte_page[pteidx])) + continue; + + pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); + pte_page[pteidx] = pte; + } + } + + for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) + set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); + + set_page_prot(pmd, PAGE_KERNEL_RO); +} + +#ifdef CONFIG_X86_64 +static void convert_pfn_mfn(void *v) +{ + pte_t *pte = v; + int i; + + /* All levels are converted the same way, so just treat them + as ptes. */ + for (i = 0; i < PTRS_PER_PTE; i++) + pte[i] = xen_make_pte(pte[i].pte); +} + +/* + * Set up the inital kernel pagetable. + * + * We can construct this by grafting the Xen provided pagetable into + * head_64.S's preconstructed pagetables. We copy the Xen L2's into + * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This + * means that only the kernel has a physical mapping to start with - + * but that's enough to get __va working. We need to fill in the rest + * of the physical mapping once some sort of allocator has been set + * up. + */ +__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, + unsigned long max_pfn) +{ + pud_t *l3; + pmd_t *l2; + + /* Zap identity mapping */ + init_level4_pgt[0] = __pgd(0); + + /* Pre-constructed entries are in pfn, so convert to mfn */ + convert_pfn_mfn(init_level4_pgt); + convert_pfn_mfn(level3_ident_pgt); + convert_pfn_mfn(level3_kernel_pgt); + + l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); + l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); + + memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + + l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); + l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); + memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + + /* Set up identity map */ + xen_map_identity_early(level2_ident_pgt, max_pfn); + + /* Make pagetable pieces RO */ + set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); + + /* Pin down new L4 */ + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, + PFN_DOWN(__pa_symbol(init_level4_pgt))); + + /* Unpin Xen-provided one */ + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + + /* Switch over */ + pgd = init_level4_pgt; + + /* + * At this stage there can be no user pgd, and no page + * structure to attach it to, so make sure we just set kernel + * pgd. + */ + xen_mc_batch(); + __xen_write_cr3(true, __pa(pgd)); + xen_mc_issue(PARAVIRT_LAZY_CPU); + + reserve_early(__pa(xen_start_info->pt_base), + __pa(xen_start_info->pt_base + + xen_start_info->nr_pt_frames * PAGE_SIZE), + "XEN PAGETABLES"); + + return pgd; +} +#else /* !CONFIG_X86_64 */ +static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; + +__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, + unsigned long max_pfn) +{ + pmd_t *kernel_pmd; + + init_pg_tables_start = __pa(pgd); + init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; + max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); + + kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); + memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); + + xen_map_identity_early(level2_kernel_pgt, max_pfn); + + memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); + set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], + __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); + + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); + set_page_prot(empty_zero_page, PAGE_KERNEL_RO); + + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + + xen_write_cr3(__pa(swapper_pg_dir)); + + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); + + return swapper_pg_dir; +} +#endif /* CONFIG_X86_64 */ + +static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) +{ + pte_t pte; + + phys >>= PAGE_SHIFT; + + switch (idx) { + case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: +#ifdef CONFIG_X86_F00F_BUG + case FIX_F00F_IDT: +#endif +#ifdef CONFIG_X86_32 + case FIX_WP_TEST: + case FIX_VDSO: +# ifdef CONFIG_HIGHMEM + case FIX_KMAP_BEGIN ... FIX_KMAP_END: +# endif +#else + case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: +#endif +#ifdef CONFIG_X86_LOCAL_APIC + case FIX_APIC_BASE: /* maps dummy local APIC */ +#endif + pte = pfn_pte(phys, prot); + break; + + default: + pte = mfn_pte(phys, prot); + break; + } + + __native_set_fixmap(idx, pte); + +#ifdef CONFIG_X86_64 + /* Replicate changes to map the vsyscall page into the user + pagetable vsyscall mapping. */ + if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { + unsigned long vaddr = __fix_to_virt(idx); + set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); + } +#endif +} + +__init void xen_post_allocator_init(void) +{ + pv_mmu_ops.set_pte = xen_set_pte; + pv_mmu_ops.set_pmd = xen_set_pmd; + pv_mmu_ops.set_pud = xen_set_pud; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.set_pgd = xen_set_pgd; +#endif + + /* This will work as long as patching hasn't happened yet + (which it hasn't) */ + pv_mmu_ops.alloc_pte = xen_alloc_pte; + pv_mmu_ops.alloc_pmd = xen_alloc_pmd; + pv_mmu_ops.release_pte = xen_release_pte; + pv_mmu_ops.release_pmd = xen_release_pmd; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.alloc_pud = xen_alloc_pud; + pv_mmu_ops.release_pud = xen_release_pud; +#endif + +#ifdef CONFIG_X86_64 + SetPagePinned(virt_to_page(level3_user_vsyscall)); +#endif + xen_mark_init_mm_pinned(); +} + + +const struct pv_mmu_ops xen_mmu_ops __initdata = { + .pagetable_setup_start = xen_pagetable_setup_start, + .pagetable_setup_done = xen_pagetable_setup_done, + + .read_cr2 = xen_read_cr2, + .write_cr2 = xen_write_cr2, + + .read_cr3 = xen_read_cr3, + .write_cr3 = xen_write_cr3, + + .flush_tlb_user = xen_flush_tlb, + .flush_tlb_kernel = xen_flush_tlb, + .flush_tlb_single = xen_flush_tlb_single, + .flush_tlb_others = xen_flush_tlb_others, + + .pte_update = paravirt_nop, + .pte_update_defer = paravirt_nop, + + .pgd_alloc = xen_pgd_alloc, + .pgd_free = xen_pgd_free, + + .alloc_pte = xen_alloc_pte_init, + .release_pte = xen_release_pte_init, + .alloc_pmd = xen_alloc_pte_init, + .alloc_pmd_clone = paravirt_nop, + .release_pmd = xen_release_pte_init, + +#ifdef CONFIG_HIGHPTE + .kmap_atomic_pte = xen_kmap_atomic_pte, +#endif + +#ifdef CONFIG_X86_64 + .set_pte = xen_set_pte, +#else + .set_pte = xen_set_pte_init, +#endif + .set_pte_at = xen_set_pte_at, + .set_pmd = xen_set_pmd_hyper, + + .ptep_modify_prot_start = __ptep_modify_prot_start, + .ptep_modify_prot_commit = __ptep_modify_prot_commit, + + .pte_val = xen_pte_val, + .pgd_val = xen_pgd_val, + + .make_pte = xen_make_pte, + .make_pgd = xen_make_pgd, + +#ifdef CONFIG_X86_PAE + .set_pte_atomic = xen_set_pte_atomic, + .set_pte_present = xen_set_pte_at, + .pte_clear = xen_pte_clear, + .pmd_clear = xen_pmd_clear, +#endif /* CONFIG_X86_PAE */ + .set_pud = xen_set_pud_hyper, + + .make_pmd = xen_make_pmd, + .pmd_val = xen_pmd_val, + +#if PAGETABLE_LEVELS == 4 + .pud_val = xen_pud_val, + .make_pud = xen_make_pud, + .set_pgd = xen_set_pgd_hyper, + + .alloc_pud = xen_alloc_pte_init, + .release_pud = xen_release_pte_init, +#endif /* PAGETABLE_LEVELS == 4 */ + + .activate_mm = xen_activate_mm, + .dup_mmap = xen_dup_mmap, + .exit_mmap = xen_exit_mmap, + + .lazy_mode = { + .enter = paravirt_enter_lazy_mmu, + .leave = xen_leave_lazy, + }, + + .set_fixmap = xen_set_fixmap, +}; + + #ifdef CONFIG_XEN_DEBUG_FS static struct dentry *d_mmu_debug; diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 98d71659da5a..24d1b44a337d 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -54,4 +54,7 @@ pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); +unsigned long xen_read_cr2_direct(void); + +extern const struct pv_mmu_ops xen_mmu_ops; #endif /* _XEN_MMU_H */ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index c1f8faf0a2c5..11913fc94c14 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -13,6 +13,7 @@ extern const char xen_failsafe_callback[]; struct trap_info; void xen_copy_trap_info(struct trap_info *traps); +DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info); DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); @@ -22,6 +23,13 @@ extern struct shared_info *HYPERVISOR_shared_info; void xen_setup_mfn_list_list(void); void xen_setup_shared_info(void); +void xen_setup_machphys_mapping(void); +pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); +void xen_ident_map_ISA(void); +void xen_reserve_top(void); + +void xen_leave_lazy(void); +void xen_post_allocator_init(void); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); From 41edafdb78feac1d1f8823846209975fde990633 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:02 -0800 Subject: [PATCH 314/566] x86/pvops: add a paravirt_ident functions to allow special patching Impact: Optimization Several paravirt ops implementations simply return their arguments, the most obvious being the make_pte/pte_val class of operations on native. On 32-bit, the identity function is literally a no-op, as the calling convention uses the same registers for the first argument and return. On 64-bit, it can be implemented with a single "mov". This patch adds special identity functions for 32 and 64 bit argument, and machinery to recognize them and replace them with either nops or a mov as appropriate. At the moment, the only users for the identity functions are the pagetable entry conversion functions. The result is a measureable improvement on pagetable-heavy benchmarks (2-3%, reducing the pvops overhead from 5 to 2%). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 5 ++ arch/x86/kernel/paravirt.c | 75 +++++++++++++++++++++++++---- arch/x86/kernel/paravirt_patch_32.c | 12 +++++ arch/x86/kernel/paravirt_patch_64.c | 15 ++++++ 4 files changed, 98 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 175778887090..961d10c12f16 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -388,6 +388,8 @@ extern struct pv_lock_ops pv_lock_ops; asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") unsigned paravirt_patch_nop(void); +unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); +unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); unsigned paravirt_patch_ignore(unsigned len); unsigned paravirt_patch_call(void *insnbuf, const void *target, u16 tgt_clobbers, @@ -1371,6 +1373,9 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, } void _paravirt_nop(void); +u32 _paravirt_ident_32(u32); +u64 _paravirt_ident_64(u64); + #define paravirt_nop ((void *)_paravirt_nop) void paravirt_use_bytelocks(void); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 202514be5923..dd25e2b1593b 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -44,6 +44,17 @@ void _paravirt_nop(void) { } +/* identity function, which can be inlined */ +u32 _paravirt_ident_32(u32 x) +{ + return x; +} + +u64 _paravirt_ident_64(u64 x) +{ + return x; +} + static void __init default_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", @@ -138,9 +149,16 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, if (opfunc == NULL) /* If there's no function, patch it with a ud2a (BUG) */ ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); - else if (opfunc == paravirt_nop) + else if (opfunc == _paravirt_nop) /* If the operation is a nop, then nop the callsite */ ret = paravirt_patch_nop(); + + /* identity functions just return their single argument */ + else if (opfunc == _paravirt_ident_32) + ret = paravirt_patch_ident_32(insnbuf, len); + else if (opfunc == _paravirt_ident_64) + ret = paravirt_patch_ident_64(insnbuf, len); + else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || @@ -373,6 +391,45 @@ struct pv_apic_ops pv_apic_ops = { #endif }; +typedef pte_t make_pte_t(pteval_t); +typedef pmd_t make_pmd_t(pmdval_t); +typedef pud_t make_pud_t(pudval_t); +typedef pgd_t make_pgd_t(pgdval_t); + +typedef pteval_t pte_val_t(pte_t); +typedef pmdval_t pmd_val_t(pmd_t); +typedef pudval_t pud_val_t(pud_t); +typedef pgdval_t pgd_val_t(pgd_t); + + +#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) +/* 32-bit pagetable entries */ +#define paravirt_native_make_pte (make_pte_t *)_paravirt_ident_32 +#define paravirt_native_pte_val (pte_val_t *)_paravirt_ident_32 + +#define paravirt_native_make_pmd (make_pmd_t *)_paravirt_ident_32 +#define paravirt_native_pmd_val (pmd_val_t *)_paravirt_ident_32 + +#define paravirt_native_make_pud (make_pud_t *)_paravirt_ident_32 +#define paravirt_native_pud_val (pud_val_t *)_paravirt_ident_32 + +#define paravirt_native_make_pgd (make_pgd_t *)_paravirt_ident_32 +#define paravirt_native_pgd_val (pgd_val_t *)_paravirt_ident_32 +#else +/* 64-bit pagetable entries */ +#define paravirt_native_make_pte (make_pte_t *)_paravirt_ident_64 +#define paravirt_native_pte_val (pte_val_t *)_paravirt_ident_64 + +#define paravirt_native_make_pmd (make_pmd_t *)_paravirt_ident_64 +#define paravirt_native_pmd_val (pmd_val_t *)_paravirt_ident_64 + +#define paravirt_native_make_pud (make_pud_t *)_paravirt_ident_64 +#define paravirt_native_pud_val (pud_val_t *)_paravirt_ident_64 + +#define paravirt_native_make_pgd (make_pgd_t *)_paravirt_ident_64 +#define paravirt_native_pgd_val (pgd_val_t *)_paravirt_ident_64 +#endif + struct pv_mmu_ops pv_mmu_ops = { #ifndef CONFIG_X86_64 .pagetable_setup_start = native_pagetable_setup_start, @@ -424,21 +481,21 @@ struct pv_mmu_ops pv_mmu_ops = { .pmd_clear = native_pmd_clear, #endif .set_pud = native_set_pud, - .pmd_val = native_pmd_val, - .make_pmd = native_make_pmd, + .pmd_val = paravirt_native_pmd_val, + .make_pmd = paravirt_native_make_pmd, #if PAGETABLE_LEVELS == 4 - .pud_val = native_pud_val, - .make_pud = native_make_pud, + .pud_val = paravirt_native_pud_val, + .make_pud = paravirt_native_make_pud, .set_pgd = native_set_pgd, #endif #endif /* PAGETABLE_LEVELS >= 3 */ - .pte_val = native_pte_val, - .pgd_val = native_pgd_val, + .pte_val = paravirt_native_pte_val, + .pgd_val = paravirt_native_pgd_val, - .make_pte = native_make_pte, - .make_pgd = native_make_pgd, + .make_pte = paravirt_native_make_pte, + .make_pgd = paravirt_native_make_pgd, .dup_mmap = paravirt_nop, .exit_mmap = paravirt_nop, diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 9fe644f4861d..d9f32e6d6ab6 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -12,6 +12,18 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); +unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) +{ + /* arg in %eax, return in %eax */ + return 0; +} + +unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) +{ + /* arg in %edx:%eax, return in %edx:%eax */ + return 0; +} + unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) { diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 061d01df9ae6..3f08f34f93eb 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -19,6 +19,21 @@ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); +DEF_NATIVE(, mov32, "mov %edi, %eax"); +DEF_NATIVE(, mov64, "mov %rdi, %rax"); + +unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) +{ + return paravirt_patch_insns(insnbuf, len, + start__mov32, end__mov32); +} + +unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) +{ + return paravirt_patch_insns(insnbuf, len, + start__mov64, end__mov64); +} + unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) { From b8aa287f77be943e37a84fa4657e27df95269bfb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:03 -0800 Subject: [PATCH 315/566] x86: fix paravirt clobber in entry_64.S Impact: Fix latent bug The clobber is trying to say that anything except RDI is available for clobbering, but actually clobbers everything. This hasn't mattered because the clobbers were basically ignored, but subsequent patches will rely on them. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a52703864a16..e4c9710cae52 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1140,7 +1140,7 @@ ENTRY(native_load_gs_index) CFI_STARTPROC pushf CFI_ADJUST_CFA_OFFSET 8 - DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) + DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) SWAPGS gs_change: movl %edi,%gs From 9104a18dcdd8dfefdddca8ce44988563f13ed3c4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:04 -0800 Subject: [PATCH 316/566] x86/paravirt: selectively save/restore regs around pvops calls Impact: Optimization Each asm paravirt-ops call says what registers are available for clobbering. This patch makes use of this to selectively save/restore registers around each pvops call. In many cases this significantly shrinks code size. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 100 +++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 961d10c12f16..dcce961262bf 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -12,19 +12,29 @@ #define CLBR_EAX (1 << 0) #define CLBR_ECX (1 << 1) #define CLBR_EDX (1 << 2) +#define CLBR_EDI (1 << 3) -#ifdef CONFIG_X86_64 -#define CLBR_RSI (1 << 3) -#define CLBR_RDI (1 << 4) +#ifdef CONFIG_X86_32 +/* CLBR_ANY should match all regs platform has. For i386, that's just it */ +#define CLBR_ANY ((1 << 4) - 1) +#else +#define CLBR_RAX CLBR_EAX +#define CLBR_RCX CLBR_ECX +#define CLBR_RDX CLBR_EDX +#define CLBR_RDI CLBR_EDI +#define CLBR_RSI (1 << 4) #define CLBR_R8 (1 << 5) #define CLBR_R9 (1 << 6) #define CLBR_R10 (1 << 7) #define CLBR_R11 (1 << 8) #define CLBR_ANY ((1 << 9) - 1) + +#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ + CLBR_RCX | CLBR_R8 | CLBR_R9) +#define CLBR_RET_REG (CLBR_RAX | CLBR_RDX) +#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) + #include -#else -/* CLBR_ANY should match all regs platform has. For i386, that's just it */ -#define CLBR_ANY ((1 << 3) - 1) #endif /* X86_64 */ #ifndef __ASSEMBLY__ @@ -1530,33 +1540,49 @@ static inline unsigned long __raw_local_irq_save(void) .popsection +#define COND_PUSH(set, mask, reg) \ + .if ((~set) & mask); push %reg; .endif +#define COND_POP(set, mask, reg) \ + .if ((~set) & mask); pop %reg; .endif + #ifdef CONFIG_X86_64 -#define PV_SAVE_REGS \ - push %rax; \ - push %rcx; \ - push %rdx; \ - push %rsi; \ - push %rdi; \ - push %r8; \ - push %r9; \ - push %r10; \ - push %r11 -#define PV_RESTORE_REGS \ - pop %r11; \ - pop %r10; \ - pop %r9; \ - pop %r8; \ - pop %rdi; \ - pop %rsi; \ - pop %rdx; \ - pop %rcx; \ - pop %rax + +#define PV_SAVE_REGS(set) \ + COND_PUSH(set, CLBR_RAX, rax); \ + COND_PUSH(set, CLBR_RCX, rcx); \ + COND_PUSH(set, CLBR_RDX, rdx); \ + COND_PUSH(set, CLBR_RSI, rsi); \ + COND_PUSH(set, CLBR_RDI, rdi); \ + COND_PUSH(set, CLBR_R8, r8); \ + COND_PUSH(set, CLBR_R9, r9); \ + COND_PUSH(set, CLBR_R10, r10); \ + COND_PUSH(set, CLBR_R11, r11) +#define PV_RESTORE_REGS(set) \ + COND_POP(set, CLBR_R11, r11); \ + COND_POP(set, CLBR_R10, r10); \ + COND_POP(set, CLBR_R9, r9); \ + COND_POP(set, CLBR_R8, r8); \ + COND_POP(set, CLBR_RDI, rdi); \ + COND_POP(set, CLBR_RSI, rsi); \ + COND_POP(set, CLBR_RDX, rdx); \ + COND_POP(set, CLBR_RCX, rcx); \ + COND_POP(set, CLBR_RAX, rax) + #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) #define PARA_INDIRECT(addr) *addr(%rip) #else -#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx -#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax +#define PV_SAVE_REGS(set) \ + COND_PUSH(set, CLBR_EAX, eax); \ + COND_PUSH(set, CLBR_EDI, edi); \ + COND_PUSH(set, CLBR_ECX, ecx); \ + COND_PUSH(set, CLBR_EDX, edx) +#define PV_RESTORE_REGS(set) \ + COND_POP(set, CLBR_EDX, edx); \ + COND_POP(set, CLBR_ECX, ecx); \ + COND_POP(set, CLBR_EDI, edi); \ + COND_POP(set, CLBR_EAX, eax) + #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) #define PARA_INDIRECT(addr) *%cs:addr @@ -1568,15 +1594,15 @@ static inline unsigned long __raw_local_irq_save(void) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ - PV_SAVE_REGS; \ + PV_SAVE_REGS(clobbers); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ - PV_RESTORE_REGS;) \ + PV_RESTORE_REGS(clobbers);) #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ - PV_SAVE_REGS; \ + PV_SAVE_REGS(clobbers); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ - PV_RESTORE_REGS;) + PV_RESTORE_REGS(clobbers);) #define USERGS_SYSRET32 \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ @@ -1606,11 +1632,15 @@ static inline unsigned long __raw_local_irq_save(void) PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ swapgs) +/* + * Note: swapgs is very special, and in practise is either going to be + * implemented with a single "swapgs" instruction or something very + * special. Either way, we don't need to save any registers for + * it. + */ #define SWAPGS \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ - PV_SAVE_REGS; \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ - PV_RESTORE_REGS \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ ) #define GET_CR2_INTO_RCX \ From ecb93d1ccd0aac63f03be2db3cac3fa974716f4c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:05 -0800 Subject: [PATCH 317/566] x86/paravirt: add register-saving thunks to reduce caller register pressure Impact: Optimization One of the problems with inserting a pile of C calls where previously there were none is that the register pressure is greatly increased. The C calling convention says that the caller must expect a certain set of registers may be trashed by the callee, and that the callee can use those registers without restriction. This includes the function argument registers, and several others. This patch seeks to alleviate this pressure by introducing wrapper thunks that will do the register saving/restoring, so that the callsite doesn't need to worry about it, but the callee function can be conventional compiler-generated code. In many cases (particularly performance-sensitive cases) the callee will be in assembler anyway, and need not use the compiler's calling convention. Standard calling convention is: arguments return scratch x86-32 eax edx ecx eax ? x86-64 rdi rsi rdx rcx rax r8 r9 r10 r11 The thunk preserves all argument and scratch registers. The return register is not preserved, and is available as a scratch register for unwrapped callee code (and of course the return value). Wrapped function pointers are themselves wrapped in a struct paravirt_callee_save structure, in order to get some warning from the compiler when functions with mismatched calling conventions are used. The most common paravirt ops, both statically and dynamically, are interrupt enable/disable/save/restore, so handle them first. This is particularly easy since their calls are handled specially anyway. XXX Deal with VMI. What's their calling convention? Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 126 ++++++++++++++++++++++++-------- arch/x86/kernel/paravirt.c | 8 +- arch/x86/kernel/vsmp_64.c | 12 ++- arch/x86/lguest/boot.c | 13 +++- arch/x86/xen/enlighten.c | 8 +- arch/x86/xen/irq.c | 14 +++- 6 files changed, 132 insertions(+), 49 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index dcce961262bf..f9107b88631b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -17,6 +17,10 @@ #ifdef CONFIG_X86_32 /* CLBR_ANY should match all regs platform has. For i386, that's just it */ #define CLBR_ANY ((1 << 4) - 1) + +#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) +#define CLBR_RET_REG (CLBR_EAX) +#define CLBR_SCRATCH (0) #else #define CLBR_RAX CLBR_EAX #define CLBR_RCX CLBR_ECX @@ -27,16 +31,19 @@ #define CLBR_R9 (1 << 6) #define CLBR_R10 (1 << 7) #define CLBR_R11 (1 << 8) + #define CLBR_ANY ((1 << 9) - 1) #define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ CLBR_RCX | CLBR_R8 | CLBR_R9) -#define CLBR_RET_REG (CLBR_RAX | CLBR_RDX) +#define CLBR_RET_REG (CLBR_RAX) #define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) #include #endif /* X86_64 */ +#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG) + #ifndef __ASSEMBLY__ #include #include @@ -50,6 +57,14 @@ struct tss_struct; struct mm_struct; struct desc_struct; +/* + * Wrapper type for pointers to code which uses the non-standard + * calling convention. See PV_CALL_SAVE_REGS_THUNK below. + */ +struct paravirt_callee_save { + void *func; +}; + /* general info */ struct pv_info { unsigned int kernel_rpl; @@ -199,11 +214,15 @@ struct pv_irq_ops { * expected to use X86_EFLAGS_IF; all other bits * returned from save_fl are undefined, and may be ignored by * restore_fl. + * + * NOTE: These functions callers expect the callee to preserve + * more registers than the standard C calling convention. */ - unsigned long (*save_fl)(void); - void (*restore_fl)(unsigned long); - void (*irq_disable)(void); - void (*irq_enable)(void); + struct paravirt_callee_save save_fl; + struct paravirt_callee_save restore_fl; + struct paravirt_callee_save irq_disable; + struct paravirt_callee_save irq_enable; + void (*safe_halt)(void); void (*halt)(void); @@ -1437,12 +1456,37 @@ extern struct paravirt_patch_site __parainstructions[], __parainstructions_end[]; #ifdef CONFIG_X86_32 -#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;" -#define PV_RESTORE_REGS "popl %%edx; popl %%ecx" +#define PV_SAVE_REGS "pushl %ecx; pushl %edx;" +#define PV_RESTORE_REGS "popl %edx; popl %ecx;" + +/* save and restore all caller-save registers, except return value */ +#define PV_SAVE_ALL_CALLER_REGS PV_SAVE_REGS +#define PV_RESTORE_ALL_CALLER_REGS PV_RESTORE_REGS + #define PV_FLAGS_ARG "0" #define PV_EXTRA_CLOBBERS #define PV_VEXTRA_CLOBBERS #else +/* save and restore all caller-save registers, except return value */ +#define PV_SAVE_ALL_CALLER_REGS \ + "push %rcx;" \ + "push %rdx;" \ + "push %rsi;" \ + "push %rdi;" \ + "push %r8;" \ + "push %r9;" \ + "push %r10;" \ + "push %r11;" +#define PV_RESTORE_ALL_CALLER_REGS \ + "pop %r11;" \ + "pop %r10;" \ + "pop %r9;" \ + "pop %r8;" \ + "pop %rdi;" \ + "pop %rsi;" \ + "pop %rdx;" \ + "pop %rcx;" + /* We save some registers, but all of them, that's too much. We clobber all * caller saved registers but the argument parameter */ #define PV_SAVE_REGS "pushq %%rdi;" @@ -1452,52 +1496,76 @@ extern struct paravirt_patch_site __parainstructions[], #define PV_FLAGS_ARG "D" #endif +/* + * Generate a thunk around a function which saves all caller-save + * registers except for the return value. This allows C functions to + * be called from assembler code where fewer than normal registers are + * available. It may also help code generation around calls from C + * code if the common case doesn't use many registers. + * + * When a callee is wrapped in a thunk, the caller can assume that all + * arg regs and all scratch registers are preserved across the + * call. The return value in rax/eax will not be saved, even for void + * functions. + */ +#define PV_CALLEE_SAVE_REGS_THUNK(func) \ + extern typeof(func) __raw_callee_save_##func; \ + static void *__##func##__ __used = func; \ + \ + asm(".pushsection .text;" \ + "__raw_callee_save_" #func ": " \ + PV_SAVE_ALL_CALLER_REGS \ + "call " #func ";" \ + PV_RESTORE_ALL_CALLER_REGS \ + "ret;" \ + ".popsection") + +/* Get a reference to a callee-save function */ +#define PV_CALLEE_SAVE(func) \ + ((struct paravirt_callee_save) { __raw_callee_save_##func }) + +/* Promise that "func" already uses the right calling convention */ +#define __PV_IS_CALLEE_SAVE(func) \ + ((struct paravirt_callee_save) { func }) + static inline unsigned long __raw_local_save_flags(void) { unsigned long f; - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : "=a"(f) : paravirt_type(pv_irq_ops.save_fl), paravirt_clobber(CLBR_EAX) - : "memory", "cc" PV_VEXTRA_CLOBBERS); + : "memory", "cc"); return f; } static inline void raw_local_irq_restore(unsigned long f) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : "=a"(f) : PV_FLAGS_ARG(f), paravirt_type(pv_irq_ops.restore_fl), paravirt_clobber(CLBR_EAX) - : "memory", "cc" PV_EXTRA_CLOBBERS); + : "memory", "cc"); } static inline void raw_local_irq_disable(void) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : : paravirt_type(pv_irq_ops.irq_disable), paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); + : "memory", "eax", "cc"); } static inline void raw_local_irq_enable(void) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : : paravirt_type(pv_irq_ops.irq_enable), paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); + : "memory", "eax", "cc"); } static inline unsigned long __raw_local_irq_save(void) @@ -1541,9 +1609,9 @@ static inline unsigned long __raw_local_irq_save(void) #define COND_PUSH(set, mask, reg) \ - .if ((~set) & mask); push %reg; .endif + .if ((~(set)) & mask); push %reg; .endif #define COND_POP(set, mask, reg) \ - .if ((~set) & mask); pop %reg; .endif + .if ((~(set)) & mask); pop %reg; .endif #ifdef CONFIG_X86_64 @@ -1594,15 +1662,15 @@ static inline unsigned long __raw_local_irq_save(void) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ - PV_SAVE_REGS(clobbers); \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ - PV_RESTORE_REGS(clobbers);) + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ - PV_SAVE_REGS(clobbers); \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ - PV_RESTORE_REGS(clobbers);) + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define USERGS_SYSRET32 \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index dd25e2b1593b..8adb6b5aa421 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -310,10 +310,10 @@ struct pv_time_ops pv_time_ops = { struct pv_irq_ops pv_irq_ops = { .init_IRQ = native_init_IRQ, - .save_fl = native_save_fl, - .restore_fl = native_restore_fl, - .irq_disable = native_irq_disable, - .irq_enable = native_irq_enable, + .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), + .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), + .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), + .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), .safe_halt = native_safe_halt, .halt = native_halt, #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index a688f3bfaec2..c609205df594 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -37,6 +37,7 @@ static unsigned long vsmp_save_fl(void) flags &= ~X86_EFLAGS_IF; return flags; } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); static void vsmp_restore_fl(unsigned long flags) { @@ -46,6 +47,7 @@ static void vsmp_restore_fl(unsigned long flags) flags |= X86_EFLAGS_AC; native_restore_fl(flags); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); static void vsmp_irq_disable(void) { @@ -53,6 +55,7 @@ static void vsmp_irq_disable(void) native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); static void vsmp_irq_enable(void) { @@ -60,6 +63,7 @@ static void vsmp_irq_enable(void) native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable); static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) @@ -90,10 +94,10 @@ static void __init set_vsmp_pv_ops(void) cap, ctl); if (cap & ctl & (1 << 4)) { /* Setup irq ops and turn on vSMP IRQ fastpath handling */ - pv_irq_ops.irq_disable = vsmp_irq_disable; - pv_irq_ops.irq_enable = vsmp_irq_enable; - pv_irq_ops.save_fl = vsmp_save_fl; - pv_irq_ops.restore_fl = vsmp_restore_fl; + pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); + pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable); + pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); + pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); pv_init_ops.patch = vsmp_patch; ctl &= ~(1 << 4); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 92f1c6f3e19d..19e33b6cd593 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -173,24 +173,29 @@ static unsigned long save_fl(void) { return lguest_data.irq_enabled; } +PV_CALLEE_SAVE_REGS_THUNK(save_fl); /* restore_flags() just sets the flags back to the value given. */ static void restore_fl(unsigned long flags) { lguest_data.irq_enabled = flags; } +PV_CALLEE_SAVE_REGS_THUNK(restore_fl); /* Interrupts go off... */ static void irq_disable(void) { lguest_data.irq_enabled = 0; } +PV_CALLEE_SAVE_REGS_THUNK(irq_disable); /* Interrupts go on... */ static void irq_enable(void) { lguest_data.irq_enabled = X86_EFLAGS_IF; } +PV_CALLEE_SAVE_REGS_THUNK(irq_enable); + /*:*/ /*M:003 Note that we don't check for outstanding interrupts when we re-enable * them (or when we unmask an interrupt). This seems to work for the moment, @@ -984,10 +989,10 @@ __init void lguest_init(void) /* interrupt-related operations */ pv_irq_ops.init_IRQ = lguest_init_IRQ; - pv_irq_ops.save_fl = save_fl; - pv_irq_ops.restore_fl = restore_fl; - pv_irq_ops.irq_disable = irq_disable; - pv_irq_ops.irq_enable = irq_enable; + pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); + pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl); + pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); + pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable); pv_irq_ops.safe_halt = lguest_safe_halt; /* init-time operations */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0cd2a165f179..ff6d530ccc77 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -676,10 +676,10 @@ void xen_setup_vcpu_info_placement(void) if (have_vcpu_info_placement) { printk(KERN_INFO "Xen: using vcpu_info placement\n"); - pv_irq_ops.save_fl = xen_save_fl_direct; - pv_irq_ops.restore_fl = xen_restore_fl_direct; - pv_irq_ops.irq_disable = xen_irq_disable_direct; - pv_irq_ops.irq_enable = xen_irq_enable_direct; + pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); + pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); + pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); + pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); pv_mmu_ops.read_cr2 = xen_read_cr2_direct; } } diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 2e8271431e1a..5a070900ad35 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -50,6 +50,7 @@ static unsigned long xen_save_fl(void) */ return (-flags) & X86_EFLAGS_IF; } +PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl); static void xen_restore_fl(unsigned long flags) { @@ -76,6 +77,7 @@ static void xen_restore_fl(unsigned long flags) xen_force_evtchn_callback(); } } +PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); static void xen_irq_disable(void) { @@ -86,6 +88,7 @@ static void xen_irq_disable(void) percpu_read(xen_vcpu)->evtchn_upcall_mask = 1; preempt_enable_no_resched(); } +PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); static void xen_irq_enable(void) { @@ -106,6 +109,7 @@ static void xen_irq_enable(void) if (unlikely(vcpu->evtchn_upcall_pending)) xen_force_evtchn_callback(); } +PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable); static void xen_safe_halt(void) { @@ -124,10 +128,12 @@ static void xen_halt(void) static const struct pv_irq_ops xen_irq_ops __initdata = { .init_IRQ = __xen_init_IRQ, - .save_fl = xen_save_fl, - .restore_fl = xen_restore_fl, - .irq_disable = xen_irq_disable, - .irq_enable = xen_irq_enable, + + .save_fl = PV_CALLEE_SAVE(xen_save_fl), + .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), + .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), + .irq_enable = PV_CALLEE_SAVE(xen_irq_enable), + .safe_halt = xen_safe_halt, .halt = xen_halt, #ifdef CONFIG_X86_64 From 791bad9d28d405d9397ea0c370ffb7c7bdd2aa6e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:06 -0800 Subject: [PATCH 318/566] x86/paravirt: implement PVOP_CALL macros for callee-save functions Impact: Optimization Functions with the callee save calling convention clobber many fewer registers than the normal C calling convention. Implement variants of PVOP_V?CALL* accordingly. This only bothers with functions up to 3 args, since functions with more args may as well use the normal calling convention. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 134 +++++++++++++++++++++++--------- 1 file changed, 99 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index f9107b88631b..beb10ecdbe67 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -510,25 +510,45 @@ int paravirt_disable_iospace(void); * makes sure the incoming and outgoing types are always correct. */ #ifdef CONFIG_X86_32 -#define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx +#define PVOP_VCALL_ARGS \ + unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx #define PVOP_CALL_ARGS PVOP_VCALL_ARGS + +#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) +#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) +#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) + #define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ "=c" (__ecx) #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS + +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) +#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS + #define EXTRA_CLOBBERS #define VEXTRA_CLOBBERS -#else -#define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx +#else /* CONFIG_X86_64 */ +#define PVOP_VCALL_ARGS \ + unsigned long __edi = __edi, __esi = __esi, \ + __edx = __edx, __ecx = __ecx #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax + +#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) +#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) +#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) +#define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x)) + #define PVOP_VCALL_CLOBBERS "=D" (__edi), \ "=S" (__esi), "=d" (__edx), \ "=c" (__ecx) - #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) +#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS + #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" -#endif +#endif /* CONFIG_X86_32 */ #ifdef CONFIG_PARAVIRT_DEBUG #define PVOP_TEST_NULL(op) BUG_ON(op == NULL) @@ -536,10 +556,11 @@ int paravirt_disable_iospace(void); #define PVOP_TEST_NULL(op) ((void)op) #endif -#define __PVOP_CALL(rettype, op, pre, post, ...) \ +#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ + pre, post, ...) \ ({ \ rettype __ret; \ - PVOP_CALL_ARGS; \ + PVOP_CALL_ARGS; \ PVOP_TEST_NULL(op); \ /* This is 32-bit specific, but is okay in 64-bit */ \ /* since this condition will never hold */ \ @@ -547,70 +568,113 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : PVOP_CALL_CLOBBERS \ + : call_clbr \ : paravirt_type(op), \ - paravirt_clobber(CLBR_ANY), \ + paravirt_clobber(clbr), \ ##__VA_ARGS__ \ - : "memory", "cc" EXTRA_CLOBBERS); \ + : "memory", "cc" extra_clbr); \ __ret = (rettype)((((u64)__edx) << 32) | __eax); \ } else { \ asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : PVOP_CALL_CLOBBERS \ + : call_clbr \ : paravirt_type(op), \ - paravirt_clobber(CLBR_ANY), \ + paravirt_clobber(clbr), \ ##__VA_ARGS__ \ - : "memory", "cc" EXTRA_CLOBBERS); \ + : "memory", "cc" extra_clbr); \ __ret = (rettype)__eax; \ } \ __ret; \ }) -#define __PVOP_VCALL(op, pre, post, ...) \ + +#define __PVOP_CALL(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \ + EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__) + +#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ + PVOP_CALLEE_CLOBBERS, , \ + pre, post, ##__VA_ARGS__) + + +#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \ ({ \ PVOP_VCALL_ARGS; \ PVOP_TEST_NULL(op); \ asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : PVOP_VCALL_CLOBBERS \ + : call_clbr \ : paravirt_type(op), \ - paravirt_clobber(CLBR_ANY), \ + paravirt_clobber(clbr), \ ##__VA_ARGS__ \ - : "memory", "cc" VEXTRA_CLOBBERS); \ + : "memory", "cc" extra_clbr); \ }) +#define __PVOP_VCALL(op, pre, post, ...) \ + ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ + VEXTRA_CLOBBERS, \ + pre, post, ##__VA_ARGS__) + +#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ + PVOP_VCALLEE_CLOBBERS, , \ + pre, post, ##__VA_ARGS__) + + + #define PVOP_CALL0(rettype, op) \ __PVOP_CALL(rettype, op, "", "") #define PVOP_VCALL0(op) \ __PVOP_VCALL(op, "", "") +#define PVOP_CALLEE0(rettype, op) \ + __PVOP_CALLEESAVE(rettype, op, "", "") +#define PVOP_VCALLEE0(op) \ + __PVOP_VCALLEESAVE(op, "", "") + + #define PVOP_CALL1(rettype, op, arg1) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1))) + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) #define PVOP_VCALL1(op, arg1) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1))) + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1)) + +#define PVOP_CALLEE1(rettype, op, arg1) \ + __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) +#define PVOP_VCALLEE1(op, arg1) \ + __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1)) + #define PVOP_CALL2(rettype, op, arg1, arg2) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ - "1" ((unsigned long)(arg2))) + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) #define PVOP_VCALL2(op, arg1, arg2) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ - "1" ((unsigned long)(arg2))) + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) + +#define PVOP_CALLEE2(rettype, op, arg1, arg2) \ + __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) +#define PVOP_VCALLEE2(op, arg1, arg2) \ + __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) + #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) #define PVOP_VCALL3(op, arg1, arg2, arg3) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) /* This is the only difference in x86_64. We can make it much simpler */ #ifdef CONFIG_X86_32 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ __PVOP_CALL(rettype, op, \ "push %[_arg4];", "lea 4(%%esp),%%esp;", \ - "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4))) #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ __PVOP_VCALL(op, \ "push %[_arg4];", "lea 4(%%esp),%%esp;", \ @@ -618,13 +682,13 @@ int paravirt_disable_iospace(void); "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) #else #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ - "3"((unsigned long)(arg4))) + __PVOP_CALL(rettype, op, "", "", \ + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ - "3"((unsigned long)(arg4))) + __PVOP_VCALL(op, "", "", \ + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) #endif static inline int paravirt_enabled(void) From da5de7c22eb705be709a57e486e7475a6969b994 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:07 -0800 Subject: [PATCH 319/566] x86/paravirt: use callee-saved convention for pte_val/make_pte/etc Impact: Optimization In the native case, pte_val, make_pte, etc are all just identity functions, so there's no need to clobber a lot of registers over them. (This changes the 32-bit callee-save calling convention to return both EAX and EDX so functions can return 64-bit values.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 78 ++++++++++++++++----------------- arch/x86/kernel/paravirt.c | 53 +++++----------------- arch/x86/xen/mmu.c | 24 ++++++---- 3 files changed, 67 insertions(+), 88 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index beb10ecdbe67..2d098b78bc10 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -19,7 +19,7 @@ #define CLBR_ANY ((1 << 4) - 1) #define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) -#define CLBR_RET_REG (CLBR_EAX) +#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) #define CLBR_SCRATCH (0) #else #define CLBR_RAX CLBR_EAX @@ -308,11 +308,11 @@ struct pv_mmu_ops { void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); - pteval_t (*pte_val)(pte_t); - pte_t (*make_pte)(pteval_t pte); + struct paravirt_callee_save pte_val; + struct paravirt_callee_save make_pte; - pgdval_t (*pgd_val)(pgd_t); - pgd_t (*make_pgd)(pgdval_t pgd); + struct paravirt_callee_save pgd_val; + struct paravirt_callee_save make_pgd; #if PAGETABLE_LEVELS >= 3 #ifdef CONFIG_X86_PAE @@ -327,12 +327,12 @@ struct pv_mmu_ops { void (*set_pud)(pud_t *pudp, pud_t pudval); - pmdval_t (*pmd_val)(pmd_t); - pmd_t (*make_pmd)(pmdval_t pmd); + struct paravirt_callee_save pmd_val; + struct paravirt_callee_save make_pmd; #if PAGETABLE_LEVELS == 4 - pudval_t (*pud_val)(pud_t); - pud_t (*make_pud)(pudval_t pud); + struct paravirt_callee_save pud_val; + struct paravirt_callee_save make_pud; void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); #endif /* PAGETABLE_LEVELS == 4 */ @@ -1155,13 +1155,13 @@ static inline pte_t __pte(pteval_t val) pteval_t ret; if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, - pv_mmu_ops.make_pte, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pteval_t, + pv_mmu_ops.make_pte, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pteval_t, - pv_mmu_ops.make_pte, - val); + ret = PVOP_CALLEE1(pteval_t, + pv_mmu_ops.make_pte, + val); return (pte_t) { .pte = ret }; } @@ -1171,11 +1171,11 @@ static inline pteval_t pte_val(pte_t pte) pteval_t ret; if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val, - pte.pte, (u64)pte.pte >> 32); + ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val, + pte.pte, (u64)pte.pte >> 32); else - ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val, - pte.pte); + ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val, + pte.pte); return ret; } @@ -1185,11 +1185,11 @@ static inline pgd_t __pgd(pgdval_t val) pgdval_t ret; if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd, - val); + ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd, + val); return (pgd_t) { ret }; } @@ -1199,11 +1199,11 @@ static inline pgdval_t pgd_val(pgd_t pgd) pgdval_t ret; if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val, - pgd.pgd, (u64)pgd.pgd >> 32); + ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val, + pgd.pgd, (u64)pgd.pgd >> 32); else - ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val, - pgd.pgd); + ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val, + pgd.pgd); return ret; } @@ -1267,11 +1267,11 @@ static inline pmd_t __pmd(pmdval_t val) pmdval_t ret; if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd, - val); + ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd, + val); return (pmd_t) { ret }; } @@ -1281,11 +1281,11 @@ static inline pmdval_t pmd_val(pmd_t pmd) pmdval_t ret; if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val, - pmd.pmd, (u64)pmd.pmd >> 32); + ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val, + pmd.pmd, (u64)pmd.pmd >> 32); else - ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val, - pmd.pmd); + ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val, + pmd.pmd); return ret; } @@ -1307,11 +1307,11 @@ static inline pud_t __pud(pudval_t val) pudval_t ret; if (sizeof(pudval_t) > sizeof(long)) - ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud, - val); + ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud, + val); return (pud_t) { ret }; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8adb6b5aa421..cea11c8e3049 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -391,43 +391,12 @@ struct pv_apic_ops pv_apic_ops = { #endif }; -typedef pte_t make_pte_t(pteval_t); -typedef pmd_t make_pmd_t(pmdval_t); -typedef pud_t make_pud_t(pudval_t); -typedef pgd_t make_pgd_t(pgdval_t); - -typedef pteval_t pte_val_t(pte_t); -typedef pmdval_t pmd_val_t(pmd_t); -typedef pudval_t pud_val_t(pud_t); -typedef pgdval_t pgd_val_t(pgd_t); - - #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) /* 32-bit pagetable entries */ -#define paravirt_native_make_pte (make_pte_t *)_paravirt_ident_32 -#define paravirt_native_pte_val (pte_val_t *)_paravirt_ident_32 - -#define paravirt_native_make_pmd (make_pmd_t *)_paravirt_ident_32 -#define paravirt_native_pmd_val (pmd_val_t *)_paravirt_ident_32 - -#define paravirt_native_make_pud (make_pud_t *)_paravirt_ident_32 -#define paravirt_native_pud_val (pud_val_t *)_paravirt_ident_32 - -#define paravirt_native_make_pgd (make_pgd_t *)_paravirt_ident_32 -#define paravirt_native_pgd_val (pgd_val_t *)_paravirt_ident_32 +#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) #else /* 64-bit pagetable entries */ -#define paravirt_native_make_pte (make_pte_t *)_paravirt_ident_64 -#define paravirt_native_pte_val (pte_val_t *)_paravirt_ident_64 - -#define paravirt_native_make_pmd (make_pmd_t *)_paravirt_ident_64 -#define paravirt_native_pmd_val (pmd_val_t *)_paravirt_ident_64 - -#define paravirt_native_make_pud (make_pud_t *)_paravirt_ident_64 -#define paravirt_native_pud_val (pud_val_t *)_paravirt_ident_64 - -#define paravirt_native_make_pgd (make_pgd_t *)_paravirt_ident_64 -#define paravirt_native_pgd_val (pgd_val_t *)_paravirt_ident_64 +#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) #endif struct pv_mmu_ops pv_mmu_ops = { @@ -481,21 +450,23 @@ struct pv_mmu_ops pv_mmu_ops = { .pmd_clear = native_pmd_clear, #endif .set_pud = native_set_pud, - .pmd_val = paravirt_native_pmd_val, - .make_pmd = paravirt_native_make_pmd, + + .pmd_val = PTE_IDENT, + .make_pmd = PTE_IDENT, #if PAGETABLE_LEVELS == 4 - .pud_val = paravirt_native_pud_val, - .make_pud = paravirt_native_make_pud, + .pud_val = PTE_IDENT, + .make_pud = PTE_IDENT, + .set_pgd = native_set_pgd, #endif #endif /* PAGETABLE_LEVELS >= 3 */ - .pte_val = paravirt_native_pte_val, - .pgd_val = paravirt_native_pgd_val, + .pte_val = PTE_IDENT, + .pgd_val = PTE_IDENT, - .make_pte = paravirt_native_make_pte, - .make_pgd = paravirt_native_make_pgd, + .make_pte = PTE_IDENT, + .make_pgd = PTE_IDENT, .dup_mmap = paravirt_nop, .exit_mmap = paravirt_nop, diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 94e452c0b00c..5e41f7fc6cf1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -492,28 +492,33 @@ pteval_t xen_pte_val(pte_t pte) { return pte_mfn_to_pfn(pte.pte); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); pgdval_t xen_pgd_val(pgd_t pgd) { return pte_mfn_to_pfn(pgd.pgd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val); pte_t xen_make_pte(pteval_t pte) { pte = pte_pfn_to_mfn(pte); return native_make_pte(pte); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); pgd_t xen_make_pgd(pgdval_t pgd) { pgd = pte_pfn_to_mfn(pgd); return native_make_pgd(pgd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); pmdval_t xen_pmd_val(pmd_t pmd) { return pte_mfn_to_pfn(pmd.pmd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val); void xen_set_pud_hyper(pud_t *ptr, pud_t val) { @@ -590,12 +595,14 @@ pmd_t xen_make_pmd(pmdval_t pmd) pmd = pte_pfn_to_mfn(pmd); return native_make_pmd(pmd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); #if PAGETABLE_LEVELS == 4 pudval_t xen_pud_val(pud_t pud) { return pte_mfn_to_pfn(pud.pud); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); pud_t xen_make_pud(pudval_t pud) { @@ -603,6 +610,7 @@ pud_t xen_make_pud(pudval_t pud) return native_make_pud(pud); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud); pgd_t *xen_get_user_pgd(pgd_t *pgd) { @@ -1813,11 +1821,11 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .ptep_modify_prot_start = __ptep_modify_prot_start, .ptep_modify_prot_commit = __ptep_modify_prot_commit, - .pte_val = xen_pte_val, - .pgd_val = xen_pgd_val, + .pte_val = PV_CALLEE_SAVE(xen_pte_val), + .pgd_val = PV_CALLEE_SAVE(xen_pgd_val), - .make_pte = xen_make_pte, - .make_pgd = xen_make_pgd, + .make_pte = PV_CALLEE_SAVE(xen_make_pte), + .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), #ifdef CONFIG_X86_PAE .set_pte_atomic = xen_set_pte_atomic, @@ -1827,12 +1835,12 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { #endif /* CONFIG_X86_PAE */ .set_pud = xen_set_pud_hyper, - .make_pmd = xen_make_pmd, - .pmd_val = xen_pmd_val, + .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), + .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), #if PAGETABLE_LEVELS == 4 - .pud_val = xen_pud_val, - .make_pud = xen_make_pud, + .pud_val = PV_CALLEE_SAVE(xen_pud_val), + .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_pgd = xen_set_pgd_hyper, .alloc_pud = xen_alloc_pte_init, From 4767afbf1f60f73997a7eb69a86d380f1fb27a92 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 29 Jan 2009 01:51:34 -0800 Subject: [PATCH 320/566] x86/paravirt: fix missing callee-save call on pud_val Impact: Fix build when CONFIG_PARAVIRT_DEBUG is enabled Fix missed convertion to using callee-saved calls for pud_val, which causes a compile error when CONFIG_PARAVIRT_DEBUG is enabled. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 2d098b78bc10..b17365c39749 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1321,11 +1321,11 @@ static inline pudval_t pud_val(pud_t pud) pudval_t ret; if (sizeof(pudval_t) > sizeof(long)) - ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val, - pud.pud, (u64)pud.pud >> 32); + ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val, + pud.pud, (u64)pud.pud >> 32); else - ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val, - pud.pud); + ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val, + pud.pud); return ret; } From 193c81b979adbc4a540bf89e75b9039fae75bf82 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:23:27 +0100 Subject: [PATCH 321/566] x86, irq: add LOCAL_PERF_VECTOR Add a slot for the performance monitoring interrupt. Not yet used by any subsystem - but the hardware has it. (This eases integration with performance monitoring code.) Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 9a83a10a5d51..0e2220bb3142 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -88,6 +88,11 @@ */ #define LOCAL_TIMER_VECTOR 0xef +/* + * Performance monitoring interrupt vector: + */ +#define LOCAL_PERF_VECTOR 0xee + /* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority From d1de36f5b5a30b8f9dae7142516fb122ce1e0661 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 01:59:14 +0100 Subject: [PATCH 322/566] x86, apic: clean up header section Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 48 ++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 968c817762a4..29e7186b0e87 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -14,43 +14,41 @@ * Mikael Pettersson : PM converted to driver model. */ -#include - -#include -#include -#include -#include -#include #include -#include -#include -#include -#include +#include #include -#include -#include -#include +#include +#include +#include #include -#include -#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include -#include -#include -#include -#include #include -#include #include +#include +#include +#include #include -#include +#include #include #include -#include +#include +#include +#include +#include #include -#include - /* * Sanity check */ From 8f47e16348e8e25eedf639092a8a2f10a66aba34 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:03:42 +0100 Subject: [PATCH 323/566] x86: update copyrights Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/io_apic.c | 2 +- arch/x86/kernel/mpparse.c | 2 +- arch/x86/kernel/smp.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/stacktrace.c | 2 +- arch/x86/mm/mmap.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 29e7186b0e87..d6da6dd2f60a 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1,7 +1,7 @@ /* * Local APIC handling, local APIC timers * - * (c) 1999, 2000 Ingo Molnar + * (c) 1999, 2000, 2009 Ingo Molnar * * Fixes * Maciej W. Rozycki : Bits for genuine 82489DX APICs; diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 3378ffb21407..57d60c741e37 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1,7 +1,7 @@ /* * Intel IO-APIC support for multi-Pentium hosts. * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo + * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * * Many thanks to Stig Venaas for trying out countless experimental * patches and reporting/debugging problems patiently! diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b46ca7d31feb..66ebb823f390 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -3,7 +3,7 @@ * compliant MP-table parsing routines. * * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar + * (c) 1998, 1999, 2000, 2009 Ingo Molnar * (c) 2008 Alexey Starikovskiy */ diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 0eb32ae9bf1f..eaaffae31cc0 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -2,7 +2,7 @@ * Intel SMP support routines. * * (c) 1995 Alan Cox, Building #3 - * (c) 1998-99, 2000 Ingo Molnar + * (c) 1998-99, 2000, 2009 Ingo Molnar * (c) 2002,2003 Andi Kleen, SuSE Labs. * * i386 and x86_64 integration by Glauber Costa diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1268a862abb7..f40f86fec2fe 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -2,7 +2,7 @@ * x86 SMP booting functions * * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar + * (c) 1998, 1999, 2000, 2009 Ingo Molnar * Copyright 2001 Andi Kleen, SuSE Labs. * * Much of the core SMP work is based on previous work by Thomas Radke, to diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 10786af95545..f7bddc2e37d1 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -1,7 +1,7 @@ /* * Stack trace management functions * - * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar */ #include #include diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 56fe7124fbec..165829600566 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -4,7 +4,7 @@ * Based on code by Ingo Molnar and Andi Kleen, copyrighted * as follows: * - * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. + * Copyright 2003-2009 Red Hat Inc. * All Rights Reserved. * Copyright 2005 Andi Kleen, SUSE Labs. * Copyright 2007 Jiri Kosina, SUSE Labs. From 5da690d29f0de17cc1835dd3eb8f8bd0945521f0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:10:03 +0100 Subject: [PATCH 324/566] x86, apic: unify the APIC vector enumeration Most of the vector layout on 32-bit and 64-bit is identical now, so eliminate the duplicated enumeration of the vectors. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 37 +++++++++++------------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 0e2220bb3142..393f85ecdd80 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -50,37 +50,26 @@ * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. * TLB, reschedule and local APIC vectors are performance-critical. */ + +#define SPURIOUS_APIC_VECTOR 0xff +#define ERROR_APIC_VECTOR 0xfe +#define RESCHEDULE_VECTOR 0xfd +#define CALL_FUNCTION_VECTOR 0xfc +#define CALL_FUNCTION_SINGLE_VECTOR 0xfb +#define THERMAL_APIC_VECTOR 0xfa + #ifdef CONFIG_X86_32 - -# define SPURIOUS_APIC_VECTOR 0xff -# define ERROR_APIC_VECTOR 0xfe -# define RESCHEDULE_VECTOR 0xfd -# define CALL_FUNCTION_VECTOR 0xfc -# define CALL_FUNCTION_SINGLE_VECTOR 0xfb -# define THERMAL_APIC_VECTOR 0xfa /* 0xf8 - 0xf9 : free */ -# define INVALIDATE_TLB_VECTOR_END 0xf7 -# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ - -# define NUM_INVALIDATE_TLB_VECTORS 8 - #else - -# define SPURIOUS_APIC_VECTOR 0xff -# define ERROR_APIC_VECTOR 0xfe -# define RESCHEDULE_VECTOR 0xfd -# define CALL_FUNCTION_VECTOR 0xfc -# define CALL_FUNCTION_SINGLE_VECTOR 0xfb -# define THERMAL_APIC_VECTOR 0xfa # define THRESHOLD_APIC_VECTOR 0xf9 # define UV_BAU_MESSAGE 0xf8 -# define INVALIDATE_TLB_VECTOR_END 0xf7 -# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ - -#define NUM_INVALIDATE_TLB_VECTORS 8 - #endif +/* f0-f7 used for spreading out TLB flushes: */ +#define INVALIDATE_TLB_VECTOR_END 0xf7 +#define INVALIDATE_TLB_VECTOR_START 0xf0 +#define NUM_INVALIDATE_TLB_VECTORS 8 + /* * Local APIC timer IRQ vector is on a different priority level, * to work around the 'lost local interrupt if more than 2 IRQ From 647ad94fc0479e33958cb4d0e20e241c0bcf599c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:06:50 +0100 Subject: [PATCH 325/566] x86, apic: clean up spurious vector sanity check Move the spurious vector sanity check to the place where it's defined - out of a .c file. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 7 +++++++ arch/x86/kernel/apic.c | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 393f85ecdd80..2601fd108c7d 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -52,6 +52,13 @@ */ #define SPURIOUS_APIC_VECTOR 0xff +/* + * Sanity check + */ +#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) +# error SPURIOUS_APIC_VECTOR definition error +#endif + #define ERROR_APIC_VECTOR 0xfe #define RESCHEDULE_VECTOR 0xfd #define CALL_FUNCTION_VECTOR 0xfc diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index d6da6dd2f60a..85d8b50d1af7 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -49,13 +49,6 @@ #include #include -/* - * Sanity check - */ -#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) -# error SPURIOUS_APIC_VECTOR definition error -#endif - unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; /* Processor that is doing the boot up */ From ed74ca6d5a3e57eb0969d4e14e46cf9f88d25d3f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:16:04 +0100 Subject: [PATCH 326/566] x86, voyager: move Voyager-specific defines to voyager.h They dont belong into the generic headers. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 10 ------- arch/x86/include/asm/irq_vectors.h | 35 ------------------------- arch/x86/include/asm/voyager.h | 42 ++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 45 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 415507973968..3ef2bded97ac 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -84,16 +84,6 @@ extern atomic_t irq_mis_count; /* EISA */ extern void eisa_set_level_irq(unsigned int irq); -/* Voyager functions */ -extern asmlinkage void vic_cpi_interrupt(void); -extern asmlinkage void vic_sys_interrupt(void); -extern asmlinkage void vic_cmn_interrupt(void); -extern asmlinkage void qic_timer_interrupt(void); -extern asmlinkage void qic_invalidate_interrupt(void); -extern asmlinkage void qic_reschedule_interrupt(void); -extern asmlinkage void qic_enable_irq_interrupt(void); -extern asmlinkage void qic_call_function_interrupt(void); - /* SMP */ extern void smp_apic_timer_interrupt(struct pt_regs *); extern void smp_spurious_interrupt(struct pt_regs *); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 2601fd108c7d..067d22ffb3ec 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -135,39 +135,4 @@ #endif -/* Voyager specific defines */ -/* These define the CPIs we use in linux */ -#define VIC_CPI_LEVEL0 0 -#define VIC_CPI_LEVEL1 1 -/* now the fake CPIs */ -#define VIC_TIMER_CPI 2 -#define VIC_INVALIDATE_CPI 3 -#define VIC_RESCHEDULE_CPI 4 -#define VIC_ENABLE_IRQ_CPI 5 -#define VIC_CALL_FUNCTION_CPI 6 -#define VIC_CALL_FUNCTION_SINGLE_CPI 7 - -/* Now the QIC CPIs: Since we don't need the two initial levels, - * these are 2 less than the VIC CPIs */ -#define QIC_CPI_OFFSET 1 -#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) -#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) -#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) -#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_SINGLE_CPI (VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET) - -#define VIC_START_FAKE_CPI VIC_TIMER_CPI -#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_SINGLE_CPI - -/* this is the SYS_INT CPI. */ -#define VIC_SYS_INT 8 -#define VIC_CMN_INT 15 - -/* This is the boot CPI for alternate processors. It gets overwritten - * by the above once the system has activated all available processors */ -#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 -#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) - - #endif /* _ASM_X86_IRQ_VECTORS_H */ diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h index b3e647307625..c1635d43616f 100644 --- a/arch/x86/include/asm/voyager.h +++ b/arch/x86/include/asm/voyager.h @@ -527,3 +527,45 @@ extern void voyager_smp_intr_init(void); #define VOYAGER_PSI_SUBREAD 2 #define VOYAGER_PSI_SUBWRITE 3 extern void voyager_cat_psi(__u8, __u16, __u8 *); + +/* These define the CPIs we use in linux */ +#define VIC_CPI_LEVEL0 0 +#define VIC_CPI_LEVEL1 1 +/* now the fake CPIs */ +#define VIC_TIMER_CPI 2 +#define VIC_INVALIDATE_CPI 3 +#define VIC_RESCHEDULE_CPI 4 +#define VIC_ENABLE_IRQ_CPI 5 +#define VIC_CALL_FUNCTION_CPI 6 +#define VIC_CALL_FUNCTION_SINGLE_CPI 7 + +/* Now the QIC CPIs: Since we don't need the two initial levels, + * these are 2 less than the VIC CPIs */ +#define QIC_CPI_OFFSET 1 +#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) +#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) +#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) +#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) +#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) +#define QIC_CALL_FUNCTION_SINGLE_CPI (VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET) + +#define VIC_START_FAKE_CPI VIC_TIMER_CPI +#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_SINGLE_CPI + +/* this is the SYS_INT CPI. */ +#define VIC_SYS_INT 8 +#define VIC_CMN_INT 15 + +/* This is the boot CPI for alternate processors. It gets overwritten + * by the above once the system has activated all available processors */ +#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 +#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) + +extern asmlinkage void vic_cpi_interrupt(void); +extern asmlinkage void vic_sys_interrupt(void); +extern asmlinkage void vic_cmn_interrupt(void); +extern asmlinkage void qic_timer_interrupt(void); +extern asmlinkage void qic_invalidate_interrupt(void); +extern asmlinkage void qic_reschedule_interrupt(void); +extern asmlinkage void qic_enable_irq_interrupt(void); +extern asmlinkage void qic_call_function_interrupt(void); From 3e92ab3d7e2edef5dccd8b0db21528699c81d2c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:21:42 +0100 Subject: [PATCH 327/566] x86, irqs, voyager: remove Voyager quirk Remove a Voyager complication from the generic irq_vectors.h header. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 067d22ffb3ec..81fc883b3c05 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -106,7 +106,7 @@ #define NR_IRQS_LEGACY 16 -#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) +#ifdef CONFIG_X86_IO_APIC #include /* need MAX_IO_APICS */ @@ -117,22 +117,14 @@ # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif #else - # define NR_IRQS \ ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \ (NR_VECTORS + (8 * NR_CPUS)) : \ - (NR_VECTORS + (32 * MAX_IO_APICS))) \ - + (NR_VECTORS + (32 * MAX_IO_APICS))) #endif -#elif defined(CONFIG_X86_VOYAGER) - -# define NR_IRQS 224 - -#else /* IO_APIC || VOYAGER */ - +#else /* !CONFIG_X86_IO_APIC: */ # define NR_IRQS 16 - #endif #endif /* _ASM_X86_IRQ_VECTORS_H */ From 9fc2e79d4f239c1c1dfdab7b10854c7588b39d9a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:48:17 +0100 Subject: [PATCH 328/566] x86, irq: add IRQ layout comments Describe the layout of x86 trap/exception/IRQ vectors and clean up indentation and other small details. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 92 +++++++++++++++++++----------- 1 file changed, 58 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 81fc883b3c05..5f7d6a1e3d28 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -3,45 +3,69 @@ #include -#define NMI_VECTOR 0x02 +/* + * Linux IRQ vector layout. + * + * There are 256 IDT entries (per CPU - each entry is 8 bytes) which can + * be defined by Linux. They are used as a jump table by the CPU when a + * given vector is triggered - by a CPU-external, CPU-internal or + * software-triggered event. + * + * Linux sets the kernel code address each entry jumps to early during + * bootup, and never changes them. This is the general layout of the + * IDT entries: + * + * Vectors 0 ... 31 : system traps and exceptions - hardcoded events + * Vectors 32 ... 127 : device interrupts + * Vector 128 : legacy int80 syscall interface + * Vectors 129 ... 237 : device interrupts + * Vectors 238 ... 255 : special interrupts + * + * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. + * + * This file enumerates the exact layout of them: + */ + +#define NMI_VECTOR 0x02 /* * IDT vectors usable for external interrupt sources start * at 0x20: */ -#define FIRST_EXTERNAL_VECTOR 0x20 +#define FIRST_EXTERNAL_VECTOR 0x20 #ifdef CONFIG_X86_32 -# define SYSCALL_VECTOR 0x80 +# define SYSCALL_VECTOR 0x80 #else -# define IA32_SYSCALL_VECTOR 0x80 +# define IA32_SYSCALL_VECTOR 0x80 #endif /* * Reserve the lowest usable priority level 0x20 - 0x2f for triggering * cleanup after irq migration. */ -#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR /* * Vectors 0x30-0x3f are used for ISA interrupts. */ -#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) -#define IRQ1_VECTOR (IRQ0_VECTOR + 1) -#define IRQ2_VECTOR (IRQ0_VECTOR + 2) -#define IRQ3_VECTOR (IRQ0_VECTOR + 3) -#define IRQ4_VECTOR (IRQ0_VECTOR + 4) -#define IRQ5_VECTOR (IRQ0_VECTOR + 5) -#define IRQ6_VECTOR (IRQ0_VECTOR + 6) -#define IRQ7_VECTOR (IRQ0_VECTOR + 7) -#define IRQ8_VECTOR (IRQ0_VECTOR + 8) -#define IRQ9_VECTOR (IRQ0_VECTOR + 9) -#define IRQ10_VECTOR (IRQ0_VECTOR + 10) -#define IRQ11_VECTOR (IRQ0_VECTOR + 11) -#define IRQ12_VECTOR (IRQ0_VECTOR + 12) -#define IRQ13_VECTOR (IRQ0_VECTOR + 13) -#define IRQ14_VECTOR (IRQ0_VECTOR + 14) -#define IRQ15_VECTOR (IRQ0_VECTOR + 15) +#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) + +#define IRQ1_VECTOR (IRQ0_VECTOR + 1) +#define IRQ2_VECTOR (IRQ0_VECTOR + 2) +#define IRQ3_VECTOR (IRQ0_VECTOR + 3) +#define IRQ4_VECTOR (IRQ0_VECTOR + 4) +#define IRQ5_VECTOR (IRQ0_VECTOR + 5) +#define IRQ6_VECTOR (IRQ0_VECTOR + 6) +#define IRQ7_VECTOR (IRQ0_VECTOR + 7) +#define IRQ8_VECTOR (IRQ0_VECTOR + 8) +#define IRQ9_VECTOR (IRQ0_VECTOR + 9) +#define IRQ10_VECTOR (IRQ0_VECTOR + 10) +#define IRQ11_VECTOR (IRQ0_VECTOR + 11) +#define IRQ12_VECTOR (IRQ0_VECTOR + 12) +#define IRQ13_VECTOR (IRQ0_VECTOR + 13) +#define IRQ14_VECTOR (IRQ0_VECTOR + 14) +#define IRQ15_VECTOR (IRQ0_VECTOR + 15) /* * Special IRQ vectors used by the SMP architecture, 0xf0-0xff @@ -75,36 +99,36 @@ /* f0-f7 used for spreading out TLB flushes: */ #define INVALIDATE_TLB_VECTOR_END 0xf7 #define INVALIDATE_TLB_VECTOR_START 0xf0 -#define NUM_INVALIDATE_TLB_VECTORS 8 +#define NUM_INVALIDATE_TLB_VECTORS 8 /* * Local APIC timer IRQ vector is on a different priority level, * to work around the 'lost local interrupt if more than 2 IRQ * sources per level' errata. */ -#define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_TIMER_VECTOR 0xef /* * Performance monitoring interrupt vector: */ -#define LOCAL_PERF_VECTOR 0xee +#define LOCAL_PERF_VECTOR 0xee /* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ -#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) +#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -#define NR_VECTORS 256 +#define NR_VECTORS 256 -#define FPU_IRQ 13 +#define FPU_IRQ 13 -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) +#define FIRST_VM86_IRQ 3 +#define LAST_VM86_IRQ 15 +#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) -#define NR_IRQS_LEGACY 16 +#define NR_IRQS_LEGACY 16 #ifdef CONFIG_X86_IO_APIC @@ -112,9 +136,9 @@ #ifndef CONFIG_SPARSE_IRQ # if NR_CPUS < MAX_IO_APICS -# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) +# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else -# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) +# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif #else # define NR_IRQS \ @@ -124,7 +148,7 @@ #endif #else /* !CONFIG_X86_IO_APIC: */ -# define NR_IRQS 16 +# define NR_IRQS 16 #endif #endif /* _ASM_X86_IRQ_VECTORS_H */ From c379698fdac7cb65c96dec549850ce606dd6ceba Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:50:46 +0100 Subject: [PATCH 329/566] x86, irq_vectors.h: remove needless includes Reduce include file dependencies a bit - remove the two headers that are included in irq_vectors.h. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 5f7d6a1e3d28..ec87910025d5 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_IRQ_VECTORS_H #define _ASM_X86_IRQ_VECTORS_H -#include - /* * Linux IRQ vector layout. * @@ -131,22 +129,18 @@ #define NR_IRQS_LEGACY 16 #ifdef CONFIG_X86_IO_APIC - -#include /* need MAX_IO_APICS */ - -#ifndef CONFIG_SPARSE_IRQ -# if NR_CPUS < MAX_IO_APICS -# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) +# ifndef CONFIG_SPARSE_IRQ +# if NR_CPUS < MAX_IO_APICS +# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) +# else +# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) +# endif # else -# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) -# endif -#else -# define NR_IRQS \ +# define NR_IRQS \ ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \ (NR_VECTORS + (8 * NR_CPUS)) : \ (NR_VECTORS + (32 * MAX_IO_APICS))) -#endif - +# endif #else /* !CONFIG_X86_IO_APIC: */ # define NR_IRQS 16 #endif From 009eb3fe146aa6f1951f3c5235851bb8d1330dfb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:56:44 +0100 Subject: [PATCH 330/566] x86, irq: describe NR_IRQ sizing details, clean up Impact: cleanup Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 36 +++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index ec87910025d5..41e2450e13bd 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -126,23 +126,37 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) +/* + * Size the maximum number of interrupts. + * + * If the irq_desc[] array has a sparse layout, we can size things + * generously - it scales up linearly with the maximum number of CPUs, + * and the maximum number of IO-APICs, whichever is higher. + * + * In other cases we size more conservatively, to not create too large + * static arrays. + */ + #define NR_IRQS_LEGACY 16 +#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS ) +#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) + #ifdef CONFIG_X86_IO_APIC -# ifndef CONFIG_SPARSE_IRQ -# if NR_CPUS < MAX_IO_APICS -# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) -# else -# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) -# endif -# else +# ifdef CONFIG_SPARSE_IRQ # define NR_IRQS \ - ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \ - (NR_VECTORS + (8 * NR_CPUS)) : \ - (NR_VECTORS + (32 * MAX_IO_APICS))) + (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ + (NR_VECTORS + CPU_VECTOR_LIMIT) : \ + (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) +# else +# if NR_CPUS < MAX_IO_APICS +# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) +# else +# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) +# endif # endif #else /* !CONFIG_X86_IO_APIC: */ -# define NR_IRQS 16 +# define NR_IRQS NR_IRQS_LEGACY #endif #endif /* _ASM_X86_IRQ_VECTORS_H */ From d8106d2e24d54497233ca9cd97fa9bec807de458 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 03:06:17 +0100 Subject: [PATCH 331/566] x86, vm86: clean up invalid_vm86_irq() Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 41e2450e13bd..b07278c55e9e 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -124,7 +124,13 @@ #define FIRST_VM86_IRQ 3 #define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) + +#ifndef __ASSEMBLY__ +static inline int invalid_vm86_irq(int irq) +{ + return irq < 3 || irq > 15; +} +#endif /* * Size the maximum number of interrupts. From 2749ebe320ff9f77548d10fcc0a3464ac21c8e58 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Thu, 29 Jan 2009 15:35:26 -0600 Subject: [PATCH 332/566] x86: UV fix uv_flush_send_and_wait() Impact: fix possible tlb mis-flushing on UV uv_flush_send_and_wait() should return a pointer if the broadcast remote tlb shootdown requests fail. That causes the conventional IPI method of shootdown to be used. Signed-off-by: Cliff Wickman Signed-off-by: Tejun Heo --- arch/x86/kernel/tlb_uv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 89fce1b6d01f..f4b2f27d19b9 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -259,7 +259,7 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade, * the cpu's, all of which are still in the mask. */ __get_cpu_var(ptcstats).ptc_i++; - return 0; + return flush_mask; } /* From 552be871e67ff577ed36beb2f53d078b42304739 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 30 Jan 2009 17:47:53 +0900 Subject: [PATCH 333/566] x86: pass in cpu number to switch_to_new_gdt() Impact: cleanup, prepare for xen boot fix. Xen needs to call this function very early to setup the GDT and per-cpu segments. Remove the call to smp_processor_id() and just pass in the cpu number. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 7 +++---- arch/x86/kernel/setup_percpu.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/mach-voyager/voyager_smp.c | 11 ++++++----- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index befa20b4a68c..1c25eb69ea86 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -768,7 +768,7 @@ extern int sysenter_setup(void); extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); -extern void switch_to_new_gdt(void); +extern void switch_to_new_gdt(int); extern void cpu_init(void); static inline unsigned long get_debugctlmsr(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 652fdc9a757a..6eacd64b602e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -255,10 +255,9 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, * it's on the real one. */ -void switch_to_new_gdt(void) +void switch_to_new_gdt(int cpu) { struct desc_ptr gdt_descr; - int cpu = smp_processor_id(); gdt_descr.address = (long)get_cpu_gdt_table(cpu); gdt_descr.size = GDT_SIZE - 1; @@ -993,7 +992,7 @@ void __cpuinit cpu_init(void) * and set up the GDT descriptor: */ - switch_to_new_gdt(); + switch_to_new_gdt(cpu); loadsegment(fs, 0); load_idt((const struct desc_ptr *)&idt_descr); @@ -1098,7 +1097,7 @@ void __cpuinit cpu_init(void) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); load_idt(&idt_descr); - switch_to_new_gdt(); + switch_to_new_gdt(cpu); /* * Set up and load the per-CPU TSS and LDT diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 0d1e7ac439f4..ef91747bbed5 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -122,7 +122,7 @@ void __init setup_per_cpu_areas(void) * area. Reload any changed state for the boot CPU. */ if (cpu == boot_cpu_id) - switch_to_new_gdt(); + switch_to_new_gdt(cpu); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f9dbcff43546..612d3c74f6a3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1185,7 +1185,7 @@ out: void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); - switch_to_new_gdt(); + switch_to_new_gdt(me); /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); per_cpu(cpu_state, me) = CPU_ONLINE; diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 331cd6d56483..58c7cac3440d 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1746,12 +1746,13 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) static void __cpuinit voyager_smp_prepare_boot_cpu(void) { - switch_to_new_gdt(); + int cpu = smp_processor_id(); + switch_to_new_gdt(cpu); - cpu_set(smp_processor_id(), cpu_online_map); - cpu_set(smp_processor_id(), cpu_callout_map); - cpu_set(smp_processor_id(), cpu_possible_map); - cpu_set(smp_processor_id(), cpu_present_map); + cpu_set(cpu, cpu_online_map); + cpu_set(cpu, cpu_callout_map); + cpu_set(cpu, cpu_possible_map); + cpu_set(cpu, cpu_present_map); } static int __cpuinit voyager_cpu_up(unsigned int cpu) From 11e3a840cd5b731cdd8f6f956dfae78a8046d09c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 30 Jan 2009 17:47:54 +0900 Subject: [PATCH 334/566] x86: split loading percpu segments from loading gdt Impact: split out a function, no functional change Xen needs to be able to access percpu data from very early on. For various reasons, it cannot also load the gdt at that time. It does, however, have a pefectly functional gdt at that point, so there's no pressing need to reload the gdt. Split the function to load the segment registers off, so Xen can call it directly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/common.c | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1c25eb69ea86..656d02ea509b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -769,6 +769,7 @@ extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); extern void switch_to_new_gdt(int); +extern void load_percpu_segment(int); extern void cpu_init(void); static inline unsigned long get_debugctlmsr(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6eacd64b602e..0f73ea423089 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -253,6 +253,16 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; +void load_percpu_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + loadsegment(fs, __KERNEL_PERCPU); +#else + loadsegment(gs, 0); + wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); +#endif +} + /* Current gdt points %fs at the "master" per-cpu area: after this, * it's on the real one. */ void switch_to_new_gdt(int cpu) @@ -263,12 +273,8 @@ void switch_to_new_gdt(int cpu) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); /* Reload the per-cpu base */ -#ifdef CONFIG_X86_32 - loadsegment(fs, __KERNEL_PERCPU); -#else - loadsegment(gs, 0); - wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); -#endif + + load_percpu_segment(cpu); } static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; From 795f99b61d20c34cb04d17d8906b32f745a635ec Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 30 Jan 2009 17:47:54 +0900 Subject: [PATCH 335/566] xen: setup percpu data pointers Impact: fix xen booting We need to access percpu data fairly early, so set up the percpu registers as soon as possible. We only need to load the appropriate segment register. We already have a GDT, but its hard to change it early because we need to manipulate the pagetable to do so, and that hasn't been set up yet. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Tejun Heo --- arch/x86/xen/enlighten.c | 3 +++ arch/x86/xen/smp.c | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bef941f61451..fe19c88a5029 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1647,6 +1647,9 @@ asmlinkage void __init xen_start_kernel(void) have_vcpu_info_placement = 0; #endif + /* setup percpu state */ + load_percpu_segment(0); + xen_smp_init(); /* Get mfn list */ diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 7735e3dd359c..88d5d5ec6beb 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -170,7 +170,8 @@ static void __init xen_smp_prepare_boot_cpu(void) /* We've switched to the "real" per-cpu gdt, so make sure the old memory can be recycled */ - make_lowmem_page_readwrite(&per_cpu_var(gdt_page)); + make_lowmem_page_readwrite(__per_cpu_load + + (unsigned long)&per_cpu_var(gdt_page)); xen_setup_vcpu_info_placement(); } @@ -235,6 +236,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->user_regs.ss = __KERNEL_DS; #ifdef CONFIG_X86_32 ctxt->user_regs.fs = __KERNEL_PERCPU; +#else + ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ From e584f559c7b8711cccdf319400acd6294b2c074e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 30 Jan 2009 23:17:23 -0800 Subject: [PATCH 336/566] x86/paravirt: don't restore second return reg Impact: bugfix In the 32-bit calling convention, %eax:%edx is used to return 64-bit values. Don't save and restore %edx around wrapped functions, or they can't return a full 64-bit result. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index b17365c39749..016dce311305 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1524,8 +1524,8 @@ extern struct paravirt_patch_site __parainstructions[], #define PV_RESTORE_REGS "popl %edx; popl %ecx;" /* save and restore all caller-save registers, except return value */ -#define PV_SAVE_ALL_CALLER_REGS PV_SAVE_REGS -#define PV_RESTORE_ALL_CALLER_REGS PV_RESTORE_REGS +#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" +#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" #define PV_FLAGS_ARG "0" #define PV_EXTRA_CLOBBERS From 664c7954721adfc9bd61de6ec78f89f482f1a802 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 30 Jan 2009 23:18:41 -0800 Subject: [PATCH 337/566] x86/vmi: fix interrupt enable/disable/save/restore calling convention. Zach says: > Enable/Disable have no clobbers at all. > Save clobbers only return value, %eax > Restore also clobbers nothing. This is precisely compatible with the calling convention, so we can just call them directly without wrapping. (Compile tested only.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/kernel/vmi_32.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 1d3302cc2ddf..eb9e7347928e 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -670,10 +670,11 @@ static inline int __init activate_vmi(void) para_fill(pv_mmu_ops.write_cr2, SetCR2); para_fill(pv_mmu_ops.write_cr3, SetCR3); para_fill(pv_cpu_ops.write_cr4, SetCR4); - para_fill(pv_irq_ops.save_fl, GetInterruptMask); - para_fill(pv_irq_ops.restore_fl, SetInterruptMask); - para_fill(pv_irq_ops.irq_disable, DisableInterrupts); - para_fill(pv_irq_ops.irq_enable, EnableInterrupts); + + para_fill(pv_irq_ops.save_fl.func, GetInterruptMask); + para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask); + para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts); + para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts); para_fill(pv_cpu_ops.wbinvd, WBINVD); para_fill(pv_cpu_ops.read_tsc, RDTSC); From ccc9c8b91c2631da2cab46a6fcd9c3106dcb9abb Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Tue, 27 Jan 2009 19:22:38 +0530 Subject: [PATCH 338/566] pcf50633_charger: Fix typo container_of(psy, struct pcf50633_mbc, usb); should be container_of(psy, struct pcf50633_mbc, adapter); Signed-off-by: Balaji Rao Cc: Andy Green Signed-off-by: Anton Vorontsov --- drivers/power/pcf50633-charger.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/power/pcf50633-charger.c b/drivers/power/pcf50633-charger.c index e988ec130fcd..41aec2acbb91 100644 --- a/drivers/power/pcf50633-charger.c +++ b/drivers/power/pcf50633-charger.c @@ -199,7 +199,8 @@ static int adapter_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val) { - struct pcf50633_mbc *mbc = container_of(psy, struct pcf50633_mbc, usb); + struct pcf50633_mbc *mbc = container_of(psy, + struct pcf50633_mbc, adapter); int ret = 0; switch (psp) { From 3bd323a1da42525317e2ce6c93b97b5ba653bc9d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 2 Feb 2009 14:52:00 -0800 Subject: [PATCH 339/566] x86 setup: a20: early timeout for a nonexistent keyboard controller When probing the keyboard controller to enable A20, if we get FF back (which is *possible* as a valid status word, but is extremely unlikely) then bail after much fewer iterations than we otherwise would, and abort the attempt to access the KBC. This hopefully should make it work a lot better for embedded platforms which don't have a KBC and where the BIOS doesn't implement INT 15h AX=2401h (and doesn't boot with A20 already enabled.) If this works, it will be the one remaining use of CONFIG_X86_ELAN as anything other than a processor type optimization option. Signed-off-by: H. Peter Anvin --- arch/x86/boot/a20.c | 71 +++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index 4063d630deff..fba8e9c6a504 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007-2008 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -15,16 +16,23 @@ #include "boot.h" #define MAX_8042_LOOPS 100000 +#define MAX_8042_FF 32 static int empty_8042(void) { u8 status; int loops = MAX_8042_LOOPS; + int ffs = MAX_8042_FF; while (loops--) { io_delay(); status = inb(0x64); + if (status == 0xff) { + /* FF is a plausible, but very unlikely status */ + if (!--ffs) + return -1; /* Assume no KBC present */ + } if (status & 1) { /* Read and discard input data */ io_delay(); @@ -118,44 +126,43 @@ static void enable_a20_fast(void) int enable_a20(void) { -#if defined(CONFIG_X86_ELAN) - /* Elan croaks if we try to touch the KBC */ - enable_a20_fast(); - while (!a20_test_long()) - ; - return 0; -#elif defined(CONFIG_X86_VOYAGER) +#ifdef CONFIG_X86_VOYAGER /* On Voyager, a20_test() is unsafe? */ enable_a20_kbc(); return 0; #else int loops = A20_ENABLE_LOOPS; - while (loops--) { - /* First, check to see if A20 is already enabled - (legacy free, etc.) */ - if (a20_test_short()) - return 0; + int kbc_err; - /* Next, try the BIOS (INT 0x15, AX=0x2401) */ - enable_a20_bios(); - if (a20_test_short()) - return 0; + while (loops--) { + /* First, check to see if A20 is already enabled + (legacy free, etc.) */ + if (a20_test_short()) + return 0; + + /* Next, try the BIOS (INT 0x15, AX=0x2401) */ + enable_a20_bios(); + if (a20_test_short()) + return 0; + + /* Try enabling A20 through the keyboard controller */ + kbc_err = empty_8042(); - /* Try enabling A20 through the keyboard controller */ - empty_8042(); - if (a20_test_short()) - return 0; /* BIOS worked, but with delayed reaction */ - - enable_a20_kbc(); - if (a20_test_long()) - return 0; - - /* Finally, try enabling the "fast A20 gate" */ - enable_a20_fast(); - if (a20_test_long()) - return 0; - } - - return -1; + if (a20_test_short()) + return 0; /* BIOS worked, but with delayed reaction */ + + if (!kbc_err) { + enable_a20_kbc(); + if (a20_test_long()) + return 0; + } + + /* Finally, try enabling the "fast A20 gate" */ + enable_a20_fast(); + if (a20_test_long()) + return 0; + } + + return -1; #endif } From ef3892bd63420380d115f755d351d2071f1f805f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 2 Feb 2009 18:16:19 -0800 Subject: [PATCH 340/566] x86, percpu: fix kexec with vmlinux Impact: fix regression with kexec with vmlinux Split data.init into data.init, percpu, data.init2 sections instead of let data.init wrap percpu secion. Thus kexec loading will be happy, because sections will not overlap. Before the patch we have: Elf file type is EXEC (Executable file) Entry point 0x200000 There are 6 program headers, starting at offset 64 Program Headers: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flags Align LOAD 0x0000000000200000 0xffffffff80200000 0x0000000000200000 0x0000000000ca6000 0x0000000000ca6000 R E 200000 LOAD 0x0000000000ea6000 0xffffffff80ea6000 0x0000000000ea6000 0x000000000014dfe0 0x000000000014dfe0 RWE 200000 LOAD 0x0000000001000000 0xffffffffff600000 0x0000000000ff4000 0x0000000000000888 0x0000000000000888 RWE 200000 LOAD 0x00000000011f6000 0xffffffff80ff6000 0x0000000000ff6000 0x0000000000073086 0x0000000000a2d938 RWE 200000 LOAD 0x0000000001400000 0x0000000000000000 0x000000000106a000 0x00000000001d2ce0 0x00000000001d2ce0 RWE 200000 NOTE 0x00000000009e2c1c 0xffffffff809e2c1c 0x00000000009e2c1c 0x0000000000000024 0x0000000000000024 4 Section to Segment mapping: Segment Sections... 00 .text .notes __ex_table .rodata __bug_table .pci_fixup .builtin_fw __ksymtab __ksymtab_gpl __ksymtab_strings __init_rodata __param 01 .data .init.rodata .data.cacheline_aligned .data.read_mostly 02 .vsyscall_0 .vsyscall_fn .vsyscall_gtod_data .vsyscall_1 .vsyscall_2 .vgetcpu_mode .jiffies 03 .data.init_task .smp_locks .init.text .init.data .init.setup .initcall.init .con_initcall.init .x86_cpu_dev.init .altinstructions .altinstr_replacement .exit.text .init.ramfs .bss 04 .data.percpu 05 .notes After patch we've got: Elf file type is EXEC (Executable file) Entry point 0x200000 There are 7 program headers, starting at offset 64 Program Headers: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flags Align LOAD 0x0000000000200000 0xffffffff80200000 0x0000000000200000 0x0000000000ca6000 0x0000000000ca6000 R E 200000 LOAD 0x0000000000ea6000 0xffffffff80ea6000 0x0000000000ea6000 0x000000000014dfe0 0x000000000014dfe0 RWE 200000 LOAD 0x0000000001000000 0xffffffffff600000 0x0000000000ff4000 0x0000000000000888 0x0000000000000888 RWE 200000 LOAD 0x00000000011f6000 0xffffffff80ff6000 0x0000000000ff6000 0x0000000000073086 0x0000000000073086 RWE 200000 LOAD 0x0000000001400000 0x0000000000000000 0x000000000106a000 0x00000000001d2ce0 0x00000000001d2ce0 RWE 200000 LOAD 0x000000000163d000 0xffffffff8123d000 0x000000000123d000 0x0000000000000000 0x00000000007e6938 RWE 200000 NOTE 0x00000000009e2c1c 0xffffffff809e2c1c 0x00000000009e2c1c 0x0000000000000024 0x0000000000000024 4 Section to Segment mapping: Segment Sections... 00 .text .notes __ex_table .rodata __bug_table .pci_fixup .builtin_fw __ksymtab __ksymtab_gpl __ksymtab_strings __init_rodata __param 01 .data .init.rodata .data.cacheline_aligned .data.read_mostly 02 .vsyscall_0 .vsyscall_fn .vsyscall_gtod_data .vsyscall_1 .vsyscall_2 .vgetcpu_mode .jiffies 03 .data.init_task .smp_locks .init.text .init.data .init.setup .initcall.init .con_initcall.init .x86_cpu_dev.init .altinstructions .altinstr_replacement .exit.text .init.ramfs 04 .data.percpu 05 .bss 06 .notes Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux_64.lds.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index c9740996430a..07f62d287ff0 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -22,6 +22,7 @@ PHDRS { #ifdef CONFIG_SMP percpu PT_LOAD FLAGS(7); /* RWE */ #endif + data.init2 PT_LOAD FLAGS(7); /* RWE */ note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS @@ -215,7 +216,7 @@ SECTIONS /* * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the * output PHDR, so the next output section - __data_nosave - should - * switch it back to data.init. Also, pda should be at the head of + * start another section data.init2. Also, pda should be at the head of * percpu area. Preallocate it and define the percpu offset symbol * so that it can be accessed as a percpu variable. */ @@ -232,7 +233,7 @@ SECTIONS __nosave_begin = .; .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) - } :data.init /* switch back to data.init, see PERCPU_VADDR() above */ + } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */ . = ALIGN(PAGE_SIZE); __nosave_end = .; From ac048e1734699dd98f4bdf4daf2b9592d4a4d38e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 3 Feb 2009 19:05:12 +1000 Subject: [PATCH 341/566] i915: fix unneeded locking in i915 LVDS get modes code. This code is always called under the lock from the higher layers, so need to go locking it here. Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_lvds.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index b36a5214d8df..cf8da644faf2 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -311,10 +311,8 @@ static int intel_lvds_get_modes(struct drm_connector *connector) if (dev_priv->panel_fixed_mode != NULL) { struct drm_display_mode *mode; - mutex_lock(&dev->mode_config.mutex); mode = drm_mode_duplicate(dev, dev_priv->panel_fixed_mode); drm_mode_probed_add(connector, mode); - mutex_unlock(&dev->mode_config.mutex); return 1; } From 063f8913afb48842b9329e195d90d2c28e58aacc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 3 Feb 2009 18:02:36 +0100 Subject: [PATCH 342/566] x86: document 64-bit and 32-bit function call convention ABI - also clean up the calling.h file a tiny bit Signed-off-by: Ingo Molnar --- arch/x86/include/asm/calling.h | 56 ++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 2bc162e0ec6e..0e63c9a2a8d0 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -1,5 +1,55 @@ /* - * Some macros to handle stack frames in assembly. + + x86 function call convention, 64-bit: + ------------------------------------- + arguments | callee-saved | extra caller-saved | return + [callee-clobbered] | | [callee-clobbered] | + --------------------------------------------------------------------------- + rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**] + + ( rsp is obviously invariant across normal function calls. (gcc can 'merge' + functions when it sees tail-call optimization possibilities) rflags is + clobbered. Leftover arguments are passed over the stack frame.) + + [*] In the frame-pointers case rbp is fixed to the stack frame. + + [**] for struct return values wider than 64 bits the return convention is a + bit more complex: up to 128 bits width we return small structures + straight in rax, rdx. For structures larger than that (3 words or + larger) the caller puts a pointer to an on-stack return struct + [allocated in the caller's stack frame] into the first argument - i.e. + into rdi. All other arguments shift up by one in this case. + Fortunately this case is rare in the kernel. + +For 32-bit we have the following conventions - kernel is built with +-mregparm=3 and -freg-struct-return: + + x86 function calling convention, 32-bit: + ---------------------------------------- + arguments | callee-saved | extra caller-saved | return + [callee-clobbered] | | [callee-clobbered] | + ------------------------------------------------------------------------- + eax edx ecx | ebx edi esi ebp [*] | | eax, edx [**] + + ( here too esp is obviously invariant across normal function calls. eflags + is clobbered. Leftover arguments are passed over the stack frame. ) + + [*] In the frame-pointers case ebp is fixed to the stack frame. + + [**] We build with -freg-struct-return, which on 32-bit means similar + semantics as on 64-bit: edx can be used for a second return value + (i.e. covering integer and structure sizes up to 64 bits) - after that + it gets more complex and more expensive: 3-word or larger struct returns + get done in the caller's frame and the pointer to the return struct goes + into regparm0, i.e. eax - the other arguments shift up and the + function's register parameters degenerate to regparm=2 in essence. + +*/ + + +/* + * 64-bit system call stack frame layout defines and helpers, + * for assembly code: */ #define R15 0 @@ -9,7 +59,7 @@ #define RBP 32 #define RBX 40 -/* arguments: interrupts/non tracing syscalls only save upto here*/ +/* arguments: interrupts/non tracing syscalls only save up to here: */ #define R11 48 #define R10 56 #define R9 64 @@ -22,7 +72,7 @@ #define ORIG_RAX 120 /* + error_code */ /* end of arguments */ -/* cpu exception frame or undefined in case of fast syscall. */ +/* cpu exception frame or undefined in case of fast syscall: */ #define RIP 128 #define CS 136 #define EFLAGS 144 From 0eb592dbba40baebec9cdde3ff4574185de6cbcc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 3 Feb 2009 16:00:38 -0800 Subject: [PATCH 343/566] x86/paravirt: return full 64-bit result Impact: Bug fix A hunk went missing in the original patch, and callee-save callsites were not marked as returning the upper 32-bit of result, causing Badness. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 016dce311305..c85e7475e171 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -522,7 +522,7 @@ int paravirt_disable_iospace(void); "=c" (__ecx) #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS -#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx) #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS #define EXTRA_CLOBBERS From f5deb79679af6eb41b61112fadcda28b2a4cfb0d Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 3 Feb 2009 14:22:48 +0800 Subject: [PATCH 344/566] x86: kexec: Use one page table in x86_64 machine_kexec Impact: reduce kernel BSS size by 7 pages, improve code readability Two page tables are used in current x86_64 kexec implementation. One is used to jump from kernel virtual address to identity map address, the other is used to map all physical memory. In fact, on x86_64, there is no conflict between kernel virtual address space and physical memory space, so just one page table is sufficient. The page table pages used to map control page are dynamically allocated to save memory if kexec image is not loaded. ASM code used to map control page is replaced by C code too. Signed-off-by: Huang Ying Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/kexec.h | 27 ++---- arch/x86/kernel/machine_kexec_64.c | 82 ++++++++++++------ arch/x86/kernel/relocate_kernel_64.S | 125 +-------------------------- 3 files changed, 67 insertions(+), 167 deletions(-) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index c61d8b2ab8b9..0ceb6d19ed30 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -9,23 +9,8 @@ # define PAGES_NR 4 #else # define PA_CONTROL_PAGE 0 -# define VA_CONTROL_PAGE 1 -# define PA_PGD 2 -# define VA_PGD 3 -# define PA_PUD_0 4 -# define VA_PUD_0 5 -# define PA_PMD_0 6 -# define VA_PMD_0 7 -# define PA_PTE_0 8 -# define VA_PTE_0 9 -# define PA_PUD_1 10 -# define VA_PUD_1 11 -# define PA_PMD_1 12 -# define VA_PMD_1 13 -# define PA_PTE_1 14 -# define VA_PTE_1 15 -# define PA_TABLE_PAGE 16 -# define PAGES_NR 17 +# define PA_TABLE_PAGE 1 +# define PAGES_NR 2 #endif #ifdef CONFIG_X86_32 @@ -157,9 +142,9 @@ relocate_kernel(unsigned long indirection_page, unsigned long start_address) ATTRIB_NORET; #endif -#ifdef CONFIG_X86_32 #define ARCH_HAS_KIMAGE_ARCH +#ifdef CONFIG_X86_32 struct kimage_arch { pgd_t *pgd; #ifdef CONFIG_X86_PAE @@ -169,6 +154,12 @@ struct kimage_arch { pte_t *pte0; pte_t *pte1; }; +#else +struct kimage_arch { + pud_t *pud; + pmd_t *pmd; + pte_t *pte; +}; #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index c43caa3a91f3..6993d51b7fd8 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -18,15 +18,6 @@ #include #include -#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) -static u64 kexec_pgd[512] PAGE_ALIGNED; -static u64 kexec_pud0[512] PAGE_ALIGNED; -static u64 kexec_pmd0[512] PAGE_ALIGNED; -static u64 kexec_pte0[512] PAGE_ALIGNED; -static u64 kexec_pud1[512] PAGE_ALIGNED; -static u64 kexec_pmd1[512] PAGE_ALIGNED; -static u64 kexec_pte1[512] PAGE_ALIGNED; - static void init_level2_page(pmd_t *level2p, unsigned long addr) { unsigned long end_addr; @@ -107,12 +98,65 @@ out: return result; } +static void free_transition_pgtable(struct kimage *image) +{ + free_page((unsigned long)image->arch.pud); + free_page((unsigned long)image->arch.pmd); + free_page((unsigned long)image->arch.pte); +} + +static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long vaddr, paddr; + int result = -ENOMEM; + + vaddr = (unsigned long)relocate_kernel; + paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE); + pgd += pgd_index(vaddr); + if (!pgd_present(*pgd)) { + pud = (pud_t *)get_zeroed_page(GFP_KERNEL); + if (!pud) + goto err; + image->arch.pud = pud; + set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); + } + pud = pud_offset(pgd, vaddr); + if (!pud_present(*pud)) { + pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); + if (!pmd) + goto err; + image->arch.pmd = pmd; + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + } + pmd = pmd_offset(pud, vaddr); + if (!pmd_present(*pmd)) { + pte = (pte_t *)get_zeroed_page(GFP_KERNEL); + if (!pte) + goto err; + image->arch.pte = pte; + set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); + } + pte = pte_offset_kernel(pmd, vaddr); + set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); + return 0; +err: + free_transition_pgtable(image); + return result; +} + static int init_pgtable(struct kimage *image, unsigned long start_pgtable) { pgd_t *level4p; + int result; level4p = (pgd_t *)__va(start_pgtable); - return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); + result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); + if (result) + return result; + return init_transition_pgtable(image, level4p); } static void set_idt(void *newidt, u16 limit) @@ -174,7 +218,7 @@ int machine_kexec_prepare(struct kimage *image) void machine_kexec_cleanup(struct kimage *image) { - return; + free_transition_pgtable(image); } /* @@ -195,22 +239,6 @@ void machine_kexec(struct kimage *image) memcpy(control_page, relocate_kernel, PAGE_SIZE); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); - page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; - page_list[PA_PGD] = virt_to_phys(&kexec_pgd); - page_list[VA_PGD] = (unsigned long)kexec_pgd; - page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0); - page_list[VA_PUD_0] = (unsigned long)kexec_pud0; - page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0); - page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; - page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0); - page_list[VA_PTE_0] = (unsigned long)kexec_pte0; - page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1); - page_list[VA_PUD_1] = (unsigned long)kexec_pud1; - page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1); - page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; - page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1); - page_list[VA_PTE_1] = (unsigned long)kexec_pte1; - page_list[PA_TABLE_PAGE] = (unsigned long)__pa(page_address(image->control_code_page)); diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index f5afe665a82b..b0bbdd4829c9 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -29,122 +29,6 @@ relocate_kernel: * %rdx start address */ - /* map the control page at its virtual address */ - - movq $0x0000ff8000000000, %r10 /* mask */ - mov $(39 - 3), %cl /* bits to shift */ - movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PGD)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PUD_0)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PUD_0)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PMD_0)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PMD_0)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PTE_0)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PTE_0)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - /* identity map the control page at its physical address */ - - movq $0x0000ff8000000000, %r10 /* mask */ - mov $(39 - 3), %cl /* bits to shift */ - movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PGD)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PUD_1)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PUD_1)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PMD_1)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PMD_1)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PTE_1)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PTE_1)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - -relocate_new_kernel: - /* %rdi indirection_page - * %rsi page_list - * %rdx start address - */ - /* zero out flags, and disable interrupts */ pushq $0 popfq @@ -156,9 +40,8 @@ relocate_new_kernel: /* get physical address of page table now too */ movq PTR(PA_TABLE_PAGE)(%rsi), %rcx - /* switch to new set of page tables */ - movq PTR(PA_PGD)(%rsi), %r9 - movq %r9, %cr3 + /* Switch to the identity mapped page tables */ + movq %rcx, %cr3 /* setup a new stack at the end of the physical control page */ lea PAGE_SIZE(%r8), %rsp @@ -194,9 +77,7 @@ identity_mapped: jmp 1f 1: - /* Switch to the identity mapped page tables, - * and flush the TLB. - */ + /* Flush the TLB (needed?) */ movq %rcx, %cr3 /* Do the copies */ From 48ec4d9537282a55d602136724f069faafcac8c8 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Wed, 4 Feb 2009 15:54:45 -0500 Subject: [PATCH 345/566] x86, 64-bit: print DMI info in the oops trace This patch echoes what we already do on 32-bit since 90f7d25c6b672137344f447a30a9159945ffea72, and prints the DMI product name in show_regs, so that system specific problems can be easily identified. Signed-off-by: Kyle McMartin Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 416fb9282f4f..85b4cb5c1980 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -151,14 +152,18 @@ void __show_regs(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned int fsindex, gsindex; unsigned int ds, cs, es; + const char *board; printk("\n"); print_modules(); - printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", + board = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!board) + board = ""; + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + init_utsname()->version, board); printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, From 36723bfe81f374fd8abc05a46f10a7cac2f01a75 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 4 Feb 2009 21:44:04 +0100 Subject: [PATCH 346/566] x86/Kconfig.cpu: make Kconfig help readable in the console Impact: cleanup Some lines exceed the 80 char width making them unreadable. Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8078955845ae..c98d52e82966 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -167,9 +167,9 @@ config MK7 config MK8 bool "Opteron/Athlon64/Hammer/K8" help - Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables - use of some extended instructions, and passes appropriate optimization - flags to GCC. + Select this for an AMD Opteron or Athlon64 Hammer-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. config MCRUSOE bool "Crusoe" @@ -256,9 +256,11 @@ config MPSC config MCORE2 bool "Core 2/newer Xeon" help - Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and 53xx) - CPUs. You can distinguish newer from older Xeons by the CPU family - in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) + + Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and + 53xx) CPUs. You can distinguish newer from older Xeons by the CPU + family in /proc/cpuinfo. Newer ones have 6 and older ones 15 + (not a typo) config GENERIC_CPU bool "Generic-x86-64" @@ -320,14 +322,14 @@ config X86_PPRO_FENCE bool "PentiumPro memory ordering errata workaround" depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 help - Old PentiumPro multiprocessor systems had errata that could cause memory - operations to violate the x86 ordering standard in rare cases. Enabling this - option will attempt to work around some (but not all) occurances of - this problem, at the cost of much heavier spinlock and memory barrier - operations. + Old PentiumPro multiprocessor systems had errata that could cause + memory operations to violate the x86 ordering standard in rare cases. + Enabling this option will attempt to work around some (but not all) + occurances of this problem, at the cost of much heavier spinlock and + memory barrier operations. - If unsure, say n here. Even distro kernels should think twice before enabling - this: there are few systems, and an unlikely bug. + If unsure, say n here. Even distro kernels should think twice before + enabling this: there are few systems, and an unlikely bug. config X86_F00F_BUG def_bool y From 483b4ee60edbefdfbff0dd538fb81f368d9e7c0d Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 4 Feb 2009 11:59:44 -0800 Subject: [PATCH 347/566] sched: fix nohz load balancer on cpu offline Christian Borntraeger reports: > After a logical cpu offline, even on a complete idle system, there > is one cpu with full ticks. It turns out that nohz.cpu_mask has the > the offlined cpu still set. > > In select_nohz_load_balancer() we check if the system is completely > idle to turn of load balancing. We compare cpu_online_map with > nohz.cpu_mask. Since cpu_online_map is updated on cpu unplug, > but nohz.cpu_mask is not, the check fails and the scheduler believes > that we need an "idle load balancer" even on a fully idle system. > Since the ilb cpu does not deactivate the timer tick this breaks NOHZ. Fix the select_nohz_load_balancer() to not set the nohz.cpu_mask while a cpu is going offline. Reported-by: Christian Borntraeger Signed-off-by: Suresh Siddha Tested-by: Christian Borntraeger Signed-off-by: Ingo Molnar --- kernel/sched.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 242d0d47a70d..e1fc67d0674c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3890,19 +3890,24 @@ int select_nohz_load_balancer(int stop_tick) int cpu = smp_processor_id(); if (stop_tick) { - cpumask_set_cpu(cpu, nohz.cpu_mask); cpu_rq(cpu)->in_nohz_recently = 1; - /* - * If we are going offline and still the leader, give up! - */ - if (!cpu_active(cpu) && - atomic_read(&nohz.load_balancer) == cpu) { + if (!cpu_active(cpu)) { + if (atomic_read(&nohz.load_balancer) != cpu) + return 0; + + /* + * If we are going offline and still the leader, + * give up! + */ if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) BUG(); + return 0; } + cpumask_set_cpu(cpu, nohz.cpu_mask); + /* time for ilb owner also to sleep */ if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { if (atomic_read(&nohz.load_balancer) == cpu) From dc4ff585ffbc6cb0c872697b2d5f42293a32e5c8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 4 Feb 2009 13:48:11 -0800 Subject: [PATCH 348/566] sparc64: Call dump_stack() in die_nmi(). Signed-off-by: David S. Miller --- arch/sparc/kernel/nmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 09f088ed4a64..f3577223c863 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -70,6 +70,7 @@ static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) printk(" on CPU%d, ip %08lx, registers:\n", smp_processor_id(), regs->tpc); show_regs(regs); + dump_stack(); bust_spinlocks(0); From a6a95406c676ffe4f9dee708eb404a17c69f7fdd Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 4 Feb 2009 13:40:31 +0300 Subject: [PATCH 349/566] x86: fix hpet timer reinit for x86_64 There's a small problem with hpet_rtc_reinit function - it checks for the: hpet_readl(HPET_COUNTER) - hpet_t1_cmp > 0 to continue increasing both the HPET_T1_CMP (register) and the hpet_t1_cmp (variable). But since the HPET_COUNTER is always 32-bit, if the hpet_t1_cmp is 64-bit this condition will always be FALSE once the latter hits the 32-bit boundary, and we can have a situation, when we don't increase the HPET_T1_CMP register high enough. The result - timer stops ticking, since HPET_T1_CMP becomes less, than the COUNTER and never increased again. The solution is (based on Linus's suggestion) to not compare 64-bits (on 64-bit x86), but to do the comparison on 32-bit signed integers. Reported-by: Kirill Korotaev Signed-off-by: Pavel Emelyanov Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 64d5ad0b8add..c761f914430a 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1075,7 +1075,7 @@ static void hpet_rtc_timer_reinit(void) hpet_t1_cmp += delta; hpet_writel(hpet_t1_cmp, HPET_T1_CMP); lost_ints++; - } while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); + } while ((s32)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); if (lost_ints) { if (hpet_rtc_flags & RTC_PIE) From 4560839939f4b4a96e21e80584f87308ac93c1da Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Wed, 4 Feb 2009 16:44:01 -0700 Subject: [PATCH 350/566] x86: fix grammar in user-visible BIOS warning Fix user-visible grammo. Signed-off-by: Alex Chiang Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ae0d8042cf69..c461f6d69074 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -607,7 +607,7 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) { printk(KERN_NOTICE - "%s detected: BIOS may corrupt low RAM, working it around.\n", + "%s detected: BIOS may corrupt low RAM, working around it.\n", d->ident); e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); From 0973a06cde8cc1522fbcf2baacb926f1ee3f4c79 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 4 Feb 2009 15:24:09 -0800 Subject: [PATCH 351/566] x86: mm: introduce helper function in fault.c Impact: cleanup Introduce helper function fault_in_kernel_address() to make editors happy. Signed-off-by: Hiroshi Shimamoto Signed-off-by: H. Peter Anvin --- arch/x86/mm/fault.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index eb4d7fe05938..8e9b0f1fd872 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -775,6 +775,15 @@ static inline int access_error(unsigned long error_code, int write, return 0; } +static int fault_in_kernel_space(unsigned long address) +{ +#ifdef CONFIG_X86_32 + return address >= TASK_SIZE; +#else /* !CONFIG_X86_32 */ + return address >= TASK_SIZE64; +#endif /* CONFIG_X86_32 */ +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -817,11 +826,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) * (error_code & 4) == 0, and that the fault was not a * protection error (error_code & 9) == 0. */ -#ifdef CONFIG_X86_32 - if (unlikely(address >= TASK_SIZE)) { -#else - if (unlikely(address >= TASK_SIZE64)) { -#endif + if (unlikely(fault_in_kernel_space(address))) { if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && vmalloc_fault(address) >= 0) return; From 1f4f931501e9270c156d05ee76b7b872de486304 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 2 Feb 2009 13:58:06 -0800 Subject: [PATCH 352/566] xen: fix 32-bit build resulting from mmu move Moving the mmu code from enlighten.c to mmu.c inadvertently broke the 32-bit build. Fix it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/mmu.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5e41f7fc6cf1..d2e8ed1aff3d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1396,6 +1396,43 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) #endif } +#ifdef CONFIG_HIGHPTE +static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) +{ + pgprot_t prot = PAGE_KERNEL; + + if (PagePinned(page)) + prot = PAGE_KERNEL_RO; + + if (0 && PageHighMem(page)) + printk("mapping highpte %lx type %d prot %s\n", + page_to_pfn(page), type, + (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); + + return kmap_atomic_prot(page, type, prot); +} +#endif + +#ifdef CONFIG_X86_32 +static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) +{ + /* If there's an existing pte, then don't allow _PAGE_RW to be set */ + if (pte_val_ma(*ptep) & _PAGE_PRESENT) + pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & + pte_val_ma(pte)); + + return pte; +} + +/* Init-time set_pte while constructing initial pagetables, which + doesn't allow RO pagetable pages to be remapped RW */ +static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) +{ + pte = mask_rw_pte(ptep, pte); + + xen_set_pte(ptep, pte); +} +#endif /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ From 383414322b3b3ced0cbc146801e0cc6c60a6c5f4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 2 Feb 2009 13:55:31 -0800 Subject: [PATCH 353/566] xen: setup percpu data pointers We need to access percpu data fairly early, so set up the percpu registers as soon as possible. We only need to load the appropriate segment register. We already have a GDT, but its hard to change it early because we need to manipulate the pagetable to do so, and that hasn't been set up yet. Also, set the kernel stack when bringing up secondary CPUs. If we don't they all end up sharing the same stack... Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/enlighten.c | 15 ++++++++++++++- arch/x86/xen/smp.c | 6 ++++-- arch/x86/xen/xen-ops.h | 2 ++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index cd022c43dfbc..aed7ceeb4b65 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -66,6 +66,8 @@ EXPORT_SYMBOL_GPL(xen_start_info); struct shared_info xen_dummy_shared_info; +void *xen_initial_gdt; + /* * Point at some empty memory to start with. We map the real shared_info * page as soon as fixmap is up and running. @@ -917,8 +919,19 @@ asmlinkage void __init xen_start_kernel(void) have_vcpu_info_placement = 0; #endif - /* setup percpu state */ +#ifdef CONFIG_X86_64 + /* + * Setup percpu state. We only need to do this for 64-bit + * because 32-bit already has %fs set properly. + */ load_percpu_segment(0); +#endif + /* + * The only reliable way to retain the initial address of the + * percpu gdt_page is to remember it here, so we can go and + * mark it RW later, when the initial percpu area is freed. + */ + xen_initial_gdt = &per_cpu(gdt_page, 0); xen_smp_init(); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 88d5d5ec6beb..035582ae815d 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -170,8 +170,7 @@ static void __init xen_smp_prepare_boot_cpu(void) /* We've switched to the "real" per-cpu gdt, so make sure the old memory can be recycled */ - make_lowmem_page_readwrite(__per_cpu_load + - (unsigned long)&per_cpu_var(gdt_page)); + make_lowmem_page_readwrite(xen_initial_gdt); xen_setup_vcpu_info_placement(); } @@ -287,6 +286,9 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) irq_ctx_init(cpu); #else clear_tsk_thread_flag(idle, TIF_FORK); + per_cpu(kernel_stack, cpu) = + (unsigned long)task_stack_page(idle) - + KERNEL_STACK_OFFSET + THREAD_SIZE; #endif xen_setup_timer(cpu); xen_init_lock_cpu(cpu); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 11913fc94c14..2f5ef2632ea2 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -10,6 +10,8 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; +extern void *xen_initial_gdt; + struct trap_info; void xen_copy_trap_info(struct trap_info *traps); From 5393744b71ce797f1b1546fafaed127fc50c2b61 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 2 Feb 2009 13:55:42 -0800 Subject: [PATCH 354/566] xen: make direct versions of irq_enable/disable/save/restore to common code Now that x86-64 has directly accessible percpu variables, it can also implement the direct versions of these operations, which operate on a vcpu_info structure directly embedded in the percpu area. In fact, the 64-bit versions are more or less identical, and so can be shared. The only two differences are: 1. xen_restore_fl_direct takes its argument in eax on 32-bit, and rdi on 64-bit. Unfortunately it isn't possible to directly refer to the 2nd lsb of rdi directly (as you can with %ah), so the code isn't quite as dense. 2. check_events needs to variants to save different registers. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/Makefile | 3 +- arch/x86/xen/xen-asm.S | 140 ++++++++++++++++++++++++++++++++++++++ arch/x86/xen/xen-asm.h | 12 ++++ arch/x86/xen/xen-asm_32.S | 111 ++++-------------------------- arch/x86/xen/xen-asm_64.S | 134 +----------------------------------- 5 files changed, 169 insertions(+), 231 deletions(-) create mode 100644 arch/x86/xen/xen-asm.S create mode 100644 arch/x86/xen/xen-asm.h diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 6dcefba7836f..3b767d03fd6a 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -6,7 +6,8 @@ CFLAGS_REMOVE_irq.o = -pg endif obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ - time.o xen-asm_$(BITS).o grant-table.o suspend.o + time.o xen-asm.o xen-asm_$(BITS).o \ + grant-table.o suspend.o obj-$(CONFIG_SMP) += smp.o spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o \ No newline at end of file diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S new file mode 100644 index 000000000000..4c6f96799131 --- /dev/null +++ b/arch/x86/xen/xen-asm.S @@ -0,0 +1,140 @@ +/* + Asm versions of Xen pv-ops, suitable for either direct use or inlining. + The inline versions are the same as the direct-use versions, with the + pre- and post-amble chopped off. + + This code is encoded for size rather than absolute efficiency, + with a view to being able to inline as much as possible. + + We only bother with direct forms (ie, vcpu in percpu data) of + the operations here; the indirect forms are better handled in + C, since they're generally too large to inline anyway. + */ + +#include +#include +#include + +#include "xen-asm.h" + +/* + Enable events. This clears the event mask and tests the pending + event status with one and operation. If there are pending + events, then enter the hypervisor to get them handled. + */ +ENTRY(xen_irq_enable_direct) + /* Unmask events */ + movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + + /* Preempt here doesn't matter because that will deal with + any pending interrupts. The pending check may end up being + run on the wrong CPU, but that doesn't hurt. */ + + /* Test for pending */ + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending + jz 1f + +2: call check_events +1: +ENDPATCH(xen_irq_enable_direct) + ret + ENDPROC(xen_irq_enable_direct) + RELOC(xen_irq_enable_direct, 2b+1) + + +/* + Disabling events is simply a matter of making the event mask + non-zero. + */ +ENTRY(xen_irq_disable_direct) + movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask +ENDPATCH(xen_irq_disable_direct) + ret + ENDPROC(xen_irq_disable_direct) + RELOC(xen_irq_disable_direct, 0) + +/* + (xen_)save_fl is used to get the current interrupt enable status. + Callers expect the status to be in X86_EFLAGS_IF, and other bits + may be set in the return value. We take advantage of this by + making sure that X86_EFLAGS_IF has the right value (and other bits + in that byte are 0), but other bits in the return value are + undefined. We need to toggle the state of the bit, because + Xen and x86 use opposite senses (mask vs enable). + */ +ENTRY(xen_save_fl_direct) + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + setz %ah + addb %ah,%ah +ENDPATCH(xen_save_fl_direct) + ret + ENDPROC(xen_save_fl_direct) + RELOC(xen_save_fl_direct, 0) + + +/* + In principle the caller should be passing us a value return + from xen_save_fl_direct, but for robustness sake we test only + the X86_EFLAGS_IF flag rather than the whole byte. After + setting the interrupt mask state, it checks for unmasked + pending events and enters the hypervisor to get them delivered + if so. + */ +ENTRY(xen_restore_fl_direct) +#ifdef CONFIG_X86_64 + testw $X86_EFLAGS_IF, %di +#else + testb $X86_EFLAGS_IF>>8, %ah +#endif + setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + /* Preempt here doesn't matter because that will deal with + any pending interrupts. The pending check may end up being + run on the wrong CPU, but that doesn't hurt. */ + + /* check for unmasked and pending */ + cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending + jz 1f +2: call check_events +1: +ENDPATCH(xen_restore_fl_direct) + ret + ENDPROC(xen_restore_fl_direct) + RELOC(xen_restore_fl_direct, 2b+1) + + +/* + Force an event check by making a hypercall, + but preserve regs before making the call. + */ +check_events: +#ifdef CONFIG_X86_32 + push %eax + push %ecx + push %edx + call xen_force_evtchn_callback + pop %edx + pop %ecx + pop %eax +#else + push %rax + push %rcx + push %rdx + push %rsi + push %rdi + push %r8 + push %r9 + push %r10 + push %r11 + call xen_force_evtchn_callback + pop %r11 + pop %r10 + pop %r9 + pop %r8 + pop %rdi + pop %rsi + pop %rdx + pop %rcx + pop %rax +#endif + ret + diff --git a/arch/x86/xen/xen-asm.h b/arch/x86/xen/xen-asm.h new file mode 100644 index 000000000000..465276467a47 --- /dev/null +++ b/arch/x86/xen/xen-asm.h @@ -0,0 +1,12 @@ +#ifndef _XEN_XEN_ASM_H +#define _XEN_XEN_ASM_H + +#include + +#define RELOC(x, v) .globl x##_reloc; x##_reloc=v +#define ENDPATCH(x) .globl x##_end; x##_end=. + +/* Pseudo-flag used for virtual NMI, which we don't implement yet */ +#define XEN_EFLAGS_NMI 0x80000000 + +#endif diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 42786f59d9c0..082d173caaf3 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -11,101 +11,28 @@ generally too large to inline anyway. */ -#include - -#include +//#include #include -#include #include #include #include -#define RELOC(x, v) .globl x##_reloc; x##_reloc=v -#define ENDPATCH(x) .globl x##_end; x##_end=. - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 +#include "xen-asm.h" /* - Enable events. This clears the event mask and tests the pending - event status with one and operation. If there are pending - events, then enter the hypervisor to get them handled. + Force an event check by making a hypercall, + but preserve regs before making the call. */ -ENTRY(xen_irq_enable_direct) - /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending - jz 1f - -2: call check_events -1: -ENDPATCH(xen_irq_enable_direct) +check_events: + push %eax + push %ecx + push %edx + call xen_force_evtchn_callback + pop %edx + pop %ecx + pop %eax ret - ENDPROC(xen_irq_enable_direct) - RELOC(xen_irq_enable_direct, 2b+1) - - -/* - Disabling events is simply a matter of making the event mask - non-zero. - */ -ENTRY(xen_irq_disable_direct) - movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask -ENDPATCH(xen_irq_disable_direct) - ret - ENDPROC(xen_irq_disable_direct) - RELOC(xen_irq_disable_direct, 0) - -/* - (xen_)save_fl is used to get the current interrupt enable status. - Callers expect the status to be in X86_EFLAGS_IF, and other bits - may be set in the return value. We take advantage of this by - making sure that X86_EFLAGS_IF has the right value (and other bits - in that byte are 0), but other bits in the return value are - undefined. We need to toggle the state of the bit, because - Xen and x86 use opposite senses (mask vs enable). - */ -ENTRY(xen_save_fl_direct) - testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - setz %ah - addb %ah,%ah -ENDPATCH(xen_save_fl_direct) - ret - ENDPROC(xen_save_fl_direct) - RELOC(xen_save_fl_direct, 0) - - -/* - In principle the caller should be passing us a value return - from xen_save_fl_direct, but for robustness sake we test only - the X86_EFLAGS_IF flag rather than the whole byte. After - setting the interrupt mask state, it checks for unmasked - pending events and enters the hypervisor to get them delivered - if so. - */ -ENTRY(xen_restore_fl_direct) - testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending - jz 1f -2: call check_events -1: -ENDPATCH(xen_restore_fl_direct) - ret - ENDPROC(xen_restore_fl_direct) - RELOC(xen_restore_fl_direct, 2b+1) /* We can't use sysexit directly, because we're not running in ring0. @@ -289,17 +216,3 @@ ENTRY(xen_iret_crit_fixup) lea 4(%edi),%esp /* point esp to new frame */ 2: jmp xen_do_upcall - -/* - Force an event check by making a hypercall, - but preserve regs before making the call. - */ -check_events: - push %eax - push %ecx - push %edx - call xen_force_evtchn_callback - pop %edx - pop %ecx - pop %eax - ret diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index d6fc51f4ce85..d205a283efe0 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -11,142 +11,14 @@ generally too large to inline anyway. */ -#include - -#include -#include #include -#include #include +#include +#include #include -#define RELOC(x, v) .globl x##_reloc; x##_reloc=v -#define ENDPATCH(x) .globl x##_end; x##_end=. - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 - -#if 1 -/* - FIXME: x86_64 now can support direct access to percpu variables - via a segment override. Update xen accordingly. - */ -#define BUG ud2a -#endif - -/* - Enable events. This clears the event mask and tests the pending - event status with one and operation. If there are pending - events, then enter the hypervisor to get them handled. - */ -ENTRY(xen_irq_enable_direct) - BUG - - /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending - jz 1f - -2: call check_events -1: -ENDPATCH(xen_irq_enable_direct) - ret - ENDPROC(xen_irq_enable_direct) - RELOC(xen_irq_enable_direct, 2b+1) - -/* - Disabling events is simply a matter of making the event mask - non-zero. - */ -ENTRY(xen_irq_disable_direct) - BUG - - movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask -ENDPATCH(xen_irq_disable_direct) - ret - ENDPROC(xen_irq_disable_direct) - RELOC(xen_irq_disable_direct, 0) - -/* - (xen_)save_fl is used to get the current interrupt enable status. - Callers expect the status to be in X86_EFLAGS_IF, and other bits - may be set in the return value. We take advantage of this by - making sure that X86_EFLAGS_IF has the right value (and other bits - in that byte are 0), but other bits in the return value are - undefined. We need to toggle the state of the bit, because - Xen and x86 use opposite senses (mask vs enable). - */ -ENTRY(xen_save_fl_direct) - BUG - - testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - setz %ah - addb %ah,%ah -ENDPATCH(xen_save_fl_direct) - ret - ENDPROC(xen_save_fl_direct) - RELOC(xen_save_fl_direct, 0) - -/* - In principle the caller should be passing us a value return - from xen_save_fl_direct, but for robustness sake we test only - the X86_EFLAGS_IF flag rather than the whole byte. After - setting the interrupt mask state, it checks for unmasked - pending events and enters the hypervisor to get them delivered - if so. - */ -ENTRY(xen_restore_fl_direct) - BUG - - testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending - jz 1f -2: call check_events -1: -ENDPATCH(xen_restore_fl_direct) - ret - ENDPROC(xen_restore_fl_direct) - RELOC(xen_restore_fl_direct, 2b+1) - - -/* - Force an event check by making a hypercall, - but preserve regs before making the call. - */ -check_events: - push %rax - push %rcx - push %rdx - push %rsi - push %rdi - push %r8 - push %r9 - push %r10 - push %r11 - call xen_force_evtchn_callback - pop %r11 - pop %r10 - pop %r9 - pop %r8 - pop %rdi - pop %rsi - pop %rdx - pop %rcx - pop %rax - ret +#include "xen-asm.h" ENTRY(xen_adjust_exception_frame) mov 8+0(%rsp),%rcx From e4d0407185cdbdcfd99fc23bde2e5454bbc46329 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 2 Feb 2009 13:55:54 -0800 Subject: [PATCH 355/566] xen: use direct ops on 64-bit Enable the use of the direct vcpu-access operations on 64-bit. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/enlighten.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index aed7ceeb4b65..37230342c2c4 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -87,14 +87,7 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; * * 0: not available, 1: available */ -static int have_vcpu_info_placement = -#ifdef CONFIG_X86_32 - 1 -#else - 0 -#endif - ; - +static int have_vcpu_info_placement = 1; static void xen_vcpu_setup(int cpu) { @@ -914,11 +907,6 @@ asmlinkage void __init xen_start_kernel(void) machine_ops = xen_machine_ops; -#ifdef CONFIG_X86_64 - /* Disable until direct per-cpu data access. */ - have_vcpu_info_placement = 0; -#endif - #ifdef CONFIG_X86_64 /* * Setup percpu state. We only need to do this for 64-bit From 18114f61359ac05e3aa797d53d63f40db41f798d Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 30 Jan 2009 18:16:46 -0800 Subject: [PATCH 356/566] x86: uaccess: use errret as error value in __put_user_size() Impact: cleanup In __put_user_size() macro errret is used for error value. But if size is 8, errret isn't passed to__put_user_asm_u64(). This behavior is inconsistent. Signed-off-by: Hiroshi Shimamoto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uaccess.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index b9a24155f7af..b685ece89d5c 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -186,7 +186,7 @@ extern int __get_user_bad(void); #ifdef CONFIG_X86_32 -#define __put_user_asm_u64(x, addr, err) \ +#define __put_user_asm_u64(x, addr, err, errret) \ asm volatile("1: movl %%eax,0(%2)\n" \ "2: movl %%edx,4(%2)\n" \ "3:\n" \ @@ -197,7 +197,7 @@ extern int __get_user_bad(void); _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ : "=r" (err) \ - : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err)) + : "A" (x), "r" (addr), "i" (errret), "0" (err)) #define __put_user_asm_ex_u64(x, addr) \ asm volatile("1: movl %%eax,0(%1)\n" \ @@ -211,8 +211,8 @@ extern int __get_user_bad(void); asm volatile("call __put_user_8" : "=a" (__ret_pu) \ : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") #else -#define __put_user_asm_u64(x, ptr, retval) \ - __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT) +#define __put_user_asm_u64(x, ptr, retval, errret) \ + __put_user_asm(x, ptr, retval, "q", "", "Zr", errret) #define __put_user_asm_ex_u64(x, addr) \ __put_user_asm_ex(x, addr, "q", "", "Zr") #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) @@ -289,7 +289,8 @@ do { \ __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ break; \ case 8: \ - __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval); \ + __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \ + errret); \ break; \ default: \ __put_user_bad(); \ From 4abfd73e34e7915e62b6f75bd3e9f4014f830910 Mon Sep 17 00:00:00 2001 From: Adrian-Ken Rueegsegger Date: Thu, 5 Feb 2009 16:19:31 +1100 Subject: [PATCH 357/566] crypto: shash - Fix module refcount Module reference counting for shash is incorrect: when a new shash transformation is created the refcount is not increased as it should. Signed-off-by: Adrian-Ken Rueegsegger Signed-off-by: Herbert Xu --- crypto/shash.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crypto/shash.c b/crypto/shash.c index c9df367332ff..d5a2b619c55f 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -388,10 +388,15 @@ static int crypto_init_shash_ops_compat(struct crypto_tfm *tfm) struct shash_desc *desc = crypto_tfm_ctx(tfm); struct crypto_shash *shash; + if (!crypto_mod_get(calg)) + return -EAGAIN; + shash = __crypto_shash_cast(crypto_create_tfm( calg, &crypto_shash_type)); - if (IS_ERR(shash)) + if (IS_ERR(shash)) { + crypto_mod_put(calg); return PTR_ERR(shash); + } desc->tfm = shash; tfm->exit = crypto_exit_shash_ops_compat; From b534816b552d35bbd3c60702139ed5c7da2f55c2 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 4 Feb 2009 18:33:38 -0800 Subject: [PATCH 358/566] x86: don't apply __supported_pte_mask to non-present ptes On an x86 system which doesn't support global mappings, __supported_pte_mask has _PAGE_GLOBAL clear, to make sure it never appears in the PTE. pfn_pte() and so on will enforce it with: static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & __supported_pte_mask); } However, we overload _PAGE_GLOBAL with _PAGE_PROTNONE on non-present ptes to distinguish them from swap entries. However, applying __supported_pte_mask indiscriminately will clear the bit and corrupt the pte. I guess the best fix is to only apply __supported_pte_mask to present ptes. This seems like the right solution to me, as it means we can completely ignore the issue of overlaps between the present pte bits and the non-present pte-as-swap entry use of the bits. __supported_pte_mask contains the set of flags we support on the current hardware. We also use bits in the pte for things like logically present ptes with no permissions, and swap entries for swapped out pages. We should only apply __supported_pte_mask to present ptes, because otherwise we may destroy other information being stored in the ptes. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable.h | 26 ++++++++++++++++++++------ arch/x86/include/asm/xen/page.h | 2 +- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 06bbcbd66e9c..4f5af8447d54 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -302,16 +302,30 @@ static inline pte_t pte_mkspecial(pte_t pte) extern pteval_t __supported_pte_mask; +/* + * Mask out unsupported bits in a present pgprot. Non-present pgprots + * can use those bits for other purposes, so leave them be. + */ +static inline pgprotval_t massage_pgprot(pgprot_t pgprot) +{ + pgprotval_t protval = pgprot_val(pgprot); + + if (protval & _PAGE_PRESENT) + protval &= __supported_pte_mask; + + return protval; +} + static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | - pgprot_val(pgprot)) & __supported_pte_mask); + return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | + massage_pgprot(pgprot)); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) | - pgprot_val(pgprot)) & __supported_pte_mask); + return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | + massage_pgprot(pgprot)); } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) @@ -323,7 +337,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * the newprot (if present): */ val &= _PAGE_CHG_MASK; - val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask; + val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK; return __pte(val); } @@ -339,7 +353,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK) -#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) +#define canon_pgprot(p) __pgprot(massage_pgprot(p)) static inline int is_new_memtype_allowed(unsigned long flags, unsigned long new_flags) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 7ef617ef1df3..4bd990ee43df 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -137,7 +137,7 @@ static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) pte_t pte; pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) | - (pgprot_val(pgprot) & __supported_pte_mask); + massage_pgprot(pgprot); return pte; } From 7b2cd92adc5430b0c1adeb120971852b4ea1ab08 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 5 Feb 2009 16:48:24 +1100 Subject: [PATCH 359/566] crypto: api - Fix zeroing on free Geert Uytterhoeven pointed out that we're not zeroing all the memory when freeing a transform. This patch fixes it by calling ksize to ensure that we zero everything in sight. Reported-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- crypto/api.c | 20 ++++++++++---------- include/linux/crypto.h | 7 ++++++- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 9975a7bd246c..efe77df6863f 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -557,34 +557,34 @@ err: return ERR_PTR(err); } EXPORT_SYMBOL_GPL(crypto_alloc_tfm); - + /* - * crypto_free_tfm - Free crypto transform + * crypto_destroy_tfm - Free crypto transform + * @mem: Start of tfm slab * @tfm: Transform to free * - * crypto_free_tfm() frees up the transform and any associated resources, + * This function frees up the transform and any associated resources, * then drops the refcount on the associated algorithm. */ -void crypto_free_tfm(struct crypto_tfm *tfm) +void crypto_destroy_tfm(void *mem, struct crypto_tfm *tfm) { struct crypto_alg *alg; int size; - if (unlikely(!tfm)) + if (unlikely(!mem)) return; alg = tfm->__crt_alg; - size = sizeof(*tfm) + alg->cra_ctxsize; + size = ksize(mem); if (!tfm->exit && alg->cra_exit) alg->cra_exit(tfm); crypto_exit_ops(tfm); crypto_mod_put(alg); - memset(tfm, 0, size); - kfree(tfm); + memset(mem, 0, size); + kfree(mem); } - -EXPORT_SYMBOL_GPL(crypto_free_tfm); +EXPORT_SYMBOL_GPL(crypto_destroy_tfm); int crypto_has_alg(const char *name, u32 type, u32 mask) { diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 3bacd71509fb..1f2e9020acc6 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -552,7 +552,12 @@ struct crypto_tfm *crypto_alloc_tfm(const char *alg_name, const struct crypto_type *frontend, u32 type, u32 mask); struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask); -void crypto_free_tfm(struct crypto_tfm *tfm); +void crypto_destroy_tfm(void *mem, struct crypto_tfm *tfm); + +static inline void crypto_free_tfm(struct crypto_tfm *tfm) +{ + return crypto_destroy_tfm(tfm, tfm); +} int alg_test(const char *driver, const char *alg, u32 type, u32 mask); From 412e87ae5d852bc3d836f475c19d954b3324363d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 5 Feb 2009 16:51:25 +1100 Subject: [PATCH 360/566] crypto: shash - Fix tfm destruction We were freeing an offset into the slab object instead of the start. This patch fixes it by calling crypto_destroy_tfm which allows the correct address to be given. Signed-off-by: Herbert Xu --- include/crypto/hash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/crypto/hash.h b/include/crypto/hash.h index cd16d6e668ce..d797e119e3d5 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -222,7 +222,7 @@ static inline struct crypto_tfm *crypto_shash_tfm(struct crypto_shash *tfm) static inline void crypto_free_shash(struct crypto_shash *tfm) { - crypto_free_tfm(crypto_shash_tfm(tfm)); + crypto_destroy_tfm(tfm, crypto_shash_tfm(tfm)); } static inline unsigned int crypto_shash_alignmask( From 32bd671d6cbeda60dc73be77fa2b9037d9a9bfa0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Feb 2009 12:24:15 +0100 Subject: [PATCH 361/566] signal: re-add dead task accumulation stats. We're going to split the process wide cpu accounting into two parts: - clocks; which can take all the time they want since they run from user context. - timers; which need constant time tracing but can affort the overhead because they're default off -- and rare. The clock readout will go back to a full sum of the thread group, for this we need to re-add the exit stats that were removed in the initial itimer rework (f06febc9: timers: fix itimer/many thread hang). Furthermore, since that full sum can be rather slow for large thread groups and we have the complete dead task stats, revert the do_notify_parent time computation. Signed-off-by: Peter Zijlstra Reviewed-by: Ingo Molnar Signed-off-by: Ingo Molnar --- include/linux/sched.h | 10 +++++++++- kernel/exit.c | 3 +++ kernel/fork.c | 3 ++- kernel/signal.c | 8 ++++---- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 2127e959e0f4..2e0646a30314 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -559,7 +559,7 @@ struct signal_struct { * Live threads maintain their own counters and add to these * in __exit_signal, except for the group leader. */ - cputime_t cutime, cstime; + cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; @@ -567,6 +567,14 @@ struct signal_struct { unsigned long inblock, oublock, cinblock, coublock; struct task_io_accounting ioac; + /* + * Cumulative ns of schedule CPU time fo dead threads in the + * group, not including a zombie group leader, (This only differs + * from jiffies_to_ns(utime + stime) if sched_clock uses something + * other than jiffies.) + */ + unsigned long long sum_sched_runtime; + /* * We don't bother to synchronize most readers of this at all, * because there is no reader checking a limit that actually needs diff --git a/kernel/exit.c b/kernel/exit.c index f80dec3f1875..efd30ccf3858 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -118,6 +118,8 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ + sig->utime = cputime_add(sig->utime, task_utime(tsk)); + sig->stime = cputime_add(sig->stime, task_stime(tsk)); sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; @@ -126,6 +128,7 @@ static void __exit_signal(struct task_struct *tsk) sig->inblock += task_io_get_inblock(tsk); sig->oublock += task_io_get_oublock(tsk); task_io_accounting_add(&sig->ioac, &tsk->ioac); + sig->sum_sched_runtime += tsk->se.sum_exec_runtime; sig = NULL; /* Marker for below. */ } diff --git a/kernel/fork.c b/kernel/fork.c index 242a706e7721..e8e854a04ad2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -851,13 +851,14 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->tty_old_pgrp = NULL; sig->tty = NULL; - sig->cutime = sig->cstime = cputime_zero; + sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; sig->gtime = cputime_zero; sig->cgtime = cputime_zero; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; task_io_accounting_init(&sig->ioac); + sig->sum_sched_runtime = 0; taskstats_tgid_init(sig); task_lock(current->group_leader); diff --git a/kernel/signal.c b/kernel/signal.c index b6b36768b758..2a74fe87c0dd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1367,7 +1367,6 @@ int do_notify_parent(struct task_struct *tsk, int sig) struct siginfo info; unsigned long flags; struct sighand_struct *psig; - struct task_cputime cputime; int ret = sig; BUG_ON(sig == -1); @@ -1397,9 +1396,10 @@ int do_notify_parent(struct task_struct *tsk, int sig) info.si_uid = __task_cred(tsk)->uid; rcu_read_unlock(); - thread_group_cputime(tsk, &cputime); - info.si_utime = cputime_to_jiffies(cputime.utime); - info.si_stime = cputime_to_jiffies(cputime.stime); + info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime, + tsk->signal->utime)); + info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime, + tsk->signal->stime)); info.si_status = tsk->exit_code & 0x7f; if (tsk->exit_code & 0x80) From 4cd4c1b40d40447fb5e7ba80746c6d7ba91d7a53 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Feb 2009 12:24:16 +0100 Subject: [PATCH 362/566] timers: split process wide cpu clocks/timers Change the process wide cpu timers/clocks so that we: 1) don't mess up the kernel with too many threads, 2) don't have a per-cpu allocation for each process, 3) have no impact when not used. In order to accomplish this we're going to split it into two parts: - clocks; which can take all the time they want since they run from user context -- ie. sys_clock_gettime(CLOCK_PROCESS_CPUTIME_ID) - timers; which need constant time sampling but since they're explicity used, the user can pay the overhead. The clock readout will go back to a full sum of the thread group, while the timers will run of a global 'clock' that only runs when needed, so only programs that make use of the facility pay the price. Signed-off-by: Peter Zijlstra Reviewed-by: Ingo Molnar Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 11 +++-- include/linux/sched.h | 54 ++++++++++++---------- kernel/itimer.c | 4 +- kernel/posix-cpu-timers.c | 95 +++++++++++++++++++++++++++++++++++++-- kernel/sched_stats.h | 45 +++++++++++-------- 5 files changed, 155 insertions(+), 54 deletions(-) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index ea0ea1a4c36f..e752d973fa21 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -48,12 +48,11 @@ extern struct fs_struct init_fs; .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \ .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ .rlim = INIT_RLIMITS, \ - .cputime = { .totals = { \ - .utime = cputime_zero, \ - .stime = cputime_zero, \ - .sum_exec_runtime = 0, \ - .lock = __SPIN_LOCK_UNLOCKED(sig.cputime.totals.lock), \ - }, }, \ + .cputimer = { \ + .cputime = INIT_CPUTIME, \ + .running = 0, \ + .lock = __SPIN_LOCK_UNLOCKED(sig.cputimer.lock), \ + }, \ } extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index 2e0646a30314..082d7619b3a1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -443,7 +443,6 @@ struct pacct_struct { * @utime: time spent in user mode, in &cputime_t units * @stime: time spent in kernel mode, in &cputime_t units * @sum_exec_runtime: total time spent on the CPU, in nanoseconds - * @lock: lock for fields in this struct * * This structure groups together three kinds of CPU time that are * tracked for threads and thread groups. Most things considering @@ -454,23 +453,33 @@ struct task_cputime { cputime_t utime; cputime_t stime; unsigned long long sum_exec_runtime; - spinlock_t lock; }; /* Alternate field names when used to cache expirations. */ #define prof_exp stime #define virt_exp utime #define sched_exp sum_exec_runtime +#define INIT_CPUTIME \ + (struct task_cputime) { \ + .utime = cputime_zero, \ + .stime = cputime_zero, \ + .sum_exec_runtime = 0, \ + } + /** - * struct thread_group_cputime - thread group interval timer counts - * @totals: thread group interval timers; substructure for - * uniprocessor kernel, per-cpu for SMP kernel. + * struct thread_group_cputimer - thread group interval timer counts + * @cputime: thread group interval timers. + * @running: non-zero when there are timers running and + * @cputime receives updates. + * @lock: lock for fields in this struct. * * This structure contains the version of task_cputime, above, that is - * used for thread group CPU clock calculations. + * used for thread group CPU timer calculations. */ -struct thread_group_cputime { - struct task_cputime totals; +struct thread_group_cputimer { + struct task_cputime cputime; + int running; + spinlock_t lock; }; /* @@ -519,10 +528,10 @@ struct signal_struct { cputime_t it_prof_incr, it_virt_incr; /* - * Thread group totals for process CPU clocks. - * See thread_group_cputime(), et al, for details. + * Thread group totals for process CPU timers. + * See thread_group_cputimer(), et al, for details. */ - struct thread_group_cputime cputime; + struct thread_group_cputimer cputimer; /* Earliest-expiration cache. */ struct task_cputime cputime_expires; @@ -2191,27 +2200,26 @@ static inline int spin_needbreak(spinlock_t *lock) /* * Thread group CPU time accounting. */ +void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); static inline -void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) +void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) { - struct task_cputime *totals = &tsk->signal->cputime.totals; + struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; unsigned long flags; - spin_lock_irqsave(&totals->lock, flags); - *times = *totals; - spin_unlock_irqrestore(&totals->lock, flags); + WARN_ON(!cputimer->running); + + spin_lock_irqsave(&cputimer->lock, flags); + *times = cputimer->cputime; + spin_unlock_irqrestore(&cputimer->lock, flags); } static inline void thread_group_cputime_init(struct signal_struct *sig) { - sig->cputime.totals = (struct task_cputime){ - .utime = cputime_zero, - .stime = cputime_zero, - .sum_exec_runtime = 0, - }; - - spin_lock_init(&sig->cputime.totals.lock); + sig->cputimer.cputime = INIT_CPUTIME; + spin_lock_init(&sig->cputimer.lock); + sig->cputimer.running = 0; } static inline void thread_group_cputime_free(struct signal_struct *sig) diff --git a/kernel/itimer.c b/kernel/itimer.c index 6a5fe93dd8bd..58762f7077ec 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -62,7 +62,7 @@ int do_getitimer(int which, struct itimerval *value) struct task_cputime cputime; cputime_t utime; - thread_group_cputime(tsk, &cputime); + thread_group_cputimer(tsk, &cputime); utime = cputime.utime; if (cputime_le(cval, utime)) { /* about to fire */ cval = jiffies_to_cputime(1); @@ -82,7 +82,7 @@ int do_getitimer(int which, struct itimerval *value) struct task_cputime times; cputime_t ptime; - thread_group_cputime(tsk, ×); + thread_group_cputimer(tsk, ×); ptime = cputime_add(times.utime, times.stime); if (cputime_le(cval, ptime)) { /* about to fire */ cval = jiffies_to_cputime(1); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index fa07da94d7be..db107c9bbc05 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -230,6 +230,37 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, return 0; } +void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) +{ + struct sighand_struct *sighand; + struct signal_struct *sig; + struct task_struct *t; + + *times = INIT_CPUTIME; + + rcu_read_lock(); + sighand = rcu_dereference(tsk->sighand); + if (!sighand) + goto out; + + sig = tsk->signal; + + t = tsk; + do { + times->utime = cputime_add(times->utime, t->utime); + times->stime = cputime_add(times->stime, t->stime); + times->sum_exec_runtime += t->se.sum_exec_runtime; + + t = next_thread(t); + } while (t != tsk); + + times->utime = cputime_add(times->utime, sig->utime); + times->stime = cputime_add(times->stime, sig->stime); + times->sum_exec_runtime += sig->sum_sched_runtime; +out: + rcu_read_unlock(); +} + /* * Sample a process (thread group) clock for the given group_leader task. * Must be called with tasklist_lock held for reading. @@ -475,6 +506,29 @@ static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) now); } +/* + * Enable the process wide cpu timer accounting. + * + * serialized using ->sighand->siglock + */ +static void start_process_timers(struct task_struct *tsk) +{ + tsk->signal->cputimer.running = 1; + barrier(); +} + +/* + * Release the process wide timer accounting -- timer stops ticking when + * nobody cares about it. + * + * serialized using ->sighand->siglock + */ +static void stop_process_timers(struct task_struct *tsk) +{ + tsk->signal->cputimer.running = 0; + barrier(); +} + /* * Insert the timer on the appropriate list before any timers that * expire later. This must be called with the tasklist_lock held @@ -495,6 +549,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) BUG_ON(!irqs_disabled()); spin_lock(&p->sighand->siglock); + if (!CPUCLOCK_PERTHREAD(timer->it_clock)) + start_process_timers(p); + listpos = head; if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { list_for_each_entry(next, head, entry) { @@ -987,13 +1044,15 @@ static void check_process_timers(struct task_struct *tsk, sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY && list_empty(&timers[CPUCLOCK_VIRT]) && cputime_eq(sig->it_virt_expires, cputime_zero) && - list_empty(&timers[CPUCLOCK_SCHED])) + list_empty(&timers[CPUCLOCK_SCHED])) { + stop_process_timers(tsk); return; + } /* * Collect the current process totals. */ - thread_group_cputime(tsk, &cputime); + thread_group_cputimer(tsk, &cputime); utime = cputime.utime; ptime = cputime_add(utime, cputime.stime); sum_sched_runtime = cputime.sum_exec_runtime; @@ -1259,7 +1318,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk) if (!task_cputime_zero(&sig->cputime_expires)) { struct task_cputime group_sample; - thread_group_cputime(tsk, &group_sample); + thread_group_cputimer(tsk, &group_sample); if (task_cputime_expired(&group_sample, &sig->cputime_expires)) return 1; } @@ -1328,6 +1387,33 @@ void run_posix_cpu_timers(struct task_struct *tsk) } } +/* + * Sample a process (thread group) timer for the given group_leader task. + * Must be called with tasklist_lock held for reading. + */ +static int cpu_timer_sample_group(const clockid_t which_clock, + struct task_struct *p, + union cpu_time_count *cpu) +{ + struct task_cputime cputime; + + thread_group_cputimer(p, &cputime); + switch (CPUCLOCK_WHICH(which_clock)) { + default: + return -EINVAL; + case CPUCLOCK_PROF: + cpu->cpu = cputime_add(cputime.utime, cputime.stime); + break; + case CPUCLOCK_VIRT: + cpu->cpu = cputime.utime; + break; + case CPUCLOCK_SCHED: + cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); + break; + } + return 0; +} + /* * Set one of the process-wide special case CPU timers. * The tsk->sighand->siglock must be held by the caller. @@ -1341,7 +1427,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, struct list_head *head; BUG_ON(clock_idx == CPUCLOCK_SCHED); - cpu_clock_sample_group(clock_idx, tsk, &now); + start_process_timers(tsk); + cpu_timer_sample_group(clock_idx, tsk, &now); if (oldval) { if (!cputime_eq(*oldval, cputime_zero)) { diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 8ab0cef8ecab..a8f93dd374e1 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -296,19 +296,21 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) static inline void account_group_user_time(struct task_struct *tsk, cputime_t cputime) { - struct task_cputime *times; - struct signal_struct *sig; + struct thread_group_cputimer *cputimer; /* tsk == current, ensure it is safe to use ->signal */ if (unlikely(tsk->exit_state)) return; - sig = tsk->signal; - times = &sig->cputime.totals; + cputimer = &tsk->signal->cputimer; - spin_lock(×->lock); - times->utime = cputime_add(times->utime, cputime); - spin_unlock(×->lock); + if (!cputimer->running) + return; + + spin_lock(&cputimer->lock); + cputimer->cputime.utime = + cputime_add(cputimer->cputime.utime, cputime); + spin_unlock(&cputimer->lock); } /** @@ -324,19 +326,21 @@ static inline void account_group_user_time(struct task_struct *tsk, static inline void account_group_system_time(struct task_struct *tsk, cputime_t cputime) { - struct task_cputime *times; - struct signal_struct *sig; + struct thread_group_cputimer *cputimer; /* tsk == current, ensure it is safe to use ->signal */ if (unlikely(tsk->exit_state)) return; - sig = tsk->signal; - times = &sig->cputime.totals; + cputimer = &tsk->signal->cputimer; - spin_lock(×->lock); - times->stime = cputime_add(times->stime, cputime); - spin_unlock(×->lock); + if (!cputimer->running) + return; + + spin_lock(&cputimer->lock); + cputimer->cputime.stime = + cputime_add(cputimer->cputime.stime, cputime); + spin_unlock(&cputimer->lock); } /** @@ -352,7 +356,7 @@ static inline void account_group_system_time(struct task_struct *tsk, static inline void account_group_exec_runtime(struct task_struct *tsk, unsigned long long ns) { - struct task_cputime *times; + struct thread_group_cputimer *cputimer; struct signal_struct *sig; sig = tsk->signal; @@ -361,9 +365,12 @@ static inline void account_group_exec_runtime(struct task_struct *tsk, if (unlikely(!sig)) return; - times = &sig->cputime.totals; + cputimer = &sig->cputimer; - spin_lock(×->lock); - times->sum_exec_runtime += ns; - spin_unlock(×->lock); + if (!cputimer->running) + return; + + spin_lock(&cputimer->lock); + cputimer->cputime.sum_exec_runtime += ns; + spin_unlock(&cputimer->lock); } From 0cd5c3c80a0ebd68c08312fa7d8c13149cc61c4c Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Wed, 4 Feb 2009 14:29:19 -0800 Subject: [PATCH 363/566] x86: disable intel_iommu support by default Due to recurring issues with DMAR support on certain platforms. There's a number of filesystem corruption incidents reported: https://bugzilla.redhat.com/show_bug.cgi?id=479996 http://bugzilla.kernel.org/show_bug.cgi?id=12578 Provide a Kconfig option to change whether it is enabled by default. If disabled, it can still be reenabled by passing intel_iommu=on to the kernel. Keep the .config option off by default. Signed-off-by: Kyle McMartin Signed-off-by: Andrew Morton Acked-By: David Woodhouse Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 2 ++ arch/x86/Kconfig | 11 +++++++++++ drivers/pci/intel-iommu.c | 14 +++++++++++--- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index d8362cf9909e..b182626739ea 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -937,6 +937,8 @@ and is between 256 and 4096 characters. It is defined in the file intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option + on + Enable intel iommu driver. off Disable intel iommu driver. igfx_off [Default Off] diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 73f7fe8fd4d1..9c39095b33fc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1802,6 +1802,17 @@ config DMAR and include PCI device scope covered by these DMA remapping devices. +config DMAR_DEFAULT_ON + def_bool n + prompt "Enable DMA Remapping Devices by default" + depends on DMAR + help + Selecting this option will enable a DMAR device at boot time if + one is found. If this option is not selected, DMAR support can + be enabled by passing intel_iommu=on to the kernel. It is + recommended you say N here while the DMAR code remains + experimental. + config DMAR_GFX_WA def_bool y prompt "Support for Graphics workaround" diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 3dfecb20d5e7..f4b7c79023ff 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -268,7 +268,12 @@ static long list_size; static void domain_remove_dev_info(struct dmar_domain *domain); -int dmar_disabled; +#ifdef CONFIG_DMAR_DEFAULT_ON +int dmar_disabled = 0; +#else +int dmar_disabled = 1; +#endif /*CONFIG_DMAR_DEFAULT_ON*/ + static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; @@ -284,9 +289,12 @@ static int __init intel_iommu_setup(char *str) if (!str) return -EINVAL; while (*str) { - if (!strncmp(str, "off", 3)) { + if (!strncmp(str, "on", 2)) { + dmar_disabled = 0; + printk(KERN_INFO "Intel-IOMMU: enabled\n"); + } else if (!strncmp(str, "off", 3)) { dmar_disabled = 1; - printk(KERN_INFO"Intel-IOMMU: disabled\n"); + printk(KERN_INFO "Intel-IOMMU: disabled\n"); } else if (!strncmp(str, "igfx_off", 8)) { dmar_map_gfx = 0; printk(KERN_INFO From 1ca3abdb6a4b87246b00292f048acd344325fd12 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 23 Jan 2009 09:25:02 -0500 Subject: [PATCH 364/566] [CPUFREQ] Make ignore_nice_load setting of ondemand work as expected. ondemand micro-accounting of idle time changes broke ignore_nice_load sysfs setting due to a thinko in the code. The bug entry: http://bugzilla.kernel.org/show_bug.cgi?id=12310 Reported-by: Jim Bray Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 47 ++++++++++++++++-------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 6a2b036c9389..6f45b1658a67 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -117,11 +117,7 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); - - if (!dbs_tuners_ins.ignore_nice) { - busy_time = cputime64_add(busy_time, - kstat_cpu(cpu).cpustat.nice); - } + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); idle_time = cputime64_sub(cur_wall_time, busy_time); if (wall) @@ -137,23 +133,6 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) if (idle_time == -1ULL) return get_cpu_idle_time_jiffy(cpu, wall); - if (dbs_tuners_ins.ignore_nice) { - cputime64_t cur_nice; - unsigned long cur_nice_jiffies; - struct cpu_dbs_info_s *dbs_info; - - dbs_info = &per_cpu(cpu_dbs_info, cpu); - cur_nice = cputime64_sub(kstat_cpu(cpu).cpustat.nice, - dbs_info->prev_cpu_nice); - /* - * Assumption: nice time between sampling periods will be - * less than 2^32 jiffies for 32 bit sys - */ - cur_nice_jiffies = (unsigned long) - cputime64_to_jiffies64(cur_nice); - dbs_info->prev_cpu_nice = kstat_cpu(cpu).cpustat.nice; - return idle_time + jiffies_to_usecs(cur_nice_jiffies); - } return idle_time; } @@ -319,6 +298,9 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, dbs_info = &per_cpu(cpu_dbs_info, j); dbs_info->prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) + dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; + } mutex_unlock(&dbs_mutex); @@ -419,6 +401,23 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) j_dbs_info->prev_cpu_idle); j_dbs_info->prev_cpu_idle = cur_idle_time; + if (dbs_tuners_ins.ignore_nice) { + cputime64_t cur_nice; + unsigned long cur_nice_jiffies; + + cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, + j_dbs_info->prev_cpu_nice); + /* + * Assumption: nice time between sampling periods will + * be less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); + + j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; + idle_time += jiffies_to_usecs(cur_nice_jiffies); + } + if (unlikely(!wall_time || wall_time < idle_time)) continue; @@ -575,6 +574,10 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, &j_dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) { + j_dbs_info->prev_cpu_nice = + kstat_cpu(j).cpustat.nice; + } } this_dbs_info->cpu = cpu; /* From 732553e567c2700ba5b9bccc6ec885c75779a94b Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Tue, 3 Feb 2009 17:46:43 +0100 Subject: [PATCH 365/566] [CPUFREQ] powernow-k8: Get transition latency from ACPI _PSS table At this time, the PowerNow! driver for K8 uses an experimentally derived formula to calculate transition latency. The value it provides is orders of magnitude too large on modern systems. This patch replaces the formula with ACPI _PSS latency values for more accuracy and better performance. I've tested it on two 2nd generation Opteron systems, a 3rd generation Operton system, and a Turion X2 without seeing any stability problems. Signed-off-by: Mark Langsdorf Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 28 ++++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 5c28b37dea11..fb039cd345d8 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -939,10 +939,25 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) free_cpumask_var(data->acpi_data.shared_cpu_map); } +static int get_transition_latency(struct powernow_k8_data *data) +{ + int max_latency = 0; + int i; + for (i = 0; i < data->acpi_data.state_count; i++) { + int cur_latency = data->acpi_data.states[i].transition_latency + + data->acpi_data.states[i].bus_master_latency; + if (cur_latency > max_latency) + max_latency = cur_latency; + } + /* value in usecs, needs to be in nanoseconds */ + return 1000 * max_latency; +} + #else static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } +static int get_transition_latency(struct powernow_k8_data *data) { return 0; } #endif /* CONFIG_X86_POWERNOW_K8_ACPI */ /* Take a frequency, and issue the fid/vid transition command */ @@ -1173,7 +1188,13 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) if (rc) { goto err_out; } - } + /* Take a crude guess here. + * That guess was in microseconds, so multiply with 1000 */ + pol->cpuinfo.transition_latency = ( + ((data->rvo + 8) * data->vstable * VST_UNITS_20US) + + ((1 << data->irt) * 30)) * 1000; + } else /* ACPI _PSS objects available */ + pol->cpuinfo.transition_latency = get_transition_latency(data); /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; @@ -1204,11 +1225,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu)); data->available_cores = pol->cpus; - /* Take a crude guess here. - * That guess was in microseconds, so multiply with 1000 */ - pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) - + (3 * (1 << data->irt) * 10)) * 1000; - if (cpu_family == CPU_HW_PSTATE) pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); else From 130ace11a9dc682541336d1fe5cb3bc7771a149e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Feb 2009 00:57:48 +0900 Subject: [PATCH 366/566] x86: style cleanups for xen assemblies Make the following style cleanups: * drop unnecessary //#include from xen-asm_32.S * compulsive adding of space after comma * reformat multiline comments Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/xen/xen-asm.S | 78 +++++++------ arch/x86/xen/xen-asm_32.S | 238 ++++++++++++++++++++------------------ arch/x86/xen/xen-asm_64.S | 107 ++++++++--------- 3 files changed, 219 insertions(+), 204 deletions(-) diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 4c6f96799131..79d7362ad6d1 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -1,14 +1,14 @@ /* - Asm versions of Xen pv-ops, suitable for either direct use or inlining. - The inline versions are the same as the direct-use versions, with the - pre- and post-amble chopped off. - - This code is encoded for size rather than absolute efficiency, - with a view to being able to inline as much as possible. - - We only bother with direct forms (ie, vcpu in percpu data) of - the operations here; the indirect forms are better handled in - C, since they're generally too large to inline anyway. + * Asm versions of Xen pv-ops, suitable for either direct use or + * inlining. The inline versions are the same as the direct-use + * versions, with the pre- and post-amble chopped off. + * + * This code is encoded for size rather than absolute efficiency, with + * a view to being able to inline as much as possible. + * + * We only bother with direct forms (ie, vcpu in percpu data) of the + * operations here; the indirect forms are better handled in C, since + * they're generally too large to inline anyway. */ #include @@ -18,17 +18,19 @@ #include "xen-asm.h" /* - Enable events. This clears the event mask and tests the pending - event status with one and operation. If there are pending - events, then enter the hypervisor to get them handled. + * Enable events. This clears the event mask and tests the pending + * event status with one and operation. If there are pending events, + * then enter the hypervisor to get them handled. */ ENTRY(xen_irq_enable_direct) /* Unmask events */ movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ + /* + * Preempt here doesn't matter because that will deal with any + * pending interrupts. The pending check may end up being run + * on the wrong CPU, but that doesn't hurt. + */ /* Test for pending */ testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending @@ -43,8 +45,8 @@ ENDPATCH(xen_irq_enable_direct) /* - Disabling events is simply a matter of making the event mask - non-zero. + * Disabling events is simply a matter of making the event mask + * non-zero. */ ENTRY(xen_irq_disable_direct) movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask @@ -54,18 +56,18 @@ ENDPATCH(xen_irq_disable_direct) RELOC(xen_irq_disable_direct, 0) /* - (xen_)save_fl is used to get the current interrupt enable status. - Callers expect the status to be in X86_EFLAGS_IF, and other bits - may be set in the return value. We take advantage of this by - making sure that X86_EFLAGS_IF has the right value (and other bits - in that byte are 0), but other bits in the return value are - undefined. We need to toggle the state of the bit, because - Xen and x86 use opposite senses (mask vs enable). + * (xen_)save_fl is used to get the current interrupt enable status. + * Callers expect the status to be in X86_EFLAGS_IF, and other bits + * may be set in the return value. We take advantage of this by + * making sure that X86_EFLAGS_IF has the right value (and other bits + * in that byte are 0), but other bits in the return value are + * undefined. We need to toggle the state of the bit, because Xen and + * x86 use opposite senses (mask vs enable). */ ENTRY(xen_save_fl_direct) testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask setz %ah - addb %ah,%ah + addb %ah, %ah ENDPATCH(xen_save_fl_direct) ret ENDPROC(xen_save_fl_direct) @@ -73,12 +75,11 @@ ENDPATCH(xen_save_fl_direct) /* - In principle the caller should be passing us a value return - from xen_save_fl_direct, but for robustness sake we test only - the X86_EFLAGS_IF flag rather than the whole byte. After - setting the interrupt mask state, it checks for unmasked - pending events and enters the hypervisor to get them delivered - if so. + * In principle the caller should be passing us a value return from + * xen_save_fl_direct, but for robustness sake we test only the + * X86_EFLAGS_IF flag rather than the whole byte. After setting the + * interrupt mask state, it checks for unmasked pending events and + * enters the hypervisor to get them delivered if so. */ ENTRY(xen_restore_fl_direct) #ifdef CONFIG_X86_64 @@ -87,9 +88,11 @@ ENTRY(xen_restore_fl_direct) testb $X86_EFLAGS_IF>>8, %ah #endif setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ + /* + * Preempt here doesn't matter because that will deal with any + * pending interrupts. The pending check may end up being run + * on the wrong CPU, but that doesn't hurt. + */ /* check for unmasked and pending */ cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending @@ -103,8 +106,8 @@ ENDPATCH(xen_restore_fl_direct) /* - Force an event check by making a hypercall, - but preserve regs before making the call. + * Force an event check by making a hypercall, but preserve regs + * before making the call. */ check_events: #ifdef CONFIG_X86_32 @@ -137,4 +140,3 @@ check_events: pop %rax #endif ret - diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 082d173caaf3..88e15deb8b82 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -1,17 +1,16 @@ /* - Asm versions of Xen pv-ops, suitable for either direct use or inlining. - The inline versions are the same as the direct-use versions, with the - pre- and post-amble chopped off. - - This code is encoded for size rather than absolute efficiency, - with a view to being able to inline as much as possible. - - We only bother with direct forms (ie, vcpu in pda) of the operations - here; the indirect forms are better handled in C, since they're - generally too large to inline anyway. + * Asm versions of Xen pv-ops, suitable for either direct use or + * inlining. The inline versions are the same as the direct-use + * versions, with the pre- and post-amble chopped off. + * + * This code is encoded for size rather than absolute efficiency, with + * a view to being able to inline as much as possible. + * + * We only bother with direct forms (ie, vcpu in pda) of the + * operations here; the indirect forms are better handled in C, since + * they're generally too large to inline anyway. */ -//#include #include #include #include @@ -21,8 +20,8 @@ #include "xen-asm.h" /* - Force an event check by making a hypercall, - but preserve regs before making the call. + * Force an event check by making a hypercall, but preserve regs + * before making the call. */ check_events: push %eax @@ -35,10 +34,10 @@ check_events: ret /* - We can't use sysexit directly, because we're not running in ring0. - But we can easily fake it up using iret. Assuming xen_sysexit - is jumped to with a standard stack frame, we can just strip it - back to a standard iret frame and use iret. + * We can't use sysexit directly, because we're not running in ring0. + * But we can easily fake it up using iret. Assuming xen_sysexit is + * jumped to with a standard stack frame, we can just strip it back to + * a standard iret frame and use iret. */ ENTRY(xen_sysexit) movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ @@ -49,33 +48,31 @@ ENTRY(xen_sysexit) ENDPROC(xen_sysexit) /* - This is run where a normal iret would be run, with the same stack setup: - 8: eflags - 4: cs - esp-> 0: eip - - This attempts to make sure that any pending events are dealt - with on return to usermode, but there is a small window in - which an event can happen just before entering usermode. If - the nested interrupt ends up setting one of the TIF_WORK_MASK - pending work flags, they will not be tested again before - returning to usermode. This means that a process can end up - with pending work, which will be unprocessed until the process - enters and leaves the kernel again, which could be an - unbounded amount of time. This means that a pending signal or - reschedule event could be indefinitely delayed. - - The fix is to notice a nested interrupt in the critical - window, and if one occurs, then fold the nested interrupt into - the current interrupt stack frame, and re-process it - iteratively rather than recursively. This means that it will - exit via the normal path, and all pending work will be dealt - with appropriately. - - Because the nested interrupt handler needs to deal with the - current stack state in whatever form its in, we keep things - simple by only using a single register which is pushed/popped - on the stack. + * This is run where a normal iret would be run, with the same stack setup: + * 8: eflags + * 4: cs + * esp-> 0: eip + * + * This attempts to make sure that any pending events are dealt with + * on return to usermode, but there is a small window in which an + * event can happen just before entering usermode. If the nested + * interrupt ends up setting one of the TIF_WORK_MASK pending work + * flags, they will not be tested again before returning to + * usermode. This means that a process can end up with pending work, + * which will be unprocessed until the process enters and leaves the + * kernel again, which could be an unbounded amount of time. This + * means that a pending signal or reschedule event could be + * indefinitely delayed. + * + * The fix is to notice a nested interrupt in the critical window, and + * if one occurs, then fold the nested interrupt into the current + * interrupt stack frame, and re-process it iteratively rather than + * recursively. This means that it will exit via the normal path, and + * all pending work will be dealt with appropriately. + * + * Because the nested interrupt handler needs to deal with the current + * stack state in whatever form its in, we keep things simple by only + * using a single register which is pushed/popped on the stack. */ ENTRY(xen_iret) /* test eflags for special cases */ @@ -85,13 +82,15 @@ ENTRY(xen_iret) push %eax ESP_OFFSET=4 # bytes pushed onto stack - /* Store vcpu_info pointer for easy access. Do it this - way to avoid having to reload %fs */ + /* + * Store vcpu_info pointer for easy access. Do it this way to + * avoid having to reload %fs + */ #ifdef CONFIG_SMP GET_THREAD_INFO(%eax) - movl TI_cpu(%eax),%eax - movl __per_cpu_offset(,%eax,4),%eax - mov per_cpu__xen_vcpu(%eax),%eax + movl TI_cpu(%eax), %eax + movl __per_cpu_offset(,%eax,4), %eax + mov per_cpu__xen_vcpu(%eax), %eax #else movl per_cpu__xen_vcpu, %eax #endif @@ -99,37 +98,46 @@ ENTRY(xen_iret) /* check IF state we're restoring */ testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) - /* Maybe enable events. Once this happens we could get a - recursive event, so the critical region starts immediately - afterwards. However, if that happens we don't end up - resuming the code, so we don't have to be worried about - being preempted to another CPU. */ + /* + * Maybe enable events. Once this happens we could get a + * recursive event, so the critical region starts immediately + * afterwards. However, if that happens we don't end up + * resuming the code, so we don't have to be worried about + * being preempted to another CPU. + */ setz XEN_vcpu_info_mask(%eax) xen_iret_start_crit: /* check for unmasked and pending */ cmpw $0x0001, XEN_vcpu_info_pending(%eax) - /* If there's something pending, mask events again so we - can jump back into xen_hypervisor_callback */ + /* + * If there's something pending, mask events again so we can + * jump back into xen_hypervisor_callback + */ sete XEN_vcpu_info_mask(%eax) popl %eax - /* From this point on the registers are restored and the stack - updated, so we don't need to worry about it if we're preempted */ + /* + * From this point on the registers are restored and the stack + * updated, so we don't need to worry about it if we're + * preempted + */ iret_restore_end: - /* Jump to hypervisor_callback after fixing up the stack. - Events are masked, so jumping out of the critical - region is OK. */ + /* + * Jump to hypervisor_callback after fixing up the stack. + * Events are masked, so jumping out of the critical region is + * OK. + */ je xen_hypervisor_callback 1: iret xen_iret_end_crit: -.section __ex_table,"a" +.section __ex_table, "a" .align 4 - .long 1b,iret_exc + .long 1b, iret_exc .previous hyper_iret: @@ -139,55 +147,55 @@ hyper_iret: .globl xen_iret_start_crit, xen_iret_end_crit /* - This is called by xen_hypervisor_callback in entry.S when it sees - that the EIP at the time of interrupt was between xen_iret_start_crit - and xen_iret_end_crit. We're passed the EIP in %eax so we can do - a more refined determination of what to do. - - The stack format at this point is: - ---------------- - ss : (ss/esp may be present if we came from usermode) - esp : - eflags } outer exception info - cs } - eip } - ---------------- <- edi (copy dest) - eax : outer eax if it hasn't been restored - ---------------- - eflags } nested exception info - cs } (no ss/esp because we're nested - eip } from the same ring) - orig_eax }<- esi (copy src) - - - - - - - - - - fs } - es } - ds } SAVE_ALL state - eax } - : : - ebx }<- esp - ---------------- - - In order to deliver the nested exception properly, we need to shift - everything from the return addr up to the error code so it - sits just under the outer exception info. This means that when we - handle the exception, we do it in the context of the outer exception - rather than starting a new one. - - The only caveat is that if the outer eax hasn't been - restored yet (ie, it's still on stack), we need to insert - its value into the SAVE_ALL state before going on, since - it's usermode state which we eventually need to restore. + * This is called by xen_hypervisor_callback in entry.S when it sees + * that the EIP at the time of interrupt was between + * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in + * %eax so we can do a more refined determination of what to do. + * + * The stack format at this point is: + * ---------------- + * ss : (ss/esp may be present if we came from usermode) + * esp : + * eflags } outer exception info + * cs } + * eip } + * ---------------- <- edi (copy dest) + * eax : outer eax if it hasn't been restored + * ---------------- + * eflags } nested exception info + * cs } (no ss/esp because we're nested + * eip } from the same ring) + * orig_eax }<- esi (copy src) + * - - - - - - - - + * fs } + * es } + * ds } SAVE_ALL state + * eax } + * : : + * ebx }<- esp + * ---------------- + * + * In order to deliver the nested exception properly, we need to shift + * everything from the return addr up to the error code so it sits + * just under the outer exception info. This means that when we + * handle the exception, we do it in the context of the outer + * exception rather than starting a new one. + * + * The only caveat is that if the outer eax hasn't been restored yet + * (ie, it's still on stack), we need to insert its value into the + * SAVE_ALL state before going on, since it's usermode state which we + * eventually need to restore. */ ENTRY(xen_iret_crit_fixup) /* - Paranoia: Make sure we're really coming from kernel space. - One could imagine a case where userspace jumps into the - critical range address, but just before the CPU delivers a GP, - it decides to deliver an interrupt instead. Unlikely? - Definitely. Easy to avoid? Yes. The Intel documents - explicitly say that the reported EIP for a bad jump is the - jump instruction itself, not the destination, but some virtual - environments get this wrong. + * Paranoia: Make sure we're really coming from kernel space. + * One could imagine a case where userspace jumps into the + * critical range address, but just before the CPU delivers a + * GP, it decides to deliver an interrupt instead. Unlikely? + * Definitely. Easy to avoid? Yes. The Intel documents + * explicitly say that the reported EIP for a bad jump is the + * jump instruction itself, not the destination, but some + * virtual environments get this wrong. */ movl PT_CS(%esp), %ecx andl $SEGMENT_RPL_MASK, %ecx @@ -197,15 +205,17 @@ ENTRY(xen_iret_crit_fixup) lea PT_ORIG_EAX(%esp), %esi lea PT_EFLAGS(%esp), %edi - /* If eip is before iret_restore_end then stack - hasn't been restored yet. */ + /* + * If eip is before iret_restore_end then stack + * hasn't been restored yet. + */ cmp $iret_restore_end, %eax jae 1f - movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */ + movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */ movl %eax, PT_EAX(%esp) - lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ + lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */ /* set up the copy */ 1: std @@ -213,6 +223,6 @@ ENTRY(xen_iret_crit_fixup) rep movsl cld - lea 4(%edi),%esp /* point esp to new frame */ + lea 4(%edi), %esp /* point esp to new frame */ 2: jmp xen_do_upcall diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index d205a283efe0..02f496a8dbaa 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -1,14 +1,14 @@ /* - Asm versions of Xen pv-ops, suitable for either direct use or inlining. - The inline versions are the same as the direct-use versions, with the - pre- and post-amble chopped off. - - This code is encoded for size rather than absolute efficiency, - with a view to being able to inline as much as possible. - - We only bother with direct forms (ie, vcpu in pda) of the operations - here; the indirect forms are better handled in C, since they're - generally too large to inline anyway. + * Asm versions of Xen pv-ops, suitable for either direct use or + * inlining. The inline versions are the same as the direct-use + * versions, with the pre- and post-amble chopped off. + * + * This code is encoded for size rather than absolute efficiency, with + * a view to being able to inline as much as possible. + * + * We only bother with direct forms (ie, vcpu in pda) of the + * operations here; the indirect forms are better handled in C, since + * they're generally too large to inline anyway. */ #include @@ -21,25 +21,25 @@ #include "xen-asm.h" ENTRY(xen_adjust_exception_frame) - mov 8+0(%rsp),%rcx - mov 8+8(%rsp),%r11 + mov 8+0(%rsp), %rcx + mov 8+8(%rsp), %r11 ret $16 hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 /* - Xen64 iret frame: - - ss - rsp - rflags - cs - rip <-- standard iret frame - - flags - - rcx } - r11 }<-- pushed by hypercall page -rsp -> rax } + * Xen64 iret frame: + * + * ss + * rsp + * rflags + * cs + * rip <-- standard iret frame + * + * flags + * + * rcx } + * r11 }<-- pushed by hypercall page + * rsp->rax } */ ENTRY(xen_iret) pushq $0 @@ -48,8 +48,8 @@ ENDPATCH(xen_iret) RELOC(xen_iret, 1b+1) /* - sysexit is not used for 64-bit processes, so it's - only ever used to return to 32-bit compat userspace. + * sysexit is not used for 64-bit processes, so it's only ever used to + * return to 32-bit compat userspace. */ ENTRY(xen_sysexit) pushq $__USER32_DS @@ -64,10 +64,12 @@ ENDPATCH(xen_sysexit) RELOC(xen_sysexit, 1b+1) ENTRY(xen_sysret64) - /* We're already on the usermode stack at this point, but still - with the kernel gs, so we can easily switch back */ + /* + * We're already on the usermode stack at this point, but + * still with the kernel gs, so we can easily switch back + */ movq %rsp, PER_CPU_VAR(old_rsp) - movq PER_CPU_VAR(kernel_stack),%rsp + movq PER_CPU_VAR(kernel_stack), %rsp pushq $__USER_DS pushq PER_CPU_VAR(old_rsp) @@ -81,8 +83,10 @@ ENDPATCH(xen_sysret64) RELOC(xen_sysret64, 1b+1) ENTRY(xen_sysret32) - /* We're already on the usermode stack at this point, but still - with the kernel gs, so we can easily switch back */ + /* + * We're already on the usermode stack at this point, but + * still with the kernel gs, so we can easily switch back + */ movq %rsp, PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack), %rsp @@ -98,28 +102,27 @@ ENDPATCH(xen_sysret32) RELOC(xen_sysret32, 1b+1) /* - Xen handles syscall callbacks much like ordinary exceptions, - which means we have: - - kernel gs - - kernel rsp - - an iret-like stack frame on the stack (including rcx and r11): - ss - rsp - rflags - cs - rip - r11 - rsp-> rcx - - In all the entrypoints, we undo all that to make it look - like a CPU-generated syscall/sysenter and jump to the normal - entrypoint. + * Xen handles syscall callbacks much like ordinary exceptions, which + * means we have: + * - kernel gs + * - kernel rsp + * - an iret-like stack frame on the stack (including rcx and r11): + * ss + * rsp + * rflags + * cs + * rip + * r11 + * rsp->rcx + * + * In all the entrypoints, we undo all that to make it look like a + * CPU-generated syscall/sysenter and jump to the normal entrypoint. */ .macro undo_xen_syscall - mov 0*8(%rsp),%rcx - mov 1*8(%rsp),%r11 - mov 5*8(%rsp),%rsp + mov 0*8(%rsp), %rcx + mov 1*8(%rsp), %r11 + mov 5*8(%rsp), %rsp .endm /* Normal 64-bit system call target */ @@ -146,7 +149,7 @@ ENDPROC(xen_sysenter_target) ENTRY(xen_syscall32_target) ENTRY(xen_sysenter_target) - lea 16(%rsp), %rsp /* strip %rcx,%r11 */ + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $VGCF_in_syscall jmp hypercall_iret From 56fc82c5360cdf0b250b5eb74f38657b0402faa5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Feb 2009 00:48:02 +0900 Subject: [PATCH 367/566] modpost: NOBITS sections may point beyond the end of the file Impact: fix link failure on certain toolchains with specific configs Recent percpu change made x86_64 split .data.init section into three separate segments - data.init, percpu and data.init2. data.init2 gets .data.nosave and .bss.* and is followed by .notes segment. Depending on configuration both segments might contain no data, in which case the tool chain makes the section header to contain offset beyond the end of the file. modpost isn't too happy about it and fails build - as reported by Pawel Dziekonski: Building modules, stage 2. MODPOST 416 modules FATAL: vmlinux is truncated. sechdrs[i].sh_offset=10354688 > sizeof(*hrd)=64 make[1]: *** [__modpost] Error 1 Teach modpost that NOBITS section may point beyond the end of the file and that .modinfo can't be NOBITS. Reported-by: Pawel Dziekonski Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- scripts/mod/modpost.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 88921611b22e..7e62303133dc 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -415,8 +415,9 @@ static int parse_elf(struct elf_info *info, const char *filename) const char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; const char *secname; + int nobits = sechdrs[i].sh_type == SHT_NOBITS; - if (sechdrs[i].sh_offset > info->size) { + if (!nobits && sechdrs[i].sh_offset > info->size) { fatal("%s is truncated. sechdrs[i].sh_offset=%lu > " "sizeof(*hrd)=%zu\n", filename, (unsigned long)sechdrs[i].sh_offset, @@ -425,6 +426,8 @@ static int parse_elf(struct elf_info *info, const char *filename) } secname = secstrings + sechdrs[i].sh_name; if (strcmp(secname, ".modinfo") == 0) { + if (nobits) + fatal("%s has NOBITS .modinfo\n", filename); info->modinfo = (void *)hdr + sechdrs[i].sh_offset; info->modinfo_len = sechdrs[i].sh_size; } else if (strcmp(secname, "__ksymtab") == 0) From 65a4e574d2382d83f71b30ea92f86d2e40a6ef8d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 03:36:17 +0100 Subject: [PATCH 368/566] smp, generic: introduce arch_disable_smp_support() instead of disable_ioapic_setup() Impact: cleanup disable_ioapic_setup() in init/main.c is ugly as the function is x86-specific. The #ifdef inline prototype there is ugly too. Replace it with a generic arch_disable_smp_support() function - which has a weak alias for non-x86 architectures and for non-ioapic x86 builds. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 9 --------- arch/x86/kernel/apic.c | 4 +--- arch/x86/kernel/io_apic.c | 11 ++++++++++- arch/x86/kernel/smpboot.c | 2 +- include/linux/smp.h | 6 ++++++ init/main.c | 12 ++++++------ 6 files changed, 24 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 08ec793aa043..309d0e23193a 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -143,15 +143,6 @@ extern int noioapicreroute; /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ extern int timer_through_8259; -static inline void disable_ioapic_setup(void) -{ -#ifdef CONFIG_PCI - noioapicquirk = 1; - noioapicreroute = -1; -#endif - skip_ioapic_setup = 1; -} - /* * If we use the IO-APIC for IRQ routing, disable automatic * assignment of PCI IRQ's. diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 85d8b50d1af7..a04a73a51d20 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1138,9 +1138,7 @@ void __cpuinit setup_local_APIC(void) int i, j; if (disable_apic) { -#ifdef CONFIG_X86_IO_APIC - disable_ioapic_setup(); -#endif + arch_disable_smp_support(); return; } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 57d60c741e37..84bccac4619f 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -98,10 +98,19 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int skip_ioapic_setup; +void arch_disable_smp_support(void) +{ +#ifdef CONFIG_PCI + noioapicquirk = 1; + noioapicreroute = -1; +#endif + skip_ioapic_setup = 1; +} + static int __init parse_noapic(char *str) { /* disable IO-APIC */ - disable_ioapic_setup(); + arch_disable_smp_support(); return 0; } early_param("noapic", parse_noapic); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f40f86fec2fe..96f7d304f5c9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1071,7 +1071,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_ERR "... forcing use of dummy APIC emulation." "(tell your hw vendor)\n"); smpboot_clear_io_apic(); - disable_ioapic_setup(); + arch_disable_smp_support(); return -1; } diff --git a/include/linux/smp.h b/include/linux/smp.h index 715196b09d67..d41a3a865fe3 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -66,6 +66,12 @@ extern int __cpu_up(unsigned int cpunum); */ extern void smp_cpus_done(unsigned int max_cpus); +/* + * Callback to arch code if there's nosmp or maxcpus=0 on the + * boot command line: + */ +extern void arch_disable_smp_support(void); + /* * Call a function on all other processors */ diff --git a/init/main.c b/init/main.c index bfe4fb0c9842..6441083f8273 100644 --- a/init/main.c +++ b/init/main.c @@ -136,14 +136,14 @@ unsigned int __initdata setup_max_cpus = NR_CPUS; * greater than 0, limits the maximum number of CPUs activated in * SMP mode to . */ -#ifndef CONFIG_X86_IO_APIC -static inline void disable_ioapic_setup(void) {}; -#endif + +void __weak arch_disable_smp_support(void) { } static int __init nosmp(char *str) { setup_max_cpus = 0; - disable_ioapic_setup(); + arch_disable_smp_support(); + return 0; } @@ -153,14 +153,14 @@ static int __init maxcpus(char *str) { get_option(&str, &setup_max_cpus); if (setup_max_cpus == 0) - disable_ioapic_setup(); + arch_disable_smp_support(); return 0; } early_param("maxcpus", maxcpus); #else -#define setup_max_cpus NR_CPUS +const unsigned int setup_max_cpus = NR_CPUS; #endif /* From fdbecd9fd14198853bec4cbae8bc7af93f2e3de3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 03:57:12 +0100 Subject: [PATCH 369/566] x86, apic: explain the purpose of max_physical_apicid Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index a04a73a51d20..5475e1c31800 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -50,13 +50,26 @@ #include unsigned int num_processors; + unsigned disabled_cpus __cpuinitdata; + /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; -EXPORT_SYMBOL(boot_cpu_physical_apicid); + +/* + * The highest APIC ID seen during enumeration. + * + * This determines the messaging protocol we can use: if all APIC IDs + * are in the 0 ... 7 range, then we can use logical addressing which + * has some performance advantages (better broadcasting). + * + * If there's an APIC ID above 8, we use physical addressing. + */ unsigned int max_physical_apicid; -/* Bitmask of physically existing CPUs */ +/* + * Bitmask of physically existing CPUs: + */ physid_mask_t phys_cpu_present_map; /* From c5e954820335ef5aed1662b70aaf5deb9de16735 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 30 Jan 2009 17:29:27 -0800 Subject: [PATCH 370/566] x86: move default_ipi_xx back to ipi.c Impact: cleanup only leave _default_ipi_xx etc in .h Beyond the cleanup factor, this saves a bit of code size as well: text data bss dec hex filename 7281931 1630144 1463304 10375379 9e50d3 vmlinux.before 7281753 1630144 1463304 10375201 9e5021 vmlinux.after Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 6 -- arch/x86/include/asm/ipi.h | 127 ++++------------------------------ arch/x86/kernel/ipi.c | 108 +++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 120 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 3ef2bded97ac..1a20e3d12006 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -71,12 +71,6 @@ extern void setup_ioapic_dest(void); extern void enable_IO_APIC(void); #endif -/* IPI functions */ -#ifdef CONFIG_X86_32 -extern void default_send_IPI_self(int vector); -#endif -extern void default_send_IPI(int dest, int vector); - /* Statistics */ extern atomic_t irq_err_count; extern atomic_t irq_mis_count; diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index aa79945445b5..5f2efc5d9927 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -119,112 +119,22 @@ static inline void native_apic_mem_write(APIC_ICR, cfg); } -static inline void -default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) -{ - unsigned long query_cpu; - unsigned long flags; - - /* - * Hack. The clustered APIC addressing mode doesn't allow us to send - * to an arbitrary mask, so I do a unicast to each CPU instead. - * - mbligh - */ - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, - query_cpu), vector, APIC_DEST_PHYSICAL); - } - local_irq_restore(flags); -} - -static inline void -default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int vector) -{ - unsigned int this_cpu = smp_processor_id(); - unsigned int query_cpu; - unsigned long flags; - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu == this_cpu) - continue; - __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, - query_cpu), vector, APIC_DEST_PHYSICAL); - } - local_irq_restore(flags); -} - +extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, + int vector); +extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, + int vector); #include -static inline void -default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector) -{ - unsigned long flags; - unsigned int query_cpu; - - /* - * Hack. The clustered APIC addressing mode doesn't allow us to send - * to an arbitrary mask, so I do a unicasts to each CPU instead. This - * should be modified to do 1 message per cluster ID - mbligh - */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - __default_send_IPI_dest_field( - apic->cpu_to_logical_apicid(query_cpu), vector, - apic->dest_logical); - local_irq_restore(flags); -} - -static inline void -default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector) -{ - unsigned long flags; - unsigned int query_cpu; - unsigned int this_cpu = smp_processor_id(); - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu == this_cpu) - continue; - __default_send_IPI_dest_field( - apic->cpu_to_logical_apicid(query_cpu), vector, - apic->dest_logical); - } - local_irq_restore(flags); -} +extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, + int vector); +extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, + int vector); /* Avoid include hell */ #define NMI_VECTOR 0x02 extern int no_broadcast; -#ifndef CONFIG_X86_64 -/* - * This is only used on smaller machines. - */ -static inline void default_send_IPI_mask_bitmask_logical(const struct cpumask *cpumask, int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - unsigned long flags; - - local_irq_save(flags); - WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); - __default_send_IPI_dest_field(mask, vector, apic->dest_logical); - local_irq_restore(flags); -} - -static inline void default_send_IPI_mask_logical(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_bitmask_logical(mask, vector); -} -#endif - static inline void __default_local_send_IPI_allbutself(int vector) { if (no_broadcast || vector == NMI_VECTOR) @@ -242,22 +152,11 @@ static inline void __default_local_send_IPI_all(int vector) } #ifdef CONFIG_X86_32 -static inline void default_send_IPI_allbutself(int vector) -{ - /* - * if there are no other CPUs in the system then we get an APIC send - * error if we try to broadcast, thus avoid sending IPIs in this case. - */ - if (!(num_online_cpus() > 1)) - return; - - __default_local_send_IPI_allbutself(vector); -} - -static inline void default_send_IPI_all(int vector) -{ - __default_local_send_IPI_all(vector); -} +extern void default_send_IPI_mask_logical(const struct cpumask *mask, + int vector); +extern void default_send_IPI_allbutself(int vector); +extern void default_send_IPI_all(int vector); +extern void default_send_IPI_self(int vector); #endif #endif diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 339f4f3feee5..dbf5445727a9 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -19,8 +19,116 @@ #include #include +void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) +{ + unsigned long query_cpu; + unsigned long flags; + + /* + * Hack. The clustered APIC addressing mode doesn't allow us to send + * to an arbitrary mask, so I do a unicast to each CPU instead. + * - mbligh + */ + local_irq_save(flags); + for_each_cpu(query_cpu, mask) { + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, + query_cpu), vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} + +void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, + int vector) +{ + unsigned int this_cpu = smp_processor_id(); + unsigned int query_cpu; + unsigned long flags; + + /* See Hack comment above */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, + query_cpu), vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} + +void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, + int vector) +{ + unsigned long flags; + unsigned int query_cpu; + + /* + * Hack. The clustered APIC addressing mode doesn't allow us to send + * to an arbitrary mask, so I do a unicasts to each CPU instead. This + * should be modified to do 1 message per cluster ID - mbligh + */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) + __default_send_IPI_dest_field( + apic->cpu_to_logical_apicid(query_cpu), vector, + apic->dest_logical); + local_irq_restore(flags); +} + +void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, + int vector) +{ + unsigned long flags; + unsigned int query_cpu; + unsigned int this_cpu = smp_processor_id(); + + /* See Hack comment above */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field( + apic->cpu_to_logical_apicid(query_cpu), vector, + apic->dest_logical); + } + local_irq_restore(flags); +} + #ifdef CONFIG_X86_32 +/* + * This is only used on smaller machines. + */ +void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) +{ + unsigned long mask = cpumask_bits(cpumask)[0]; + unsigned long flags; + + local_irq_save(flags); + WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); + __default_send_IPI_dest_field(mask, vector, apic->dest_logical); + local_irq_restore(flags); +} + +void default_send_IPI_allbutself(int vector) +{ + /* + * if there are no other CPUs in the system then we get an APIC send + * error if we try to broadcast, thus avoid sending IPIs in this case. + */ + if (!(num_online_cpus() > 1)) + return; + + __default_local_send_IPI_allbutself(vector); +} + +void default_send_IPI_all(int vector) +{ + __default_local_send_IPI_all(vector); +} + void default_send_IPI_self(int vector) { __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical); From a146649bc19d5eba4f5bfac6720c5f252d517a71 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 14:09:06 +0100 Subject: [PATCH 371/566] smp, generic: introduce arch_disable_smp_support(), build fix This function should be provided on UP too. Signed-off-by: Ingo Molnar --- include/linux/smp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/smp.h b/include/linux/smp.h index d41a3a865fe3..bbacb7baa446 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -66,12 +66,6 @@ extern int __cpu_up(unsigned int cpunum); */ extern void smp_cpus_done(unsigned int max_cpus); -/* - * Callback to arch code if there's nosmp or maxcpus=0 on the - * boot command line: - */ -extern void arch_disable_smp_support(void); - /* * Call a function on all other processors */ @@ -182,6 +176,12 @@ static inline void init_call_single_data(void) #define put_cpu() preempt_enable() #define put_cpu_no_resched() preempt_enable_no_resched() +/* + * Callback to arch code if there's nosmp or maxcpus=0 on the + * boot command line: + */ +extern void arch_disable_smp_support(void); + void smp_setup_processor_id(void); #endif /* __LINUX_SMP_H */ From 4f179d121885142fb907b64956228b369d495958 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 1 Feb 2009 11:25:57 +0100 Subject: [PATCH 372/566] x86, numaq: cleanups Also move xquad_portio over to where it's allocated. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/numaq.h | 2 ++ arch/x86/kernel/numaq_32.c | 33 ++++++++++++++++++--------------- arch/x86/pci/numaq_32.c | 4 ---- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h index 1e8bd30b4c16..9f0a5f5d29ec 100644 --- a/arch/x86/include/asm/numaq.h +++ b/arch/x86/include/asm/numaq.h @@ -31,6 +31,8 @@ extern int found_numaq; extern int get_memcfg_numaq(void); +extern void *xquad_portio; + /* * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the */ diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 947748e17211..0cc41a1d2550 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -3,7 +3,7 @@ * * Copyright (C) 2002, IBM Corp. * - * All rights reserved. + * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -23,17 +23,18 @@ * Send feedback to */ -#include +#include #include #include #include -#include -#include -#include +#include + #include +#include #include -#include +#include #include +#include #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) @@ -91,19 +92,20 @@ static int __init numaq_pre_time_init(void) } int found_numaq; + /* * Have to match translation table entries to main table entries by counter * hence the mpc_record variable .... can't see a less disgusting way of * doing this .... */ struct mpc_config_translation { - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; + unsigned char mpc_type; + unsigned char trans_len; + unsigned char trans_type; + unsigned char trans_quad; + unsigned char trans_global; + unsigned char trans_local; + unsigned short trans_reserved; }; /* x86_quirks member */ @@ -444,7 +446,8 @@ static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) return physid_mask_of_physid(cpu + 4*node); } -extern void *xquad_portio; +/* Where the IO area was mapped on multiquad, always 0 otherwise */ +void *xquad_portio; static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) { @@ -502,7 +505,7 @@ static void numaq_setup_portio_remap(void) int num_quads = num_online_nodes(); if (num_quads <= 1) - return; + return; printk("Remapping cross-quad port I/O for %d quads\n", num_quads); xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 1b2d773612e7..5601e829c387 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -18,10 +18,6 @@ #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) -/* Where the IO area was mapped on multiquad, always 0 otherwise */ -void *xquad_portio; -EXPORT_SYMBOL(xquad_portio); - #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) #define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ From 8f9ca475c994e4d32f405183d07e8c7eedbdbdb4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 5 Feb 2009 16:21:53 +0100 Subject: [PATCH 373/566] x86: clean up arch/x86/Kconfig* - Consistent alignment of help text - Use the ---help--- keyword everywhere consistently as a visual separator - fix whitespace mismatches Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 398 ++++++++++++++++++++--------------------- arch/x86/Kconfig.cpu | 70 ++++---- arch/x86/Kconfig.debug | 47 +++-- 3 files changed, 257 insertions(+), 258 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 80291f749b66..270ecf90bdb4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -5,7 +5,7 @@ mainmenu "Linux Kernel Configuration for x86" config 64BIT bool "64-bit kernel" if ARCH = "x86" default ARCH = "x86_64" - help + ---help--- Say yes to build a 64-bit kernel - formerly known as x86_64 Say no to build a 32-bit kernel - formerly known as i386 @@ -235,7 +235,7 @@ config SMP config SPARSE_IRQ bool "Support sparse irq numbering" depends on PCI_MSI || HT_IRQ - help + ---help--- This enables support for sparse irqs. This is useful for distro kernels that want to define a high CONFIG_NR_CPUS value but still want to have low kernel memory footprint on smaller machines. @@ -249,7 +249,7 @@ config NUMA_MIGRATE_IRQ_DESC bool "Move irq desc when changing irq smp_affinity" depends on SPARSE_IRQ && NUMA default n - help + ---help--- This enables moving irq_desc to cpu/node that irq will use handled. If you don't know what to do here, say N. @@ -258,19 +258,19 @@ config X86_MPPARSE bool "Enable MPS table" if ACPI default y depends on X86_LOCAL_APIC - help + ---help--- For old smp systems that do not have proper acpi support. Newer systems (esp with 64bit cpus) with acpi support, MADT and DSDT will override it config X86_BIGSMP bool "Support for big SMP systems with more than 8 CPUs" depends on X86_32 && SMP - help + ---help--- This option is needed for the systems that have more than 8 CPUs config X86_NON_STANDARD bool "Support for non-standard x86 platforms" - help + ---help--- If you disable this option then the kernel will only support standard PC platforms. (which covers the vast majority of systems out there.) @@ -285,7 +285,7 @@ config X86_VISWS bool "SGI 320/540 (Visual Workstation)" depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT depends on X86_NON_STANDARD - help + ---help--- The SGI Visual Workstation series is an IA32-based workstation based on SGI systems chips with some legacy PC hardware attached. @@ -300,7 +300,7 @@ config X86_RDC321X depends on X86_NON_STANDARD select M486 select X86_REBOOTFIXUPS - help + ---help--- This option is needed for RDC R-321x system-on-chip, also known as R-8610-(G). If you don't have one of these chips, you should say N here. @@ -309,7 +309,7 @@ config X86_UV bool "SGI Ultraviolet" depends on X86_64 depends on X86_NON_STANDARD - help + ---help--- This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. @@ -318,7 +318,7 @@ config X86_VSMP select PARAVIRT depends on X86_64 && PCI depends on X86_NON_STANDARD - help + ---help--- Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is supposed to run on these EM64T-based machines. Only choose this option if you have one of these machines. @@ -327,7 +327,7 @@ config X86_ELAN bool "AMD Elan" depends on X86_32 depends on X86_NON_STANDARD - help + ---help--- Select this for an AMD Elan processor. Do not use this option for K6/Athlon/Opteron processors! @@ -338,8 +338,8 @@ config X86_32_NON_STANDARD bool "Support non-standard 32-bit SMP architectures" depends on X86_32 && SMP depends on X86_NON_STANDARD - help - This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default + ---help--- + This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default subarchitectures. It is intended for a generic binary kernel. if you select them all, kernel will probe it one by one. and will fallback to default. @@ -349,7 +349,7 @@ config X86_NUMAQ depends on X86_32_NON_STANDARD select NUMA select X86_MPPARSE - help + ---help--- This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) NUMA multiquad box. This changes the way that processors are bootstrapped, and uses Clustered Logical APIC addressing mode instead @@ -359,14 +359,14 @@ config X86_NUMAQ config X86_SUMMIT bool "Summit/EXA (IBM x440)" depends on X86_32_NON_STANDARD - help + ---help--- This option is needed for IBM systems that use the Summit/EXA chipset. In particular, it is needed for the x440. config X86_ES7000 bool "Support for Unisys ES7000 IA32 series" depends on X86_32_NON_STANDARD && X86_BIGSMP - help + ---help--- Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. @@ -374,7 +374,7 @@ config X86_VOYAGER bool "Voyager (NCR)" depends on SMP && !PCI && BROKEN depends on X86_32_NON_STANDARD - help + ---help--- Voyager is an MCA-based 32-way capable SMP architecture proprietary to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. @@ -387,7 +387,7 @@ config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" depends on X86 - help + ---help--- Calculate simpler /proc//wchan values. If this option is disabled then wchan values will recurse back to the caller function. This provides more accurate wchan values, @@ -397,7 +397,7 @@ config SCHED_OMIT_FRAME_POINTER menuconfig PARAVIRT_GUEST bool "Paravirtualized guest support" - help + ---help--- Say Y here to get to see options related to running Linux under various hypervisors. This option alone does not add any kernel code. @@ -411,7 +411,7 @@ config VMI bool "VMI Guest support" select PARAVIRT depends on X86_32 - help + ---help--- VMI provides a paravirtualized interface to the VMware ESX server (it could be used by other hypervisors in theory too, but is not at the moment), by linking the kernel to a GPL-ed ROM module @@ -421,7 +421,7 @@ config KVM_CLOCK bool "KVM paravirtualized clock" select PARAVIRT select PARAVIRT_CLOCK - help + ---help--- Turning on this option will allow you to run a paravirtualized clock when running over the KVM hypervisor. Instead of relying on a PIT (or probably other) emulation by the underlying device model, the host @@ -431,15 +431,15 @@ config KVM_CLOCK config KVM_GUEST bool "KVM Guest support" select PARAVIRT - help - This option enables various optimizations for running under the KVM - hypervisor. + ---help--- + This option enables various optimizations for running under the KVM + hypervisor. source "arch/x86/lguest/Kconfig" config PARAVIRT bool "Enable paravirtualization code" - help + ---help--- This changes the kernel so it can modify itself when it is run under a hypervisor, potentially improving performance significantly over full virtualization. However, when run without a hypervisor @@ -452,21 +452,21 @@ config PARAVIRT_CLOCK endif config PARAVIRT_DEBUG - bool "paravirt-ops debugging" - depends on PARAVIRT && DEBUG_KERNEL - help - Enable to debug paravirt_ops internals. Specifically, BUG if - a paravirt_op is missing when it is called. + bool "paravirt-ops debugging" + depends on PARAVIRT && DEBUG_KERNEL + ---help--- + Enable to debug paravirt_ops internals. Specifically, BUG if + a paravirt_op is missing when it is called. config MEMTEST bool "Memtest" - help + ---help--- This option adds a kernel parameter 'memtest', which allows memtest to be set. - memtest=0, mean disabled; -- default - memtest=1, mean do 1 test pattern; - ... - memtest=4, mean do 4 test patterns. + memtest=0, mean disabled; -- default + memtest=1, mean do 1 test pattern; + ... + memtest=4, mean do 4 test patterns. If you are unsure how to answer this question, answer N. config X86_SUMMIT_NUMA @@ -482,21 +482,21 @@ source "arch/x86/Kconfig.cpu" config HPET_TIMER def_bool X86_64 prompt "HPET Timer Support" if X86_32 - help - Use the IA-PC HPET (High Precision Event Timer) to manage - time in preference to the PIT and RTC, if a HPET is - present. - HPET is the next generation timer replacing legacy 8254s. - The HPET provides a stable time base on SMP - systems, unlike the TSC, but it is more expensive to access, - as it is off-chip. You can find the HPET spec at - . + ---help--- + Use the IA-PC HPET (High Precision Event Timer) to manage + time in preference to the PIT and RTC, if a HPET is + present. + HPET is the next generation timer replacing legacy 8254s. + The HPET provides a stable time base on SMP + systems, unlike the TSC, but it is more expensive to access, + as it is off-chip. You can find the HPET spec at + . - You can safely choose Y here. However, HPET will only be - activated if the platform and the BIOS support this feature. - Otherwise the 8254 will be used for timing services. + You can safely choose Y here. However, HPET will only be + activated if the platform and the BIOS support this feature. + Otherwise the 8254 will be used for timing services. - Choose N to continue using the legacy 8254 timer. + Choose N to continue using the legacy 8254 timer. config HPET_EMULATE_RTC def_bool y @@ -507,7 +507,7 @@ config HPET_EMULATE_RTC config DMI default y bool "Enable DMI scanning" if EMBEDDED - help + ---help--- Enabled scanning of DMI to identify machine quirks. Say Y here unless you have verified that your setup is not affected by entries in the DMI blacklist. Required by PNP @@ -519,7 +519,7 @@ config GART_IOMMU select SWIOTLB select AGP depends on X86_64 && PCI - help + ---help--- Support for full DMA access of devices with 32bit memory access only on systems with more than 3GB. This is usually needed for USB, sound, many IDE/SATA chipsets and some other devices. @@ -534,7 +534,7 @@ config CALGARY_IOMMU bool "IBM Calgary IOMMU support" select SWIOTLB depends on X86_64 && PCI && EXPERIMENTAL - help + ---help--- Support for hardware IOMMUs in IBM's xSeries x366 and x460 systems. Needed to run systems with more than 3GB of memory properly with 32-bit PCI devices that do not support DAC @@ -552,7 +552,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT def_bool y prompt "Should Calgary be enabled by default?" depends on CALGARY_IOMMU - help + ---help--- Should Calgary be enabled by default? if you choose 'y', Calgary will be used (if it exists). If you choose 'n', Calgary will not be used even if it exists. If you choose 'n' and would like to use @@ -564,7 +564,7 @@ config AMD_IOMMU select SWIOTLB select PCI_MSI depends on X86_64 && PCI && ACPI - help + ---help--- With this option you can enable support for AMD IOMMU hardware in your system. An IOMMU is a hardware component which provides remapping of DMA memory accesses from devices. With an AMD IOMMU you @@ -579,7 +579,7 @@ config AMD_IOMMU_STATS bool "Export AMD IOMMU statistics to debugfs" depends on AMD_IOMMU select DEBUG_FS - help + ---help--- This option enables code in the AMD IOMMU driver to collect various statistics about whats happening in the driver and exports that information to userspace via debugfs. @@ -588,7 +588,7 @@ config AMD_IOMMU_STATS # need this always selected by IOMMU for the VIA workaround config SWIOTLB def_bool y if X86_64 - help + ---help--- Support for software bounce buffers used on x86-64 systems which don't have a hardware IOMMU (e.g. the current generation of Intel's x86-64 CPUs). Using this PCI devices which can only @@ -606,7 +606,7 @@ config MAXSMP depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL select CPUMASK_OFFSTACK default n - help + ---help--- Configure maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. @@ -617,7 +617,7 @@ config NR_CPUS default "4096" if MAXSMP default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000) default "8" if SMP - help + ---help--- This allows you to specify the maximum number of CPUs which this kernel will support. The maximum supported value is 512 and the minimum value which makes sense is 2. @@ -628,7 +628,7 @@ config NR_CPUS config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" depends on X86_HT - help + ---help--- SMT scheduler support improves the CPU scheduler's decision making when dealing with Intel Pentium 4 chips with HyperThreading at a cost of slightly increased overhead in some places. If unsure say @@ -638,7 +638,7 @@ config SCHED_MC def_bool y prompt "Multi-core scheduler support" depends on X86_HT - help + ---help--- Multi-core scheduler support improves the CPU scheduler's decision making when dealing with multi-core CPU chips at a cost of slightly increased overhead in some places. If unsure say N here. @@ -648,7 +648,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" depends on X86_32 && !SMP && !X86_32_NON_STANDARD - help + ---help--- A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU system which has a processor with a local APIC, you can say Y here to @@ -661,7 +661,7 @@ config X86_UP_APIC config X86_UP_IOAPIC bool "IO-APIC support on uniprocessors" depends on X86_UP_APIC - help + ---help--- An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an SMP-capable replacement for PC-style interrupt controllers. Most SMP systems and many recent uniprocessor systems have one. @@ -686,7 +686,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS bool "Reroute for broken boot IRQs" default n depends on X86_IO_APIC - help + ---help--- This option enables a workaround that fixes a source of spurious interrupts. This is recommended when threaded interrupt handling is used on systems where the generation of @@ -726,7 +726,7 @@ config X86_MCE_INTEL def_bool y prompt "Intel MCE features" depends on X86_64 && X86_MCE && X86_LOCAL_APIC - help + ---help--- Additional support for intel specific MCE features such as the thermal monitor. @@ -734,14 +734,14 @@ config X86_MCE_AMD def_bool y prompt "AMD MCE features" depends on X86_64 && X86_MCE && X86_LOCAL_APIC - help + ---help--- Additional support for AMD specific MCE features such as the DRAM Error Threshold. config X86_MCE_NONFATAL tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" depends on X86_32 && X86_MCE - help + ---help--- Enabling this feature starts a timer that triggers every 5 seconds which will look at the machine check registers to see if anything happened. Non-fatal problems automatically get corrected (but still logged). @@ -754,7 +754,7 @@ config X86_MCE_NONFATAL config X86_MCE_P4THERMAL bool "check for P4 thermal throttling interrupt." depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) - help + ---help--- Enabling this feature will cause a message to be printed when the P4 enters thermal throttling. @@ -762,11 +762,11 @@ config VM86 bool "Enable VM86 support" if EMBEDDED default y depends on X86_32 - help - This option is required by programs like DOSEMU to run 16-bit legacy + ---help--- + This option is required by programs like DOSEMU to run 16-bit legacy code on X86 processors. It also may be needed by software like - XFree86 to initialize some video cards via BIOS. Disabling this - option saves about 6k. + XFree86 to initialize some video cards via BIOS. Disabling this + option saves about 6k. config TOSHIBA tristate "Toshiba Laptop support" @@ -840,33 +840,33 @@ config MICROCODE module will be called microcode. config MICROCODE_INTEL - bool "Intel microcode patch loading support" - depends on MICROCODE - default MICROCODE - select FW_LOADER - --help--- - This options enables microcode patch loading support for Intel - processors. + bool "Intel microcode patch loading support" + depends on MICROCODE + default MICROCODE + select FW_LOADER + ---help--- + This options enables microcode patch loading support for Intel + processors. - For latest news and information on obtaining all the required - Intel ingredients for this driver, check: - . + For latest news and information on obtaining all the required + Intel ingredients for this driver, check: + . config MICROCODE_AMD - bool "AMD microcode patch loading support" - depends on MICROCODE - select FW_LOADER - --help--- - If you select this option, microcode patch loading support for AMD - processors will be enabled. + bool "AMD microcode patch loading support" + depends on MICROCODE + select FW_LOADER + ---help--- + If you select this option, microcode patch loading support for AMD + processors will be enabled. - config MICROCODE_OLD_INTERFACE +config MICROCODE_OLD_INTERFACE def_bool y depends on MICROCODE config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" - help + ---help--- This device gives privileged processes access to the x86 Model-Specific Registers (MSRs). It is a character device with major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr. @@ -875,7 +875,7 @@ config X86_MSR config X86_CPUID tristate "/dev/cpu/*/cpuid - CPU information support" - help + ---help--- This device gives processes access to the x86 CPUID instruction to be executed on a specific processor. It is a character device with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to @@ -927,7 +927,7 @@ config NOHIGHMEM config HIGHMEM4G bool "4GB" depends on !X86_NUMAQ - help + ---help--- Select this if you have a 32-bit processor and between 1 and 4 gigabytes of physical RAM. @@ -935,7 +935,7 @@ config HIGHMEM64G bool "64GB" depends on !M386 && !M486 select X86_PAE - help + ---help--- Select this if you have a 32-bit processor and more than 4 gigabytes of physical RAM. @@ -946,7 +946,7 @@ choice prompt "Memory split" if EMBEDDED default VMSPLIT_3G depends on X86_32 - help + ---help--- Select the desired split between kernel and user memory. If the address range available to the kernel is less than the @@ -992,20 +992,20 @@ config HIGHMEM config X86_PAE bool "PAE (Physical Address Extension) Support" depends on X86_32 && !HIGHMEM4G - help + ---help--- PAE is required for NX support, and furthermore enables larger swapspace support for non-overcommit purposes. It has the cost of more pagetable lookup overhead, and also consumes more pagetable space per process. config ARCH_PHYS_ADDR_T_64BIT - def_bool X86_64 || X86_PAE + def_bool X86_64 || X86_PAE config DIRECT_GBPAGES bool "Enable 1GB pages for kernel pagetables" if EMBEDDED default y depends on X86_64 - help + ---help--- Allow the kernel linear mapping to use 1GB pages on CPUs that support it. This can improve the kernel's performance a tiny bit by reducing TLB pressure. If in doubt, say "Y". @@ -1016,7 +1016,7 @@ config NUMA depends on SMP depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) - help + ---help--- Enable NUMA (Non Uniform Memory Access) support. The kernel will try to allocate memory used by a CPU on the @@ -1039,19 +1039,19 @@ config K8_NUMA def_bool y prompt "Old style AMD Opteron NUMA detection" depends on X86_64 && NUMA && PCI - help - Enable K8 NUMA node topology detection. You should say Y here if - you have a multi processor AMD K8 system. This uses an old - method to read the NUMA configuration directly from the builtin - Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA - instead, which also takes priority if both are compiled in. + ---help--- + Enable K8 NUMA node topology detection. You should say Y here if + you have a multi processor AMD K8 system. This uses an old + method to read the NUMA configuration directly from the builtin + Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA + instead, which also takes priority if both are compiled in. config X86_64_ACPI_NUMA def_bool y prompt "ACPI NUMA detection" depends on X86_64 && NUMA && ACPI && PCI select ACPI_NUMA - help + ---help--- Enable ACPI SRAT based node topology detection. # Some NUMA nodes have memory ranges that span @@ -1066,7 +1066,7 @@ config NODES_SPAN_OTHER_NODES config NUMA_EMU bool "NUMA emulation" depends on X86_64 && NUMA - help + ---help--- Enable NUMA emulation. A flat machine will be split into virtual nodes when booted with "numa=fake=N", where N is the number of nodes. This is only useful for debugging. @@ -1079,7 +1079,7 @@ config NODES_SHIFT default "4" if X86_NUMAQ default "3" depends on NEED_MULTIPLE_NODES - help + ---help--- Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accomodate various tables. @@ -1134,61 +1134,61 @@ source "mm/Kconfig" config HIGHPTE bool "Allocate 3rd-level pagetables from highmem" depends on X86_32 && (HIGHMEM4G || HIGHMEM64G) - help + ---help--- The VM uses one page table entry for each page of physical memory. For systems with a lot of RAM, this can be wasteful of precious low memory. Setting this option will put user-space page table entries in high memory. config X86_CHECK_BIOS_CORRUPTION - bool "Check for low memory corruption" - help - Periodically check for memory corruption in low memory, which - is suspected to be caused by BIOS. Even when enabled in the - configuration, it is disabled at runtime. Enable it by - setting "memory_corruption_check=1" on the kernel command - line. By default it scans the low 64k of memory every 60 - seconds; see the memory_corruption_check_size and - memory_corruption_check_period parameters in - Documentation/kernel-parameters.txt to adjust this. + bool "Check for low memory corruption" + ---help--- + Periodically check for memory corruption in low memory, which + is suspected to be caused by BIOS. Even when enabled in the + configuration, it is disabled at runtime. Enable it by + setting "memory_corruption_check=1" on the kernel command + line. By default it scans the low 64k of memory every 60 + seconds; see the memory_corruption_check_size and + memory_corruption_check_period parameters in + Documentation/kernel-parameters.txt to adjust this. - When enabled with the default parameters, this option has - almost no overhead, as it reserves a relatively small amount - of memory and scans it infrequently. It both detects corruption - and prevents it from affecting the running system. + When enabled with the default parameters, this option has + almost no overhead, as it reserves a relatively small amount + of memory and scans it infrequently. It both detects corruption + and prevents it from affecting the running system. - It is, however, intended as a diagnostic tool; if repeatable - BIOS-originated corruption always affects the same memory, - you can use memmap= to prevent the kernel from using that - memory. + It is, however, intended as a diagnostic tool; if repeatable + BIOS-originated corruption always affects the same memory, + you can use memmap= to prevent the kernel from using that + memory. config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK - bool "Set the default setting of memory_corruption_check" + bool "Set the default setting of memory_corruption_check" depends on X86_CHECK_BIOS_CORRUPTION default y - help - Set whether the default state of memory_corruption_check is - on or off. + ---help--- + Set whether the default state of memory_corruption_check is + on or off. config X86_RESERVE_LOW_64K - bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen" + bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen" default y - help - Reserve the first 64K of physical RAM on BIOSes that are known - to potentially corrupt that memory range. A numbers of BIOSes are - known to utilize this area during suspend/resume, so it must not - be used by the kernel. + ---help--- + Reserve the first 64K of physical RAM on BIOSes that are known + to potentially corrupt that memory range. A numbers of BIOSes are + known to utilize this area during suspend/resume, so it must not + be used by the kernel. - Set this to N if you are absolutely sure that you trust the BIOS - to get all its memory reservations and usages right. + Set this to N if you are absolutely sure that you trust the BIOS + to get all its memory reservations and usages right. - If you have doubts about the BIOS (e.g. suspend/resume does not - work or there's kernel crashes after certain hardware hotplug - events) and it's not AMI or Phoenix, then you might want to enable - X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical - corruption patterns. + If you have doubts about the BIOS (e.g. suspend/resume does not + work or there's kernel crashes after certain hardware hotplug + events) and it's not AMI or Phoenix, then you might want to enable + X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical + corruption patterns. - Say Y if unsure. + Say Y if unsure. config MATH_EMULATION bool @@ -1254,7 +1254,7 @@ config MTRR_SANITIZER def_bool y prompt "MTRR cleanup support" depends on MTRR - help + ---help--- Convert MTRR layout from continuous to discrete, so X drivers can add writeback entries. @@ -1269,7 +1269,7 @@ config MTRR_SANITIZER_ENABLE_DEFAULT range 0 1 default "0" depends on MTRR_SANITIZER - help + ---help--- Enable mtrr cleanup default value config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT @@ -1277,7 +1277,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT range 0 7 default "1" depends on MTRR_SANITIZER - help + ---help--- mtrr cleanup spare entries default, it can be changed via mtrr_spare_reg_nr=N on the kernel command line. @@ -1285,7 +1285,7 @@ config X86_PAT bool prompt "x86 PAT support" depends on MTRR - help + ---help--- Use PAT attributes to setup page level cache control. PATs are the modern equivalents of MTRRs and are much more @@ -1300,20 +1300,20 @@ config EFI bool "EFI runtime service support" depends on ACPI ---help--- - This enables the kernel to use EFI runtime services that are - available (such as the EFI variable services). + This enables the kernel to use EFI runtime services that are + available (such as the EFI variable services). - This option is only useful on systems that have EFI firmware. - In addition, you should use the latest ELILO loader available - at in order to take advantage - of EFI runtime services. However, even with this option, the - resultant kernel should continue to boot on existing non-EFI - platforms. + This option is only useful on systems that have EFI firmware. + In addition, you should use the latest ELILO loader available + at in order to take advantage + of EFI runtime services. However, even with this option, the + resultant kernel should continue to boot on existing non-EFI + platforms. config SECCOMP def_bool y prompt "Enable seccomp to safely compute untrusted bytecode" - help + ---help--- This kernel feature is useful for number crunching applications that may need to compute untrusted bytecode during their execution. By using pipes or other transports made available to @@ -1333,8 +1333,8 @@ config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" depends on X86_64 select CC_STACKPROTECTOR_ALL - help - This option turns on the -fstack-protector GCC feature. This + ---help--- + This option turns on the -fstack-protector GCC feature. This feature puts, at the beginning of functions, a canary value on the stack just before the return address, and validates the value just before actually returning. Stack based buffer @@ -1351,7 +1351,7 @@ source kernel/Kconfig.hz config KEXEC bool "kexec system call" - help + ---help--- kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot but it is independent of the system firmware. And like a reboot @@ -1368,7 +1368,7 @@ config KEXEC config CRASH_DUMP bool "kernel crash dumps" depends on X86_64 || (X86_32 && HIGHMEM) - help + ---help--- Generate crash dump after being started by kexec. This should be normally only set in special crash dump kernels which are loaded in the main kernel with kexec-tools into @@ -1383,7 +1383,7 @@ config KEXEC_JUMP bool "kexec jump (EXPERIMENTAL)" depends on EXPERIMENTAL depends on KEXEC && HIBERNATION && X86_32 - help + ---help--- Jump between original kernel and kexeced kernel and invoke code in physical address mode via KEXEC @@ -1392,7 +1392,7 @@ config PHYSICAL_START default "0x1000000" if X86_NUMAQ default "0x200000" if X86_64 default "0x100000" - help + ---help--- This gives the physical address where the kernel is loaded. If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then @@ -1433,7 +1433,7 @@ config PHYSICAL_START config RELOCATABLE bool "Build a relocatable kernel (EXPERIMENTAL)" depends on EXPERIMENTAL - help + ---help--- This builds a kernel image that retains relocation information so it can be loaded someplace besides the default 1MB. The relocations tend to make the kernel binary about 10% larger, @@ -1453,7 +1453,7 @@ config PHYSICAL_ALIGN default "0x100000" if X86_32 default "0x200000" if X86_64 range 0x2000 0x400000 - help + ---help--- This value puts the alignment restrictions on physical address where kernel is loaded and run from. Kernel is compiled for an address which meets above alignment restriction. @@ -1486,7 +1486,7 @@ config COMPAT_VDSO def_bool y prompt "Compat VDSO support" depends on X86_32 || IA32_EMULATION - help + ---help--- Map the 32-bit VDSO to the predictable old-style address too. ---help--- Say N here if you are running a sufficiently recent glibc @@ -1498,7 +1498,7 @@ config COMPAT_VDSO config CMDLINE_BOOL bool "Built-in kernel command line" default n - help + ---help--- Allow for specifying boot arguments to the kernel at build time. On some systems (e.g. embedded ones), it is necessary or convenient to provide some or all of the @@ -1516,7 +1516,7 @@ config CMDLINE string "Built-in kernel command string" depends on CMDLINE_BOOL default "" - help + ---help--- Enter arguments here that should be compiled into the kernel image and used at boot time. If the boot loader provides a command line at boot time, it is appended to this string to @@ -1533,7 +1533,7 @@ config CMDLINE_OVERRIDE bool "Built-in command line overrides boot loader arguments" default n depends on CMDLINE_BOOL - help + ---help--- Set this option to 'Y' to have the kernel ignore the boot loader command line, and use ONLY the built-in command line. @@ -1632,7 +1632,7 @@ if APM config APM_IGNORE_USER_SUSPEND bool "Ignore USER SUSPEND" - help + ---help--- This option will ignore USER SUSPEND requests. On machines with a compliant APM BIOS, you want to say N. However, on the NEC Versa M series notebooks, it is necessary to say Y because of a BIOS bug. @@ -1656,7 +1656,7 @@ config APM_DO_ENABLE config APM_CPU_IDLE bool "Make CPU Idle calls when idle" - help + ---help--- Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. On some machines, this can activate improved power savings, such as a slowed CPU clock rate, when the machine is idle. These idle calls @@ -1667,7 +1667,7 @@ config APM_CPU_IDLE config APM_DISPLAY_BLANK bool "Enable console blanking using APM" - help + ---help--- Enable console blanking using the APM. Some laptops can use this to turn off the LCD backlight when the screen blanker of the Linux virtual console blanks the screen. Note that this is only used by @@ -1680,7 +1680,7 @@ config APM_DISPLAY_BLANK config APM_ALLOW_INTS bool "Allow interrupts during APM BIOS calls" - help + ---help--- Normally we disable external interrupts while we are making calls to the APM BIOS as a measure to lessen the effects of a badly behaving BIOS implementation. The BIOS should reenable interrupts if it @@ -1705,7 +1705,7 @@ config PCI bool "PCI support" default y select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) - help + ---help--- Find out whether you have a PCI motherboard. PCI is the name of a bus system, i.e. the way the CPU talks to the other stuff inside your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or @@ -1776,7 +1776,7 @@ config PCI_MMCONFIG config DMAR bool "Support for DMA Remapping Devices (EXPERIMENTAL)" depends on X86_64 && PCI_MSI && ACPI && EXPERIMENTAL - help + ---help--- DMA remapping (DMAR) devices support enables independent address translations for Direct Memory Access (DMA) from devices. These DMA remapping devices are reported via ACPI tables @@ -1798,29 +1798,29 @@ config DMAR_GFX_WA def_bool y prompt "Support for Graphics workaround" depends on DMAR - help - Current Graphics drivers tend to use physical address - for DMA and avoid using DMA APIs. Setting this config - option permits the IOMMU driver to set a unity map for - all the OS-visible memory. Hence the driver can continue - to use physical addresses for DMA. + ---help--- + Current Graphics drivers tend to use physical address + for DMA and avoid using DMA APIs. Setting this config + option permits the IOMMU driver to set a unity map for + all the OS-visible memory. Hence the driver can continue + to use physical addresses for DMA. config DMAR_FLOPPY_WA def_bool y depends on DMAR - help - Floppy disk drivers are know to bypass DMA API calls - thereby failing to work when IOMMU is enabled. This - workaround will setup a 1:1 mapping for the first - 16M to make floppy (an ISA device) work. + ---help--- + Floppy disk drivers are know to bypass DMA API calls + thereby failing to work when IOMMU is enabled. This + workaround will setup a 1:1 mapping for the first + 16M to make floppy (an ISA device) work. config INTR_REMAP bool "Support for Interrupt Remapping (EXPERIMENTAL)" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL - help - Supports Interrupt remapping for IO-APIC and MSI devices. - To use x2apic mode in the CPU's which support x2APIC enhancements or - to support platforms with CPU's having > 8 bit APIC ID, say Y. + ---help--- + Supports Interrupt remapping for IO-APIC and MSI devices. + To use x2apic mode in the CPU's which support x2APIC enhancements or + to support platforms with CPU's having > 8 bit APIC ID, say Y. source "drivers/pci/pcie/Kconfig" @@ -1834,7 +1834,7 @@ if X86_32 config ISA bool "ISA support" - help + ---help--- Find out whether you have ISA slots on your motherboard. ISA is the name of a bus system, i.e. the way the CPU talks to the other stuff inside your box. Other bus systems are PCI, EISA, MicroChannel @@ -1861,7 +1861,7 @@ source "drivers/eisa/Kconfig" config MCA bool "MCA support" - help + ---help--- MicroChannel Architecture is found in some IBM PS/2 machines and laptops. It is a bus system similar to PCI or ISA. See (and especially the web page given @@ -1871,7 +1871,7 @@ source "drivers/mca/Kconfig" config SCx200 tristate "NatSemi SCx200 support" - help + ---help--- This provides basic support for National Semiconductor's (now AMD's) Geode processors. The driver probes for the PCI-IDs of several on-chip devices, so its a good dependency @@ -1883,7 +1883,7 @@ config SCx200HR_TIMER tristate "NatSemi SCx200 27MHz High-Resolution Timer Support" depends on SCx200 && GENERIC_TIME default y - help + ---help--- This driver provides a clocksource built upon the on-chip 27MHz high-resolution timer. Its also a workaround for NSC Geode SC-1100's buggy TSC, which loses time when the @@ -1894,7 +1894,7 @@ config GEODE_MFGPT_TIMER def_bool y prompt "Geode Multi-Function General Purpose Timer (MFGPT) events" depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS - help + ---help--- This driver provides a clock event source based on the MFGPT timer(s) in the CS5535 and CS5536 companion chip for the geode. MFGPTs have a better resolution and max interval than the @@ -1903,7 +1903,7 @@ config GEODE_MFGPT_TIMER config OLPC bool "One Laptop Per Child support" default n - help + ---help--- Add support for detecting the unique features of the OLPC XO hardware. @@ -1928,16 +1928,16 @@ config IA32_EMULATION bool "IA32 Emulation" depends on X86_64 select COMPAT_BINFMT_ELF - help + ---help--- Include code to run 32-bit programs under a 64-bit kernel. You should likely turn this on, unless you're 100% sure that you don't have any 32-bit programs left. config IA32_AOUT - tristate "IA32 a.out support" - depends on IA32_EMULATION - help - Support old a.out binaries in the 32bit emulation. + tristate "IA32 a.out support" + depends on IA32_EMULATION + ---help--- + Support old a.out binaries in the 32bit emulation. config COMPAT def_bool y diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 085fef4d8660..a95eaf0e582a 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -50,7 +50,7 @@ config M386 config M486 bool "486" depends on X86_32 - help + ---help--- Select this for a 486 series processor, either Intel or one of the compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX, DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or @@ -59,7 +59,7 @@ config M486 config M586 bool "586/K5/5x86/6x86/6x86MX" depends on X86_32 - help + ---help--- Select this for an 586 or 686 series processor such as the AMD K5, the Cyrix 5x86, 6x86 and 6x86MX. This choice does not assume the RDTSC (Read Time Stamp Counter) instruction. @@ -67,21 +67,21 @@ config M586 config M586TSC bool "Pentium-Classic" depends on X86_32 - help + ---help--- Select this for a Pentium Classic processor with the RDTSC (Read Time Stamp Counter) instruction for benchmarking. config M586MMX bool "Pentium-MMX" depends on X86_32 - help + ---help--- Select this for a Pentium with the MMX graphics/multimedia extended instructions. config M686 bool "Pentium-Pro" depends on X86_32 - help + ---help--- Select this for Intel Pentium Pro chips. This enables the use of Pentium Pro extended instructions, and disables the init-time guard against the f00f bug found in earlier Pentiums. @@ -89,7 +89,7 @@ config M686 config MPENTIUMII bool "Pentium-II/Celeron(pre-Coppermine)" depends on X86_32 - help + ---help--- Select this for Intel chips based on the Pentium-II and pre-Coppermine Celeron core. This option enables an unaligned copy optimization, compiles the kernel with optimization flags @@ -99,7 +99,7 @@ config MPENTIUMII config MPENTIUMIII bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon" depends on X86_32 - help + ---help--- Select this for Intel chips based on the Pentium-III and Celeron-Coppermine core. This option enables use of some extended prefetch instructions in addition to the Pentium II @@ -108,14 +108,14 @@ config MPENTIUMIII config MPENTIUMM bool "Pentium M" depends on X86_32 - help + ---help--- Select this for Intel Pentium M (not Pentium-4 M) notebook chips. config MPENTIUM4 bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon" depends on X86_32 - help + ---help--- Select this for Intel Pentium 4 chips. This includes the Pentium 4, Pentium D, P4-based Celeron and Xeon, and Pentium-4 M (not Pentium M) chips. This option enables compile @@ -151,7 +151,7 @@ config MPENTIUM4 config MK6 bool "K6/K6-II/K6-III" depends on X86_32 - help + ---help--- Select this for an AMD K6-family processor. Enables use of some extended instructions, and passes appropriate optimization flags to GCC. @@ -159,14 +159,14 @@ config MK6 config MK7 bool "Athlon/Duron/K7" depends on X86_32 - help + ---help--- Select this for an AMD Athlon K7-family processor. Enables use of some extended instructions, and passes appropriate optimization flags to GCC. config MK8 bool "Opteron/Athlon64/Hammer/K8" - help + ---help--- Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables use of some extended instructions, and passes appropriate optimization flags to GCC. @@ -174,7 +174,7 @@ config MK8 config MCRUSOE bool "Crusoe" depends on X86_32 - help + ---help--- Select this for a Transmeta Crusoe processor. Treats the processor like a 586 with TSC, and sets some GCC optimization flags (like a Pentium Pro with no alignment requirements). @@ -182,13 +182,13 @@ config MCRUSOE config MEFFICEON bool "Efficeon" depends on X86_32 - help + ---help--- Select this for a Transmeta Efficeon processor. config MWINCHIPC6 bool "Winchip-C6" depends on X86_32 - help + ---help--- Select this for an IDT Winchip C6 chip. Linux and GCC treat this chip as a 586TSC with some extended instructions and alignment requirements. @@ -196,7 +196,7 @@ config MWINCHIPC6 config MWINCHIP3D bool "Winchip-2/Winchip-2A/Winchip-3" depends on X86_32 - help + ---help--- Select this for an IDT Winchip-2, 2A or 3. Linux and GCC treat this chip as a 586TSC with some extended instructions and alignment requirements. Also enable out of order memory @@ -206,19 +206,19 @@ config MWINCHIP3D config MGEODEGX1 bool "GeodeGX1" depends on X86_32 - help + ---help--- Select this for a Geode GX1 (Cyrix MediaGX) chip. config MGEODE_LX bool "Geode GX/LX" depends on X86_32 - help + ---help--- Select this for AMD Geode GX and LX processors. config MCYRIXIII bool "CyrixIII/VIA-C3" depends on X86_32 - help + ---help--- Select this for a Cyrix III or C3 chip. Presently Linux and GCC treat this chip as a generic 586. Whilst the CPU is 686 class, it lacks the cmov extension which gcc assumes is present when @@ -230,7 +230,7 @@ config MCYRIXIII config MVIAC3_2 bool "VIA C3-2 (Nehemiah)" depends on X86_32 - help + ---help--- Select this for a VIA C3 "Nehemiah". Selecting this enables usage of SSE and tells gcc to treat the CPU as a 686. Note, this kernel will not boot on older (pre model 9) C3s. @@ -238,14 +238,14 @@ config MVIAC3_2 config MVIAC7 bool "VIA C7" depends on X86_32 - help + ---help--- Select this for a VIA C7. Selecting this uses the correct cache shift and tells gcc to treat the CPU as a 686. config MPSC bool "Intel P4 / older Netburst based Xeon" depends on X86_64 - help + ---help--- Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey Xeon CPUs with Intel 64bit which is compatible with x86-64. Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the @@ -255,7 +255,7 @@ config MPSC config MCORE2 bool "Core 2/newer Xeon" - help + ---help--- Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and 53xx) CPUs. You can distinguish newer from older Xeons by the CPU @@ -265,7 +265,7 @@ config MCORE2 config GENERIC_CPU bool "Generic-x86-64" depends on X86_64 - help + ---help--- Generic x86-64 CPU. Run equally well on all x86-64 CPUs. @@ -274,7 +274,7 @@ endchoice config X86_GENERIC bool "Generic x86 support" depends on X86_32 - help + ---help--- Instead of just including optimizations for the selected x86 variant (e.g. PII, Crusoe or Athlon), include some more generic optimizations as well. This will make the kernel @@ -319,7 +319,7 @@ config X86_XADD config X86_PPRO_FENCE bool "PentiumPro memory ordering errata workaround" depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 - help + ---help--- Old PentiumPro multiprocessor systems had errata that could cause memory operations to violate the x86 ordering standard in rare cases. Enabling this option will attempt to work around some (but not all) @@ -412,14 +412,14 @@ config X86_DEBUGCTLMSR menuconfig PROCESSOR_SELECT bool "Supported processor vendors" if EMBEDDED - help + ---help--- This lets you choose what x86 vendor support code your kernel will include. config CPU_SUP_INTEL default y bool "Support Intel processors" if PROCESSOR_SELECT - help + ---help--- This enables detection, tunings and quirks for Intel processors You need this enabled if you want your kernel to run on an @@ -433,7 +433,7 @@ config CPU_SUP_CYRIX_32 default y bool "Support Cyrix processors" if PROCESSOR_SELECT depends on !64BIT - help + ---help--- This enables detection, tunings and quirks for Cyrix processors You need this enabled if you want your kernel to run on a @@ -446,7 +446,7 @@ config CPU_SUP_CYRIX_32 config CPU_SUP_AMD default y bool "Support AMD processors" if PROCESSOR_SELECT - help + ---help--- This enables detection, tunings and quirks for AMD processors You need this enabled if you want your kernel to run on an @@ -460,7 +460,7 @@ config CPU_SUP_CENTAUR_32 default y bool "Support Centaur processors" if PROCESSOR_SELECT depends on !64BIT - help + ---help--- This enables detection, tunings and quirks for Centaur processors You need this enabled if you want your kernel to run on a @@ -474,7 +474,7 @@ config CPU_SUP_CENTAUR_64 default y bool "Support Centaur processors" if PROCESSOR_SELECT depends on 64BIT - help + ---help--- This enables detection, tunings and quirks for Centaur processors You need this enabled if you want your kernel to run on a @@ -488,7 +488,7 @@ config CPU_SUP_TRANSMETA_32 default y bool "Support Transmeta processors" if PROCESSOR_SELECT depends on !64BIT - help + ---help--- This enables detection, tunings and quirks for Transmeta processors You need this enabled if you want your kernel to run on a @@ -502,7 +502,7 @@ config CPU_SUP_UMC_32 default y bool "Support UMC processors" if PROCESSOR_SELECT depends on !64BIT - help + ---help--- This enables detection, tunings and quirks for UMC processors You need this enabled if you want your kernel to run on a @@ -521,7 +521,7 @@ config X86_PTRACE_BTS bool "Branch Trace Store" default y depends on X86_DEBUGCTLMSR - help + ---help--- This adds a ptrace interface to the hardware's branch trace store. Debuggers may use it to collect an execution trace of the debugged diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index a38dd6064f10..ba4781b93890 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -7,7 +7,7 @@ source "lib/Kconfig.debug" config STRICT_DEVMEM bool "Filter access to /dev/mem" - help + ---help--- If this option is disabled, you allow userspace (root) access to all of memory, including kernel and userspace memory. Accidental access to this is obviously disastrous, but specific access can @@ -25,7 +25,7 @@ config STRICT_DEVMEM config X86_VERBOSE_BOOTUP bool "Enable verbose x86 bootup info messages" default y - help + ---help--- Enables the informational output from the decompression stage (e.g. bzImage) of the boot. If you disable this you will still see errors. Disable this if you want silent bootup. @@ -33,7 +33,7 @@ config X86_VERBOSE_BOOTUP config EARLY_PRINTK bool "Early printk" if EMBEDDED default y - help + ---help--- Write kernel log output directly into the VGA buffer or to a serial port. @@ -47,7 +47,7 @@ config EARLY_PRINTK_DBGP bool "Early printk via EHCI debug port" default n depends on EARLY_PRINTK && PCI - help + ---help--- Write kernel log output directly into the EHCI debug port. This is useful for kernel debugging when your machine crashes very @@ -59,14 +59,14 @@ config EARLY_PRINTK_DBGP config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL - help + ---help--- This option will cause messages to be printed if free stack space drops below a certain limit. config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" depends on DEBUG_KERNEL - help + ---help--- Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T and sysrq-P debug output. @@ -75,7 +75,7 @@ config DEBUG_STACK_USAGE config DEBUG_PAGEALLOC bool "Debug page memory allocations" depends on DEBUG_KERNEL - help + ---help--- Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types of memory corruptions. @@ -85,7 +85,7 @@ config DEBUG_PER_CPU_MAPS depends on DEBUG_KERNEL depends on SMP default n - help + ---help--- Say Y to verify that the per_cpu map being accessed has been setup. Adds a fair amount of code to kernel memory and decreases performance. @@ -96,7 +96,7 @@ config X86_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL select DEBUG_FS - help + ---help--- Say Y here if you want to show the kernel pagetable layout in a debugfs file. This information is only useful for kernel developers who are working in architecture specific areas of the kernel. @@ -108,7 +108,7 @@ config DEBUG_RODATA bool "Write protect kernel read-only data structures" default y depends on DEBUG_KERNEL - help + ---help--- Mark the kernel read-only data as write-protected in the pagetables, in order to catch accidental (and incorrect) writes to such const data. This is recommended so that we can catch kernel bugs sooner. @@ -118,7 +118,7 @@ config DEBUG_RODATA_TEST bool "Testcase for the DEBUG_RODATA feature" depends on DEBUG_RODATA default y - help + ---help--- This option enables a testcase for the DEBUG_RODATA feature as well as for the change_page_attr() infrastructure. If in doubt, say "N" @@ -126,7 +126,7 @@ config DEBUG_RODATA_TEST config DEBUG_NX_TEST tristate "Testcase for the NX non-executable stack feature" depends on DEBUG_KERNEL && m - help + ---help--- This option enables a testcase for the CPU NX capability and the software setup of this feature. If in doubt, say "N" @@ -134,7 +134,7 @@ config DEBUG_NX_TEST config 4KSTACKS bool "Use 4Kb for kernel stacks instead of 8Kb" depends on X86_32 - help + ---help--- If you say Y here the kernel will use a 4Kb stacksize for the kernel stack attached to each process/thread. This facilitates running more threads on a system and also reduces the pressure @@ -145,7 +145,7 @@ config DOUBLEFAULT default y bool "Enable doublefault exception handler" if EMBEDDED depends on X86_32 - help + ---help--- This option allows trapping of rare doublefault exceptions that would otherwise cause a system to silently reboot. Disabling this option saves about 4k and might cause you much additional grey @@ -155,7 +155,7 @@ config IOMMU_DEBUG bool "Enable IOMMU debugging" depends on GART_IOMMU && DEBUG_KERNEL depends on X86_64 - help + ---help--- Force the IOMMU to on even when you have less than 4GB of memory and add debugging code. On overflow always panic. And allow to enable IOMMU leak tracing. Can be disabled at boot @@ -171,7 +171,7 @@ config IOMMU_LEAK bool "IOMMU leak tracing" depends on DEBUG_KERNEL depends on IOMMU_DEBUG - help + ---help--- Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. @@ -224,25 +224,25 @@ choice config IO_DELAY_0X80 bool "port 0x80 based port-IO delay [recommended]" - help + ---help--- This is the traditional Linux IO delay used for in/out_p. It is the most tested hence safest selection here. config IO_DELAY_0XED bool "port 0xed based port-IO delay" - help + ---help--- Use port 0xed as the IO delay. This frees up port 0x80 which is often used as a hardware-debug port. config IO_DELAY_UDELAY bool "udelay based port-IO delay" - help + ---help--- Use udelay(2) as the IO delay method. This provides the delay while not having any side-effect on the IO port space. config IO_DELAY_NONE bool "no port-IO delay" - help + ---help--- No port-IO delay. Will break on old boxes that require port-IO delay for certain operations. Should work on most new machines. @@ -276,18 +276,18 @@ config DEBUG_BOOT_PARAMS bool "Debug boot parameters" depends on DEBUG_KERNEL depends on DEBUG_FS - help + ---help--- This option will cause struct boot_params to be exported via debugfs. config CPA_DEBUG bool "CPA self-test code" depends on DEBUG_KERNEL - help + ---help--- Do change_page_attr() self-tests every 30 seconds. config OPTIMIZE_INLINING bool "Allow gcc to uninline functions marked 'inline'" - help + ---help--- This option determines if the kernel forces gcc to inline the functions developers have marked 'inline'. Doing so takes away freedom from gcc to do what it thinks is best, which is desirable for the gcc 3.x series of @@ -300,4 +300,3 @@ config OPTIMIZE_INLINING If unsure, say N. endmenu - From 1c2f61d40b691789626489fa947a3e003c9a84be Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 5 Feb 2009 23:59:04 -0800 Subject: [PATCH 374/566] sparc64: Don't hook up pcr_ops on spitfire chips. They can't be used for profiling and NMI watchdog currently since they lack the counter overflow interrupt. Signed-off-by: David S. Miller --- arch/sparc/kernel/pcr.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 92e0dda141a4..1ae8cdd7e703 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -133,11 +133,16 @@ int __init pcr_arch_init(void) case cheetah: case cheetah_plus: - case spitfire: pcr_ops = &direct_pcr_ops; pcr_enable = PCR_SUN4U_ENABLE; break; + case spitfire: + /* UltraSPARC-I/II and derivatives lack a profile + * counter overflow interrupt so we can't make use of + * their hardware currently. + */ + /* fallthrough */ default: err = -ENODEV; goto out_unregister; From 684de409acff8b1fe8bf188d75ff2f99c624387d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 6 Feb 2009 00:49:55 -0800 Subject: [PATCH 375/566] ipv6: Disallow rediculious flowlabel option sizes. Just like PKTINFO, limit the options area to 64K. Based upon report by Eric Sesterhenn and analysis by Roland Dreier. Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index c62dd247774f..7712578bdc66 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -323,17 +323,21 @@ static struct ip6_flowlabel * fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, int optlen, int *err_p) { - struct ip6_flowlabel *fl; + struct ip6_flowlabel *fl = NULL; int olen; int addr_type; int err; + olen = optlen - CMSG_ALIGN(sizeof(*freq)); + err = -EINVAL; + if (olen > 64 * 1024) + goto done; + err = -ENOMEM; fl = kzalloc(sizeof(*fl), GFP_KERNEL); if (fl == NULL) goto done; - olen = optlen - CMSG_ALIGN(sizeof(*freq)); if (olen > 0) { struct msghdr msg; struct flowi flowi; From efc683fc2a692735029067b4f939af2a3625e31d Mon Sep 17 00:00:00 2001 From: Gautam Kachroo Date: Fri, 6 Feb 2009 00:52:04 -0800 Subject: [PATCH 376/566] neigh: some entries can be skipped during dumping neightbl_dump_info and neigh_dump_table can skip entries if the *fill*info functions return an error. This results in an incomplete dump ((invoked by netlink requests for RTM_GETNEIGHTBL or RTM_GETNEIGH) nidx and idx should not be incremented if the current entry was not placed in the output buffer Signed-off-by: Gautam Kachroo Signed-off-by: David S. Miller --- net/core/neighbour.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f66c58df8953..278a142d1047 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1994,8 +1994,8 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) if (!net_eq(neigh_parms_net(p), net)) continue; - if (nidx++ < neigh_skip) - continue; + if (nidx < neigh_skip) + goto next; if (neightbl_fill_param_info(skb, tbl, p, NETLINK_CB(cb->skb).pid, @@ -2003,6 +2003,8 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) RTM_NEWNEIGHTBL, NLM_F_MULTI) <= 0) goto out; + next: + nidx++; } neigh_skip = 0; @@ -2082,12 +2084,10 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (h > s_h) s_idx = 0; for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { - int lidx; if (dev_net(n->dev) != net) continue; - lidx = idx++; - if (lidx < s_idx) - continue; + if (idx < s_idx) + goto next; if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, @@ -2096,6 +2096,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, rc = -1; goto out; } + next: + idx++; } } read_unlock_bh(&tbl->lock); From 2783ef23128ad0a4b34e4121c1f7ff664785712f Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 6 Feb 2009 01:59:12 -0800 Subject: [PATCH 377/566] udp: Fix potential wrong ip_hdr(skb) pointers Like the UDP header fix, pskb_may_pull() can potentially alter the SKB buffer. Thus the saddr and daddr, pointers may point to the old skb->data buffer. I haven't seen corruptions, as its only seen if the old skb->data buffer were reallocated by another user and written into very quickly (or poison'd by SLAB debugging). Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cc3a0a06c004..c47c989cb1fb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1234,8 +1234,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, struct udphdr *uh; unsigned short ulen; struct rtable *rt = (struct rtable*)skb->dst; - __be32 saddr = ip_hdr(skb)->saddr; - __be32 daddr = ip_hdr(skb)->daddr; + __be32 saddr, daddr; struct net *net = dev_net(skb->dev); /* @@ -1259,6 +1258,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp4_csum_init(skb, uh, proto)) goto csum_error; + saddr = ip_hdr(skb)->saddr; + daddr = ip_hdr(skb)->daddr; + if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return __udp4_lib_mcast_deliver(net, skb, uh, saddr, daddr, udptable); From 7d8e23df69820e6be42bcc41d441f4860e8c76f7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 6 Feb 2009 14:57:51 +0100 Subject: [PATCH 378/566] timers: split process wide cpu clocks/timers, remove spurious warning Mike Galbraith reported that the new warning in thread_group_cputimer() triggers en masse with Amarok running. Oleg Nesterov observed: Can't fastpath_timer_check()->thread_group_cputimer() have the false warning too? Suppose we had the timer, then posix_cpu_timer_del() removes this timer, but task_cputime_zero(&sig->cputime_expires) still not true. Remove the spurious debug warning. Reported-by: Mike Galbraith Explained-by: Oleg Nesterov Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 082d7619b3a1..79392916d6c9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2208,8 +2208,6 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; unsigned long flags; - WARN_ON(!cputimer->running); - spin_lock_irqsave(&cputimer->lock, flags); *times = cputimer->cputime; spin_unlock_irqrestore(&cputimer->lock, flags); From ff08f76d738d0ec0f334b187f61e160caa321d54 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 4 Feb 2009 13:40:31 +0300 Subject: [PATCH 379/566] x86: clean up hpet timer reinit Implement Linus's suggestion: introduce the hpet_cnt_ahead() helper function to compare hpet time values - like other wrapping counter comparisons are abstracted away elsewhere. (jiffies, ktime_t, etc.) Reported-by: Kirill Korotaev Signed-off-by: Pavel Emelyanov Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index c761f914430a..388254f69a2a 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -897,13 +897,21 @@ static unsigned long hpet_rtc_flags; static int hpet_prev_update_sec; static struct rtc_time hpet_alarm_time; static unsigned long hpet_pie_count; -static unsigned long hpet_t1_cmp; +static u32 hpet_t1_cmp; static unsigned long hpet_default_delta; static unsigned long hpet_pie_delta; static unsigned long hpet_pie_limit; static rtc_irq_handler irq_handler; +/* + * Check that the hpet counter c1 is ahead of the c2 + */ +static inline int hpet_cnt_ahead(u32 c1, u32 c2) +{ + return (s32)(c2 - c1) < 0; +} + /* * Registers a IRQ handler. */ @@ -1075,7 +1083,7 @@ static void hpet_rtc_timer_reinit(void) hpet_t1_cmp += delta; hpet_writel(hpet_t1_cmp, HPET_T1_CMP); lost_ints++; - } while ((s32)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); + } while (!hpet_cnt_ahead(hpet_t1_cmp, hpet_readl(HPET_COUNTER))); if (lost_ints) { if (hpet_rtc_flags & RTC_PIE) From e2a02ba6676cdc5ebec503b558838c08c7083c14 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 6 Feb 2009 11:10:27 +1100 Subject: [PATCH 380/566] powerpc/83xx: Build breakage for CONFIG_PM but no CONFIG_SUSPEND I noticed this doing some randconfig testing (.config below). I have CONFIG_PM but no CONFIG_SUSPEND. Bug is against mainline. arch/powerpc/sysdev/built-in.o: In function `ipic_suspend': ipic.c:(.text+0x6b34): undefined reference to `fsl_deep_sleep' make[1]: *** [.tmp_vmlinux1] Error 1 make: *** [sub-make] Error 2 Looks like #ifdef CONFIG_PM in arch/powerpc/sysdev/ipic.c should be CONFIG_SUSPEND. d49747bdfb2ddebea24d1580da55b79d093d48a9 introduced this. Fix build when we have CONFIG_PM but no CONFIG_SUSPEND. Signed-off-by: Michael Neuling Acked-by: Scott Wood Signed-off-by: Kumar Gala --- arch/powerpc/sysdev/ipic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index 88a983ece5c9..9a89cd3e80a2 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -890,7 +890,7 @@ unsigned int ipic_get_irq(void) return irq_linear_revmap(primary_ipic->irqhost, irq); } -#ifdef CONFIG_PM +#ifdef CONFIG_SUSPEND static struct { u32 sicfr; u32 siprr[2]; From 1f0d4d16d9d4ffa65438425d092f7061476b95b3 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 5 Feb 2009 23:10:32 +0300 Subject: [PATCH 381/566] powerpc/83xx: Fix missing #{address,size}-cells in mpc8313erdb.dts commit b31a1d8b41513b96e9c7ec2f68c5734cef0b26a4 ("gianfar: Convert gianfar to an of_platform_driver") introduced a child node for the ethernet@25000 controller, but no address and size cells specifiers were added, and that makes dtc unhappy: DTC: dts->dtb on file "arch/powerpc/boot/dts/mpc8313erdb.dts" Warning (reg_format): "reg" property in /soc8313@e0000000/ethernet@25000/mdio@25520 has invalid length (8 bytes) (#address-cells == 2, #size-cells == 1) Warning (avoid_default_addr_size): Relying on default #address-cells value for /soc8313@e0000000/ethernet@25000/mdio@25520 Warning (avoid_default_addr_size): Relying on default #size-cells value for /soc8313@e0000000/ethernet@25000/mdio@25520 Signed-off-by: Anton Vorontsov Signed-off-by: Kumar Gala --- arch/powerpc/boot/dts/mpc8313erdb.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/boot/dts/mpc8313erdb.dts b/arch/powerpc/boot/dts/mpc8313erdb.dts index 909a89cab9ac..3f84cd0384ed 100644 --- a/arch/powerpc/boot/dts/mpc8313erdb.dts +++ b/arch/powerpc/boot/dts/mpc8313erdb.dts @@ -219,6 +219,8 @@ }; enet1: ethernet@25000 { + #address-cells = <1>; + #size-cells = <1>; cell-index = <1>; device_type = "network"; model = "eTSEC"; From e85477f516c2de7ed515fcf94ceab5282eba7fa4 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 5 Feb 2009 23:10:40 +0300 Subject: [PATCH 382/566] powerpc/83xx: Fix TSEC0 workability on MPC8313E-RDB boards TSEC0 is connected to Vitesse 7385 5-port switch. The switch isn't connected to any mdio bus, the link to the switch is fixed to Full-duplex 1000 Mb/s (no pause). This patch fixes following failure during bootup: mdio@24520:01 not found eth0: Could not attach to PHY IP-Config: Failed to open eth0 Signed-off-by: Anton Vorontsov Signed-off-by: Kumar Gala --- arch/powerpc/boot/dts/mpc8313erdb.dts | 9 ++------- arch/powerpc/configs/83xx/mpc8313_rdb_defconfig | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/boot/dts/mpc8313erdb.dts b/arch/powerpc/boot/dts/mpc8313erdb.dts index 3f84cd0384ed..3ebf7ec0484c 100644 --- a/arch/powerpc/boot/dts/mpc8313erdb.dts +++ b/arch/powerpc/boot/dts/mpc8313erdb.dts @@ -191,7 +191,8 @@ interrupts = <37 0x8 36 0x8 35 0x8>; interrupt-parent = <&ipic>; tbi-handle = < &tbi0 >; - phy-handle = < &phy1 >; + /* Vitesse 7385 isn't on the MDIO bus */ + fixed-link = <1 1 1000 0 0>; fsl,magic-packet; mdio@24520 { @@ -199,12 +200,6 @@ #size-cells = <0>; compatible = "fsl,gianfar-mdio"; reg = <0x24520 0x20>; - phy1: ethernet-phy@1 { - interrupt-parent = <&ipic>; - interrupts = <19 0x8>; - reg = <0x1>; - device_type = "ethernet-phy"; - }; phy4: ethernet-phy@4 { interrupt-parent = <&ipic>; interrupts = <20 0x8>; diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig index 9e47ae957e2e..409d017621a8 100644 --- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig @@ -651,7 +651,7 @@ CONFIG_CICADA_PHY=y # CONFIG_NATIONAL_PHY is not set # CONFIG_STE10XP is not set # CONFIG_LSI_ET1011C_PHY is not set -# CONFIG_FIXED_PHY is not set +CONFIG_FIXED_PHY=y # CONFIG_MDIO_BITBANG is not set CONFIG_NET_ETHERNET=y CONFIG_MII=y From 7f3ea17f316577e31db868f720ac575c74d20163 Mon Sep 17 00:00:00 2001 From: paulfax Date: Tue, 27 Jan 2009 02:44:07 -0600 Subject: [PATCH 383/566] powerpc/cpm2: Fix set interrupt type This is a simple change to correct problems when using set_irq_type on platforms using CPM2. This code corrects the problem on most platform but may have issues on 8272 derived platforms for some interrupts. On 8272 PC2 & 3 are missing and PC 23 & 29 are added, which this patch does not address. Signed-off-by: Paul Bilke Reviewed-by: Anton Vorontsov Signed-off-by: Kumar Gala --- arch/powerpc/sysdev/cpm2_pic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c index b16ca3ed65d2..78f1f7cca0a0 100644 --- a/arch/powerpc/sysdev/cpm2_pic.c +++ b/arch/powerpc/sysdev/cpm2_pic.c @@ -165,7 +165,7 @@ static int cpm2_set_irq_type(unsigned int virq, unsigned int flow_type) edibit = (14 - (src - CPM2_IRQ_EXT1)); else if (src >= CPM2_IRQ_PORTC15 && src <= CPM2_IRQ_PORTC0) - edibit = (31 - (src - CPM2_IRQ_PORTC15)); + edibit = (31 - (CPM2_IRQ_PORTC0 - src)); else return (flow_type & IRQ_TYPE_LEVEL_LOW) ? 0 : -EINVAL; From 08c6e3a57877ceda5c2f4c99ae7d433b8eef779a Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 4 Feb 2009 22:43:04 +0100 Subject: [PATCH 384/566] arch/powerpc: Eliminate double sizeof Taking sizeof the result of sizeof is quite strange and does not seem to be what is wanted here. This was fixed using the following semantic patch. (http://www.emn.fr/x-info/coccinelle/) // @@ expression E; @@ - sizeof ( sizeof (E) - ) @@ type T; @@ - sizeof ( sizeof (T) - ) // Signed-off-by: Julia Lawall Acked-by: Scott Wood Signed-off-by: Kumar Gala --- arch/powerpc/platforms/82xx/pq2ads-pci-pic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c index 9876d7e072f4..ddf0bdc0fc8b 100644 --- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c @@ -186,7 +186,7 @@ out_unmap_regs: iounmap(priv->regs); out_free_bootmem: free_bootmem((unsigned long)priv, - sizeof(sizeof(struct pq2ads_pci_pic))); + sizeof(struct pq2ads_pci_pic)); of_node_put(np); out_unmap_irq: irq_dispose_mapping(irq); From 355423d0849f4506bc71ab2738d38cb74429aaef Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 6 Feb 2009 21:49:57 -0800 Subject: [PATCH 385/566] r8169: Don't update statistics counters when interface is down Some Realtek chips (RTL8169sb/8110sb in my case) are unable to retrieve ethtool statistics when the interface is down. The process stays in endless loop in rtl8169_get_ethtool_stats. This is because these chips need to have receiver enabled (CmdRxEnb bit in ChipCmd register) that is cleared when the interface is going down. It's better to update statistics only when the interface is up and otherwise return copy of statistics grabbed when the interface was up (in rtl8169_close). It is interesting that PCI-E NICs (like 8168b/8111b...) are not affected. Signed-off-by: Ivan Vecera Acked-by: Francois Romieu Signed-off-by: David S. Miller --- drivers/net/r8169.c | 93 ++++++++++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 35 deletions(-) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 2c73ca606b35..0771eb6fc6eb 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -437,6 +437,22 @@ enum features { RTL_FEATURE_GMII = (1 << 2), }; +struct rtl8169_counters { + __le64 tx_packets; + __le64 rx_packets; + __le64 tx_errors; + __le32 rx_errors; + __le16 rx_missed; + __le16 align_errors; + __le32 tx_one_collision; + __le32 tx_multi_collision; + __le64 rx_unicast; + __le64 rx_broadcast; + __le32 rx_multicast; + __le16 tx_aborted; + __le16 tx_underun; +}; + struct rtl8169_private { void __iomem *mmio_addr; /* memory map physical address */ struct pci_dev *pci_dev; /* Index of PCI device */ @@ -480,6 +496,7 @@ struct rtl8169_private { unsigned features; struct mii_if_info mii; + struct rtl8169_counters counters; }; MODULE_AUTHOR("Realtek and the Linux r8169 crew "); @@ -1100,22 +1117,6 @@ static const char rtl8169_gstrings[][ETH_GSTRING_LEN] = { "tx_underrun", }; -struct rtl8169_counters { - __le64 tx_packets; - __le64 rx_packets; - __le64 tx_errors; - __le32 rx_errors; - __le16 rx_missed; - __le16 align_errors; - __le32 tx_one_collision; - __le32 tx_multi_collision; - __le64 rx_unicast; - __le64 rx_broadcast; - __le32 rx_multicast; - __le16 tx_aborted; - __le16 tx_underun; -}; - static int rtl8169_get_sset_count(struct net_device *dev, int sset) { switch (sset) { @@ -1126,16 +1127,21 @@ static int rtl8169_get_sset_count(struct net_device *dev, int sset) } } -static void rtl8169_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data) +static void rtl8169_update_counters(struct net_device *dev) { struct rtl8169_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->mmio_addr; struct rtl8169_counters *counters; dma_addr_t paddr; u32 cmd; + int wait = 1000; - ASSERT_RTNL(); + /* + * Some chips are unable to dump tally counters when the receiver + * is disabled. + */ + if ((RTL_R8(ChipCmd) & CmdRxEnb) == 0) + return; counters = pci_alloc_consistent(tp->pci_dev, sizeof(*counters), &paddr); if (!counters) @@ -1146,31 +1152,45 @@ static void rtl8169_get_ethtool_stats(struct net_device *dev, RTL_W32(CounterAddrLow, cmd); RTL_W32(CounterAddrLow, cmd | CounterDump); - while (RTL_R32(CounterAddrLow) & CounterDump) { - if (msleep_interruptible(1)) + while (wait--) { + if ((RTL_R32(CounterAddrLow) & CounterDump) == 0) { + /* copy updated counters */ + memcpy(&tp->counters, counters, sizeof(*counters)); break; + } + udelay(10); } RTL_W32(CounterAddrLow, 0); RTL_W32(CounterAddrHigh, 0); - data[0] = le64_to_cpu(counters->tx_packets); - data[1] = le64_to_cpu(counters->rx_packets); - data[2] = le64_to_cpu(counters->tx_errors); - data[3] = le32_to_cpu(counters->rx_errors); - data[4] = le16_to_cpu(counters->rx_missed); - data[5] = le16_to_cpu(counters->align_errors); - data[6] = le32_to_cpu(counters->tx_one_collision); - data[7] = le32_to_cpu(counters->tx_multi_collision); - data[8] = le64_to_cpu(counters->rx_unicast); - data[9] = le64_to_cpu(counters->rx_broadcast); - data[10] = le32_to_cpu(counters->rx_multicast); - data[11] = le16_to_cpu(counters->tx_aborted); - data[12] = le16_to_cpu(counters->tx_underun); - pci_free_consistent(tp->pci_dev, sizeof(*counters), counters, paddr); } +static void rtl8169_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct rtl8169_private *tp = netdev_priv(dev); + + ASSERT_RTNL(); + + rtl8169_update_counters(dev); + + data[0] = le64_to_cpu(tp->counters.tx_packets); + data[1] = le64_to_cpu(tp->counters.rx_packets); + data[2] = le64_to_cpu(tp->counters.tx_errors); + data[3] = le32_to_cpu(tp->counters.rx_errors); + data[4] = le16_to_cpu(tp->counters.rx_missed); + data[5] = le16_to_cpu(tp->counters.align_errors); + data[6] = le32_to_cpu(tp->counters.tx_one_collision); + data[7] = le32_to_cpu(tp->counters.tx_multi_collision); + data[8] = le64_to_cpu(tp->counters.rx_unicast); + data[9] = le64_to_cpu(tp->counters.rx_broadcast); + data[10] = le32_to_cpu(tp->counters.rx_multicast); + data[11] = le16_to_cpu(tp->counters.tx_aborted); + data[12] = le16_to_cpu(tp->counters.tx_underun); +} + static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data) { switch(stringset) { @@ -3682,6 +3702,9 @@ static int rtl8169_close(struct net_device *dev) struct rtl8169_private *tp = netdev_priv(dev); struct pci_dev *pdev = tp->pci_dev; + /* update counters before going down */ + rtl8169_update_counters(dev); + rtl8169_down(dev); free_irq(dev->irq, dev); From 15bde72738f373aa060ececeda8e064e4f924360 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 6 Feb 2009 21:50:52 -0800 Subject: [PATCH 386/566] RxRPC: Fix a potential NULL dereference Fix a potential NULL dereference bug during error handling in rxrpc_kernel_begin_call(), whereby rxrpc_put_transport() may be handed a NULL pointer. This was found with a code checker (http://repo.or.cz/w/smatch.git/). Reported-by: Dan Carpenter Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/af_rxrpc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index d7d2bed7a699..eac5e7bb7365 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -284,13 +284,13 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, if (IS_ERR(trans)) { call = ERR_CAST(trans); trans = NULL; - goto out; + goto out_notrans; } } else { trans = rx->trans; if (!trans) { call = ERR_PTR(-ENOTCONN); - goto out; + goto out_notrans; } atomic_inc(&trans->usage); } @@ -315,6 +315,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, rxrpc_put_bundle(trans, bundle); out: rxrpc_put_transport(trans); +out_notrans: release_sock(&rx->sk); _leave(" = %p", call); return call; From 71822faa3bc0af5dbf5e333a2d085f1ed7cd809f Mon Sep 17 00:00:00 2001 From: Ilkka Virta Date: Fri, 6 Feb 2009 22:00:36 -0800 Subject: [PATCH 387/566] sungem: Soft lockup in sungem on Netra AC200 when switching interface up From: Ilkka Virta In the lockup situation the driver seems to go off in an eternal storm of interrupts right after calling request_irq(). It doesn't actually do anything interesting in the interrupt handler. Since connecting the link afterwards works, something later in initialization must fix this. Looking at gem_do_start() and gem_open(), it seems that the only thing done while opening the device after the request_irq(), is a call to napi_enable(). I don't know what the ordering requirements are for the initialization, but I boldly tried to move the napi_enable() call inside gem_do_start() before the link state is checked and interrupts subsequently enabled, and it seems to work for me. Doesn't even break anything too obvious... Signed-off-by: David S. Miller --- drivers/net/sungem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index b17efa9cc530..491876341068 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -2221,6 +2221,8 @@ static int gem_do_start(struct net_device *dev) gp->running = 1; + napi_enable(&gp->napi); + if (gp->lstate == link_up) { netif_carrier_on(gp->dev); gem_set_link_modes(gp); @@ -2238,6 +2240,8 @@ static int gem_do_start(struct net_device *dev) spin_lock_irqsave(&gp->lock, flags); spin_lock(&gp->tx_lock); + napi_disable(&gp->napi); + gp->running = 0; gem_reset(gp); gem_clean_rings(gp); @@ -2338,8 +2342,6 @@ static int gem_open(struct net_device *dev) if (!gp->asleep) rc = gem_do_start(dev); gp->opened = (rc == 0); - if (gp->opened) - napi_enable(&gp->napi); mutex_unlock(&gp->pm_mutex); @@ -2476,8 +2478,6 @@ static int gem_resume(struct pci_dev *pdev) /* Re-attach net device */ netif_device_attach(dev); - - napi_enable(&gp->napi); } spin_lock_irqsave(&gp->lock, flags); From 152abd139cca049c9b559a7cca762fa7fd9fd264 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Fri, 6 Feb 2009 22:04:08 -0800 Subject: [PATCH 388/566] 3c509: Fix resume from hibernation for PnP mode. From: Ondrej Zary last year, I posted a patch which fixed hibernation on 3c509 cards. That was back in 2.6.24. It worked fine in 2.6.25. But then I stopped using hibernation (as it did not work with my new IT8212 RAID controller). Now I fixed it and noticed that 3c509 does not wake up properly anymore (in 2.6.28) - neither in PnP nor in ISA modes. ifconfig down/up makes the card work again in PnP mode. However, in ISA mode, ifconfig up ends with "No such device" error. Comparing the 3c509 driver between 2.6.25 and 2.6.28, there's only some statistics-related change. So the cause of the problem must be somewhere else. This patch makes the resume work in PnP mode, but it's still not enough for ISA mode. Signed-off-by: David S. Miller --- drivers/net/3c509.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c index 535c234286ea..8c694213035b 100644 --- a/drivers/net/3c509.c +++ b/drivers/net/3c509.c @@ -1475,6 +1475,7 @@ el3_resume(struct device *pdev) spin_lock_irqsave(&lp->lock, flags); outw(PowerUp, ioaddr + EL3_CMD); + EL3WINDOW(0); el3_up(dev); if (netif_running(dev)) From b4bd07c20ba0c1fa7ad09ba257e0a5cfc2bf6bb3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 6 Feb 2009 22:06:43 -0800 Subject: [PATCH 389/566] net_dma: call dmaengine_get only if NET_DMA enabled Based upon a patch from Atsushi Nemoto -------------------- The commit 649274d993212e7c23c0cb734572c2311c200872 ("net_dma: acquire/release dma channels on ifup/ifdown") added unconditional call of dmaengine_get() to net_dma. The API should be called only if NET_DMA was enabled. -------------------- Signed-off-by: David S. Miller Acked-by: Dan Williams --- include/linux/dmaengine.h | 12 ++++++++++++ net/core/dev.c | 4 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 3e0f64c335c8..3e68469c1885 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -282,6 +282,18 @@ static inline void dmaengine_put(void) } #endif +#ifdef CONFIG_NET_DMA +#define net_dmaengine_get() dmaengine_get() +#define net_dmaengine_put() dmaengine_put() +#else +static inline void net_dmaengine_get(void) +{ +} +static inline void net_dmaengine_put(void) +{ +} +#endif + dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, diff --git a/net/core/dev.c b/net/core/dev.c index 5379b0c1190a..a17e00662363 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1090,7 +1090,7 @@ int dev_open(struct net_device *dev) /* * Enable NET_DMA */ - dmaengine_get(); + net_dmaengine_get(); /* * Initialize multicasting status @@ -1172,7 +1172,7 @@ int dev_close(struct net_device *dev) /* * Shutdown NET_DMA */ - dmaengine_put(); + net_dmaengine_put(); return 0; } From beeebc92ee04bff6a722ebf85e23131faedd4479 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Fri, 6 Feb 2009 22:07:41 -0800 Subject: [PATCH 390/566] 9p: fix endian issues [attempt 3] When the changes were done to the protocol last release, some endian bugs crept in. This patch fixes those endian problems and has been verified to run on 32/64 bit and x86/ppc architectures. This version of the patch incorporates the correct annotations for endian variables. Signed-off-by: Eric Van Hensbergen Signed-off-by: David S. Miller --- net/9p/protocol.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/net/9p/protocol.c b/net/9p/protocol.c index dcd7666824ba..fc70147c771e 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include "protocol.h" @@ -160,29 +161,32 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) break; case 'w':{ int16_t *val = va_arg(ap, int16_t *); - if (pdu_read(pdu, val, sizeof(*val))) { + __le16 le_val; + if (pdu_read(pdu, &le_val, sizeof(le_val))) { errcode = -EFAULT; break; } - *val = cpu_to_le16(*val); + *val = le16_to_cpu(le_val); } break; case 'd':{ int32_t *val = va_arg(ap, int32_t *); - if (pdu_read(pdu, val, sizeof(*val))) { + __le32 le_val; + if (pdu_read(pdu, &le_val, sizeof(le_val))) { errcode = -EFAULT; break; } - *val = cpu_to_le32(*val); + *val = le32_to_cpu(le_val); } break; case 'q':{ int64_t *val = va_arg(ap, int64_t *); - if (pdu_read(pdu, val, sizeof(*val))) { + __le64 le_val; + if (pdu_read(pdu, &le_val, sizeof(le_val))) { errcode = -EFAULT; break; } - *val = cpu_to_le64(*val); + *val = le64_to_cpu(le_val); } break; case 's':{ @@ -362,19 +366,19 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) } break; case 'w':{ - int16_t val = va_arg(ap, int); + __le16 val = cpu_to_le16(va_arg(ap, int)); if (pdu_write(pdu, &val, sizeof(val))) errcode = -EFAULT; } break; case 'd':{ - int32_t val = va_arg(ap, int32_t); + __le32 val = cpu_to_le32(va_arg(ap, int32_t)); if (pdu_write(pdu, &val, sizeof(val))) errcode = -EFAULT; } break; case 'q':{ - int64_t val = va_arg(ap, int64_t); + __le64 val = cpu_to_le64(va_arg(ap, int64_t)); if (pdu_write(pdu, &val, sizeof(val))) errcode = -EFAULT; } From 0b492fce3d72d982a7981905f85484a1e1ba7fde Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 7 Feb 2009 02:20:25 -0800 Subject: [PATCH 391/566] sunhme: Don't match PCI devices in SBUS probe. Unfortunately, the OF device tree nodes for SBUS and PCI hme devices have the same device node name on some systems. So if the name of the parent node isn't 'sbus', skip it. Based upon an excellent report and detective work by Meelis Roos and Eric Brower. Signed-off-by: David S. Miller Tested-by: Meelis Roos --- drivers/net/sunhme.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c index 7a72a3112f0a..cc4013be5e18 100644 --- a/drivers/net/sunhme.c +++ b/drivers/net/sunhme.c @@ -2629,6 +2629,14 @@ static int __devinit happy_meal_sbus_probe_one(struct of_device *op, int is_qfe) int i, qfe_slot = -1; int err = -ENODEV; + sbus_dp = to_of_device(op->dev.parent)->node; + if (is_qfe) + sbus_dp = to_of_device(op->dev.parent->parent)->node; + + /* We can match PCI devices too, do not accept those here. */ + if (strcmp(sbus_dp->name, "sbus")) + return err; + if (is_qfe) { qp = quattro_sbus_find(op); if (qp == NULL) @@ -2734,10 +2742,6 @@ static int __devinit happy_meal_sbus_probe_one(struct of_device *op, int is_qfe) if (qp != NULL) hp->happy_flags |= HFLAG_QUATTRO; - sbus_dp = to_of_device(op->dev.parent)->node; - if (is_qfe) - sbus_dp = to_of_device(op->dev.parent->parent)->node; - /* Get the supported DVMA burst sizes from our Happy SBUS. */ hp->happy_bursts = of_getintprop_default(sbus_dp, "burst-sizes", 0x00); From 9b8d5a124f133fe9a75397d20b874844a2e3d7e9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 7 Feb 2009 11:15:41 +1000 Subject: [PATCH 392/566] drm/radeon: fix ioremap conflict with AGP mappings this solves a regression from http://bugzilla.kernel.org/show_bug.cgi?id=12441 Reported-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_memory.c | 7 ++++++- drivers/gpu/drm/radeon/radeon_cp.c | 6 +++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c index 803bc9e7ce3c..bcc869bc4092 100644 --- a/drivers/gpu/drm/drm_memory.c +++ b/drivers/gpu/drm/drm_memory.c @@ -171,9 +171,14 @@ EXPORT_SYMBOL(drm_core_ioremap); void drm_core_ioremap_wc(struct drm_map *map, struct drm_device *dev) { - map->handle = ioremap_wc(map->offset, map->size); + if (drm_core_has_AGP(dev) && + dev->agp && dev->agp->cant_use_aperture && map->type == _DRM_AGP) + map->handle = agp_remap(map->offset, map->size, dev); + else + map->handle = ioremap_wc(map->offset, map->size); } EXPORT_SYMBOL(drm_core_ioremap_wc); + void drm_core_ioremapfree(struct drm_map *map, struct drm_device *dev) { if (!map->handle || !map->size) diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c index 63212d7bbc28..df4cf97e5d97 100644 --- a/drivers/gpu/drm/radeon/radeon_cp.c +++ b/drivers/gpu/drm/radeon/radeon_cp.c @@ -1039,9 +1039,9 @@ static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, #if __OS_HAS_AGP if (dev_priv->flags & RADEON_IS_AGP) { - drm_core_ioremap(dev_priv->cp_ring, dev); - drm_core_ioremap(dev_priv->ring_rptr, dev); - drm_core_ioremap(dev->agp_buffer_map, dev); + drm_core_ioremap_wc(dev_priv->cp_ring, dev); + drm_core_ioremap_wc(dev_priv->ring_rptr, dev); + drm_core_ioremap_wc(dev->agp_buffer_map, dev); if (!dev_priv->cp_ring->handle || !dev_priv->ring_rptr->handle || !dev->agp_buffer_map->handle) { From e806b4957412bf472d826bd8cc571da041248799 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 22 Jan 2009 09:56:58 -0800 Subject: [PATCH 393/566] drm/i915: Suppress GEM teardown on X Server exit in KMS mode. Fixes hangs when starting X for the second time. Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index debad5c04cc0..a590d61ff692 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3273,6 +3273,9 @@ i915_gem_lastclose(struct drm_device *dev) { int ret; + if (drm_core_check_feature(dev, DRIVER_MODESET)) + return; + ret = i915_gem_idle(dev); if (ret) DRM_ERROR("failed to idle hardware: %d\n", ret); From 725e30ad6601d7fe443d9215d6331758a9d7e0c8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 22 Jan 2009 13:01:02 -0800 Subject: [PATCH 394/566] drm/i915: Skip SDVO/HDMI init when the chipset tells us it's not present. This saves startup time from probing SDVO, and saves setting up HDMI outputs on G4X devices that don't have them. Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_display.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 31c3732b7a69..dce1abfa6496 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1452,6 +1452,7 @@ static int intel_connector_clones(struct drm_device *dev, int type_mask) static void intel_setup_outputs(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_connector *connector; intel_crt_init(dev); @@ -1463,13 +1464,16 @@ static void intel_setup_outputs(struct drm_device *dev) if (IS_I9XX(dev)) { int found; - found = intel_sdvo_init(dev, SDVOB); - if (!found && SUPPORTS_INTEGRATED_HDMI(dev)) - intel_hdmi_init(dev, SDVOB); - - found = intel_sdvo_init(dev, SDVOC); - if (!found && SUPPORTS_INTEGRATED_HDMI(dev)) - intel_hdmi_init(dev, SDVOC); + if (I915_READ(SDVOB) & SDVO_DETECTED) { + found = intel_sdvo_init(dev, SDVOB); + if (!found && SUPPORTS_INTEGRATED_HDMI(dev)) + intel_hdmi_init(dev, SDVOB); + } + if (!IS_G4X(dev) || (I915_READ(SDVOB) & SDVO_DETECTED)) { + found = intel_sdvo_init(dev, SDVOC); + if (!found && SUPPORTS_INTEGRATED_HDMI(dev)) + intel_hdmi_init(dev, SDVOC); + } } else intel_dvo_init(dev); From ab657db12d7020629f26f30d287558a8d0e32b41 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 23 Jan 2009 12:57:47 -0800 Subject: [PATCH 395/566] drm/i915: Set up an MTRR covering the GTT at driver load. We'd love to just be using PAT, but even on chips with PAT it gets disabled sometimes due to an errata. It would probably be better to have pat_enabled exported and only bother with this when !pat_enabled. Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_dma.c | 29 ++++++++++++++++++++++++----- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 6 ------ 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index ee64b7301f67..1e01e7847155 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -966,10 +966,6 @@ static int i915_load_modeset_init(struct drm_device *dev) if (ret) goto kfree_devname; - dev_priv->mm.gtt_mapping = - io_mapping_create_wc(dev->agp->base, - dev->agp->agp_info.aper_size * 1024*1024); - /* Allow hardware batchbuffers unless told otherwise. */ dev_priv->allow_batchbuffer = 1; @@ -1081,6 +1077,23 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto free_priv; } + dev_priv->mm.gtt_mapping = + io_mapping_create_wc(dev->agp->base, + dev->agp->agp_info.aper_size * 1024*1024); + /* Set up a WC MTRR for non-PAT systems. This is more common than + * one would think, because the kernel disables PAT on first + * generation Core chips because WC PAT gets overridden by a UC + * MTRR if present. Even if a UC MTRR isn't present. + */ + dev_priv->mm.gtt_mtrr = mtrr_add(dev->agp->base, + dev->agp->agp_info.aper_size * + 1024 * 1024, + MTRR_TYPE_WRCOMB, 1); + if (dev_priv->mm.gtt_mtrr < 0) { + DRM_INFO("MTRR allocation failed\n. Graphics " + "performance may suffer.\n"); + } + #ifdef CONFIG_HIGHMEM64G /* don't enable GEM on PAE - needs agp + set_memory_* interface fixes */ dev_priv->has_gem = 0; @@ -1145,8 +1158,14 @@ int i915_driver_unload(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + io_mapping_free(dev_priv->mm.gtt_mapping); + if (dev_priv->mm.gtt_mtrr >= 0) { + mtrr_del(dev_priv->mm.gtt_mtrr, dev->agp->base, + dev->agp->agp_info.aper_size * 1024 * 1024); + dev_priv->mm.gtt_mtrr = -1; + } + if (drm_core_check_feature(dev, DRIVER_MODESET)) { - io_mapping_free(dev_priv->mm.gtt_mapping); drm_irq_uninstall(dev); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e13518252007..f471d218b89a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -284,6 +284,7 @@ typedef struct drm_i915_private { struct drm_mm gtt_space; struct io_mapping *gtt_mapping; + int gtt_mtrr; /** * List of objects currently involved in rendering from the diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a590d61ff692..af8034d52511 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3229,10 +3229,6 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, dev_priv->mm.wedged = 0; } - dev_priv->mm.gtt_mapping = io_mapping_create_wc(dev->agp->base, - dev->agp->agp_info.aper_size - * 1024 * 1024); - mutex_lock(&dev->struct_mutex); dev_priv->mm.suspended = 0; @@ -3255,7 +3251,6 @@ int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - drm_i915_private_t *dev_priv = dev->dev_private; int ret; if (drm_core_check_feature(dev, DRIVER_MODESET)) @@ -3264,7 +3259,6 @@ i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, ret = i915_gem_idle(dev); drm_irq_uninstall(dev); - io_mapping_free(dev_priv->mm.gtt_mapping); return ret; } From d9ddcb96e05cfbadf3dbf66859bcaf5eae25af0b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 27 Jan 2009 10:33:49 -0800 Subject: [PATCH 396/566] drm/i915: Return error from i915_gem_object_get_fence_reg() when failing. Previously, the caller would continue along without knowing that the function failed, resulting in potential mis-rendering. Right now vm_fault just returns SIGBUS in that case, and we may need to disable signal handling to avoid that happening. Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index af8034d52511..e1f831f166ca 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -52,7 +52,7 @@ static void i915_gem_object_free_page_list(struct drm_gem_object *obj); static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment); -static void i915_gem_object_get_fence_reg(struct drm_gem_object *obj); +static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj); static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); static int i915_gem_evict_something(struct drm_device *dev); static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, @@ -585,8 +585,11 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* Need a new fence register? */ if (obj_priv->fence_reg == I915_FENCE_REG_NONE && - obj_priv->tiling_mode != I915_TILING_NONE) - i915_gem_object_get_fence_reg(obj); + obj_priv->tiling_mode != I915_TILING_NONE) { + ret = i915_gem_object_get_fence_reg(obj); + if (ret != 0) + return VM_FAULT_SIGBUS; + } pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + page_offset; @@ -1513,7 +1516,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) * It then sets up the reg based on the object's properties: address, pitch * and tiling format. */ -static void +static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; @@ -1563,10 +1566,11 @@ try_again: * objects to finish before trying again. */ if (i == dev_priv->num_fence_regs) { - ret = i915_gem_object_wait_rendering(reg->obj); + ret = i915_gem_object_set_to_gtt_domain(reg->obj, 0); if (ret) { - WARN(ret, "wait_rendering failed: %d\n", ret); - return; + WARN(ret != -ERESTARTSYS, + "switch to GTT domain failed: %d\n", ret); + return ret; } goto try_again; } @@ -1591,6 +1595,8 @@ try_again: i915_write_fence_reg(reg); else i830_write_fence_reg(reg); + + return 0; } /** From 0f973f27888e4664b253ab2cf69c67c2eb80ab1b Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 26 Jan 2009 17:10:45 -0800 Subject: [PATCH 397/566] drm/i915: add fence register management to execbuf Adds code to set up fence registers at execbuf time on pre-965 chips as necessary. Also fixes up a few bugs in the pre-965 tile register support (get_order != ffs). The number of fences available to the kernel defaults to the hw limit minus 3 (for legacy X front/back/depth), but a new parameter allows userspace to override that as needed. Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_dma.c | 10 +++ drivers/gpu/drm/i915/i915_drv.h | 6 ++ drivers/gpu/drm/i915/i915_gem.c | 56 +++++++++++----- drivers/gpu/drm/i915/i915_gem_tiling.c | 88 +++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 4 +- include/drm/i915_drm.h | 2 + 6 files changed, 146 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 1e01e7847155..cc0adb428cee 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -731,6 +731,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_GEM: value = dev_priv->has_gem; break; + case I915_PARAM_NUM_FENCES_AVAIL: + value = dev_priv->num_fence_regs - dev_priv->fence_reg_start; + break; default: DRM_ERROR("Unknown parameter %d\n", param->param); return -EINVAL; @@ -764,6 +767,13 @@ static int i915_setparam(struct drm_device *dev, void *data, case I915_SETPARAM_ALLOW_BATCHBUFFER: dev_priv->allow_batchbuffer = param->value; break; + case I915_SETPARAM_NUM_USED_FENCES: + if (param->value > dev_priv->num_fence_regs || + param->value < 0) + return -EINVAL; + /* Userspace can use first N regs */ + dev_priv->fence_reg_start = param->value; + break; default: DRM_ERROR("unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f471d218b89a..a70bf77290fc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -602,6 +602,7 @@ int i915_gem_init_object(struct drm_gem_object *obj); void i915_gem_free_object(struct drm_gem_object *obj); int i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment); void i915_gem_object_unpin(struct drm_gem_object *obj); +int i915_gem_object_unbind(struct drm_gem_object *obj); void i915_gem_lastclose(struct drm_device *dev); uint32_t i915_get_gem_seqno(struct drm_device *dev); void i915_gem_retire_requests(struct drm_device *dev); @@ -785,6 +786,11 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); IS_I945GM(dev) || IS_I965GM(dev) || IS_GM45(dev)) #define I915_NEED_GFX_HWS(dev) (IS_G33(dev) || IS_GM45(dev) || IS_G4X(dev)) +/* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte + * rows, which changed the alignment requirements and fence programming. + */ +#define HAS_128_BYTE_Y_TILING(dev) (IS_I9XX(dev) && !(IS_I915G(dev) || \ + IS_I915GM(dev))) #define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev)) #define PRIMARY_RINGBUFFER_SIZE (128*1024) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e1f831f166ca..6a9e3a875083 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -52,7 +52,7 @@ static void i915_gem_object_free_page_list(struct drm_gem_object *obj); static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment); -static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj); +static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write); static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); static int i915_gem_evict_something(struct drm_device *dev); static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, @@ -567,6 +567,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) pgoff_t page_offset; unsigned long pfn; int ret = 0; + bool write = !!(vmf->flags & FAULT_FLAG_WRITE); /* We don't use vmf->pgoff since that has the fake offset */ page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> @@ -586,7 +587,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* Need a new fence register? */ if (obj_priv->fence_reg == I915_FENCE_REG_NONE && obj_priv->tiling_mode != I915_TILING_NONE) { - ret = i915_gem_object_get_fence_reg(obj); + ret = i915_gem_object_get_fence_reg(obj, write); if (ret != 0) return VM_FAULT_SIGBUS; } @@ -1214,7 +1215,7 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj) /** * Unbinds an object from the GTT aperture. */ -static int +int i915_gem_object_unbind(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; @@ -1448,21 +1449,26 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj_priv = obj->driver_private; int regnum = obj_priv->fence_reg; + int tile_width; uint32_t val; uint32_t pitch_val; if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || (obj_priv->gtt_offset & (obj->size - 1))) { - WARN(1, "%s: object not 1M or size aligned\n", __func__); + WARN(1, "%s: object 0x%08x not 1M or size (0x%x) aligned\n", + __func__, obj_priv->gtt_offset, obj->size); return; } - if (obj_priv->tiling_mode == I915_TILING_Y && (IS_I945G(dev) || - IS_I945GM(dev) || - IS_G33(dev))) - pitch_val = (obj_priv->stride / 128) - 1; + if (obj_priv->tiling_mode == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(dev)) + tile_width = 128; else - pitch_val = (obj_priv->stride / 512) - 1; + tile_width = 512; + + /* Note: pitch better be a power of two tile widths */ + pitch_val = obj_priv->stride / tile_width; + pitch_val = ffs(pitch_val) - 1; val = obj_priv->gtt_offset; if (obj_priv->tiling_mode == I915_TILING_Y) @@ -1486,7 +1492,8 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || (obj_priv->gtt_offset & (obj->size - 1))) { - WARN(1, "%s: object not 1M or size aligned\n", __func__); + WARN(1, "%s: object 0x%08x not 1M or size aligned\n", + __func__, obj_priv->gtt_offset); return; } @@ -1506,6 +1513,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) /** * i915_gem_object_get_fence_reg - set up a fence reg for an object * @obj: object to map through a fence reg + * @write: object is about to be written * * When mapping objects through the GTT, userspace wants to be able to write * to them without having to worry about swizzling if the object is tiled. @@ -1517,7 +1525,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) * and tiling format. */ static int -i915_gem_object_get_fence_reg(struct drm_gem_object *obj) +i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write) { struct drm_device *dev = obj->dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -1530,12 +1538,18 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) WARN(1, "allocating a fence for non-tiled object?\n"); break; case I915_TILING_X: - WARN(obj_priv->stride & (512 - 1), - "object is X tiled but has non-512B pitch\n"); + if (!obj_priv->stride) + return -EINVAL; + WARN((obj_priv->stride & (512 - 1)), + "object 0x%08x is X tiled but has non-512B pitch\n", + obj_priv->gtt_offset); break; case I915_TILING_Y: - WARN(obj_priv->stride & (128 - 1), - "object is Y tiled but has non-128B pitch\n"); + if (!obj_priv->stride) + return -EINVAL; + WARN((obj_priv->stride & (128 - 1)), + "object 0x%08x is Y tiled but has non-128B pitch\n", + obj_priv->gtt_offset); break; } @@ -1637,7 +1651,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) if (dev_priv->mm.suspended) return -EBUSY; if (alignment == 0) - alignment = PAGE_SIZE; + alignment = i915_gem_get_gtt_alignment(obj); if (alignment & (PAGE_SIZE - 1)) { DRM_ERROR("Invalid object alignment requested %u\n", alignment); return -EINVAL; @@ -2658,6 +2672,14 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) DRM_ERROR("Failure to bind: %d", ret); return ret; } + /* + * Pre-965 chips need a fence register set up in order to + * properly handle tiled surfaces. + */ + if (!IS_I965G(dev) && + obj_priv->fence_reg == I915_FENCE_REG_NONE && + obj_priv->tiling_mode != I915_TILING_NONE) + i915_gem_object_get_fence_reg(obj, true); } obj_priv->pin_count++; @@ -3297,7 +3319,7 @@ i915_gem_load(struct drm_device *dev) /* Old X drivers will take 0-2 for front, back, depth buffers */ dev_priv->fence_reg_start = 3; - if (IS_I965G(dev)) + if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) dev_priv->num_fence_regs = 16; else dev_priv->num_fence_regs = 8; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 241f39b7f460..2534c792808e 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -173,6 +173,73 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) dev_priv->mm.bit_6_swizzle_y = swizzle_y; } + +/** + * Returns the size of the fence for a tiled object of the given size. + */ +static int +i915_get_fence_size(struct drm_device *dev, int size) +{ + int i; + int start; + + if (IS_I965G(dev)) { + /* The 965 can have fences at any page boundary. */ + return ALIGN(size, 4096); + } else { + /* Align the size to a power of two greater than the smallest + * fence size. + */ + if (IS_I9XX(dev)) + start = 1024 * 1024; + else + start = 512 * 1024; + + for (i = start; i < size; i <<= 1) + ; + + return i; + } +} + +/* Check pitch constriants for all chips & tiling formats */ +static bool +i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) +{ + int tile_width; + + /* Linear is always fine */ + if (tiling_mode == I915_TILING_NONE) + return true; + + if (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) + tile_width = 128; + else + tile_width = 512; + + /* 965+ just needs multiples of tile width */ + if (IS_I965G(dev)) { + if (stride & (tile_width - 1)) + return false; + return true; + } + + /* Pre-965 needs power of two tile widths */ + if (stride < tile_width) + return false; + + if (stride & (stride - 1)) + return false; + + /* We don't handle the aperture area covered by the fence being bigger + * than the object size. + */ + if (i915_get_fence_size(dev, size) != size) + return false; + + return true; +} + /** * Sets the tiling mode of an object, returning the required swizzling of * bit 6 of addresses in the object. @@ -191,6 +258,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, return -EINVAL; obj_priv = obj->driver_private; + if (!i915_tiling_ok(dev, args->stride, obj->size, args->tiling_mode)) + return -EINVAL; + mutex_lock(&dev->struct_mutex); if (args->tiling_mode == I915_TILING_NONE) { @@ -207,7 +277,23 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; } } - obj_priv->tiling_mode = args->tiling_mode; + if (args->tiling_mode != obj_priv->tiling_mode) { + int ret; + + /* Unbind the object, as switching tiling means we're + * switching the cache organization due to fencing, probably. + */ + ret = i915_gem_object_unbind(obj); + if (ret != 0) { + WARN(ret != -ERESTARTSYS, + "failed to unbind object for tiling switch"); + args->tiling_mode = obj_priv->tiling_mode; + mutex_unlock(&dev->struct_mutex); + + return ret; + } + obj_priv->tiling_mode = args->tiling_mode; + } obj_priv->stride = args->stride; mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 273162579e1b..928e00462570 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -186,12 +186,12 @@ #define FENCE_REG_830_0 0x2000 #define I830_FENCE_START_MASK 0x07f80000 #define I830_FENCE_TILING_Y_SHIFT 12 -#define I830_FENCE_SIZE_BITS(size) ((get_order(size >> 19) - 1) << 8) +#define I830_FENCE_SIZE_BITS(size) ((ffs((size) >> 19) - 1) << 8) #define I830_FENCE_PITCH_SHIFT 4 #define I830_FENCE_REG_VALID (1<<0) #define I915_FENCE_START_MASK 0x0ff00000 -#define I915_FENCE_SIZE_BITS(size) ((get_order(size >> 20) - 1) << 8) +#define I915_FENCE_SIZE_BITS(size) ((ffs((size) >> 20) - 1) << 8) #define FENCE_REG_965_0 0x03000 #define I965_FENCE_PITCH_SHIFT 2 diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index b3bcf72dc656..912cd52db965 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -261,6 +261,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_LAST_DISPATCH 3 #define I915_PARAM_CHIPSET_ID 4 #define I915_PARAM_HAS_GEM 5 +#define I915_PARAM_NUM_FENCES_AVAIL 6 typedef struct drm_i915_getparam { int param; @@ -272,6 +273,7 @@ typedef struct drm_i915_getparam { #define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1 #define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 #define I915_SETPARAM_ALLOW_BATCHBUFFER 3 +#define I915_SETPARAM_NUM_USED_FENCES 4 typedef struct drm_i915_setparam { int param; From 72daad40dc0be179e0dc85c17d5dc1e850b5e8e4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 30 Jan 2009 21:10:22 +0000 Subject: [PATCH 398/566] drm/i915: Unref the object after failing to set tiling mode. Cleanup the object reference on the error paths. Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem_tiling.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 2534c792808e..fa1685cba840 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -258,8 +258,10 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, return -EINVAL; obj_priv = obj->driver_private; - if (!i915_tiling_ok(dev, args->stride, obj->size, args->tiling_mode)) + if (!i915_tiling_ok(dev, args->stride, obj->size, args->tiling_mode)) { + drm_gem_object_unreference(obj); return -EINVAL; + } mutex_lock(&dev->struct_mutex); @@ -289,6 +291,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, "failed to unbind object for tiling switch"); args->tiling_mode = obj_priv->tiling_mode; mutex_unlock(&dev->struct_mutex); + drm_gem_object_unreference(obj); return ret; } From e2f0ba97d60e59fe5c6237851933a9c38a8f9a24 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 2 Feb 2009 15:11:52 -0800 Subject: [PATCH 399/566] drm/i915: sync SDVO code with stable userland modesetting driver Pull in an update from the 2D driver (hopefully the last one, future work should be done here and pulled back into xf86-video-intel as needed). Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_display.c | 2 + drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_sdvo.c | 870 ++++++++++++++++++++++--- drivers/gpu/drm/i915/intel_sdvo_regs.h | 404 +++++++++++- 4 files changed, 1173 insertions(+), 104 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index dce1abfa6496..bbdd72909a11 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -755,6 +755,8 @@ static void intel_crtc_mode_set(struct drm_crtc *crtc, case INTEL_OUTPUT_SDVO: case INTEL_OUTPUT_HDMI: is_sdvo = true; + if (intel_output->needs_tv_clock) + is_tv = true; break; case INTEL_OUTPUT_DVO: is_dvo = true; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 8a4cc50c5b4e..957daef8edff 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -82,6 +82,7 @@ struct intel_output { struct intel_i2c_chan *i2c_bus; /* for control functions */ struct intel_i2c_chan *ddc_bus; /* for DDC only stuff */ bool load_detect_temp; + bool needs_tv_clock; void *dev_priv; }; diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 407215469102..a30508b639ba 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -40,13 +40,59 @@ struct intel_sdvo_priv { struct intel_i2c_chan *i2c_bus; int slaveaddr; + + /* Register for the SDVO device: SDVOB or SDVOC */ int output_device; - u16 active_outputs; + /* Active outputs controlled by this SDVO output */ + uint16_t controlled_output; + /* + * Capabilities of the SDVO device returned by + * i830_sdvo_get_capabilities() + */ struct intel_sdvo_caps caps; + + /* Pixel clock limitations reported by the SDVO device, in kHz */ int pixel_clock_min, pixel_clock_max; + /** + * This is set if we're going to treat the device as TV-out. + * + * While we have these nice friendly flags for output types that ought + * to decide this for us, the S-Video output on our HDMI+S-Video card + * shows up as RGB1 (VGA). + */ + bool is_tv; + + /** + * This is set if we treat the device as HDMI, instead of DVI. + */ + bool is_hdmi; + + /** + * Returned SDTV resolutions allowed for the current format, if the + * device reported it. + */ + struct intel_sdvo_sdtv_resolution_reply sdtv_resolutions; + + /** + * Current selected TV format. + * + * This is stored in the same structure that's passed to the device, for + * convenience. + */ + struct intel_sdvo_tv_format tv_format; + + /* + * supported encoding mode, used to determine whether HDMI is + * supported + */ + struct intel_sdvo_encode encode; + + /* DDC bus used by this SDVO output */ + uint8_t ddc_bus; + int save_sdvo_mult; u16 save_active_outputs; struct intel_sdvo_dtd save_input_dtd_1, save_input_dtd_2; @@ -148,8 +194,8 @@ static bool intel_sdvo_write_byte(struct intel_output *intel_output, int addr, #define SDVO_CMD_NAME_ENTRY(cmd) {cmd, #cmd} /** Mapping of command numbers to names, for debug output */ const static struct _sdvo_cmd_name { - u8 cmd; - char *name; + u8 cmd; + char *name; } sdvo_cmd_names[] = { SDVO_CMD_NAME_ENTRY(SDVO_CMD_RESET), SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_DEVICE_CAPS), @@ -186,8 +232,35 @@ const static struct _sdvo_cmd_name { SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_TV_FORMATS), SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_FORMAT), SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_FORMAT), - SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_RESOLUTION_SUPPORT), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_POWER_STATES), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POWER_STATE), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ENCODER_POWER_STATE), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_DISPLAY_POWER_STATE), SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTROL_BUS_SWITCH), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS), + /* HDMI op code */ + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPP_ENCODE), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ENCODE), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ENCODE), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_PIXEL_REPLI), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PIXEL_REPLI), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_COLORIMETRY_CAP), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_COLORIMETRY), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_COLORIMETRY), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_AUDIO_ENCRYPT_PREFER), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_AUDIO_STAT), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_AUDIO_STAT), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_INDEX), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_INDEX), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_INFO), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_AV_SPLIT), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_AV_SPLIT), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_TXRATE), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_TXRATE), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_DATA), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_DATA), }; #define SDVO_NAME(dev_priv) ((dev_priv)->output_device == SDVOB ? "SDVOB" : "SDVOC") @@ -506,6 +579,50 @@ static bool intel_sdvo_set_output_timing(struct intel_output *intel_output, SDVO_CMD_SET_OUTPUT_TIMINGS_PART1, dtd); } +static bool +intel_sdvo_create_preferred_input_timing(struct intel_output *output, + uint16_t clock, + uint16_t width, + uint16_t height) +{ + struct intel_sdvo_preferred_input_timing_args args; + uint8_t status; + + args.clock = clock; + args.width = width; + args.height = height; + intel_sdvo_write_cmd(output, SDVO_CMD_CREATE_PREFERRED_INPUT_TIMING, + &args, sizeof(args)); + status = intel_sdvo_read_response(output, NULL, 0); + if (status != SDVO_CMD_STATUS_SUCCESS) + return false; + + return true; +} + +static bool intel_sdvo_get_preferred_input_timing(struct intel_output *output, + struct intel_sdvo_dtd *dtd) +{ + bool status; + + intel_sdvo_write_cmd(output, SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART1, + NULL, 0); + + status = intel_sdvo_read_response(output, &dtd->part1, + sizeof(dtd->part1)); + if (status != SDVO_CMD_STATUS_SUCCESS) + return false; + + intel_sdvo_write_cmd(output, SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART2, + NULL, 0); + + status = intel_sdvo_read_response(output, &dtd->part2, + sizeof(dtd->part2)); + if (status != SDVO_CMD_STATUS_SUCCESS) + return false; + + return false; +} static int intel_sdvo_get_clock_rate_mult(struct intel_output *intel_output) { @@ -536,36 +653,12 @@ static bool intel_sdvo_set_clock_rate_mult(struct intel_output *intel_output, u8 return true; } -static bool intel_sdvo_mode_fixup(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) +static void intel_sdvo_get_dtd_from_mode(struct intel_sdvo_dtd *dtd, + struct drm_display_mode *mode) { - /* Make the CRTC code factor in the SDVO pixel multiplier. The SDVO - * device will be told of the multiplier during mode_set. - */ - adjusted_mode->clock *= intel_sdvo_get_pixel_multiplier(mode); - return true; -} - -static void intel_sdvo_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct drm_device *dev = encoder->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_crtc *crtc = encoder->crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_output *intel_output = enc_to_intel_output(encoder); - struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; - u16 width, height; - u16 h_blank_len, h_sync_len, v_blank_len, v_sync_len; - u16 h_sync_offset, v_sync_offset; - u32 sdvox; - struct intel_sdvo_dtd output_dtd; - int sdvo_pixel_multiply; - - if (!mode) - return; + uint16_t width, height; + uint16_t h_blank_len, h_sync_len, v_blank_len, v_sync_len; + uint16_t h_sync_offset, v_sync_offset; width = mode->crtc_hdisplay; height = mode->crtc_vdisplay; @@ -580,93 +673,423 @@ static void intel_sdvo_mode_set(struct drm_encoder *encoder, h_sync_offset = mode->crtc_hsync_start - mode->crtc_hblank_start; v_sync_offset = mode->crtc_vsync_start - mode->crtc_vblank_start; - output_dtd.part1.clock = mode->clock / 10; - output_dtd.part1.h_active = width & 0xff; - output_dtd.part1.h_blank = h_blank_len & 0xff; - output_dtd.part1.h_high = (((width >> 8) & 0xf) << 4) | + dtd->part1.clock = mode->clock / 10; + dtd->part1.h_active = width & 0xff; + dtd->part1.h_blank = h_blank_len & 0xff; + dtd->part1.h_high = (((width >> 8) & 0xf) << 4) | ((h_blank_len >> 8) & 0xf); - output_dtd.part1.v_active = height & 0xff; - output_dtd.part1.v_blank = v_blank_len & 0xff; - output_dtd.part1.v_high = (((height >> 8) & 0xf) << 4) | + dtd->part1.v_active = height & 0xff; + dtd->part1.v_blank = v_blank_len & 0xff; + dtd->part1.v_high = (((height >> 8) & 0xf) << 4) | ((v_blank_len >> 8) & 0xf); - output_dtd.part2.h_sync_off = h_sync_offset; - output_dtd.part2.h_sync_width = h_sync_len & 0xff; - output_dtd.part2.v_sync_off_width = (v_sync_offset & 0xf) << 4 | + dtd->part2.h_sync_off = h_sync_offset; + dtd->part2.h_sync_width = h_sync_len & 0xff; + dtd->part2.v_sync_off_width = (v_sync_offset & 0xf) << 4 | (v_sync_len & 0xf); - output_dtd.part2.sync_off_width_high = ((h_sync_offset & 0x300) >> 2) | + dtd->part2.sync_off_width_high = ((h_sync_offset & 0x300) >> 2) | ((h_sync_len & 0x300) >> 4) | ((v_sync_offset & 0x30) >> 2) | ((v_sync_len & 0x30) >> 4); - output_dtd.part2.dtd_flags = 0x18; + dtd->part2.dtd_flags = 0x18; if (mode->flags & DRM_MODE_FLAG_PHSYNC) - output_dtd.part2.dtd_flags |= 0x2; + dtd->part2.dtd_flags |= 0x2; if (mode->flags & DRM_MODE_FLAG_PVSYNC) - output_dtd.part2.dtd_flags |= 0x4; + dtd->part2.dtd_flags |= 0x4; - output_dtd.part2.sdvo_flags = 0; - output_dtd.part2.v_sync_off_high = v_sync_offset & 0xc0; - output_dtd.part2.reserved = 0; + dtd->part2.sdvo_flags = 0; + dtd->part2.v_sync_off_high = v_sync_offset & 0xc0; + dtd->part2.reserved = 0; +} - /* Set the output timing to the screen */ - intel_sdvo_set_target_output(intel_output, sdvo_priv->active_outputs); - intel_sdvo_set_output_timing(intel_output, &output_dtd); +static void intel_sdvo_get_mode_from_dtd(struct drm_display_mode * mode, + struct intel_sdvo_dtd *dtd) +{ + uint16_t width, height; + uint16_t h_blank_len, h_sync_len, v_blank_len, v_sync_len; + uint16_t h_sync_offset, v_sync_offset; + + width = mode->crtc_hdisplay; + height = mode->crtc_vdisplay; + + /* do some mode translations */ + h_blank_len = mode->crtc_hblank_end - mode->crtc_hblank_start; + h_sync_len = mode->crtc_hsync_end - mode->crtc_hsync_start; + + v_blank_len = mode->crtc_vblank_end - mode->crtc_vblank_start; + v_sync_len = mode->crtc_vsync_end - mode->crtc_vsync_start; + + h_sync_offset = mode->crtc_hsync_start - mode->crtc_hblank_start; + v_sync_offset = mode->crtc_vsync_start - mode->crtc_vblank_start; + + mode->hdisplay = dtd->part1.h_active; + mode->hdisplay += ((dtd->part1.h_high >> 4) & 0x0f) << 8; + mode->hsync_start = mode->hdisplay + dtd->part2.h_sync_off; + mode->hsync_start += (dtd->part2.sync_off_width_high & 0xa0) << 2; + mode->hsync_end = mode->hsync_start + dtd->part2.h_sync_width; + mode->hsync_end += (dtd->part2.sync_off_width_high & 0x30) << 4; + mode->htotal = mode->hdisplay + dtd->part1.h_blank; + mode->htotal += (dtd->part1.h_high & 0xf) << 8; + + mode->vdisplay = dtd->part1.v_active; + mode->vdisplay += ((dtd->part1.v_high >> 4) & 0x0f) << 8; + mode->vsync_start = mode->vdisplay; + mode->vsync_start += (dtd->part2.v_sync_off_width >> 4) & 0xf; + mode->vsync_start += (dtd->part2.sync_off_width_high & 0x0a) << 2; + mode->vsync_start += dtd->part2.v_sync_off_high & 0xc0; + mode->vsync_end = mode->vsync_start + + (dtd->part2.v_sync_off_width & 0xf); + mode->vsync_end += (dtd->part2.sync_off_width_high & 0x3) << 4; + mode->vtotal = mode->vdisplay + dtd->part1.v_blank; + mode->vtotal += (dtd->part1.v_high & 0xf) << 8; + + mode->clock = dtd->part1.clock * 10; + + mode->flags &= (DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC); + if (dtd->part2.dtd_flags & 0x2) + mode->flags |= DRM_MODE_FLAG_PHSYNC; + if (dtd->part2.dtd_flags & 0x4) + mode->flags |= DRM_MODE_FLAG_PVSYNC; +} + +static bool intel_sdvo_get_supp_encode(struct intel_output *output, + struct intel_sdvo_encode *encode) +{ + uint8_t status; + + intel_sdvo_write_cmd(output, SDVO_CMD_GET_SUPP_ENCODE, NULL, 0); + status = intel_sdvo_read_response(output, encode, sizeof(*encode)); + if (status != SDVO_CMD_STATUS_SUCCESS) { /* non-support means DVI */ + memset(encode, 0, sizeof(*encode)); + return false; + } + + return true; +} + +static bool intel_sdvo_set_encode(struct intel_output *output, uint8_t mode) +{ + uint8_t status; + + intel_sdvo_write_cmd(output, SDVO_CMD_SET_ENCODE, &mode, 1); + status = intel_sdvo_read_response(output, NULL, 0); + + return (status == SDVO_CMD_STATUS_SUCCESS); +} + +static bool intel_sdvo_set_colorimetry(struct intel_output *output, + uint8_t mode) +{ + uint8_t status; + + intel_sdvo_write_cmd(output, SDVO_CMD_SET_COLORIMETRY, &mode, 1); + status = intel_sdvo_read_response(output, NULL, 0); + + return (status == SDVO_CMD_STATUS_SUCCESS); +} + +#if 0 +static void intel_sdvo_dump_hdmi_buf(struct intel_output *output) +{ + int i, j; + uint8_t set_buf_index[2]; + uint8_t av_split; + uint8_t buf_size; + uint8_t buf[48]; + uint8_t *pos; + + intel_sdvo_write_cmd(output, SDVO_CMD_GET_HBUF_AV_SPLIT, NULL, 0); + intel_sdvo_read_response(output, &av_split, 1); + + for (i = 0; i <= av_split; i++) { + set_buf_index[0] = i; set_buf_index[1] = 0; + intel_sdvo_write_cmd(output, SDVO_CMD_SET_HBUF_INDEX, + set_buf_index, 2); + intel_sdvo_write_cmd(output, SDVO_CMD_GET_HBUF_INFO, NULL, 0); + intel_sdvo_read_response(output, &buf_size, 1); + + pos = buf; + for (j = 0; j <= buf_size; j += 8) { + intel_sdvo_write_cmd(output, SDVO_CMD_GET_HBUF_DATA, + NULL, 0); + intel_sdvo_read_response(output, pos, 8); + pos += 8; + } + } +} +#endif + +static void intel_sdvo_set_hdmi_buf(struct intel_output *output, int index, + uint8_t *data, int8_t size, uint8_t tx_rate) +{ + uint8_t set_buf_index[2]; + + set_buf_index[0] = index; + set_buf_index[1] = 0; + + intel_sdvo_write_cmd(output, SDVO_CMD_SET_HBUF_INDEX, set_buf_index, 2); + + for (; size > 0; size -= 8) { + intel_sdvo_write_cmd(output, SDVO_CMD_SET_HBUF_DATA, data, 8); + data += 8; + } + + intel_sdvo_write_cmd(output, SDVO_CMD_SET_HBUF_TXRATE, &tx_rate, 1); +} + +static uint8_t intel_sdvo_calc_hbuf_csum(uint8_t *data, uint8_t size) +{ + uint8_t csum = 0; + int i; + + for (i = 0; i < size; i++) + csum += data[i]; + + return 0x100 - csum; +} + +#define DIP_TYPE_AVI 0x82 +#define DIP_VERSION_AVI 0x2 +#define DIP_LEN_AVI 13 + +struct dip_infoframe { + uint8_t type; + uint8_t version; + uint8_t len; + uint8_t checksum; + union { + struct { + /* Packet Byte #1 */ + uint8_t S:2; + uint8_t B:2; + uint8_t A:1; + uint8_t Y:2; + uint8_t rsvd1:1; + /* Packet Byte #2 */ + uint8_t R:4; + uint8_t M:2; + uint8_t C:2; + /* Packet Byte #3 */ + uint8_t SC:2; + uint8_t Q:2; + uint8_t EC:3; + uint8_t ITC:1; + /* Packet Byte #4 */ + uint8_t VIC:7; + uint8_t rsvd2:1; + /* Packet Byte #5 */ + uint8_t PR:4; + uint8_t rsvd3:4; + /* Packet Byte #6~13 */ + uint16_t top_bar_end; + uint16_t bottom_bar_start; + uint16_t left_bar_end; + uint16_t right_bar_start; + } avi; + struct { + /* Packet Byte #1 */ + uint8_t channel_count:3; + uint8_t rsvd1:1; + uint8_t coding_type:4; + /* Packet Byte #2 */ + uint8_t sample_size:2; /* SS0, SS1 */ + uint8_t sample_frequency:3; + uint8_t rsvd2:3; + /* Packet Byte #3 */ + uint8_t coding_type_private:5; + uint8_t rsvd3:3; + /* Packet Byte #4 */ + uint8_t channel_allocation; + /* Packet Byte #5 */ + uint8_t rsvd4:3; + uint8_t level_shift:4; + uint8_t downmix_inhibit:1; + } audio; + uint8_t payload[28]; + } __attribute__ ((packed)) u; +} __attribute__((packed)); + +static void intel_sdvo_set_avi_infoframe(struct intel_output *output, + struct drm_display_mode * mode) +{ + struct dip_infoframe avi_if = { + .type = DIP_TYPE_AVI, + .version = DIP_VERSION_AVI, + .len = DIP_LEN_AVI, + }; + + avi_if.checksum = intel_sdvo_calc_hbuf_csum((uint8_t *)&avi_if, + 4 + avi_if.len); + intel_sdvo_set_hdmi_buf(output, 1, (uint8_t *)&avi_if, 4 + avi_if.len, + SDVO_HBUF_TX_VSYNC); +} + +static bool intel_sdvo_mode_fixup(struct drm_encoder *encoder, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + struct intel_output *output = enc_to_intel_output(encoder); + struct intel_sdvo_priv *dev_priv = output->dev_priv; + + if (!dev_priv->is_tv) { + /* Make the CRTC code factor in the SDVO pixel multiplier. The + * SDVO device will be told of the multiplier during mode_set. + */ + adjusted_mode->clock *= intel_sdvo_get_pixel_multiplier(mode); + } else { + struct intel_sdvo_dtd output_dtd; + bool success; + + /* We need to construct preferred input timings based on our + * output timings. To do that, we have to set the output + * timings, even though this isn't really the right place in + * the sequence to do it. Oh well. + */ + + + /* Set output timings */ + intel_sdvo_get_dtd_from_mode(&output_dtd, mode); + intel_sdvo_set_target_output(output, + dev_priv->controlled_output); + intel_sdvo_set_output_timing(output, &output_dtd); + + /* Set the input timing to the screen. Assume always input 0. */ + intel_sdvo_set_target_input(output, true, false); + + + success = intel_sdvo_create_preferred_input_timing(output, + mode->clock / 10, + mode->hdisplay, + mode->vdisplay); + if (success) { + struct intel_sdvo_dtd input_dtd; + + intel_sdvo_get_preferred_input_timing(output, + &input_dtd); + intel_sdvo_get_mode_from_dtd(adjusted_mode, &input_dtd); + + } else { + return false; + } + } + return true; +} + +static void intel_sdvo_mode_set(struct drm_encoder *encoder, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + struct drm_device *dev = encoder->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_crtc *crtc = encoder->crtc; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_output *output = enc_to_intel_output(encoder); + struct intel_sdvo_priv *sdvo_priv = output->dev_priv; + u32 sdvox = 0; + int sdvo_pixel_multiply; + struct intel_sdvo_in_out_map in_out; + struct intel_sdvo_dtd input_dtd; + u8 status; + + if (!mode) + return; + + /* First, set the input mapping for the first input to our controlled + * output. This is only correct if we're a single-input device, in + * which case the first input is the output from the appropriate SDVO + * channel on the motherboard. In a two-input device, the first input + * will be SDVOB and the second SDVOC. + */ + in_out.in0 = sdvo_priv->controlled_output; + in_out.in1 = 0; + + intel_sdvo_write_cmd(output, SDVO_CMD_SET_IN_OUT_MAP, + &in_out, sizeof(in_out)); + status = intel_sdvo_read_response(output, NULL, 0); + + if (sdvo_priv->is_hdmi) { + intel_sdvo_set_avi_infoframe(output, mode); + sdvox |= SDVO_AUDIO_ENABLE; + } + + intel_sdvo_get_dtd_from_mode(&input_dtd, mode); + + /* If it's a TV, we already set the output timing in mode_fixup. + * Otherwise, the output timing is equal to the input timing. + */ + if (!sdvo_priv->is_tv) { + /* Set the output timing to the screen */ + intel_sdvo_set_target_output(output, + sdvo_priv->controlled_output); + intel_sdvo_set_output_timing(output, &input_dtd); + } /* Set the input timing to the screen. Assume always input 0. */ - intel_sdvo_set_target_input(intel_output, true, false); + intel_sdvo_set_target_input(output, true, false); - /* We would like to use i830_sdvo_create_preferred_input_timing() to + /* We would like to use intel_sdvo_create_preferred_input_timing() to * provide the device with a timing it can support, if it supports that * feature. However, presumably we would need to adjust the CRTC to * output the preferred timing, and we don't support that currently. */ - intel_sdvo_set_input_timing(intel_output, &output_dtd); +#if 0 + success = intel_sdvo_create_preferred_input_timing(output, clock, + width, height); + if (success) { + struct intel_sdvo_dtd *input_dtd; + + intel_sdvo_get_preferred_input_timing(output, &input_dtd); + intel_sdvo_set_input_timing(output, &input_dtd); + } +#else + intel_sdvo_set_input_timing(output, &input_dtd); +#endif switch (intel_sdvo_get_pixel_multiplier(mode)) { case 1: - intel_sdvo_set_clock_rate_mult(intel_output, + intel_sdvo_set_clock_rate_mult(output, SDVO_CLOCK_RATE_MULT_1X); break; case 2: - intel_sdvo_set_clock_rate_mult(intel_output, + intel_sdvo_set_clock_rate_mult(output, SDVO_CLOCK_RATE_MULT_2X); break; case 4: - intel_sdvo_set_clock_rate_mult(intel_output, + intel_sdvo_set_clock_rate_mult(output, SDVO_CLOCK_RATE_MULT_4X); break; } /* Set the SDVO control regs. */ - if (0/*IS_I965GM(dev)*/) { - sdvox = SDVO_BORDER_ENABLE; - } else { - sdvox = I915_READ(sdvo_priv->output_device); - switch (sdvo_priv->output_device) { - case SDVOB: - sdvox &= SDVOB_PRESERVE_MASK; - break; - case SDVOC: - sdvox &= SDVOC_PRESERVE_MASK; - break; - } - sdvox |= (9 << 19) | SDVO_BORDER_ENABLE; - } + if (IS_I965G(dev)) { + sdvox |= SDVO_BORDER_ENABLE | + SDVO_VSYNC_ACTIVE_HIGH | + SDVO_HSYNC_ACTIVE_HIGH; + } else { + sdvox |= I915_READ(sdvo_priv->output_device); + switch (sdvo_priv->output_device) { + case SDVOB: + sdvox &= SDVOB_PRESERVE_MASK; + break; + case SDVOC: + sdvox &= SDVOC_PRESERVE_MASK; + break; + } + sdvox |= (9 << 19) | SDVO_BORDER_ENABLE; + } if (intel_crtc->pipe == 1) sdvox |= SDVO_PIPE_B_SELECT; sdvo_pixel_multiply = intel_sdvo_get_pixel_multiplier(mode); if (IS_I965G(dev)) { - /* done in crtc_mode_set as the dpll_md reg must be written - early */ - } else if (IS_I945G(dev) || IS_I945GM(dev)) { - /* done in crtc_mode_set as it lives inside the - dpll register */ + /* done in crtc_mode_set as the dpll_md reg must be written early */ + } else if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) { + /* done in crtc_mode_set as it lives inside the dpll register */ } else { sdvox |= (sdvo_pixel_multiply - 1) << SDVO_PORT_MULTIPLY_SHIFT; } - intel_sdvo_write_sdvox(intel_output, sdvox); + intel_sdvo_write_sdvox(output, sdvox); } static void intel_sdvo_dpms(struct drm_encoder *encoder, int mode) @@ -714,7 +1137,7 @@ static void intel_sdvo_dpms(struct drm_encoder *encoder, int mode) if (0) intel_sdvo_set_encoder_power_state(intel_output, mode); - intel_sdvo_set_active_outputs(intel_output, sdvo_priv->active_outputs); + intel_sdvo_set_active_outputs(intel_output, sdvo_priv->controlled_output); } return; } @@ -752,6 +1175,9 @@ static void intel_sdvo_save(struct drm_connector *connector) &sdvo_priv->save_output_dtd[o]); } } + if (sdvo_priv->is_tv) { + /* XXX: Save TV format/enhancements. */ + } sdvo_priv->save_SDVOX = I915_READ(sdvo_priv->output_device); } @@ -759,7 +1185,6 @@ static void intel_sdvo_save(struct drm_connector *connector) static void intel_sdvo_restore(struct drm_connector *connector) { struct drm_device *dev = connector->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_output *intel_output = to_intel_output(connector); struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; int o; @@ -790,7 +1215,11 @@ static void intel_sdvo_restore(struct drm_connector *connector) intel_sdvo_set_clock_rate_mult(intel_output, sdvo_priv->save_sdvo_mult); - I915_WRITE(sdvo_priv->output_device, sdvo_priv->save_SDVOX); + if (sdvo_priv->is_tv) { + /* XXX: Restore TV format/enhancements. */ + } + + intel_sdvo_write_sdvox(intel_output, sdvo_priv->save_SDVOX); if (sdvo_priv->save_SDVOX & SDVO_ENABLE) { @@ -916,20 +1345,173 @@ static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connect status = intel_sdvo_read_response(intel_output, &response, 2); DRM_DEBUG("SDVO response %d %d\n", response[0], response[1]); + + if (status != SDVO_CMD_STATUS_SUCCESS) + return connector_status_unknown; + if ((response[0] != 0) || (response[1] != 0)) return connector_status_connected; else return connector_status_disconnected; } -static int intel_sdvo_get_modes(struct drm_connector *connector) +static void intel_sdvo_get_ddc_modes(struct drm_connector *connector) { struct intel_output *intel_output = to_intel_output(connector); + struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; /* set the bus switch and get the modes */ - intel_sdvo_set_control_bus_switch(intel_output, SDVO_CONTROL_BUS_DDC2); + intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus); intel_ddc_get_modes(intel_output); +#if 0 + struct drm_device *dev = encoder->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + /* Mac mini hack. On this device, I get DDC through the analog, which + * load-detects as disconnected. I fail to DDC through the SDVO DDC, + * but it does load-detect as connected. So, just steal the DDC bits + * from analog when we fail at finding it the right way. + */ + crt = xf86_config->output[0]; + intel_output = crt->driver_private; + if (intel_output->type == I830_OUTPUT_ANALOG && + crt->funcs->detect(crt) == XF86OutputStatusDisconnected) { + I830I2CInit(pScrn, &intel_output->pDDCBus, GPIOA, "CRTDDC_A"); + edid_mon = xf86OutputGetEDID(crt, intel_output->pDDCBus); + xf86DestroyI2CBusRec(intel_output->pDDCBus, true, true); + } + if (edid_mon) { + xf86OutputSetEDID(output, edid_mon); + modes = xf86OutputGetEDIDModes(output); + } +#endif +} + +/** + * This function checks the current TV format, and chooses a default if + * it hasn't been set. + */ +static void +intel_sdvo_check_tv_format(struct intel_output *output) +{ + struct intel_sdvo_priv *dev_priv = output->dev_priv; + struct intel_sdvo_tv_format format, unset; + uint8_t status; + + intel_sdvo_write_cmd(output, SDVO_CMD_GET_TV_FORMAT, NULL, 0); + status = intel_sdvo_read_response(output, &format, sizeof(format)); + if (status != SDVO_CMD_STATUS_SUCCESS) + return; + + memset(&unset, 0, sizeof(unset)); + if (memcmp(&format, &unset, sizeof(format))) { + DRM_DEBUG("%s: Choosing default TV format of NTSC-M\n", + SDVO_NAME(dev_priv)); + + format.ntsc_m = true; + intel_sdvo_write_cmd(output, SDVO_CMD_SET_TV_FORMAT, NULL, 0); + status = intel_sdvo_read_response(output, NULL, 0); + } +} + +/* + * Set of SDVO TV modes. + * Note! This is in reply order (see loop in get_tv_modes). + * XXX: all 60Hz refresh? + */ +struct drm_display_mode sdvo_tv_modes[] = { + { DRM_MODE("320x200", DRM_MODE_TYPE_DRIVER, 5815680, 321, 384, 416, + 200, 0, 232, 201, 233, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("320x240", DRM_MODE_TYPE_DRIVER, 6814080, 321, 384, 416, + 240, 0, 272, 241, 273, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("400x300", DRM_MODE_TYPE_DRIVER, 9910080, 401, 464, 496, + 300, 0, 332, 301, 333, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("640x350", DRM_MODE_TYPE_DRIVER, 16913280, 641, 704, 736, + 350, 0, 382, 351, 383, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("640x400", DRM_MODE_TYPE_DRIVER, 19121280, 641, 704, 736, + 400, 0, 432, 401, 433, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("640x400", DRM_MODE_TYPE_DRIVER, 19121280, 641, 704, 736, + 400, 0, 432, 401, 433, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("704x480", DRM_MODE_TYPE_DRIVER, 24624000, 705, 768, 800, + 480, 0, 512, 481, 513, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("704x576", DRM_MODE_TYPE_DRIVER, 29232000, 705, 768, 800, + 576, 0, 608, 577, 609, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("720x350", DRM_MODE_TYPE_DRIVER, 18751680, 721, 784, 816, + 350, 0, 382, 351, 383, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("720x400", DRM_MODE_TYPE_DRIVER, 21199680, 721, 784, 816, + 400, 0, 432, 401, 433, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("720x480", DRM_MODE_TYPE_DRIVER, 25116480, 721, 784, 816, + 480, 0, 512, 481, 513, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("720x540", DRM_MODE_TYPE_DRIVER, 28054080, 721, 784, 816, + 540, 0, 572, 541, 573, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("720x576", DRM_MODE_TYPE_DRIVER, 29816640, 721, 784, 816, + 576, 0, 608, 577, 609, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("768x576", DRM_MODE_TYPE_DRIVER, 31570560, 769, 832, 864, + 576, 0, 608, 577, 609, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 34030080, 801, 864, 896, + 600, 0, 632, 601, 633, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("832x624", DRM_MODE_TYPE_DRIVER, 36581760, 833, 896, 928, + 624, 0, 656, 625, 657, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("920x766", DRM_MODE_TYPE_DRIVER, 48707040, 921, 984, 1016, + 766, 0, 798, 767, 799, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 53827200, 1025, 1088, 1120, + 768, 0, 800, 769, 801, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, + { DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 87265920, 1281, 1344, 1376, + 1024, 0, 1056, 1025, 1057, 4196112, 0, + DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, +}; + +static void intel_sdvo_get_tv_modes(struct drm_connector *connector) +{ + struct intel_output *output = to_intel_output(connector); + uint32_t reply = 0; + uint8_t status; + int i = 0; + + intel_sdvo_check_tv_format(output); + + /* Read the list of supported input resolutions for the selected TV + * format. + */ + intel_sdvo_write_cmd(output, SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT, + NULL, 0); + status = intel_sdvo_read_response(output, &reply, 3); + if (status != SDVO_CMD_STATUS_SUCCESS) + return; + + for (i = 0; i < ARRAY_SIZE(sdvo_tv_modes); i++) + if (reply & (1 << i)) + drm_mode_probed_add(connector, &sdvo_tv_modes[i]); +} + +static int intel_sdvo_get_modes(struct drm_connector *connector) +{ + struct intel_output *output = to_intel_output(connector); + struct intel_sdvo_priv *sdvo_priv = output->dev_priv; + + if (sdvo_priv->is_tv) + intel_sdvo_get_tv_modes(connector); + else + intel_sdvo_get_ddc_modes(connector); + if (list_empty(&connector->probed_modes)) return 0; return 1; @@ -978,6 +1560,65 @@ static const struct drm_encoder_funcs intel_sdvo_enc_funcs = { }; +/** + * Choose the appropriate DDC bus for control bus switch command for this + * SDVO output based on the controlled output. + * + * DDC bus number assignment is in a priority order of RGB outputs, then TMDS + * outputs, then LVDS outputs. + */ +static void +intel_sdvo_select_ddc_bus(struct intel_sdvo_priv *dev_priv) +{ + uint16_t mask = 0; + unsigned int num_bits; + + /* Make a mask of outputs less than or equal to our own priority in the + * list. + */ + switch (dev_priv->controlled_output) { + case SDVO_OUTPUT_LVDS1: + mask |= SDVO_OUTPUT_LVDS1; + case SDVO_OUTPUT_LVDS0: + mask |= SDVO_OUTPUT_LVDS0; + case SDVO_OUTPUT_TMDS1: + mask |= SDVO_OUTPUT_TMDS1; + case SDVO_OUTPUT_TMDS0: + mask |= SDVO_OUTPUT_TMDS0; + case SDVO_OUTPUT_RGB1: + mask |= SDVO_OUTPUT_RGB1; + case SDVO_OUTPUT_RGB0: + mask |= SDVO_OUTPUT_RGB0; + break; + } + + /* Count bits to find what number we are in the priority list. */ + mask &= dev_priv->caps.output_flags; + num_bits = hweight16(mask); + if (num_bits > 3) { + /* if more than 3 outputs, default to DDC bus 3 for now */ + num_bits = 3; + } + + /* Corresponds to SDVO_CONTROL_BUS_DDCx */ + dev_priv->ddc_bus = 1 << num_bits; +} + +static bool +intel_sdvo_get_digital_encoding_mode(struct intel_output *output) +{ + struct intel_sdvo_priv *sdvo_priv = output->dev_priv; + uint8_t status; + + intel_sdvo_set_target_output(output, sdvo_priv->controlled_output); + + intel_sdvo_write_cmd(output, SDVO_CMD_GET_ENCODE, NULL, 0); + status = intel_sdvo_read_response(output, &sdvo_priv->is_hdmi, 1); + if (status != SDVO_CMD_STATUS_SUCCESS) + return false; + return true; +} + bool intel_sdvo_init(struct drm_device *dev, int output_device) { struct drm_connector *connector; @@ -1040,45 +1681,76 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) intel_sdvo_get_capabilities(intel_output, &sdvo_priv->caps); - memset(&sdvo_priv->active_outputs, 0, sizeof(sdvo_priv->active_outputs)); + if (sdvo_priv->caps.output_flags & + (SDVO_OUTPUT_TMDS0 | SDVO_OUTPUT_TMDS1)) { + if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_TMDS0) + sdvo_priv->controlled_output = SDVO_OUTPUT_TMDS0; + else + sdvo_priv->controlled_output = SDVO_OUTPUT_TMDS1; - /* TODO, CVBS, SVID, YPRPB & SCART outputs. */ - if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB0) + connector->display_info.subpixel_order = SubPixelHorizontalRGB; + encoder_type = DRM_MODE_ENCODER_TMDS; + connector_type = DRM_MODE_CONNECTOR_DVID; + + if (intel_sdvo_get_supp_encode(intel_output, + &sdvo_priv->encode) && + intel_sdvo_get_digital_encoding_mode(intel_output) && + sdvo_priv->is_hdmi) { + /* enable hdmi encoding mode if supported */ + intel_sdvo_set_encode(intel_output, SDVO_ENCODE_HDMI); + intel_sdvo_set_colorimetry(intel_output, + SDVO_COLORIMETRY_RGB256); + connector_type = DRM_MODE_CONNECTOR_HDMIA; + } + } + else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_SVID0) { - sdvo_priv->active_outputs = SDVO_OUTPUT_RGB0; + sdvo_priv->controlled_output = SDVO_OUTPUT_SVID0; + connector->display_info.subpixel_order = SubPixelHorizontalRGB; + encoder_type = DRM_MODE_ENCODER_TVDAC; + connector_type = DRM_MODE_CONNECTOR_SVIDEO; + sdvo_priv->is_tv = true; + intel_output->needs_tv_clock = true; + } + else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB0) + { + sdvo_priv->controlled_output = SDVO_OUTPUT_RGB0; connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_DAC; connector_type = DRM_MODE_CONNECTOR_VGA; } else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB1) { - sdvo_priv->active_outputs = SDVO_OUTPUT_RGB1; + sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1; connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_DAC; connector_type = DRM_MODE_CONNECTOR_VGA; } - else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_TMDS0) + else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS0) { - sdvo_priv->active_outputs = SDVO_OUTPUT_TMDS0; + sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0; connector->display_info.subpixel_order = SubPixelHorizontalRGB; - encoder_type = DRM_MODE_ENCODER_TMDS; - connector_type = DRM_MODE_CONNECTOR_DVID; + encoder_type = DRM_MODE_ENCODER_LVDS; + connector_type = DRM_MODE_CONNECTOR_LVDS; } - else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_TMDS1) + else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS1) { - sdvo_priv->active_outputs = SDVO_OUTPUT_TMDS1; + sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS1; connector->display_info.subpixel_order = SubPixelHorizontalRGB; - encoder_type = DRM_MODE_ENCODER_TMDS; - connector_type = DRM_MODE_CONNECTOR_DVID; + encoder_type = DRM_MODE_ENCODER_LVDS; + connector_type = DRM_MODE_CONNECTOR_LVDS; } else { unsigned char bytes[2]; + sdvo_priv->controlled_output = 0; memcpy (bytes, &sdvo_priv->caps.output_flags, 2); - DRM_DEBUG("%s: No active RGB or TMDS outputs (0x%02x%02x)\n", + DRM_DEBUG("%s: Unknown SDVO output type (0x%02x%02x)\n", SDVO_NAME(sdvo_priv), bytes[0], bytes[1]); + encoder_type = DRM_MODE_ENCODER_NONE; + connector_type = DRM_MODE_CONNECTOR_Unknown; goto err_i2c; } @@ -1089,6 +1761,8 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc); drm_sysfs_connector_add(connector); + intel_sdvo_select_ddc_bus(sdvo_priv); + /* Set the input timing to the screen. Assume always input 0. */ intel_sdvo_set_target_input(intel_output, true, false); diff --git a/drivers/gpu/drm/i915/intel_sdvo_regs.h b/drivers/gpu/drm/i915/intel_sdvo_regs.h index 861a43f8693c..1117b9c151a6 100644 --- a/drivers/gpu/drm/i915/intel_sdvo_regs.h +++ b/drivers/gpu/drm/i915/intel_sdvo_regs.h @@ -173,6 +173,9 @@ struct intel_sdvo_get_trained_inputs_response { * Returns two struct intel_sdvo_output_flags structures. */ #define SDVO_CMD_GET_IN_OUT_MAP 0x06 +struct intel_sdvo_in_out_map { + u16 in0, in1; +}; /** * Sets the current mapping of SDVO inputs to outputs on the device. @@ -206,7 +209,8 @@ struct intel_sdvo_get_trained_inputs_response { struct intel_sdvo_get_interrupt_event_source_response { u16 interrupt_status; unsigned int ambient_light_interrupt:1; - unsigned int pad:7; + unsigned int hdmi_audio_encrypt_change:1; + unsigned int pad:6; } __attribute__((packed)); /** @@ -305,23 +309,411 @@ struct intel_sdvo_set_target_input_args { # define SDVO_CLOCK_RATE_MULT_4X (1 << 3) #define SDVO_CMD_GET_SUPPORTED_TV_FORMATS 0x27 +/** 5 bytes of bit flags for TV formats shared by all TV format functions */ +struct intel_sdvo_tv_format { + unsigned int ntsc_m:1; + unsigned int ntsc_j:1; + unsigned int ntsc_443:1; + unsigned int pal_b:1; + unsigned int pal_d:1; + unsigned int pal_g:1; + unsigned int pal_h:1; + unsigned int pal_i:1; + + unsigned int pal_m:1; + unsigned int pal_n:1; + unsigned int pal_nc:1; + unsigned int pal_60:1; + unsigned int secam_b:1; + unsigned int secam_d:1; + unsigned int secam_g:1; + unsigned int secam_k:1; + + unsigned int secam_k1:1; + unsigned int secam_l:1; + unsigned int secam_60:1; + unsigned int hdtv_std_smpte_240m_1080i_59:1; + unsigned int hdtv_std_smpte_240m_1080i_60:1; + unsigned int hdtv_std_smpte_260m_1080i_59:1; + unsigned int hdtv_std_smpte_260m_1080i_60:1; + unsigned int hdtv_std_smpte_274m_1080i_50:1; + + unsigned int hdtv_std_smpte_274m_1080i_59:1; + unsigned int hdtv_std_smpte_274m_1080i_60:1; + unsigned int hdtv_std_smpte_274m_1080p_23:1; + unsigned int hdtv_std_smpte_274m_1080p_24:1; + unsigned int hdtv_std_smpte_274m_1080p_25:1; + unsigned int hdtv_std_smpte_274m_1080p_29:1; + unsigned int hdtv_std_smpte_274m_1080p_30:1; + unsigned int hdtv_std_smpte_274m_1080p_50:1; + + unsigned int hdtv_std_smpte_274m_1080p_59:1; + unsigned int hdtv_std_smpte_274m_1080p_60:1; + unsigned int hdtv_std_smpte_295m_1080i_50:1; + unsigned int hdtv_std_smpte_295m_1080p_50:1; + unsigned int hdtv_std_smpte_296m_720p_59:1; + unsigned int hdtv_std_smpte_296m_720p_60:1; + unsigned int hdtv_std_smpte_296m_720p_50:1; + unsigned int hdtv_std_smpte_293m_480p_59:1; + + unsigned int hdtv_std_smpte_170m_480i_59:1; + unsigned int hdtv_std_iturbt601_576i_50:1; + unsigned int hdtv_std_iturbt601_576p_50:1; + unsigned int hdtv_std_eia_7702a_480i_60:1; + unsigned int hdtv_std_eia_7702a_480p_60:1; + unsigned int pad:3; +} __attribute__((packed)); #define SDVO_CMD_GET_TV_FORMAT 0x28 #define SDVO_CMD_SET_TV_FORMAT 0x29 +/** Returns the resolutiosn that can be used with the given TV format */ +#define SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT 0x83 +struct intel_sdvo_sdtv_resolution_request { + unsigned int ntsc_m:1; + unsigned int ntsc_j:1; + unsigned int ntsc_443:1; + unsigned int pal_b:1; + unsigned int pal_d:1; + unsigned int pal_g:1; + unsigned int pal_h:1; + unsigned int pal_i:1; + + unsigned int pal_m:1; + unsigned int pal_n:1; + unsigned int pal_nc:1; + unsigned int pal_60:1; + unsigned int secam_b:1; + unsigned int secam_d:1; + unsigned int secam_g:1; + unsigned int secam_k:1; + + unsigned int secam_k1:1; + unsigned int secam_l:1; + unsigned int secam_60:1; + unsigned int pad:5; +} __attribute__((packed)); + +struct intel_sdvo_sdtv_resolution_reply { + unsigned int res_320x200:1; + unsigned int res_320x240:1; + unsigned int res_400x300:1; + unsigned int res_640x350:1; + unsigned int res_640x400:1; + unsigned int res_640x480:1; + unsigned int res_704x480:1; + unsigned int res_704x576:1; + + unsigned int res_720x350:1; + unsigned int res_720x400:1; + unsigned int res_720x480:1; + unsigned int res_720x540:1; + unsigned int res_720x576:1; + unsigned int res_768x576:1; + unsigned int res_800x600:1; + unsigned int res_832x624:1; + + unsigned int res_920x766:1; + unsigned int res_1024x768:1; + unsigned int res_1280x1024:1; + unsigned int pad:5; +} __attribute__((packed)); + +/* Get supported resolution with squire pixel aspect ratio that can be + scaled for the requested HDTV format */ +#define SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT 0x85 + +struct intel_sdvo_hdtv_resolution_request { + unsigned int hdtv_std_smpte_240m_1080i_59:1; + unsigned int hdtv_std_smpte_240m_1080i_60:1; + unsigned int hdtv_std_smpte_260m_1080i_59:1; + unsigned int hdtv_std_smpte_260m_1080i_60:1; + unsigned int hdtv_std_smpte_274m_1080i_50:1; + unsigned int hdtv_std_smpte_274m_1080i_59:1; + unsigned int hdtv_std_smpte_274m_1080i_60:1; + unsigned int hdtv_std_smpte_274m_1080p_23:1; + + unsigned int hdtv_std_smpte_274m_1080p_24:1; + unsigned int hdtv_std_smpte_274m_1080p_25:1; + unsigned int hdtv_std_smpte_274m_1080p_29:1; + unsigned int hdtv_std_smpte_274m_1080p_30:1; + unsigned int hdtv_std_smpte_274m_1080p_50:1; + unsigned int hdtv_std_smpte_274m_1080p_59:1; + unsigned int hdtv_std_smpte_274m_1080p_60:1; + unsigned int hdtv_std_smpte_295m_1080i_50:1; + + unsigned int hdtv_std_smpte_295m_1080p_50:1; + unsigned int hdtv_std_smpte_296m_720p_59:1; + unsigned int hdtv_std_smpte_296m_720p_60:1; + unsigned int hdtv_std_smpte_296m_720p_50:1; + unsigned int hdtv_std_smpte_293m_480p_59:1; + unsigned int hdtv_std_smpte_170m_480i_59:1; + unsigned int hdtv_std_iturbt601_576i_50:1; + unsigned int hdtv_std_iturbt601_576p_50:1; + + unsigned int hdtv_std_eia_7702a_480i_60:1; + unsigned int hdtv_std_eia_7702a_480p_60:1; + unsigned int pad:6; +} __attribute__((packed)); + +struct intel_sdvo_hdtv_resolution_reply { + unsigned int res_640x480:1; + unsigned int res_800x600:1; + unsigned int res_1024x768:1; + unsigned int res_1280x960:1; + unsigned int res_1400x1050:1; + unsigned int res_1600x1200:1; + unsigned int res_1920x1440:1; + unsigned int res_2048x1536:1; + + unsigned int res_2560x1920:1; + unsigned int res_3200x2400:1; + unsigned int res_3840x2880:1; + unsigned int pad1:5; + + unsigned int res_848x480:1; + unsigned int res_1064x600:1; + unsigned int res_1280x720:1; + unsigned int res_1360x768:1; + unsigned int res_1704x960:1; + unsigned int res_1864x1050:1; + unsigned int res_1920x1080:1; + unsigned int res_2128x1200:1; + + unsigned int res_2560x1400:1; + unsigned int res_2728x1536:1; + unsigned int res_3408x1920:1; + unsigned int res_4264x2400:1; + unsigned int res_5120x2880:1; + unsigned int pad2:3; + + unsigned int res_768x480:1; + unsigned int res_960x600:1; + unsigned int res_1152x720:1; + unsigned int res_1124x768:1; + unsigned int res_1536x960:1; + unsigned int res_1680x1050:1; + unsigned int res_1728x1080:1; + unsigned int res_1920x1200:1; + + unsigned int res_2304x1440:1; + unsigned int res_2456x1536:1; + unsigned int res_3072x1920:1; + unsigned int res_3840x2400:1; + unsigned int res_4608x2880:1; + unsigned int pad3:3; + + unsigned int res_1280x1024:1; + unsigned int pad4:7; + + unsigned int res_1280x768:1; + unsigned int pad5:7; +} __attribute__((packed)); + +/* Get supported power state returns info for encoder and monitor, rely on + last SetTargetInput and SetTargetOutput calls */ #define SDVO_CMD_GET_SUPPORTED_POWER_STATES 0x2a +/* Get power state returns info for encoder and monitor, rely on last + SetTargetInput and SetTargetOutput calls */ +#define SDVO_CMD_GET_POWER_STATE 0x2b #define SDVO_CMD_GET_ENCODER_POWER_STATE 0x2b #define SDVO_CMD_SET_ENCODER_POWER_STATE 0x2c # define SDVO_ENCODER_STATE_ON (1 << 0) # define SDVO_ENCODER_STATE_STANDBY (1 << 1) # define SDVO_ENCODER_STATE_SUSPEND (1 << 2) # define SDVO_ENCODER_STATE_OFF (1 << 3) +# define SDVO_MONITOR_STATE_ON (1 << 4) +# define SDVO_MONITOR_STATE_STANDBY (1 << 5) +# define SDVO_MONITOR_STATE_SUSPEND (1 << 6) +# define SDVO_MONITOR_STATE_OFF (1 << 7) -#define SDVO_CMD_SET_TV_RESOLUTION_SUPPORT 0x93 +#define SDVO_CMD_GET_MAX_PANEL_POWER_SEQUENCING 0x2d +#define SDVO_CMD_GET_PANEL_POWER_SEQUENCING 0x2e +#define SDVO_CMD_SET_PANEL_POWER_SEQUENCING 0x2f +/** + * The panel power sequencing parameters are in units of milliseconds. + * The high fields are bits 8:9 of the 10-bit values. + */ +struct sdvo_panel_power_sequencing { + u8 t0; + u8 t1; + u8 t2; + u8 t3; + u8 t4; + + unsigned int t0_high:2; + unsigned int t1_high:2; + unsigned int t2_high:2; + unsigned int t3_high:2; + + unsigned int t4_high:2; + unsigned int pad:6; +} __attribute__((packed)); + +#define SDVO_CMD_GET_MAX_BACKLIGHT_LEVEL 0x30 +struct sdvo_max_backlight_reply { + u8 max_value; + u8 default_value; +} __attribute__((packed)); + +#define SDVO_CMD_GET_BACKLIGHT_LEVEL 0x31 +#define SDVO_CMD_SET_BACKLIGHT_LEVEL 0x32 + +#define SDVO_CMD_GET_AMBIENT_LIGHT 0x33 +struct sdvo_get_ambient_light_reply { + u16 trip_low; + u16 trip_high; + u16 value; +} __attribute__((packed)); +#define SDVO_CMD_SET_AMBIENT_LIGHT 0x34 +struct sdvo_set_ambient_light_reply { + u16 trip_low; + u16 trip_high; + unsigned int enable:1; + unsigned int pad:7; +} __attribute__((packed)); + +/* Set display power state */ +#define SDVO_CMD_SET_DISPLAY_POWER_STATE 0x7d +# define SDVO_DISPLAY_STATE_ON (1 << 0) +# define SDVO_DISPLAY_STATE_STANDBY (1 << 1) +# define SDVO_DISPLAY_STATE_SUSPEND (1 << 2) +# define SDVO_DISPLAY_STATE_OFF (1 << 3) + +#define SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS 0x84 +struct intel_sdvo_enhancements_reply { + unsigned int flicker_filter:1; + unsigned int flicker_filter_adaptive:1; + unsigned int flicker_filter_2d:1; + unsigned int saturation:1; + unsigned int hue:1; + unsigned int brightness:1; + unsigned int contrast:1; + unsigned int overscan_h:1; + + unsigned int overscan_v:1; + unsigned int position_h:1; + unsigned int position_v:1; + unsigned int sharpness:1; + unsigned int dot_crawl:1; + unsigned int dither:1; + unsigned int max_tv_chroma_filter:1; + unsigned int max_tv_luma_filter:1; +} __attribute__((packed)); + +/* Picture enhancement limits below are dependent on the current TV format, + * and thus need to be queried and set after it. + */ +#define SDVO_CMD_GET_MAX_FLICKER_FITER 0x4d +#define SDVO_CMD_GET_MAX_ADAPTIVE_FLICKER_FITER 0x7b +#define SDVO_CMD_GET_MAX_2D_FLICKER_FITER 0x52 +#define SDVO_CMD_GET_MAX_SATURATION 0x55 +#define SDVO_CMD_GET_MAX_HUE 0x58 +#define SDVO_CMD_GET_MAX_BRIGHTNESS 0x5b +#define SDVO_CMD_GET_MAX_CONTRAST 0x5e +#define SDVO_CMD_GET_MAX_OVERSCAN_H 0x61 +#define SDVO_CMD_GET_MAX_OVERSCAN_V 0x64 +#define SDVO_CMD_GET_MAX_POSITION_H 0x67 +#define SDVO_CMD_GET_MAX_POSITION_V 0x6a +#define SDVO_CMD_GET_MAX_SHARPNESS_V 0x6d +#define SDVO_CMD_GET_MAX_TV_CHROMA 0x74 +#define SDVO_CMD_GET_MAX_TV_LUMA 0x77 +struct intel_sdvo_enhancement_limits_reply { + u16 max_value; + u16 default_value; +} __attribute__((packed)); + +#define SDVO_CMD_GET_LVDS_PANEL_INFORMATION 0x7f +#define SDVO_CMD_SET_LVDS_PANEL_INFORMATION 0x80 +# define SDVO_LVDS_COLOR_DEPTH_18 (0 << 0) +# define SDVO_LVDS_COLOR_DEPTH_24 (1 << 0) +# define SDVO_LVDS_CONNECTOR_SPWG (0 << 2) +# define SDVO_LVDS_CONNECTOR_OPENLDI (1 << 2) +# define SDVO_LVDS_SINGLE_CHANNEL (0 << 4) +# define SDVO_LVDS_DUAL_CHANNEL (1 << 4) + +#define SDVO_CMD_GET_FLICKER_FILTER 0x4e +#define SDVO_CMD_SET_FLICKER_FILTER 0x4f +#define SDVO_CMD_GET_ADAPTIVE_FLICKER_FITER 0x50 +#define SDVO_CMD_SET_ADAPTIVE_FLICKER_FITER 0x51 +#define SDVO_CMD_GET_2D_FLICKER_FITER 0x53 +#define SDVO_CMD_SET_2D_FLICKER_FITER 0x54 +#define SDVO_CMD_GET_SATURATION 0x56 +#define SDVO_CMD_SET_SATURATION 0x57 +#define SDVO_CMD_GET_HUE 0x59 +#define SDVO_CMD_SET_HUE 0x5a +#define SDVO_CMD_GET_BRIGHTNESS 0x5c +#define SDVO_CMD_SET_BRIGHTNESS 0x5d +#define SDVO_CMD_GET_CONTRAST 0x5f +#define SDVO_CMD_SET_CONTRAST 0x60 +#define SDVO_CMD_GET_OVERSCAN_H 0x62 +#define SDVO_CMD_SET_OVERSCAN_H 0x63 +#define SDVO_CMD_GET_OVERSCAN_V 0x65 +#define SDVO_CMD_SET_OVERSCAN_V 0x66 +#define SDVO_CMD_GET_POSITION_H 0x68 +#define SDVO_CMD_SET_POSITION_H 0x69 +#define SDVO_CMD_GET_POSITION_V 0x6b +#define SDVO_CMD_SET_POSITION_V 0x6c +#define SDVO_CMD_GET_SHARPNESS 0x6e +#define SDVO_CMD_SET_SHARPNESS 0x6f +#define SDVO_CMD_GET_TV_CHROMA 0x75 +#define SDVO_CMD_SET_TV_CHROMA 0x76 +#define SDVO_CMD_GET_TV_LUMA 0x78 +#define SDVO_CMD_SET_TV_LUMA 0x79 +struct intel_sdvo_enhancements_arg { + u16 value; +}__attribute__((packed)); + +#define SDVO_CMD_GET_DOT_CRAWL 0x70 +#define SDVO_CMD_SET_DOT_CRAWL 0x71 +# define SDVO_DOT_CRAWL_ON (1 << 0) +# define SDVO_DOT_CRAWL_DEFAULT_ON (1 << 1) + +#define SDVO_CMD_GET_DITHER 0x72 +#define SDVO_CMD_SET_DITHER 0x73 +# define SDVO_DITHER_ON (1 << 0) +# define SDVO_DITHER_DEFAULT_ON (1 << 1) #define SDVO_CMD_SET_CONTROL_BUS_SWITCH 0x7a -# define SDVO_CONTROL_BUS_PROM 0x0 -# define SDVO_CONTROL_BUS_DDC1 0x1 -# define SDVO_CONTROL_BUS_DDC2 0x2 -# define SDVO_CONTROL_BUS_DDC3 0x3 +# define SDVO_CONTROL_BUS_PROM (1 << 0) +# define SDVO_CONTROL_BUS_DDC1 (1 << 1) +# define SDVO_CONTROL_BUS_DDC2 (1 << 2) +# define SDVO_CONTROL_BUS_DDC3 (1 << 3) + +/* HDMI op codes */ +#define SDVO_CMD_GET_SUPP_ENCODE 0x9d +#define SDVO_CMD_GET_ENCODE 0x9e +#define SDVO_CMD_SET_ENCODE 0x9f + #define SDVO_ENCODE_DVI 0x0 + #define SDVO_ENCODE_HDMI 0x1 +#define SDVO_CMD_SET_PIXEL_REPLI 0x8b +#define SDVO_CMD_GET_PIXEL_REPLI 0x8c +#define SDVO_CMD_GET_COLORIMETRY_CAP 0x8d +#define SDVO_CMD_SET_COLORIMETRY 0x8e + #define SDVO_COLORIMETRY_RGB256 0x0 + #define SDVO_COLORIMETRY_RGB220 0x1 + #define SDVO_COLORIMETRY_YCrCb422 0x3 + #define SDVO_COLORIMETRY_YCrCb444 0x4 +#define SDVO_CMD_GET_COLORIMETRY 0x8f +#define SDVO_CMD_GET_AUDIO_ENCRYPT_PREFER 0x90 +#define SDVO_CMD_SET_AUDIO_STAT 0x91 +#define SDVO_CMD_GET_AUDIO_STAT 0x92 +#define SDVO_CMD_SET_HBUF_INDEX 0x93 +#define SDVO_CMD_GET_HBUF_INDEX 0x94 +#define SDVO_CMD_GET_HBUF_INFO 0x95 +#define SDVO_CMD_SET_HBUF_AV_SPLIT 0x96 +#define SDVO_CMD_GET_HBUF_AV_SPLIT 0x97 +#define SDVO_CMD_SET_HBUF_DATA 0x98 +#define SDVO_CMD_GET_HBUF_DATA 0x99 +#define SDVO_CMD_SET_HBUF_TXRATE 0x9a +#define SDVO_CMD_GET_HBUF_TXRATE 0x9b + #define SDVO_HBUF_TX_DISABLED (0 << 6) + #define SDVO_HBUF_TX_ONCE (2 << 6) + #define SDVO_HBUF_TX_VSYNC (3 << 6) +#define SDVO_CMD_GET_AUDIO_TX_INFO 0x9c + +struct intel_sdvo_encode{ + u8 dvi_rev; + u8 hdmi_rev; +} __attribute__ ((packed)); From 565dcd4635f4f8c0ac4dee38a5625bc325799b1e Mon Sep 17 00:00:00 2001 From: Paul Collins Date: Wed, 4 Feb 2009 23:05:41 +1300 Subject: [PATCH 400/566] drm/i915: skip LVDS initialization on Apple Mac Mini The Apple Mac Mini falsely reports LVDS. Use DMI to check whether we are running on a Mac Mini, and skip LVDS initialization if that proves to be the case. Signed-off-by: Paul Collins Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_lvds.c | 47 +++++++++---------------------- 1 file changed, 13 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index cf8da644faf2..6d4f91265354 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -27,6 +27,7 @@ * Jesse Barnes */ +#include #include #include "drmP.h" #include "drm.h" @@ -403,6 +404,16 @@ void intel_lvds_init(struct drm_device *dev) u32 lvds; int pipe; + /* Blacklist machines that we know falsely report LVDS. */ + /* FIXME: add a check for the Aopen Mini PC */ + + /* Apple Mac Mini Core Duo and Mac Mini Core 2 Duo */ + if(dmi_match(DMI_PRODUCT_NAME, "Macmini1,1") || + dmi_match(DMI_PRODUCT_NAME, "Macmini2,1")) { + DRM_DEBUG("Skipping LVDS initialization for Apple Mac Mini\n"); + return; + } + intel_output = kzalloc(sizeof(struct intel_output), GFP_KERNEL); if (!intel_output) { return; @@ -456,7 +467,7 @@ void intel_lvds_init(struct drm_device *dev) dev_priv->panel_fixed_mode = drm_mode_duplicate(dev, scan); mutex_unlock(&dev->mode_config.mutex); - goto out; /* FIXME: check for quirks */ + goto out; } mutex_unlock(&dev->mode_config.mutex); } @@ -490,7 +501,7 @@ void intel_lvds_init(struct drm_device *dev) if (dev_priv->panel_fixed_mode) { dev_priv->panel_fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; - goto out; /* FIXME: check for quirks */ + goto out; } } @@ -498,38 +509,6 @@ void intel_lvds_init(struct drm_device *dev) if (!dev_priv->panel_fixed_mode) goto failed; - /* FIXME: detect aopen & mac mini type stuff automatically? */ - /* - * Blacklist machines with BIOSes that list an LVDS panel without - * actually having one. - */ - if (IS_I945GM(dev)) { - /* aopen mini pc */ - if (dev->pdev->subsystem_vendor == 0xa0a0) - goto failed; - - if ((dev->pdev->subsystem_vendor == 0x8086) && - (dev->pdev->subsystem_device == 0x7270)) { - /* It's a Mac Mini or Macbook Pro. - * - * Apple hardware is out to get us. The macbook pro - * has a real LVDS panel, but the mac mini does not, - * and they have the same device IDs. We'll - * distinguish by panel size, on the assumption - * that Apple isn't about to make any machines with an - * 800x600 display. - */ - - if (dev_priv->panel_fixed_mode != NULL && - dev_priv->panel_fixed_mode->hdisplay == 800 && - dev_priv->panel_fixed_mode->vdisplay == 600) { - DRM_DEBUG("Suspected Mac Mini, ignoring the LVDS\n"); - goto failed; - } - } - } - - out: drm_sysfs_connector_add(connector); return; From 122ee2a63bc49d21f402f6b6d2208306cdcc98c1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 3 Feb 2009 12:10:21 -0800 Subject: [PATCH 401/566] drm/i915: Quiet the message on get/setparam ioctl with an unknown value. Getting an unknown get/setparam used to be more significant back when they didn't change much. However, now that we're in the git world we're using them instead of a monotonic version number to signal feature availability, so clients ask about unknown params on older kernels more often. Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index cc0adb428cee..0ded483d9ac7 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -735,7 +735,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = dev_priv->num_fence_regs - dev_priv->fence_reg_start; break; default: - DRM_ERROR("Unknown parameter %d\n", param->param); + DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; } @@ -775,7 +775,7 @@ static int i915_setparam(struct drm_device *dev, void *data, dev_priv->fence_reg_start = param->value; break; default: - DRM_ERROR("unknown parameter %d\n", param->param); + DRM_DEBUG("unknown parameter %d\n", param->param); return -EINVAL; } From 7d8d58b23fd01e60ed44d8d8c10b2df86e638faa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Feb 2009 14:15:10 +0000 Subject: [PATCH 402/566] drm/i915: Unlock mutex on i915_gem_fault() error path If we failed to allocate a new fence register we would return VM_FAULT_SIGBUS without relinquishing the lock. Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6a9e3a875083..1441831fa06e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -588,8 +588,10 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (obj_priv->fence_reg == I915_FENCE_REG_NONE && obj_priv->tiling_mode != I915_TILING_NONE) { ret = i915_gem_object_get_fence_reg(obj, write); - if (ret != 0) + if (ret) { + mutex_unlock(&dev->struct_mutex); return VM_FAULT_SIGBUS; + } } pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + From 14d200c5e5bd19219d930bbb9a5a22758c8f5bec Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 6 Feb 2009 13:04:49 -0800 Subject: [PATCH 403/566] drm/i915: capture last_vblank count at IRQ uninstall time too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In dc1336ff4fe08ae7cfe8301bfd7f0b2cfd31d20a (set vblank enable flag correctly across IRQ uninstall), we made sure drivers that uninstall their interrupt handler set the vblank enabled flag correctly, so that when interrupts are re-enabled, vblank interrupts & counts work as expected. However I missed the last_vblank field: it needs to be updated as well, otherwise, at the next drm_update_vblank_count we'll end up comparing a current count to a stale one (the last one captured by the disable function), which may trigger the wraparound handling, leading to a jumpy counter and hangs in drm_wait_vblank. The jumpy counter can prevent the DRM_WAIT_ON from returning success if the difference between the current count and the requested count is greater than 2^23, leading to timeouts or hangs, if the ioctl is restarted in a loop (as is the case in libdrm < 2.4.4). Signed-off-by: Jesse Barnes Acked-by: Michel Dänzer Tested-by: Timo Aaltonen Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 69aa0ab28403..3795dbc0f50c 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -276,6 +276,7 @@ int drm_irq_uninstall(struct drm_device * dev) for (i = 0; i < dev->num_crtcs; i++) { DRM_WAKEUP(&dev->vbl_queue[i]); dev->vblank_enabled[i] = 0; + dev->last_vblank[i] = dev->driver->get_vblank_counter(dev, i); } spin_unlock_irqrestore(&dev->vbl_lock, irqflags); From 9880b7a527ffbb52f65c2de0a8d4eea86e24775e Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 6 Feb 2009 10:22:41 -0800 Subject: [PATCH 404/566] drm/i915: add get_vblank_counter function for GM45 As discussed in the long thread about vblank related timeouts, it turns out GM45 has different frame count registers than previous chips. This patch adds support for them, which prevents us from waiting on really stale sequence values in drm_wait_vblank (which rather than returning immediately ends up timing out or getting interrupted). Signed-off-by: Jesse Barnes Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_dma.c | 4 ++++ drivers/gpu/drm/i915/i915_drv.c | 1 - drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_irq.c | 13 +++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 6 ++++++ 5 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 0ded483d9ac7..81f1cff56fd5 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1112,6 +1112,10 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) dev_priv->has_gem = 1; #endif + dev->driver->get_vblank_counter = i915_get_vblank_counter; + if (IS_GM45(dev)) + dev->driver->get_vblank_counter = gm45_get_vblank_counter; + i915_gem_load(dev); /* Init HWS */ diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f8b3df0926c0..aac12ee31a46 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -112,7 +112,6 @@ static struct drm_driver driver = { .suspend = i915_suspend, .resume = i915_resume, .device_is_agp = i915_driver_device_is_agp, - .get_vblank_counter = i915_get_vblank_counter, .enable_vblank = i915_enable_vblank, .disable_vblank = i915_disable_vblank, .irq_preinstall = i915_driver_irq_preinstall, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a70bf77290fc..7325363164f8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -535,6 +535,7 @@ extern int i915_vblank_pipe_get(struct drm_device *dev, void *data, extern int i915_enable_vblank(struct drm_device *dev, int crtc); extern void i915_disable_vblank(struct drm_device *dev, int crtc); extern u32 i915_get_vblank_counter(struct drm_device *dev, int crtc); +extern u32 gm45_get_vblank_counter(struct drm_device *dev, int crtc); extern int i915_vblank_swap(struct drm_device *dev, void *data, struct drm_file *file_priv); extern void i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6290219de6c8..548ff2c66431 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -174,6 +174,19 @@ u32 i915_get_vblank_counter(struct drm_device *dev, int pipe) return count; } +u32 gm45_get_vblank_counter(struct drm_device *dev, int pipe) +{ + drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; + int reg = pipe ? PIPEB_FRMCOUNT_GM45 : PIPEA_FRMCOUNT_GM45; + + if (!i915_pipe_enabled(dev, pipe)) { + DRM_ERROR("trying to get vblank count for disabled pipe %d\n", pipe); + return 0; + } + + return I915_READ(reg); +} + irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 928e00462570..9d6539a868b3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1371,6 +1371,9 @@ #define PIPE_FRAME_LOW_SHIFT 24 #define PIPE_PIXEL_MASK 0x00ffffff #define PIPE_PIXEL_SHIFT 0 +/* GM45+ just has to be different */ +#define PIPEA_FRMCOUNT_GM45 0x70040 +#define PIPEA_FLIPCOUNT_GM45 0x70044 /* Cursor A & B regs */ #define CURACNTR 0x70080 @@ -1439,6 +1442,9 @@ #define PIPEBSTAT 0x71024 #define PIPEBFRAMEHIGH 0x71040 #define PIPEBFRAMEPIXEL 0x71044 +#define PIPEB_FRMCOUNT_GM45 0x71040 +#define PIPEB_FLIPCOUNT_GM45 0x71044 + /* Display B control */ #define DSPBCNTR 0x71180 From d2f59357700487a8b944f4f7777d1e97cf5ea2ed Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 5 Feb 2009 16:03:34 +0100 Subject: [PATCH 405/566] drm/i915: select framebuffer support automatically Migration helper. The i915 driver recently added a 'depends on FB' rule to its Kconfig entry - which silently turns off DRM_I915 if someone has a working config but no CONFIG_FB selected, and upgrades to the latest upstream kernel. Norbert Preining reported this problem: Bug-Entry : http://bugzilla.kernel.org/show_bug.cgi?id=12599 Subject : dri /dev node disappeared with 2.6.29-rc1 So change it to "select FB", which auto-selects framebuffer support. This way the driver keeps working, regardless of whether FB was enabled before or not. Kconfig select's of interactive options can be problematic to dependencies and can cause build breakages - but in this case it's safe because it's a leaf entry with no dependencies of its own. ( There is some minor circular dependency fallout as FB_I810 and FB_INTEL also used 'depends on FB' constructs - update those to "select FB" too. ) Reported-by: Norbert Preining Signed-off-by: Ingo Molnar Signed-off-by: Dave Airlie --- drivers/gpu/drm/Kconfig | 2 +- drivers/video/Kconfig | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 5130b72d593c..4be3acbaaf9a 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -70,7 +70,7 @@ config DRM_I915 select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT - depends on FB + select FB tristate "i915 driver" help Choose this option if you have a system that has Intel 830M, 845G, diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index f0267706cb45..bf0af660df8a 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1054,9 +1054,10 @@ config FB_RIVA_BACKLIGHT config FB_I810 tristate "Intel 810/815 support (EXPERIMENTAL)" - depends on FB && EXPERIMENTAL && PCI && X86_32 + depends on EXPERIMENTAL && PCI && X86_32 select AGP select AGP_INTEL + select FB select FB_MODE_HELPERS select FB_CFB_FILLRECT select FB_CFB_COPYAREA @@ -1119,7 +1120,8 @@ config FB_CARILLO_RANCH config FB_INTEL tristate "Intel 830M/845G/852GM/855GM/865G/915G/945G/945GM/965G/965GM support (EXPERIMENTAL)" - depends on FB && EXPERIMENTAL && PCI && X86 + depends on EXPERIMENTAL && PCI && X86 + select FB select AGP select AGP_INTEL select FB_MODE_HELPERS From bc111d570ba87cff48ec8dfa15a2a598e59c0f4b Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 8 Feb 2009 17:00:02 -0800 Subject: [PATCH 406/566] drivers/atm: introduce missing kfree Error handling code following a kmalloc should free the allocated data. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,l; position p1,p2; expression *ptr != NULL; @@ ( if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S | x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S ) <... when != x when != if (...) { <+...x...+> } x->f = E ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/atm/solos-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 72fc0f799a64..89d7a6e94c9c 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -685,6 +685,7 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) out_release_regions: pci_release_regions(dev); out: + kfree(card); return err; } From 23b904f35128f3c596831cc3320bab1f2db81f60 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 8 Feb 2009 17:00:49 -0800 Subject: [PATCH 407/566] drivers/isdn: introduce missing kfree Error handling code following a kmalloc should free the allocated data. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,l; position p1,p2; expression *ptr != NULL; @@ ( if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S | x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S ) <... when != x when != if (...) { <+...x...+> } x->f = E ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfcmulti.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 595ba8eb4a07..0b28141e43bf 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -4599,6 +4599,7 @@ init_e1_port(struct hfc_multi *hc, struct hm_map *m) printk(KERN_ERR "%s: no memory for coeffs\n", __func__); ret = -ENOMEM; + kfree(bch); goto free_chan; } bch->nr = ch; @@ -4767,6 +4768,7 @@ init_multi_port(struct hfc_multi *hc, int pt) printk(KERN_ERR "%s: no memory for coeffs\n", __func__); ret = -ENOMEM; + kfree(bch); goto free_chan; } bch->nr = ch + 1; From cfbf84fcbcda98bb91ada683a8dc8e6901a83ebd Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Sun, 8 Feb 2009 17:49:17 -0800 Subject: [PATCH 408/566] tun: Fix unicast filter overflow Tap devices can make use of a small MAC filter set via the TUNSETTXFILTER ioctl. The filter has a set of exact matches plus a hash for imperfect filtering of additional multicast addresses. The current code is unbalanced, adding unicast addresses to the multicast hash, but only checking the hash against multicast addresses. This results in the filter dropping unicast addresses that overflow the exact filter. The fix is simply to disable the filter by leaving count set to zero if we find non-multicast addresses after the exact match table is filled. Signed-off-by: Alex Williamson Signed-off-by: David S. Miller --- drivers/net/tun.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index d7b81e4fdd56..09fea31d3e36 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -157,10 +157,16 @@ static int update_filter(struct tap_filter *filter, void __user *arg) nexact = n; - /* The rest is hashed */ + /* Remaining multicast addresses are hashed, + * unicast will leave the filter disabled. */ memset(filter->mask, 0, sizeof(filter->mask)); - for (; n < uf.count; n++) + for (; n < uf.count; n++) { + if (!is_multicast_ether_addr(addr[n].u)) { + err = 0; /* no filter */ + goto done; + } addr_hash_set(filter->mask, addr[n].u); + } /* For ALLMULTI just set the mask to all ones. * This overrides the mask populated above. */ From b991d2bc4a6e1821555bdc2a682f9aed24650c98 Mon Sep 17 00:00:00 2001 From: Risto Suominen Date: Sun, 8 Feb 2009 17:50:34 -0800 Subject: [PATCH 409/566] de2104x: force correct order when writing to rx ring DescOwn should not be set, thus allowing the chip to use the descriptor, before everything else is set up correctly. Signed-off-by: Risto Suominen Signed-off-by: David S. Miller --- drivers/net/tulip/de2104x.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c index 0bf2114738be..d4c5ecc51f77 100644 --- a/drivers/net/tulip/de2104x.c +++ b/drivers/net/tulip/de2104x.c @@ -464,13 +464,14 @@ static void de_rx (struct de_private *de) drop = 1; rx_next: - de->rx_ring[rx_tail].opts1 = cpu_to_le32(DescOwn); if (rx_tail == (DE_RX_RING_SIZE - 1)) de->rx_ring[rx_tail].opts2 = cpu_to_le32(RingEnd | de->rx_buf_sz); else de->rx_ring[rx_tail].opts2 = cpu_to_le32(de->rx_buf_sz); de->rx_ring[rx_tail].addr1 = cpu_to_le32(mapping); + wmb(); + de->rx_ring[rx_tail].opts1 = cpu_to_le32(DescOwn); rx_tail = NEXT_RX(rx_tail); } From b3df68f8f5a29888ae693fdb84ebabbc28ed9400 Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Sun, 8 Feb 2009 19:20:19 -0800 Subject: [PATCH 410/566] netxen: fix msi-x interrupt handling o Cut down msi-x vectors from 8 to 1 since only one is used for now. o Use separate handler for msi-x, that doesn't unnecessarily scrub msi status register. Signed-off-by: Dhananjay Phadke Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic.h | 2 +- drivers/net/netxen/netxen_nic_main.c | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/netxen/netxen_nic.h b/drivers/net/netxen/netxen_nic.h index 9c78c963b721..f4dd9acb6877 100644 --- a/drivers/net/netxen/netxen_nic.h +++ b/drivers/net/netxen/netxen_nic.h @@ -1203,7 +1203,7 @@ typedef struct { #define NETXEN_IS_MSI_FAMILY(adapter) \ ((adapter)->flags & (NETXEN_NIC_MSI_ENABLED | NETXEN_NIC_MSIX_ENABLED)) -#define MSIX_ENTRIES_PER_ADAPTER 8 +#define MSIX_ENTRIES_PER_ADAPTER 1 #define NETXEN_MSIX_TBL_SPACE 8192 #define NETXEN_PCI_REG_MSIX_TBL 0x44 diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index 645d384fe87e..3b17a7936147 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -76,6 +76,7 @@ static void netxen_nic_poll_controller(struct net_device *netdev); #endif static irqreturn_t netxen_intr(int irq, void *data); static irqreturn_t netxen_msi_intr(int irq, void *data); +static irqreturn_t netxen_msix_intr(int irq, void *data); /* PCI Device ID Table */ #define ENTRY(device) \ @@ -1084,7 +1085,9 @@ static int netxen_nic_open(struct net_device *netdev) for (ring = 0; ring < adapter->max_rds_rings; ring++) netxen_post_rx_buffers(adapter, ctx, ring); } - if (NETXEN_IS_MSI_FAMILY(adapter)) + if (adapter->flags & NETXEN_NIC_MSIX_ENABLED) + handler = netxen_msix_intr; + else if (adapter->flags & NETXEN_NIC_MSI_ENABLED) handler = netxen_msi_intr; else { flags |= IRQF_SHARED; @@ -1612,6 +1615,14 @@ static irqreturn_t netxen_msi_intr(int irq, void *data) return IRQ_HANDLED; } +static irqreturn_t netxen_msix_intr(int irq, void *data) +{ + struct netxen_adapter *adapter = data; + + napi_schedule(&adapter->napi); + return IRQ_HANDLED; +} + static int netxen_nic_poll(struct napi_struct *napi, int budget) { struct netxen_adapter *adapter = container_of(napi, struct netxen_adapter, napi); From 4f3e797ad07d52d34983354a77b365dfcd48c1b4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 9 Feb 2009 14:22:14 +1100 Subject: [PATCH 411/566] crypto: scatterwalk - Avoid flush_dcache_page on slab pages It's illegal to call flush_dcache_page on slab pages on a number of architectures. So this patch avoids doing so if PageSlab is true. In future we can move the flush_dcache_page call to those page cache users that actually need it. Reported-by: David S. Miller Signed-off-by: Herbert Xu --- crypto/scatterwalk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index 9aeeb52004a5..3de89a424401 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -54,7 +54,8 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out, struct page *page; page = sg_page(walk->sg) + ((walk->offset - 1) >> PAGE_SHIFT); - flush_dcache_page(page); + if (!PageSlab(page)) + flush_dcache_page(page); } if (more) { From 40bdac7dbc161639a498697f34fbd1ee800e51f4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 8 Feb 2009 22:00:55 -0800 Subject: [PATCH 412/566] sparc64: Kill .fixup section bloat. This is an implementation of a suggestion made by Chris Torek: -------------------- Something else I noticed in passing: the EX and EX_LD/EX_ST macros scattered throughout the various .S files make a fair bit of .fixup code, all of which does the same thing. At the cost of one symbol in copy_in_user.S, you could just have one common two-instruction retl-and-mov-1 fixup that they all share. -------------------- The following is with a defconfig build: text data bss dec hex filename 3972767 344024 584449 4901240 4ac978 vmlinux.orig 3968887 344024 584449 4897360 4aba50 vmlinux Signed-off-by: David S. Miller --- arch/sparc/kernel/head_64.S | 31 +++++++++++++++++++++++++++--- arch/sparc/lib/GENbzero.S | 6 +----- arch/sparc/lib/GENcopy_from_user.S | 6 +----- arch/sparc/lib/GENcopy_to_user.S | 6 +----- arch/sparc/lib/NG2copy_from_user.S | 7 +------ arch/sparc/lib/NG2copy_to_user.S | 7 +------ arch/sparc/lib/NGbzero.S | 6 +----- arch/sparc/lib/NGcopy_from_user.S | 7 +------ arch/sparc/lib/NGcopy_to_user.S | 7 +------ arch/sparc/lib/U1copy_from_user.S | 6 +----- arch/sparc/lib/U1copy_to_user.S | 6 +----- arch/sparc/lib/U3copy_from_user.S | 6 +----- arch/sparc/lib/U3copy_to_user.S | 6 +----- arch/sparc/lib/bzero.S | 6 +----- arch/sparc/lib/copy_in_user.S | 6 +----- 15 files changed, 42 insertions(+), 77 deletions(-) diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 8ffee714f932..a46c3a21e26d 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -891,10 +891,35 @@ prom_tba: .xword 0 tlb_type: .word 0 /* Must NOT end up in BSS */ .section ".fixup",#alloc,#execinstr - .globl __ret_efault, __retl_efault -__ret_efault: + .globl __ret_efault, __retl_efault, __ret_one, __retl_one +ENTRY(__ret_efault) ret restore %g0, -EFAULT, %o0 -__retl_efault: +ENDPROC(__ret_efault) + +ENTRY(__retl_efault) retl mov -EFAULT, %o0 +ENDPROC(__retl_efault) + +ENTRY(__retl_one) + retl + mov 1, %o0 +ENDPROC(__retl_one) + +ENTRY(__ret_one_asi) + wr %g0, ASI_AIUS, %asi + ret + restore %g0, 1, %o0 +ENDPROC(__ret_one_asi) + +ENTRY(__retl_one_asi) + wr %g0, ASI_AIUS, %asi + retl + mov 1, %o0 +ENDPROC(__retl_one_asi) + +ENTRY(__retl_o1) + retl + mov %o1, %o0 +ENDPROC(__retl_o1) diff --git a/arch/sparc/lib/GENbzero.S b/arch/sparc/lib/GENbzero.S index 6a4f956a2f7a..8e7a843ddd88 100644 --- a/arch/sparc/lib/GENbzero.S +++ b/arch/sparc/lib/GENbzero.S @@ -6,13 +6,9 @@ #define EX_ST(x,y) \ 98: x,y; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - mov %o1, %o0; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, 99b; \ + .word 98b, __retl_o1; \ .text; \ .align 4; diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S index 2b9df99e87f9..9b0e58f9f970 100644 --- a/arch/sparc/lib/GENcopy_from_user.S +++ b/arch/sparc/lib/GENcopy_from_user.S @@ -5,13 +5,9 @@ #define EX_LD(x) \ 98: x; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - mov 1, %o0; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, 99b; \ + .word 98b, __retl_one; \ .text; \ .align 4; diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S index bb3f7084daf9..ce79b495c65d 100644 --- a/arch/sparc/lib/GENcopy_to_user.S +++ b/arch/sparc/lib/GENcopy_to_user.S @@ -5,13 +5,9 @@ #define EX_ST(x) \ 98: x; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - mov 1, %o0; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, 99b; \ + .word 98b, __retl_one; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S index c77ef5f22102..01ac546a86f4 100644 --- a/arch/sparc/lib/NG2copy_from_user.S +++ b/arch/sparc/lib/NG2copy_from_user.S @@ -5,14 +5,9 @@ #define EX_LD(x) \ 98: x; \ - .section .fixup; \ - .align 4; \ -99: wr %g0, ASI_AIUS, %asi;\ - retl; \ - mov 1, %o0; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, 99b; \ + .word 98b, __retl_one_asi;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S index 4bd4093acbbd..c477e1594a72 100644 --- a/arch/sparc/lib/NG2copy_to_user.S +++ b/arch/sparc/lib/NG2copy_to_user.S @@ -5,14 +5,9 @@ #define EX_ST(x) \ 98: x; \ - .section .fixup; \ - .align 4; \ -99: wr %g0, ASI_AIUS, %asi;\ - retl; \ - mov 1, %o0; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, 99b; \ + .word 98b, __retl_one_asi;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NGbzero.S b/arch/sparc/lib/NGbzero.S index 814d5f7a45e1..beab29bf419b 100644 --- a/arch/sparc/lib/NGbzero.S +++ b/arch/sparc/lib/NGbzero.S @@ -6,13 +6,9 @@ #define EX_ST(x,y) \ 98: x,y; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - mov %o1, %o0; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, 99b; \ + .word 98b, __retl_o1; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S index e7f433f71b42..39bb8912c6e7 100644 --- a/arch/sparc/lib/NGcopy_from_user.S +++ b/arch/sparc/lib/NGcopy_from_user.S @@ -5,14 +5,9 @@ #define EX_LD(x) \ 98: x; \ - .section .fixup; \ - .align 4; \ -99: wr %g0, ASI_AIUS, %asi;\ - ret; \ - restore %g0, 1, %o0; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, 99b; \ + .word 98b, __ret_one_asi;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S index 6ea01c5532a0..bf62fc1a26c7 100644 --- a/arch/sparc/lib/NGcopy_to_user.S +++ b/arch/sparc/lib/NGcopy_to_user.S @@ -5,14 +5,9 @@ #define EX_ST(x) \ 98: x; \ - .section .fixup; \ - .align 4; \ -99: wr %g0, ASI_AIUS, %asi;\ - ret; \ - restore %g0, 1, %o0; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, 99b; \ + .word 98b, __ret_one_asi;\ .text; \ .align 4; diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S index 3192b0bf4fab..f14efdd6d4a9 100644 --- a/arch/sparc/lib/U1copy_from_user.S +++ b/arch/sparc/lib/U1copy_from_user.S @@ -5,13 +5,9 @@ #define EX_LD(x) \ 98: x; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - mov 1, %o0; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, 99b; \ + .word 98b, __retl_one; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S index d1210ffb0b82..59d531b4a1a5 100644 --- a/arch/sparc/lib/U1copy_to_user.S +++ b/arch/sparc/lib/U1copy_to_user.S @@ -5,13 +5,9 @@ #define EX_ST(x) \ 98: x; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - mov 1, %o0; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, 99b; \ + .word 98b, __retl_one; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S index f5bfc8d9d216..b1acd1331c33 100644 --- a/arch/sparc/lib/U3copy_from_user.S +++ b/arch/sparc/lib/U3copy_from_user.S @@ -5,13 +5,9 @@ #define EX_LD(x) \ 98: x; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - mov 1, %o0; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, 99b; \ + .word 98b, __retl_one; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S index 2334f111bb0c..b4cbfe5a97ba 100644 --- a/arch/sparc/lib/U3copy_to_user.S +++ b/arch/sparc/lib/U3copy_to_user.S @@ -5,13 +5,9 @@ #define EX_ST(x) \ 98: x; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - mov 1, %o0; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, 99b; \ + .word 98b, __retl_one; \ .text; \ .align 4; diff --git a/arch/sparc/lib/bzero.S b/arch/sparc/lib/bzero.S index c7bbae8c590f..b6557297440f 100644 --- a/arch/sparc/lib/bzero.S +++ b/arch/sparc/lib/bzero.S @@ -88,13 +88,9 @@ __bzero_done: #define EX_ST(x,y) \ 98: x,y; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - mov %o1, %o0; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, 99b; \ + .word 98b, __retl_o1; \ .text; \ .align 4; diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S index 650af3f21f78..884f46499d4e 100644 --- a/arch/sparc/lib/copy_in_user.S +++ b/arch/sparc/lib/copy_in_user.S @@ -9,13 +9,9 @@ #define EX(x,y) \ 98: x,y; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - mov 1, %o0; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, 99b; \ + .word 98b, __retl_one; \ .text; \ .align 4; From aeb398768345c74a9e4c01aa3ebf839e858312ec Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 8 Feb 2009 22:32:31 -0800 Subject: [PATCH 413/566] sparc64: Fix probe_kernel_{read,write}(). This is based upon a report from Chris Torek and his initial patch. From Chris's report: -------------------- This came up in testing kgdb, using the built-in tests -- turn on CONFIG_KGDB_TESTS, then echo V1 > /sys/module/kgdbts/parameters/kgdbts -- but it would affect using kgdb if you were debugging and looking at bad pointers. -------------------- When we get a copy_{from,to}_user() request and the %asi is set to something other than ASI_AIUS (which is userspace) then we branch off to a routine called memcpy_user_stub(). It just does a straight memcpy since we are copying from kernel to kernel in this case. The logic was that since source and destination are both kernel pointers we don't need to have exception checks. But for what probe_kernel_{read,write}() is trying to do, we have to have the checks, otherwise things like kgdb bad kernel pointer accesses don't do the right thing. Signed-off-by: David S. Miller --- arch/sparc/lib/GENcopy_from_user.S | 2 +- arch/sparc/lib/GENcopy_to_user.S | 2 +- arch/sparc/lib/NG2copy_from_user.S | 2 +- arch/sparc/lib/NG2copy_to_user.S | 2 +- arch/sparc/lib/NGcopy_from_user.S | 2 +- arch/sparc/lib/NGcopy_to_user.S | 2 +- arch/sparc/lib/U1copy_from_user.S | 2 +- arch/sparc/lib/U1copy_to_user.S | 2 +- arch/sparc/lib/U3copy_to_user.S | 2 +- arch/sparc/lib/copy_in_user.S | 55 +++++++++--------------------- 10 files changed, 25 insertions(+), 48 deletions(-) diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S index 9b0e58f9f970..b7d0bd6b1406 100644 --- a/arch/sparc/lib/GENcopy_from_user.S +++ b/arch/sparc/lib/GENcopy_from_user.S @@ -23,7 +23,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, memcpy_user_stub; \ + bne,pn %icc, ___copy_in_user; \ nop #endif diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S index ce79b495c65d..780550e1afc7 100644 --- a/arch/sparc/lib/GENcopy_to_user.S +++ b/arch/sparc/lib/GENcopy_to_user.S @@ -27,7 +27,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, memcpy_user_stub; \ + bne,pn %icc, ___copy_in_user; \ nop #endif diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S index 01ac546a86f4..119ccb9a54f4 100644 --- a/arch/sparc/lib/NG2copy_from_user.S +++ b/arch/sparc/lib/NG2copy_from_user.S @@ -28,7 +28,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, memcpy_user_stub; \ + bne,pn %icc, ___copy_in_user; \ nop #endif diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S index c477e1594a72..7fe1ccefd9d0 100644 --- a/arch/sparc/lib/NG2copy_to_user.S +++ b/arch/sparc/lib/NG2copy_to_user.S @@ -37,7 +37,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, memcpy_user_stub; \ + bne,pn %icc, ___copy_in_user; \ nop #endif diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S index 39bb8912c6e7..5d1e4d1ac21e 100644 --- a/arch/sparc/lib/NGcopy_from_user.S +++ b/arch/sparc/lib/NGcopy_from_user.S @@ -25,7 +25,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, memcpy_user_stub; \ + bne,pn %icc, ___copy_in_user; \ nop #endif diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S index bf62fc1a26c7..ff630dcb273c 100644 --- a/arch/sparc/lib/NGcopy_to_user.S +++ b/arch/sparc/lib/NGcopy_to_user.S @@ -28,7 +28,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, memcpy_user_stub; \ + bne,pn %icc, ___copy_in_user; \ nop #endif diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S index f14efdd6d4a9..a6ae2ea04bf5 100644 --- a/arch/sparc/lib/U1copy_from_user.S +++ b/arch/sparc/lib/U1copy_from_user.S @@ -23,7 +23,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, memcpy_user_stub; \ + bne,pn %icc, ___copy_in_user; \ nop; \ #include "U1memcpy.S" diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S index 59d531b4a1a5..f4b970eeb485 100644 --- a/arch/sparc/lib/U1copy_to_user.S +++ b/arch/sparc/lib/U1copy_to_user.S @@ -23,7 +23,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, memcpy_user_stub; \ + bne,pn %icc, ___copy_in_user; \ nop; \ #include "U1memcpy.S" diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S index b4cbfe5a97ba..ef1e493afdfa 100644 --- a/arch/sparc/lib/U3copy_to_user.S +++ b/arch/sparc/lib/U3copy_to_user.S @@ -23,7 +23,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, memcpy_user_stub; \ + bne,pn %icc, ___copy_in_user; \ nop; \ #include "U3memcpy.S" diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S index 884f46499d4e..302c0e60dc2c 100644 --- a/arch/sparc/lib/copy_in_user.S +++ b/arch/sparc/lib/copy_in_user.S @@ -3,6 +3,7 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ +#include #include #define XCC xcc @@ -27,18 +28,7 @@ * to copy register windows around during thread cloning. */ - .globl ___copy_in_user - .type ___copy_in_user,#function -___copy_in_user: /* %o0=dst, %o1=src, %o2=len */ - /* Writing to %asi is _expensive_ so we hardcode it. - * Reading %asi to check for KERNEL_DS is comparatively - * cheap. - */ - rd %asi, %g1 - cmp %g1, ASI_AIUS - bne,pn %icc, memcpy_user_stub - nop - +ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ cmp %o2, 0 be,pn %XCC, 85f or %o0, %o1, %o3 @@ -49,22 +39,24 @@ ___copy_in_user: /* %o0=dst, %o1=src, %o2=len */ /* 16 < len <= 64 */ andcc %o3, 0x7, %g0 bne,pn %XCC, 90f - sub %o0, %o1, %o3 + nop andn %o2, 0x7, %o4 and %o2, 0x7, %o2 1: subcc %o4, 0x8, %o4 EX(ldxa [%o1] %asi, %o5) - EX(stxa %o5, [%o1 + %o3] ASI_AIUS) + EX(stxa %o5, [%o0] %asi) + add %o1, 0x8, %o1 bgu,pt %XCC, 1b - add %o1, 0x8, %o1 + add %o0, 0x8, %o0 andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 EX(lduwa [%o1] %asi, %o5) - EX(stwa %o5, [%o1 + %o3] ASI_AIUS) + EX(stwa %o5, [%o0] %asi) add %o1, 0x4, %o1 + add %o0, 0x4, %o0 1: cmp %o2, 0 be,pt %XCC, 85f nop @@ -74,14 +66,15 @@ ___copy_in_user: /* %o0=dst, %o1=src, %o2=len */ 80: /* 0 < len <= 16 */ andcc %o3, 0x3, %g0 bne,pn %XCC, 90f - sub %o0, %o1, %o3 + nop 82: subcc %o2, 4, %o2 EX(lduwa [%o1] %asi, %g1) - EX(stwa %g1, [%o1 + %o3] ASI_AIUS) + EX(stwa %g1, [%o0] %asi) + add %o1, 4, %o1 bgu,pt %XCC, 82b - add %o1, 4, %o1 + add %o0, 4, %o0 85: retl clr %o0 @@ -90,26 +83,10 @@ ___copy_in_user: /* %o0=dst, %o1=src, %o2=len */ 90: subcc %o2, 1, %o2 EX(lduba [%o1] %asi, %g1) - EX(stba %g1, [%o1 + %o3] ASI_AIUS) + EX(stba %g1, [%o0] %asi) + add %o1, 1, %o1 bgu,pt %XCC, 90b - add %o1, 1, %o1 + add %o0, 1, %o0 retl clr %o0 - - .size ___copy_in_user, .-___copy_in_user - - /* Act like copy_{to,in}_user(), ie. return zero instead - * of original destination pointer. This is invoked when - * copy_{to,in}_user() finds that %asi is kernel space. - */ - .globl memcpy_user_stub - .type memcpy_user_stub,#function -memcpy_user_stub: - save %sp, -192, %sp - mov %i0, %o0 - mov %i1, %o1 - call memcpy - mov %i2, %o2 - ret - restore %g0, %g0, %o0 - .size memcpy_user_stub, .-memcpy_user_stub +ENDPROC(___copy_in_user) From f1ee5548a6507d2b4293635c61ddbf12e2c00e94 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: [PATCH 414/566] x86/irq: optimize nr_irqs Impact: make nr_irqs depend more on cards used in a system depend on nr_irq_gsi more, and have a ratio for MSI. v2: make nr_irqs less than NR_VECTORS * nr_cpu_ids aka if only one cpu, we only can support nr_irqs = NR_VECTORS Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9578d33f20a0..43f95d7b13af 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3479,9 +3479,9 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { irq = create_irq_nr(irq_want); - irq_want++; if (irq == 0) return -1; + irq_want = irq + 1; #ifdef CONFIG_INTR_REMAP if (!intr_remapping_enabled) goto no_ir; @@ -3825,11 +3825,17 @@ int __init arch_probe_nr_irqs(void) { int nr; - nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ? - (NR_VECTORS + (8 * nr_cpu_ids)) : - (NR_VECTORS + (32 * nr_ioapics))); + if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) + nr_irqs = NR_VECTORS * nr_cpu_ids; - if (nr < nr_irqs && nr > nr_irqs_gsi) + nr = nr_irqs_gsi + 8 * nr_cpu_ids; +#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) + /* + * for MSI and HT dyn irq + */ + nr += nr_irqs_gsi * 16; +#endif + if (nr < nr_irqs) nr_irqs = nr; return 0; From abcaa2b8319a7673e76c2391cb5de3045ab1b401 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: [PATCH 415/566] x86: use NR_IRQS_LEGACY to replace 16 Impact: cleanup also could kill platform_legacy_irq Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 4 +--- arch/x86/kernel/io_apic.c | 8 ++++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 1a20e3d12006..1b82781b898d 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -25,8 +25,6 @@ #include #include -#define platform_legacy_irq(irq) ((irq) < 16) - /* Interrupt handlers registered during init_IRQ */ extern void apic_timer_interrupt(void); extern void error_interrupt(void); @@ -58,7 +56,7 @@ extern void make_8259A_irq(unsigned int irq); extern void init_8259A(int aeoi); /* IOAPIC */ -#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) +#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) extern unsigned long io_apic_irqs; extern void init_VISWS_APIC_irqs(void); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 43f95d7b13af..e5be9f35ea54 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3165,6 +3165,7 @@ static int __init ioapic_init_sysfs(void) device_initcall(ioapic_init_sysfs); +static int nr_irqs_gsi = NR_IRQS_LEGACY; /* * Dynamic irq allocate and deallocation */ @@ -3179,11 +3180,11 @@ unsigned int create_irq_nr(unsigned int irq_want) struct irq_desc *desc_new = NULL; irq = 0; + if (irq_want < nr_irqs_gsi) + irq_want = nr_irqs_gsi; + spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new < nr_irqs; new++) { - if (platform_legacy_irq(new)) - continue; - desc_new = irq_to_desc_alloc_cpu(new, cpu); if (!desc_new) { printk(KERN_INFO "can not get irq_desc for %d\n", new); @@ -3208,7 +3209,6 @@ unsigned int create_irq_nr(unsigned int irq_want) return irq; } -static int nr_irqs_gsi = NR_IRQS_LEGACY; int create_irq(void) { unsigned int irq_want; From f72dccace737df74d04a96461785a3ad61724b9f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: [PATCH 416/566] x86: check_timer cleanup Impact: make check-timer more robust potentially solve boot fragility For edge trigger io-apic routing, we already unmasked the pin via setup_IO_APIC_irq(), so don't unmask it again. Also call local_irq_disable() between timer_irq_works(), because it calls local_irq_enable() inside. Also remove not needed apic version reading for 64-bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index e5be9f35ea54..855209a1b172 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1657,7 +1657,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, * to the first CPU. */ entry.dest_mode = apic->irq_dest_mode; - entry.mask = 1; /* mask IRQ now */ + entry.mask = 0; /* don't mask IRQ for edge */ entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; @@ -2863,14 +2863,10 @@ static inline void __init check_timer(void) int cpu = boot_cpu_id; int apic1, pin1, apic2, pin2; unsigned long flags; - unsigned int ver; int no_pin1 = 0; local_irq_save(flags); - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - /* * get/set the timer IRQ vector: */ @@ -2889,7 +2885,13 @@ static inline void __init check_timer(void) apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); #ifdef CONFIG_X86_32 - timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); + { + unsigned int ver; + + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); + timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); + } #endif pin1 = find_isa_irq_pin(0, mp_INT); @@ -2928,8 +2930,17 @@ static inline void __init check_timer(void) if (no_pin1) { add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); + } else { + /* for edge trigger, setup_IO_APIC_irq already + * leave it unmasked. + * so only need to unmask if it is level-trigger + * do we really have level trigger timer? + */ + int idx; + idx = find_irq_entry(apic1, pin1, mp_INT); + if (idx != -1 && irq_trigger(idx)) + unmask_IO_APIC_irq_desc(desc); } - unmask_IO_APIC_irq_desc(desc); if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); @@ -2943,6 +2954,7 @@ static inline void __init check_timer(void) if (intr_remapping_enabled) panic("timer doesn't work through Interrupt-remapped IO-APIC"); #endif + local_irq_disable(); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " @@ -2957,7 +2969,6 @@ static inline void __init check_timer(void) */ replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - unmask_IO_APIC_irq_desc(desc); enable_8259A_irq(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); @@ -2972,6 +2983,7 @@ static inline void __init check_timer(void) /* * Cleanup, just in case ... */ + local_irq_disable(); disable_8259A_irq(0); clear_IO_APIC_pin(apic2, pin2); apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); @@ -2997,6 +3009,7 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); goto out; } + local_irq_disable(); disable_8259A_irq(0); apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); @@ -3014,6 +3027,7 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); goto out; } + local_irq_disable(); apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " "report. Then try booting with the 'noapic' option.\n"); From cc6c50066ec1ac98bef97117e2f078bb89bbccc7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: [PATCH 417/566] x86: find nr_irqs_gsi with mp_ioapic_routing Impact: find right nr_irqs_gsi on some systems. One test-system has gap between gsi's: [ 0.000000] ACPI: IOAPIC (id[0x04] address[0xfec00000] gsi_base[0]) [ 0.000000] IOAPIC[0]: apic_id 4, version 0, address 0xfec00000, GSI 0-23 [ 0.000000] ACPI: IOAPIC (id[0x05] address[0xfeafd000] gsi_base[48]) [ 0.000000] IOAPIC[1]: apic_id 5, version 0, address 0xfeafd000, GSI 48-54 [ 0.000000] ACPI: IOAPIC (id[0x06] address[0xfeafc000] gsi_base[56]) [ 0.000000] IOAPIC[2]: apic_id 6, version 0, address 0xfeafc000, GSI 56-62 ... [ 0.000000] nr_irqs_gsi: 38 So nr_irqs_gsi is not right. some irq for MSI will overwrite with io_apic. need to get that with acpi_probe_gsi when acpi io_apic is used Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mpspec.h | 6 ++++++ arch/x86/kernel/acpi/boot.c | 23 +++++++++++++++++++++++ arch/x86/kernel/io_apic.c | 20 +++++++++++++++----- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index d22f732eab8f..8c5620147c40 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -73,6 +73,7 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); extern void mp_config_acpi_legacy_irqs(void); extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); +extern int acpi_probe_gsi(void); #ifdef CONFIG_X86_IO_APIC extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity); @@ -84,6 +85,11 @@ mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, return 0; } #endif +#else /* !CONFIG_ACPI: */ +static inline int acpi_probe_gsi(void) +{ + return 0; +} #endif /* CONFIG_ACPI */ #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 3efa996b036c..c334fe75dcd6 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -961,6 +961,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } +int __init acpi_probe_gsi(void) +{ + int idx; + int gsi; + int max_gsi = 0; + + if (acpi_disabled) + return 0; + + if (!acpi_ioapic) + return 0; + + max_gsi = 0; + for (idx = 0; idx < nr_ioapics; idx++) { + gsi = mp_ioapic_routing[idx].gsi_end; + + if (gsi > max_gsi) + max_gsi = gsi; + } + + return max_gsi + 1; +} + static void assign_to_mp_irq(struct mpc_intsrc *m, struct mpc_intsrc *mp_irq) { diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 855209a1b172..56e51eb551a5 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3824,14 +3824,24 @@ int __init io_apic_get_redir_entries (int ioapic) void __init probe_nr_irqs_gsi(void) { - int idx; int nr = 0; - for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx) + 1; - - if (nr > nr_irqs_gsi) + nr = acpi_probe_gsi(); + if (nr > nr_irqs_gsi) { nr_irqs_gsi = nr; + } else { + /* for acpi=off or acpi is not compiled in */ + int idx; + + nr = 0; + for (idx = 0; idx < nr_ioapics; idx++) + nr += io_apic_get_redir_entries(idx) + 1; + + if (nr > nr_irqs_gsi) + nr_irqs_gsi = nr; + } + + printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } #ifdef CONFIG_SPARSE_IRQ From 2c344e9d6e1938fdf15e93c56d6fe42f8410e9d3 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 7 Feb 2009 12:23:37 -0800 Subject: [PATCH 418/566] x86: don't pretend that non-framepointer stack traces are reliable Without frame pointers enabled, the x86 stack traces should not pretend to be reliable; instead they should just be what they are: unreliable. The effect of this is that they have a '?' printed in the stacktrace, to warn the reader that these entries are guesses rather than known based on more reliable information. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6b1f6f6f8661..87d103ded1c3 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -99,7 +99,7 @@ print_context_stack(struct thread_info *tinfo, frame = frame->next_frame; bp = (unsigned long) frame; } else { - ops->address(data, addr, bp == 0); + ops->address(data, addr, 0); } print_ftrace_graph_addr(addr, data, ops, tinfo, graph); } From d3770449d3cb058b94ca1d050d5ced4a66c75ce4 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 8 Feb 2009 09:58:38 -0500 Subject: [PATCH 419/566] percpu: make PER_CPU_BASE_SECTION overridable by arches Impact: bug fix IA-64 needs to put percpu data in the seperate section even on UP. Fixes regression caused by "percpu: refactor percpu.h" Signed-off-by: Brian Gerst Acked-by: Tony Luck Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/percpu.h | 4 ++-- include/linux/percpu.h | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h index 77f30b664b4e..30cf46534dd2 100644 --- a/arch/ia64/include/asm/percpu.h +++ b/arch/ia64/include/asm/percpu.h @@ -27,12 +27,12 @@ extern void *per_cpu_init(void); #else /* ! SMP */ -#define PER_CPU_ATTRIBUTES __attribute__((__section__(".data.percpu"))) - #define per_cpu_init() (__phys_per_cpu_start) #endif /* SMP */ +#define PER_CPU_BASE_SECTION ".data.percpu" + /* * Be extremely careful when taking the address of this variable! Due to virtual * remapping, it is different from the canonical address returned by __get_cpu_var(var)! diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 0e24202b5a4e..3577ffd90d45 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -8,8 +8,15 @@ #include +#ifndef PER_CPU_BASE_SECTION #ifdef CONFIG_SMP #define PER_CPU_BASE_SECTION ".data.percpu" +#else +#define PER_CPU_BASE_SECTION ".data" +#endif +#endif + +#ifdef CONFIG_SMP #ifdef MODULE #define PER_CPU_SHARED_ALIGNED_SECTION "" @@ -20,7 +27,6 @@ #else -#define PER_CPU_BASE_SECTION ".data" #define PER_CPU_SHARED_ALIGNED_SECTION "" #define PER_CPU_FIRST_SECTION "" From 2add8e235cbe0dcd672c33fc322754e15500238c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 8 Feb 2009 09:58:39 -0500 Subject: [PATCH 420/566] x86: use linker to offset symbols by __per_cpu_load Impact: cleanup and bug fix Use the linker to create symbols for certain per-cpu variables that are offset by __per_cpu_load. This allows the removal of the runtime fixup of the GDT pointer, which fixes a bug with resume reported by Jiri Slaby. Reported-by: Jiri Slaby Signed-off-by: Brian Gerst Acked-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/include/asm/percpu.h | 22 ++++++++++++++++++++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/kernel/cpu/common.c | 6 +----- arch/x86/kernel/head_64.S | 21 ++------------------- arch/x86/kernel/vmlinux_64.lds.S | 8 ++++++++ 5 files changed, 35 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0b64af4f13ac..aee103b26d01 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -34,6 +34,12 @@ #define PER_CPU_VAR(var) per_cpu__##var #endif /* SMP */ +#ifdef CONFIG_X86_64_SMP +#define INIT_PER_CPU_VAR(var) init_per_cpu__##var +#else +#define INIT_PER_CPU_VAR(var) per_cpu__##var +#endif + #else /* ...!ASSEMBLY */ #include @@ -45,6 +51,22 @@ #define __percpu_arg(x) "%" #x #endif +/* + * Initialized pointers to per-cpu variables needed for the boot + * processor need to use these macros to get the proper address + * offset from __per_cpu_load on SMP. + * + * There also must be an entry in vmlinux_64.lds.S + */ +#define DECLARE_INIT_PER_CPU(var) \ + extern typeof(per_cpu_var(var)) init_per_cpu_var(var) + +#ifdef CONFIG_X86_64_SMP +#define init_per_cpu_var(var) init_per_cpu__##var +#else +#define init_per_cpu_var(var) per_cpu_var(var) +#endif + /* For arch-specific code, we can use direct single-insn ops (they * don't give an lvalue though). */ extern void __bad_percpu_size(void); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 656d02ea509b..373d3f628d24 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -393,6 +393,8 @@ union irq_stack_union { }; DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); +DECLARE_INIT_PER_CPU(irq_stack_union); + DECLARE_PER_CPU(char *, irq_stack_ptr); #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0f73ea423089..41b0de6df873 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -902,12 +902,8 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE); -#ifdef CONFIG_SMP -DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ -#else DEFINE_PER_CPU(char *, irq_stack_ptr) = - per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; -#endif + init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; DEFINE_PER_CPU(unsigned long, kernel_stack) = (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a0a2b5ca9b7d..2e648e3a5ea4 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -205,19 +205,6 @@ ENTRY(secondary_startup_64) pushq $0 popfq -#ifdef CONFIG_SMP - /* - * Fix up static pointers that need __per_cpu_load added. The assembler - * is unable to do this directly. This is only needed for the boot cpu. - * These values are set up with the correct base addresses by C code for - * secondary cpus. - */ - movq initial_gs(%rip), %rax - cmpl $0, per_cpu__cpu_number(%rax) - jne 1f - addq %rax, early_gdt_descr_base(%rip) -1: -#endif /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace @@ -275,11 +262,7 @@ ENTRY(secondary_startup_64) ENTRY(initial_code) .quad x86_64_start_kernel ENTRY(initial_gs) -#ifdef CONFIG_SMP - .quad __per_cpu_load -#else - .quad PER_CPU_VAR(irq_stack_union) -#endif + .quad INIT_PER_CPU_VAR(irq_stack_union) __FINITDATA ENTRY(stack_start) @@ -425,7 +408,7 @@ NEXT_PAGE(level2_spare_pgt) early_gdt_descr: .word GDT_ENTRIES*8-1 early_gdt_descr_base: - .quad per_cpu__gdt_page + .quad INIT_PER_CPU_VAR(gdt_page) ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 07f62d287ff0..087a7f2c639b 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -257,6 +257,14 @@ SECTIONS DWARF_DEBUG } + /* + * Per-cpu symbols which need to be offset from __per_cpu_load + * for the boot processor. + */ +#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load +INIT_PER_CPU(gdt_page); +INIT_PER_CPU(irq_stack_union); + /* * Build-time check on the image size: */ From 44581a28e805a31661469c4b466b9cd14b36e7b6 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 8 Feb 2009 09:58:40 -0500 Subject: [PATCH 421/566] x86: fix abuse of per_cpu_offset Impact: bug fix Don't use per_cpu_offset() to determine if it valid to access a per-cpu variable for a given cpu number. It is not a valid assumption on x86-64 anymore. Use cpu_possible() instead. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar --- arch/x86/mm/numa_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 08d140fbc31b..deb1c1ab7868 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -702,7 +702,7 @@ void __cpuinit numa_set_node(int cpu, int node) } #ifdef CONFIG_DEBUG_PER_CPU_MAPS - if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) { + if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); dump_stack(); return; @@ -790,7 +790,7 @@ int early_cpu_to_node(int cpu) if (early_per_cpu_ptr(x86_cpu_to_node_map)) return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - if (!per_cpu_offset(cpu)) { + if (!cpu_possible(cpu)) { printk(KERN_WARNING "early_cpu_to_node(%d): no per_cpu area!\n", cpu); dump_stack(); From e736ad548db152776de61d7a26805cfae77ce5ce Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Fri, 6 Feb 2009 16:52:05 -0800 Subject: [PATCH 422/566] x86: add clflush before monitor for Intel 7400 series For Intel 7400 series CPUs, the recommendation is to use a clflush on the monitored address just before monitor and mwait pair [1]. This clflush makes sure that there are no false wakeups from mwait when the monitored address was recently written to. [1] "MONITOR/MWAIT Recommendations for Intel Xeon Processor 7400 series" section in specification update document of 7400 series http://download.intel.com/design/xeon/specupdt/32033601.pdf Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 + arch/x86/kernel/cpu/intel.c | 3 +++ arch/x86/kernel/process.c | 6 ++++++ 3 files changed, 10 insertions(+) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ea408dcba513..7301e60dc4a8 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -93,6 +93,7 @@ #define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ +#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 430e5c38a544..24ff26a38ade 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -291,6 +291,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ds_init_intel(c); } + if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) + set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR); + #ifdef CONFIG_X86_64 if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e68bb9e30864..6d12f7e37f8c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -180,6 +180,9 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); if (!need_resched()) { + if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); + __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) @@ -194,6 +197,9 @@ static void mwait_idle(void) struct power_trace it; if (!need_resched()) { trace_power_start(&it, POWER_CSTATE, 1); + if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); + __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) From 9b2b76a3344146c4d8d300874e73af8161204f87 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:40 -0800 Subject: [PATCH 423/566] x86: add handle_irq() to allow interrupt injection Xen uses a different interrupt path, so introduce handle_irq() to allow interrupts to be inserted into the normal interrupt path. This is handled slightly differently on 32 and 64-bit. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq.h | 1 + arch/x86/kernel/irq_32.c | 34 +++++++++++++++++++++------------- arch/x86/kernel/irq_64.c | 23 ++++++++++++++++------- 3 files changed, 38 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 592688ed04d3..d0f6f7d1771c 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -39,6 +39,7 @@ extern void fixup_irqs(void); extern unsigned int do_IRQ(struct pt_regs *regs); extern void init_IRQ(void); extern void native_init_IRQ(void); +extern bool handle_irq(unsigned irq, struct pt_regs *regs); /* Interrupt vector management */ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index d802c844991e..61f09fb969ee 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -191,6 +191,26 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } #endif +bool handle_irq(unsigned irq, struct pt_regs *regs) +{ + struct irq_desc *desc; + int overflow; + + overflow = check_stack_overflow(); + + desc = irq_to_desc(irq); + if (unlikely(!desc)) + return false; + + if (!execute_on_irq_stack(overflow, desc, irq)) { + if (unlikely(overflow)) + print_stack_overflow(); + desc->handle_irq(irq, desc); + } + + return true; +} + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -200,31 +220,19 @@ unsigned int do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs; /* high bit used in ret_from_ code */ - int overflow; unsigned vector = ~regs->orig_ax; - struct irq_desc *desc; unsigned irq; - old_regs = set_irq_regs(regs); irq_enter(); irq = __get_cpu_var(vector_irq)[vector]; - overflow = check_stack_overflow(); - - desc = irq_to_desc(irq); - if (unlikely(!desc)) { + if (!handle_irq(irq, regs)) { printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", __func__, irq, vector, smp_processor_id()); BUG(); } - if (!execute_on_irq_stack(overflow, desc, irq)) { - if (unlikely(overflow)) - print_stack_overflow(); - desc->handle_irq(irq, desc); - } - irq_exit(); set_irq_regs(old_regs); return 1; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 018963aa6ee3..a93f3b0dc7f2 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -48,6 +48,20 @@ static inline void stack_overflow_check(struct pt_regs *regs) #endif } +bool handle_irq(unsigned irq, struct pt_regs *regs) +{ + struct irq_desc *desc; + + stack_overflow_check(regs); + + desc = irq_to_desc(irq); + if (unlikely(!desc)) + return false; + + generic_handle_irq_desc(irq, desc); + return true; +} + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -56,7 +70,6 @@ static inline void stack_overflow_check(struct pt_regs *regs) asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - struct irq_desc *desc; /* high bit used in ret_from_ code */ unsigned vector = ~regs->orig_ax; @@ -64,14 +77,10 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) exit_idle(); irq_enter(); + irq = __get_cpu_var(vector_irq)[vector]; - stack_overflow_check(regs); - - desc = irq_to_desc(irq); - if (likely(desc)) - generic_handle_irq_desc(irq, desc); - else { + if (!handle_irq(irq, regs)) { if (!disable_apic) ack_APIC_irq(); From 7c1d7cdcef1b54f4a78892b6b99d19f12c4f398e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:41 -0800 Subject: [PATCH 424/566] x86: unify do_IRQ() With the differences in interrupt handling hoisted into handle_irq(), do_IRQ is more or less identical between 32 and 64 bit, so unify it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq.h | 3 ++- arch/x86/kernel/irq.c | 38 ++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/irq_32.c | 27 --------------------------- arch/x86/kernel/irq_64.c | 33 --------------------------------- 4 files changed, 40 insertions(+), 61 deletions(-) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index d0f6f7d1771c..107eb2196691 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -36,11 +36,12 @@ static inline int irq_canonicalize(int irq) extern void fixup_irqs(void); #endif -extern unsigned int do_IRQ(struct pt_regs *regs); extern void init_IRQ(void); extern void native_init_IRQ(void); extern bool handle_irq(unsigned irq, struct pt_regs *regs); +extern unsigned int do_IRQ(struct pt_regs *regs); + /* Interrupt vector management */ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); extern int vector_used_by_percpu_irq(unsigned int vector); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 8b30d0c2512c..f13ca1650aaf 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -6,10 +6,12 @@ #include #include #include +#include #include #include #include +#include atomic_t irq_err_count; @@ -188,4 +190,40 @@ u64 arch_irq_stat(void) return sum; } + +/* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +unsigned int __irq_entry do_IRQ(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + /* high bit used in ret_from_ code */ + unsigned vector = ~regs->orig_ax; + unsigned irq; + + exit_idle(); + irq_enter(); + + irq = __get_cpu_var(vector_irq)[vector]; + + if (!handle_irq(irq, regs)) { +#ifdef CONFIG_X86_64 + if (!disable_apic) + ack_APIC_irq(); +#endif + + if (printk_ratelimit()) + printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n", + __func__, smp_processor_id(), vector, irq); + } + + irq_exit(); + + set_irq_regs(old_regs); + return 1; +} + EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 61f09fb969ee..4beb9a13873d 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -211,33 +211,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -unsigned int do_IRQ(struct pt_regs *regs) -{ - struct pt_regs *old_regs; - /* high bit used in ret_from_ code */ - unsigned vector = ~regs->orig_ax; - unsigned irq; - - old_regs = set_irq_regs(regs); - irq_enter(); - irq = __get_cpu_var(vector_irq)[vector]; - - if (!handle_irq(irq, regs)) { - printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", - __func__, irq, vector, smp_processor_id()); - BUG(); - } - - irq_exit(); - set_irq_regs(old_regs); - return 1; -} - #ifdef CONFIG_HOTPLUG_CPU #include diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index a93f3b0dc7f2..977d8b43a0dd 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -62,39 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - /* high bit used in ret_from_ code */ - unsigned vector = ~regs->orig_ax; - unsigned irq; - - exit_idle(); - irq_enter(); - - irq = __get_cpu_var(vector_irq)[vector]; - - if (!handle_irq(irq, regs)) { - if (!disable_apic) - ack_APIC_irq(); - - if (printk_ratelimit()) - printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", - __func__, smp_processor_id(), vector); - } - - irq_exit(); - - set_irq_regs(old_regs); - return 1; -} - #ifdef CONFIG_HOTPLUG_CPU /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) From 54a353a0f845c1dad5fc8183872e750d667838ac Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:42 -0800 Subject: [PATCH 425/566] xen: set irq_chip disable By default, the irq_chip.disable operation is a no-op. Explicitly set it to disable the Xen event channel. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- drivers/xen/events.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 3141e149d595..7c3705479ea1 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -820,8 +820,11 @@ void xen_irq_resume(void) static struct irq_chip xen_dynamic_chip __read_mostly = { .name = "xen-dyn", + + .disable = disable_dynirq, .mask = disable_dynirq, .unmask = enable_dynirq, + .ack = ack_dynirq, .set_affinity = set_affinity_irq, .retrigger = retrigger_dynirq, From 792dc4f6cdacf50d3f2b93756d282fc04ee34bd5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:43 -0800 Subject: [PATCH 426/566] xen: use our own eventchannel->irq path Rather than overloading vectors for event channels, take full responsibility for mapping an event channel to irq directly. With this patch Xen has its own irq allocator. When the kernel gets an event channel upcall, it maps the event channel number to an irq and injects it into the normal interrupt path. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/xen/events.h | 6 ------ arch/x86/xen/irq.c | 17 +---------------- drivers/xen/events.c | 22 ++++++++++++++++++++-- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index 19144184983a..1df35417c412 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h @@ -15,10 +15,4 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) return raw_irqs_disabled_flags(regs->flags); } -static inline void xen_do_IRQ(int irq, struct pt_regs *regs) -{ - regs->orig_ax = ~irq; - do_IRQ(regs); -} - #endif /* _ASM_X86_XEN_EVENTS_H */ diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 5a070900ad35..cfd17799bd6d 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -19,21 +19,6 @@ void xen_force_evtchn_callback(void) (void)HYPERVISOR_xen_version(0, NULL); } -static void __init __xen_init_IRQ(void) -{ - int i; - - /* Create identity vector->irq map */ - for(i = 0; i < NR_VECTORS; i++) { - int cpu; - - for_each_possible_cpu(cpu) - per_cpu(vector_irq, cpu)[i] = i; - } - - xen_init_IRQ(); -} - static unsigned long xen_save_fl(void) { struct vcpu_info *vcpu; @@ -127,7 +112,7 @@ static void xen_halt(void) } static const struct pv_irq_ops xen_irq_ops __initdata = { - .init_IRQ = __xen_init_IRQ, + .init_IRQ = xen_init_IRQ, .save_fl = PV_CALLEE_SAVE(xen_save_fl), .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 7c3705479ea1..2c8d710713f5 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -517,6 +518,24 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id) } +static void xen_do_irq(unsigned irq, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + if (WARN_ON(irq == -1)) + return; + + exit_idle(); + irq_enter(); + + //printk("cpu %d handling irq %d\n", smp_processor_id(), info->irq); + handle_irq(irq, regs); + + irq_exit(); + + set_irq_regs(old_regs); +} + /* * Search the CPUs pending events bitmasks. For each one found, map * the event number to an irq, and feed it into do_IRQ() for @@ -557,8 +576,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) int port = (word_idx * BITS_PER_LONG) + bit_idx; int irq = evtchn_to_irq[port]; - if (irq != -1) - xen_do_IRQ(irq, regs); + xen_do_irq(irq, regs); } } From ced40d0f3e8833bb8d7d8e2cbfac7da0bf7008c4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:44 -0800 Subject: [PATCH 427/566] xen: pack all irq-related info together Put all irq info into one struct. Also, use a union to keep event channel type-specific information, rather than overloading the index field. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- drivers/xen/events.c | 184 +++++++++++++++++++++++++++++++------------ 1 file changed, 135 insertions(+), 49 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 2c8d710713f5..0541e07d4f67 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -52,18 +52,8 @@ static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; /* IRQ <-> IPI mapping */ static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1}; -/* Packed IRQ information: binding type, sub-type index, and event channel. */ -struct packed_irq -{ - unsigned short evtchn; - unsigned char index; - unsigned char type; -}; - -static struct packed_irq irq_info[NR_IRQS]; - -/* Binding types. */ -enum { +/* Interrupt types. */ +enum xen_irq_type { IRQT_UNBOUND, IRQT_PIRQ, IRQT_VIRQ, @@ -71,8 +61,34 @@ enum { IRQT_EVTCHN }; -/* Convenient shorthand for packed representation of an unbound IRQ. */ -#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) +/* + * Packed IRQ information: + * type - enum xen_irq_type + * event channel - irq->event channel mapping + * cpu - cpu this event channel is bound to + * index - type-specific information: + * PIRQ - vector, with MSB being "needs EIO" + * VIRQ - virq number + * IPI - IPI vector + * EVTCHN - + */ +struct irq_info +{ + enum xen_irq_type type; /* type */ + unsigned short evtchn; /* event channel */ + unsigned short cpu; /* cpu bound */ + + union { + unsigned short virq; + enum ipi_vector ipi; + struct { + unsigned short gsi; + unsigned short vector; + } pirq; + } u; +}; + +static struct irq_info irq_info[NR_IRQS]; static int evtchn_to_irq[NR_EVENT_CHANNELS] = { [0 ... NR_EVENT_CHANNELS-1] = -1 @@ -85,7 +101,6 @@ static inline unsigned long *cpu_evtchn_mask(int cpu) { return cpu_evtchn_mask_p[cpu].bits; } -static u8 cpu_evtchn[NR_EVENT_CHANNELS]; /* Reference counts for bindings to IRQs. */ static int irq_bindcount[NR_IRQS]; @@ -96,27 +111,107 @@ static int irq_bindcount[NR_IRQS]; static struct irq_chip xen_dynamic_chip; /* Constructor for packed IRQ information. */ -static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn) +static struct irq_info mk_unbound_info(void) { - return (struct packed_irq) { evtchn, index, type }; + return (struct irq_info) { .type = IRQT_UNBOUND }; +} + +static struct irq_info mk_evtchn_info(unsigned short evtchn) +{ + return (struct irq_info) { .type = IRQT_EVTCHN, .evtchn = evtchn }; +} + +static struct irq_info mk_ipi_info(unsigned short evtchn, enum ipi_vector ipi) +{ + return (struct irq_info) { .type = IRQT_IPI, .evtchn = evtchn, + .u.ipi = ipi }; +} + +static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq) +{ + return (struct irq_info) { .type = IRQT_VIRQ, .evtchn = evtchn, + .u.virq = virq }; +} + +static struct irq_info mk_pirq_info(unsigned short evtchn, + unsigned short gsi, unsigned short vector) +{ + return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn, + .u.pirq = { .gsi = gsi, .vector = vector } }; } /* * Accessors for packed IRQ information. */ -static inline unsigned int evtchn_from_irq(int irq) +static struct irq_info *info_for_irq(unsigned irq) { - return irq_info[irq].evtchn; + return &irq_info[irq]; } -static inline unsigned int index_from_irq(int irq) +static unsigned int evtchn_from_irq(unsigned irq) { - return irq_info[irq].index; + return info_for_irq(irq)->evtchn; } -static inline unsigned int type_from_irq(int irq) +static enum ipi_vector ipi_from_irq(unsigned irq) { - return irq_info[irq].type; + struct irq_info *info = info_for_irq(irq); + + BUG_ON(info == NULL); + BUG_ON(info->type != IRQT_IPI); + + return info->u.ipi; +} + +static unsigned virq_from_irq(unsigned irq) +{ + struct irq_info *info = info_for_irq(irq); + + BUG_ON(info == NULL); + BUG_ON(info->type != IRQT_VIRQ); + + return info->u.virq; +} + +static unsigned gsi_from_irq(unsigned irq) +{ + struct irq_info *info = info_for_irq(irq); + + BUG_ON(info == NULL); + BUG_ON(info->type != IRQT_PIRQ); + + return info->u.pirq.gsi; +} + +static unsigned vector_from_irq(unsigned irq) +{ + struct irq_info *info = info_for_irq(irq); + + BUG_ON(info == NULL); + BUG_ON(info->type != IRQT_PIRQ); + + return info->u.pirq.vector; +} + +static enum xen_irq_type type_from_irq(unsigned irq) +{ + return info_for_irq(irq)->type; +} + +static unsigned cpu_from_irq(unsigned irq) +{ + return info_for_irq(irq)->cpu; +} + +static unsigned int cpu_from_evtchn(unsigned int evtchn) +{ + int irq = evtchn_to_irq[evtchn]; + unsigned ret = 0; + + if (irq != -1) + ret = cpu_from_irq(irq); + + return ret; } static inline unsigned long active_evtchns(unsigned int cpu, @@ -137,10 +232,10 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); #endif - __clear_bit(chn, cpu_evtchn_mask(cpu_evtchn[chn])); + __clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); __set_bit(chn, cpu_evtchn_mask(cpu)); - cpu_evtchn[chn] = cpu; + irq_info[irq].cpu = cpu; } static void init_evtchn_cpu_bindings(void) @@ -155,15 +250,9 @@ static void init_evtchn_cpu_bindings(void) } #endif - memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); memset(cpu_evtchn_mask(0), ~0, sizeof(cpu_evtchn_mask(0))); } -static inline unsigned int cpu_from_evtchn(unsigned int evtchn) -{ - return cpu_evtchn[evtchn]; -} - static inline void clear_evtchn(int port) { struct shared_info *s = HYPERVISOR_shared_info; @@ -253,6 +342,8 @@ static int find_unbound_irq(void) if (WARN_ON(desc == NULL)) return -1; + dynamic_irq_init(irq); + return irq; } @@ -267,12 +358,11 @@ int bind_evtchn_to_irq(unsigned int evtchn) if (irq == -1) { irq = find_unbound_irq(); - dynamic_irq_init(irq); set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, handle_level_irq, "event"); evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); + irq_info[irq] = mk_evtchn_info(evtchn); } irq_bindcount[irq]++; @@ -296,7 +386,6 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) if (irq < 0) goto out; - dynamic_irq_init(irq); set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, handle_level_irq, "ipi"); @@ -307,7 +396,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) evtchn = bind_ipi.port; evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + irq_info[irq] = mk_ipi_info(evtchn, ipi); per_cpu(ipi_to_irq, cpu)[ipi] = irq; @@ -341,12 +430,11 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) irq = find_unbound_irq(); - dynamic_irq_init(irq); set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, handle_level_irq, "virq"); evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + irq_info[irq] = mk_virq_info(evtchn, virq); per_cpu(virq_to_irq, cpu)[virq] = irq; @@ -375,11 +463,11 @@ static void unbind_from_irq(unsigned int irq) switch (type_from_irq(irq)) { case IRQT_VIRQ: per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) - [index_from_irq(irq)] = -1; + [virq_from_irq(irq)] = -1; break; case IRQT_IPI: per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn)) - [index_from_irq(irq)] = -1; + [ipi_from_irq(irq)] = -1; break; default: break; @@ -389,7 +477,7 @@ static void unbind_from_irq(unsigned int irq) bind_evtchn_to_cpu(evtchn, 0); evtchn_to_irq[evtchn] = -1; - irq_info[irq] = IRQ_UNBOUND; + irq_info[irq] = mk_unbound_info(); dynamic_irq_cleanup(irq); } @@ -507,8 +595,8 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id) for(i = 0; i < NR_EVENT_CHANNELS; i++) { if (sync_test_bit(i, sh->evtchn_pending)) { printk(" %d: event %d -> irq %d\n", - cpu_evtchn[i], i, - evtchn_to_irq[i]); + cpu_from_evtchn(i), i, + evtchn_to_irq[i]); } } @@ -606,7 +694,7 @@ void rebind_evtchn_irq(int evtchn, int irq) BUG_ON(irq_bindcount[irq] == 0); evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); + irq_info[irq] = mk_evtchn_info(evtchn); spin_unlock(&irq_mapping_update_lock); @@ -716,8 +804,7 @@ static void restore_cpu_virqs(unsigned int cpu) if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) continue; - BUG_ON(irq_info[irq].type != IRQT_VIRQ); - BUG_ON(irq_info[irq].index != virq); + BUG_ON(virq_from_irq(irq) != virq); /* Get a new binding from Xen. */ bind_virq.virq = virq; @@ -729,7 +816,7 @@ static void restore_cpu_virqs(unsigned int cpu) /* Record the new mapping. */ evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + irq_info[irq] = mk_virq_info(evtchn, virq); bind_evtchn_to_cpu(evtchn, cpu); /* Ready for use. */ @@ -746,8 +833,7 @@ static void restore_cpu_ipis(unsigned int cpu) if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) continue; - BUG_ON(irq_info[irq].type != IRQT_IPI); - BUG_ON(irq_info[irq].index != ipi); + BUG_ON(ipi_from_irq(irq) != ipi); /* Get a new binding from Xen. */ bind_ipi.vcpu = cpu; @@ -758,7 +844,7 @@ static void restore_cpu_ipis(unsigned int cpu) /* Record the new mapping. */ evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + irq_info[irq] = mk_ipi_info(evtchn, ipi); bind_evtchn_to_cpu(evtchn, cpu); /* Ready for use. */ From d77bbd4db475e2edc78edb7f94a258159c140b54 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:45 -0800 Subject: [PATCH 428/566] xen: remove irq bindcount There should be no need for us to maintain our own bind count for irqs, since the surrounding irq system should keep track of shared irqs for us. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- drivers/xen/events.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 0541e07d4f67..459121c53251 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -54,7 +54,7 @@ static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = - /* Interrupt types. */ enum xen_irq_type { - IRQT_UNBOUND, + IRQT_UNBOUND = 0, IRQT_PIRQ, IRQT_VIRQ, IRQT_IPI, @@ -102,9 +102,6 @@ static inline unsigned long *cpu_evtchn_mask(int cpu) return cpu_evtchn_mask_p[cpu].bits; } -/* Reference counts for bindings to IRQs. */ -static int irq_bindcount[NR_IRQS]; - /* Xen will never allocate port zero for any purpose. */ #define VALID_EVTCHN(chn) ((chn) != 0) @@ -330,9 +327,8 @@ static int find_unbound_irq(void) int irq; struct irq_desc *desc; - /* Only allocate from dynirq range */ for (irq = 0; irq < nr_irqs; irq++) - if (irq_bindcount[irq] == 0) + if (irq_info[irq].type == IRQT_UNBOUND) break; if (irq == nr_irqs) @@ -365,8 +361,6 @@ int bind_evtchn_to_irq(unsigned int evtchn) irq_info[irq] = mk_evtchn_info(evtchn); } - irq_bindcount[irq]++; - spin_unlock(&irq_mapping_update_lock); return irq; @@ -403,8 +397,6 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) bind_evtchn_to_cpu(evtchn, cpu); } - irq_bindcount[irq]++; - out: spin_unlock(&irq_mapping_update_lock); return irq; @@ -441,8 +433,6 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) bind_evtchn_to_cpu(evtchn, cpu); } - irq_bindcount[irq]++; - spin_unlock(&irq_mapping_update_lock); return irq; @@ -455,7 +445,7 @@ static void unbind_from_irq(unsigned int irq) spin_lock(&irq_mapping_update_lock); - if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) { + if (VALID_EVTCHN(evtchn)) { close.port = evtchn; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) BUG(); @@ -681,6 +671,8 @@ out: /* Rebind a new event channel to an existing irq. */ void rebind_evtchn_irq(int evtchn, int irq) { + struct irq_info *info = info_for_irq(irq); + /* Make sure the irq is masked, since the new event channel will also be masked. */ disable_irq(irq); @@ -690,8 +682,8 @@ void rebind_evtchn_irq(int evtchn, int irq) /* After resume the irq<->evtchn mappings are all cleared out */ BUG_ON(evtchn_to_irq[evtchn] != -1); /* Expect irq to have been bound before, - so the bindcount should be non-0 */ - BUG_ON(irq_bindcount[irq] == 0); + so there should be a proper type */ + BUG_ON(info->type == IRQT_UNBOUND); evtchn_to_irq[evtchn] = irq; irq_info[irq] = mk_evtchn_info(evtchn); @@ -948,9 +940,5 @@ void __init xen_init_IRQ(void) for (i = 0; i < NR_EVENT_CHANNELS; i++) mask_evtchn(i); - /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ - for (i = 0; i < nr_irqs; i++) - irq_bindcount[i] = 0; - irq_ctx_init(smp_processor_id()); } From 3445a8fd7c6868bd9db0d1bea7d6e89004552122 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:46 -0800 Subject: [PATCH 429/566] xen: make sure that softirqs get handled at the end of event processing Make sure that irq_enter()/irq_exit() wrap the entire event processing loop, rather than each individual event invokation. This makes sure that softirq processing is deferred until the end of event processing, rather than in the middle with interrupts disabled. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- drivers/xen/events.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 459121c53251..e53fd60fe4b8 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -595,25 +595,6 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } - -static void xen_do_irq(unsigned irq, struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - if (WARN_ON(irq == -1)) - return; - - exit_idle(); - irq_enter(); - - //printk("cpu %d handling irq %d\n", smp_processor_id(), info->irq); - handle_irq(irq, regs); - - irq_exit(); - - set_irq_regs(old_regs); -} - /* * Search the CPUs pending events bitmasks. For each one found, map * the event number to an irq, and feed it into do_IRQ() for @@ -626,11 +607,15 @@ static void xen_do_irq(unsigned irq, struct pt_regs *regs) void xen_evtchn_do_upcall(struct pt_regs *regs) { int cpu = get_cpu(); + struct pt_regs *old_regs = set_irq_regs(regs); struct shared_info *s = HYPERVISOR_shared_info; struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); static DEFINE_PER_CPU(unsigned, nesting_count); unsigned count; + exit_idle(); + irq_enter(); + do { unsigned long pending_words; @@ -654,7 +639,8 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) int port = (word_idx * BITS_PER_LONG) + bit_idx; int irq = evtchn_to_irq[port]; - xen_do_irq(irq, regs); + if (irq != -1) + handle_irq(irq, regs); } } @@ -665,6 +651,9 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) } while(count != 1); out: + irq_exit(); + set_irq_regs(old_regs); + put_cpu(); } From 90af9514ac99f51e81682c7bec8f9fb88a17a95c Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 16:55:58 -0800 Subject: [PATCH 430/566] xen: explicitly initialise the cpu field of irq_info I was seeing a very odd crash on 64 bit in bind_evtchn_to_cpu because cpu_from_irq(irq) was coming out as -1. I found this was coming direct from the mk_ipi_info call. It's not clear to me that this isn't a compiler bug (implicit initialisation to zero of unsigned shorts in a struct not handled correctly?). On the other hand is it true that all event channels start of bound to CPU 0? If not then -1 might be correct and the various other functions should cope with this. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- drivers/xen/events.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index e53fd60fe4b8..30963af5dba0 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -115,26 +115,27 @@ static struct irq_info mk_unbound_info(void) static struct irq_info mk_evtchn_info(unsigned short evtchn) { - return (struct irq_info) { .type = IRQT_EVTCHN, .evtchn = evtchn }; + return (struct irq_info) { .type = IRQT_EVTCHN, .evtchn = evtchn, + .cpu = 0 }; } static struct irq_info mk_ipi_info(unsigned short evtchn, enum ipi_vector ipi) { return (struct irq_info) { .type = IRQT_IPI, .evtchn = evtchn, - .u.ipi = ipi }; + .cpu = 0, .u.ipi = ipi }; } static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq) { return (struct irq_info) { .type = IRQT_VIRQ, .evtchn = evtchn, - .u.virq = virq }; + .cpu = 0, .u.virq = virq }; } static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short gsi, unsigned short vector) { return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn, - .u.pirq = { .gsi = gsi, .vector = vector } }; + .cpu = 0, .u.pirq = { .gsi = gsi, .vector = vector } }; } /* @@ -375,6 +376,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) spin_lock(&irq_mapping_update_lock); irq = per_cpu(ipi_to_irq, cpu)[ipi]; + if (irq == -1) { irq = find_unbound_irq(); if (irq < 0) @@ -391,7 +393,6 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) evtchn_to_irq[evtchn] = irq; irq_info[irq] = mk_ipi_info(evtchn, ipi); - per_cpu(ipi_to_irq, cpu)[ipi] = irq; bind_evtchn_to_cpu(evtchn, cpu); From 3f4a739c6accd651a11fcf3c7a20ec8147c42660 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: [PATCH 431/566] x86: find nr_irqs_gsi with mp_ioapic_routing Impact: find right nr_irqs_gsi on some systems. One test-system has gap between gsi's: [ 0.000000] ACPI: IOAPIC (id[0x04] address[0xfec00000] gsi_base[0]) [ 0.000000] IOAPIC[0]: apic_id 4, version 0, address 0xfec00000, GSI 0-23 [ 0.000000] ACPI: IOAPIC (id[0x05] address[0xfeafd000] gsi_base[48]) [ 0.000000] IOAPIC[1]: apic_id 5, version 0, address 0xfeafd000, GSI 48-54 [ 0.000000] ACPI: IOAPIC (id[0x06] address[0xfeafc000] gsi_base[56]) [ 0.000000] IOAPIC[2]: apic_id 6, version 0, address 0xfeafc000, GSI 56-62 ... [ 0.000000] nr_irqs_gsi: 38 So nr_irqs_gsi is not right. some irq for MSI will overwrite with io_apic. need to get that with acpi_probe_gsi when acpi io_apic is used Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mpspec.h | 6 ++++++ arch/x86/kernel/acpi/boot.c | 23 +++++++++++++++++++++++ arch/x86/kernel/io_apic.c | 20 +++++++++++++++----- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 62d14ce3cd00..bd22f2a3713f 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -60,6 +60,7 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); extern void mp_config_acpi_legacy_irqs(void); extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); +extern int acpi_probe_gsi(void); #ifdef CONFIG_X86_IO_APIC extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity); @@ -71,6 +72,11 @@ mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, return 0; } #endif +#else /* !CONFIG_ACPI: */ +static inline int acpi_probe_gsi(void) +{ + return 0; +} #endif /* CONFIG_ACPI */ #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d37593c2f438..7678f10c4568 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -973,6 +973,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } +int __init acpi_probe_gsi(void) +{ + int idx; + int gsi; + int max_gsi = 0; + + if (acpi_disabled) + return 0; + + if (!acpi_ioapic) + return 0; + + max_gsi = 0; + for (idx = 0; idx < nr_ioapics; idx++) { + gsi = mp_ioapic_routing[idx].gsi_end; + + if (gsi > max_gsi) + max_gsi = gsi; + } + + return max_gsi + 1; +} + static void assign_to_mp_irq(struct mp_config_intsrc *m, struct mp_config_intsrc *mp_irq) { diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9b0c480c383b..bc7ac4da90d7 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3841,14 +3841,24 @@ int __init io_apic_get_redir_entries (int ioapic) void __init probe_nr_irqs_gsi(void) { - int idx; int nr = 0; - for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx) + 1; - - if (nr > nr_irqs_gsi) + nr = acpi_probe_gsi(); + if (nr > nr_irqs_gsi) { nr_irqs_gsi = nr; + } else { + /* for acpi=off or acpi is not compiled in */ + int idx; + + nr = 0; + for (idx = 0; idx < nr_ioapics; idx++) + nr += io_apic_get_redir_entries(idx) + 1; + + if (nr > nr_irqs_gsi) + nr_irqs_gsi = nr; + } + + printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } /* -------------------------------------------------------------------------- From 55a8ba4b7f76bebd7e8ce3f74c04b140627a1bad Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Fri, 6 Feb 2009 10:29:35 -0800 Subject: [PATCH 432/566] x86, vmi: put a missing paravirt_release_pmd in pgd_dtor Commit 6194ba6ff6ccf8d5c54c857600843c67aa82c407 ("x86: don't special-case pmd allocations as much") made changes to the way we handle pmd allocations, and while doing that it dropped a call to paravirt_release_pd on the pgd page from the pgd_dtor code path. As a result of this missing release, the hypervisor is now unaware of the pgd page being freed, and as a result it ends up tracking this page as a page table page. After this the guest may start using the same page for other purposes, and depending on what use the page is put to, it may result in various performance and/or functional issues ( hangs, reboots). Since this release is only required for VMI, I now release the pgd page from the (vmi)_pgd_free hook. Signed-off-by: Alok N Kataria Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Cc: --- arch/x86/kernel/vmi_32.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 1d3302cc2ddf..bef58b4982db 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -320,6 +320,16 @@ static void vmi_release_pmd(unsigned long pfn) vmi_ops.release_page(pfn, VMI_PAGE_L2); } +/* + * We use the pgd_free hook for releasing the pgd page: + */ +static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + unsigned long pfn = __pa(pgd) >> PAGE_SHIFT; + + vmi_ops.release_page(pfn, VMI_PAGE_L2); +} + /* * Helper macros for MMU update flags. We can defer updates until a flush * or page invalidation only if the update is to the current address space @@ -762,6 +772,7 @@ static inline int __init activate_vmi(void) if (vmi_ops.release_page) { pv_mmu_ops.release_pte = vmi_release_pte; pv_mmu_ops.release_pmd = vmi_release_pmd; + pv_mmu_ops.pgd_free = vmi_pgd_free; } /* Set linear is needed in all cases */ From 1c14fa4937eb73509e07ac12bf8db1fdf4c42a59 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 7 Feb 2009 15:39:38 -0800 Subject: [PATCH 433/566] x86: use early_ioremap in __acpi_map_table __acpi_map_table() effectively reimplements early_ioremap(). Rather than have that duplication, just implement it in terms of early_ioremap(). However, unlike early_ioremap(), __acpi_map_table() just maintains a single mapping which gets replaced each call, and has no corresponding unmap function. Implement this by just removing the previous mapping each time its called. Unfortunately, this will leave a stray mapping at the end. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/acpi.h | 3 --- arch/x86/include/asm/fixmap_32.h | 4 ---- arch/x86/include/asm/fixmap_64.h | 4 ---- arch/x86/kernel/acpi/boot.c | 27 +++++++-------------------- 4 files changed, 7 insertions(+), 31 deletions(-) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 9830681446ad..4518dc500903 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -102,9 +102,6 @@ static inline void disable_acpi(void) acpi_noirq = 1; } -/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */ -#define FIX_ACPI_PAGES 4 - extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); static inline void acpi_noirq_set(void) { acpi_noirq = 1; } diff --git a/arch/x86/include/asm/fixmap_32.h b/arch/x86/include/asm/fixmap_32.h index c7115c1d7217..047d9bab2b31 100644 --- a/arch/x86/include/asm/fixmap_32.h +++ b/arch/x86/include/asm/fixmap_32.h @@ -95,10 +95,6 @@ enum fixed_addresses { (__end_of_permanent_fixed_addresses & 255), FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, FIX_WP_TEST, -#ifdef CONFIG_ACPI - FIX_ACPI_BEGIN, - FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, -#endif #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT FIX_OHCI1394_BASE, #endif diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h index 00a30ab9b1a5..298d9ba3faeb 100644 --- a/arch/x86/include/asm/fixmap_64.h +++ b/arch/x86/include/asm/fixmap_64.h @@ -50,10 +50,6 @@ enum fixed_addresses { FIX_PARAVIRT_BOOTMAP, #endif __end_of_permanent_fixed_addresses, -#ifdef CONFIG_ACPI - FIX_ACPI_BEGIN, - FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, -#endif #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT FIX_OHCI1394_BASE, #endif diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d37593c2f438..c518599e4264 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -121,8 +121,8 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; */ char *__init __acpi_map_table(unsigned long phys, unsigned long size) { - unsigned long base, offset, mapped_size; - int idx; + static char *prev_map; + static unsigned long prev_size; if (!phys || !size) return NULL; @@ -130,26 +130,13 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT)) return __va(phys); - offset = phys & (PAGE_SIZE - 1); - mapped_size = PAGE_SIZE - offset; - clear_fixmap(FIX_ACPI_END); - set_fixmap(FIX_ACPI_END, phys); - base = fix_to_virt(FIX_ACPI_END); + if (prev_map) + early_iounmap(prev_map, prev_size); - /* - * Most cases can be covered by the below. - */ - idx = FIX_ACPI_END; - while (mapped_size < size) { - if (--idx < FIX_ACPI_BEGIN) - return NULL; /* cannot handle this */ - phys += PAGE_SIZE; - clear_fixmap(idx); - set_fixmap(idx, phys); - mapped_size += PAGE_SIZE; - } + prev_size = size; + prev_map = early_ioremap(phys, size); - return ((unsigned char *)base + offset); + return prev_map; } #ifdef CONFIG_PCI_MMCONFIG From eecb9a697f0b790e5840dae8a8b866bea49a86ee Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 7 Feb 2009 15:39:38 -0800 Subject: [PATCH 434/566] x86: always explicitly map acpi memory Always map acpi tables, rather than assuming we can use the normal linear mapping to access the acpi tables. This is necessary in a virtual environment where the linear mappings are to pseudo-physical memory, but the acpi tables exist at a real physical address. It doesn't hurt to map in the normal non-virtual case, so just do it unconditionally. Signed-off-by: Jeremy Fitzhardinge Acked-by: Len Brown Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c518599e4264..5424a18f2e4e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -127,9 +127,6 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) if (!phys || !size) return NULL; - if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT)) - return __va(phys); - if (prev_map) early_iounmap(prev_map, prev_size); From 05876f88ed9a66b26af613e222795ae790616252 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 7 Feb 2009 15:39:40 -0800 Subject: [PATCH 435/566] acpi: remove final __acpi_map_table mapping before setting acpi_gbl_permanent_mmap On x86, __acpi_map_table uses early_ioremap() to create the mapping, replacing the previous mapping with a new one. Once enough of the kernel is up an running it switches to using normal ioremap(). At that point, we need to clean up the final mapping to avoid a warning from the early_ioremap subsystem. This can be removed after all the instances in the ACPI code are fixed that rely on early-ioremap's implicit overmapping of previously mapped tables. Signed-off-by: Jeremy Fitzhardinge Acked-by: Len Brown Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 8 +++++--- drivers/acpi/bus.c | 6 ++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 5424a18f2e4e..7217834f6b1d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -124,12 +124,14 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) static char *prev_map; static unsigned long prev_size; + if (prev_map) { + early_iounmap(prev_map, prev_size); + prev_map = NULL; + } + if (!phys || !size) return NULL; - if (prev_map) - early_iounmap(prev_map, prev_size); - prev_size = size; prev_map = early_ioremap(phys, size); diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 765fd1c56cd6..fb1be7b5dbc1 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -694,6 +694,12 @@ void __init acpi_early_init(void) if (!acpi_strict) acpi_gbl_enable_interpreter_slack = TRUE; + /* + * Doing a zero-sized mapping will clear out the previous + * __acpi_map_table() mapping, if any. + */ + __acpi_map_table(0, 0); + acpi_gbl_permanent_mmap = 1; status = acpi_reallocate_root_table(); From 7d97277b754d3ee098a5ec69b6aaafb00c94e2f2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 7 Feb 2009 15:39:41 -0800 Subject: [PATCH 436/566] acpi/x86: introduce __apci_map_table, v4 to prevent wrongly overwriting fixmap that still want to use. ACPI used to rely on low mappings being all linearly mapped and grew a habit: it never really unmapped certain kinds of tables after use. This can cause problems - for example the hypothetical case when some spurious access still references it. v2: remove prev_map and prev_size in __apci_map_table v3: let acpi_os_unmap_memory() call early_iounmap too, so remove extral calling to early_acpi_os_unmap_memory v4: fix typo in one acpi_get_table_with_size calling Signed-off-by: Yinghai Lu Acked-by: Len Brown Signed-off-by: Ingo Molnar --- arch/ia64/kernel/acpi.c | 4 ++++ arch/x86/kernel/acpi/boot.c | 17 +++++++---------- drivers/acpi/acpica/tbxface.c | 17 ++++++++++++++--- drivers/acpi/bus.c | 6 ------ drivers/acpi/osl.c | 11 +++++++++-- drivers/acpi/tables.c | 20 ++++++++++++++------ include/acpi/acpiosxf.h | 1 + include/acpi/acpixf.h | 4 ++++ include/linux/acpi.h | 1 + 9 files changed, 54 insertions(+), 27 deletions(-) diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index d541671caf4a..2363ed173198 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -199,6 +199,10 @@ char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size) return __va(phys_addr); } +char *__init __acpi_unmap_table(unsigned long virt_addr, unsigned long size) +{ +} + /* -------------------------------------------------------------------------- Boot-time Table Parsing -------------------------------------------------------------------------- */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7217834f6b1d..4c2aaea42930 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -121,21 +121,18 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; */ char *__init __acpi_map_table(unsigned long phys, unsigned long size) { - static char *prev_map; - static unsigned long prev_size; - - if (prev_map) { - early_iounmap(prev_map, prev_size); - prev_map = NULL; - } if (!phys || !size) return NULL; - prev_size = size; - prev_map = early_ioremap(phys, size); + return early_ioremap(phys, size); +} +void __init __acpi_unmap_table(char *map, unsigned long size) +{ + if (!map || !size) + return; - return prev_map; + early_iounmap(map, size); } #ifdef CONFIG_PCI_MMCONFIG diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c index c3e841f3cde9..ab0aff3c7d6a 100644 --- a/drivers/acpi/acpica/tbxface.c +++ b/drivers/acpi/acpica/tbxface.c @@ -365,7 +365,7 @@ ACPI_EXPORT_SYMBOL(acpi_unload_table_id) /******************************************************************************* * - * FUNCTION: acpi_get_table + * FUNCTION: acpi_get_table_with_size * * PARAMETERS: Signature - ACPI signature of needed table * Instance - Which instance (for SSDTs) @@ -377,8 +377,9 @@ ACPI_EXPORT_SYMBOL(acpi_unload_table_id) * *****************************************************************************/ acpi_status -acpi_get_table(char *signature, - u32 instance, struct acpi_table_header **out_table) +acpi_get_table_with_size(char *signature, + u32 instance, struct acpi_table_header **out_table, + acpi_size *tbl_size) { u32 i; u32 j; @@ -408,6 +409,7 @@ acpi_get_table(char *signature, acpi_tb_verify_table(&acpi_gbl_root_table_list.tables[i]); if (ACPI_SUCCESS(status)) { *out_table = acpi_gbl_root_table_list.tables[i].pointer; + *tbl_size = acpi_gbl_root_table_list.tables[i].length; } if (!acpi_gbl_permanent_mmap) { @@ -420,6 +422,15 @@ acpi_get_table(char *signature, return (AE_NOT_FOUND); } +acpi_status +acpi_get_table(char *signature, + u32 instance, struct acpi_table_header **out_table) +{ + acpi_size tbl_size; + + return acpi_get_table_with_size(signature, + instance, out_table, &tbl_size); +} ACPI_EXPORT_SYMBOL(acpi_get_table) /******************************************************************************* diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index fb1be7b5dbc1..765fd1c56cd6 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -694,12 +694,6 @@ void __init acpi_early_init(void) if (!acpi_strict) acpi_gbl_enable_interpreter_slack = TRUE; - /* - * Doing a zero-sized mapping will clear out the previous - * __acpi_map_table() mapping, if any. - */ - __acpi_map_table(0, 0); - acpi_gbl_permanent_mmap = 1; status = acpi_reallocate_root_table(); diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index b3193ec0a2ef..d1dd5160daa9 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -274,12 +274,19 @@ EXPORT_SYMBOL_GPL(acpi_os_map_memory); void acpi_os_unmap_memory(void __iomem * virt, acpi_size size) { - if (acpi_gbl_permanent_mmap) { + if (acpi_gbl_permanent_mmap) iounmap(virt); - } + else + __acpi_unmap_table(virt, size); } EXPORT_SYMBOL_GPL(acpi_os_unmap_memory); +void early_acpi_os_unmap_memory(void __iomem * virt, acpi_size size) +{ + if (!acpi_gbl_permanent_mmap) + __acpi_unmap_table(virt, size); +} + #ifdef ACPI_FUTURE_USAGE acpi_status acpi_os_get_physical_address(void *virt, acpi_physical_address * phys) diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index a8852952fac4..fec1ae36d431 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -181,14 +181,15 @@ acpi_table_parse_entries(char *id, struct acpi_subtable_header *entry; unsigned int count = 0; unsigned long table_end; + acpi_size tbl_size; if (!handler) return -EINVAL; if (strncmp(id, ACPI_SIG_MADT, 4) == 0) - acpi_get_table(id, acpi_apic_instance, &table_header); + acpi_get_table_with_size(id, acpi_apic_instance, &table_header, &tbl_size); else - acpi_get_table(id, 0, &table_header); + acpi_get_table_with_size(id, 0, &table_header, &tbl_size); if (!table_header) { printk(KERN_WARNING PREFIX "%4.4s not present\n", id); @@ -206,8 +207,10 @@ acpi_table_parse_entries(char *id, table_end) { if (entry->type == entry_id && (!max_entries || count++ < max_entries)) - if (handler(entry, table_end)) + if (handler(entry, table_end)) { + early_acpi_os_unmap_memory((char *)table_header, tbl_size); return -EINVAL; + } entry = (struct acpi_subtable_header *) ((unsigned long)entry + entry->length); @@ -217,6 +220,7 @@ acpi_table_parse_entries(char *id, "%i found\n", id, entry_id, count - max_entries, count); } + early_acpi_os_unmap_memory((char *)table_header, tbl_size); return count; } @@ -241,17 +245,19 @@ acpi_table_parse_madt(enum acpi_madt_type id, int __init acpi_table_parse(char *id, acpi_table_handler handler) { struct acpi_table_header *table = NULL; + acpi_size tbl_size; if (!handler) return -EINVAL; if (strncmp(id, ACPI_SIG_MADT, 4) == 0) - acpi_get_table(id, acpi_apic_instance, &table); + acpi_get_table_with_size(id, acpi_apic_instance, &table, &tbl_size); else - acpi_get_table(id, 0, &table); + acpi_get_table_with_size(id, 0, &table, &tbl_size); if (table) { handler(table); + early_acpi_os_unmap_memory(table, tbl_size); return 0; } else return 1; @@ -265,8 +271,9 @@ int __init acpi_table_parse(char *id, acpi_table_handler handler) static void __init check_multiple_madt(void) { struct acpi_table_header *table = NULL; + acpi_size tbl_size; - acpi_get_table(ACPI_SIG_MADT, 2, &table); + acpi_get_table_with_size(ACPI_SIG_MADT, 2, &table, &tbl_size); if (table) { printk(KERN_WARNING PREFIX "BIOS bug: multiple APIC/MADT found," @@ -275,6 +282,7 @@ static void __init check_multiple_madt(void) "If \"acpi_apic_instance=%d\" works better, " "notify linux-acpi@vger.kernel.org\n", acpi_apic_instance ? 0 : 2); + early_acpi_os_unmap_memory(table, tbl_size); } else acpi_apic_instance = 0; diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index a62720a7edc0..ab0b85cf21f3 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -144,6 +144,7 @@ void __iomem *acpi_os_map_memory(acpi_physical_address where, acpi_size length); void acpi_os_unmap_memory(void __iomem * logical_address, acpi_size size); +void early_acpi_os_unmap_memory(void __iomem * virt, acpi_size size); #ifdef ACPI_FUTURE_USAGE acpi_status diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index c8e8cf45830f..cc40102fe2f3 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -130,6 +130,10 @@ acpi_get_table_header(acpi_string signature, struct acpi_table_header *out_table_header); acpi_status +acpi_get_table_with_size(acpi_string signature, + u32 instance, struct acpi_table_header **out_table, + acpi_size *tbl_size); +acpi_status acpi_get_table(acpi_string signature, u32 instance, struct acpi_table_header **out_table); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6fce2fc2d124..d59f0fa4d772 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -79,6 +79,7 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table); typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); char * __acpi_map_table (unsigned long phys_addr, unsigned long size); +void __init __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); int acpi_boot_table_init (void); From b825e6cc7b1401862531df497a4a4daff8102ed5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 7 Feb 2009 15:39:41 -0800 Subject: [PATCH 437/566] x86, es7000: fix ACPI table mappings Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 53699c931ad4..71d7be624d46 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -292,24 +292,31 @@ int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) { struct acpi_table_header *header = NULL; int i = 0; + acpi_size tbl_size; - while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { + while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) { if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { struct oem_table *t = (struct oem_table *)header; oem_addrX = t->OEMTableAddr; oem_size = t->OEMTableSize; + early_acpi_os_unmap_memory(header, tbl_size); *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size); return 0; } + early_acpi_os_unmap_memory(header, tbl_size); } return -1; } void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) { + if (!oem_addr) + return; + + __acpi_unmap_table((char *)oem_addr, oem_size); } #endif From 914c3d630b29b07d04908eab1b246812dadd5bd6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: [PATCH 438/566] x86: include correct %gs in a.out core dump Impact: dump the correct %gs into a.out core dump aout_dump_thread() read %gs but didn't include it in core dump. Fix it. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/a.out-core.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index 37822206083e..3c601f8224be 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -23,8 +23,6 @@ */ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) { - u16 gs; - /* changed the size calculations - should hopefully work better. lbt */ dump->magic = CMAGIC; dump->start_code = 0; @@ -57,7 +55,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) dump->regs.ds = (u16)regs->ds; dump->regs.es = (u16)regs->es; dump->regs.fs = (u16)regs->fs; - savesegment(gs, gs); + savesegment(gs, dump->regs.gs); dump->regs.orig_ax = regs->orig_ax; dump->regs.ip = regs->ip; dump->regs.cs = (u16)regs->cs; From ae6af41f5a4841f06eb92bc86ad020ad44ae2a30 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: [PATCH 439/566] x86: math_emu info cleanup Impact: cleanup * Come on, struct info? s/struct info/struct math_emu_info/ * Use struct pt_regs and kernel_vm86_regs instead of defining its own register frame structure. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/math_emu.h | 29 ++++----------- arch/x86/include/asm/processor.h | 2 +- arch/x86/math-emu/fpu_entry.c | 2 +- arch/x86/math-emu/fpu_proto.h | 2 +- arch/x86/math-emu/fpu_system.h | 14 +++---- arch/x86/math-emu/get_address.c | 63 +++++++++++++++----------------- 6 files changed, 48 insertions(+), 64 deletions(-) diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h index 5a65b107ad58..302492c77956 100644 --- a/arch/x86/include/asm/math_emu.h +++ b/arch/x86/include/asm/math_emu.h @@ -1,31 +1,18 @@ #ifndef _ASM_X86_MATH_EMU_H #define _ASM_X86_MATH_EMU_H +#include +#include + /* This structure matches the layout of the data saved to the stack following a device-not-present interrupt, part of it saved automatically by the 80386/80486. */ -struct info { +struct math_emu_info { long ___orig_eip; - long ___ebx; - long ___ecx; - long ___edx; - long ___esi; - long ___edi; - long ___ebp; - long ___eax; - long ___ds; - long ___es; - long ___fs; - long ___orig_eax; - long ___eip; - long ___cs; - long ___eflags; - long ___esp; - long ___ss; - long ___vm86_es; /* This and the following only in vm86 mode */ - long ___vm86_ds; - long ___vm86_fs; - long ___vm86_gs; + union { + struct pt_regs regs; + struct kernel_vm86_regs vm86; + }; }; #endif /* _ASM_X86_MATH_EMU_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 091cd8855f2e..3bfd5235a9eb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -353,7 +353,7 @@ struct i387_soft_struct { u8 no_update; u8 rm; u8 alimit; - struct info *info; + struct math_emu_info *info; u32 entry_eip; }; diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index c7b06feb139b..c268abe72253 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -659,7 +659,7 @@ static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, } } -void math_abort(struct info *info, unsigned int signal) +void math_abort(struct math_emu_info *info, unsigned int signal) { FPU_EIP = FPU_ORIG_EIP; current->thread.trap_no = 16; diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h index aa49b6a0d850..51bfbb61c5b1 100644 --- a/arch/x86/math-emu/fpu_proto.h +++ b/arch/x86/math-emu/fpu_proto.h @@ -52,7 +52,7 @@ extern void fst_i_(void); extern void fstp_i(void); /* fpu_entry.c */ asmlinkage extern void math_emulate(long arg); -extern void math_abort(struct info *info, unsigned int signal); +extern void math_abort(struct math_emu_info *info, unsigned int signal); /* fpu_etc.c */ extern void FPU_etc(void); /* fpu_tags.c */ diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 13488fa153e0..6729c6a31348 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -18,7 +18,7 @@ /* This sets the pointer FPU_info to point to the argument part of the stack frame of math_emulate() */ -#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg +#define SETUP_DATA_AREA(arg) FPU_info = (struct math_emu_info *) &arg /* s is always from a cpu register, and the cpu does bounds checking * during register load --> no further bounds checks needed */ @@ -38,12 +38,12 @@ #define I387 (current->thread.xstate) #define FPU_info (I387->soft.info) -#define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) -#define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) -#define FPU_DS (*(unsigned short *) &(FPU_info->___ds)) -#define FPU_EAX (FPU_info->___eax) -#define FPU_EFLAGS (FPU_info->___eflags) -#define FPU_EIP (FPU_info->___eip) +#define FPU_CS (*(unsigned short *) &(FPU_info->regs.cs)) +#define FPU_SS (*(unsigned short *) &(FPU_info->regs.ss)) +#define FPU_DS (*(unsigned short *) &(FPU_info->regs.ds)) +#define FPU_EAX (FPU_info->regs.ax) +#define FPU_EFLAGS (FPU_info->regs.flags) +#define FPU_EIP (FPU_info->regs.ip) #define FPU_ORIG_EIP (FPU_info->___orig_eip) #define FPU_lookahead (I387->soft.lookahead) diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index d701e2b39e44..62daa7fcc44c 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -29,42 +29,39 @@ #define FPU_WRITE_BIT 0x10 static int reg_offset[] = { - offsetof(struct info, ___eax), - offsetof(struct info, ___ecx), - offsetof(struct info, ___edx), - offsetof(struct info, ___ebx), - offsetof(struct info, ___esp), - offsetof(struct info, ___ebp), - offsetof(struct info, ___esi), - offsetof(struct info, ___edi) + offsetof(struct math_emu_info, regs.ax), + offsetof(struct math_emu_info, regs.cx), + offsetof(struct math_emu_info, regs.dx), + offsetof(struct math_emu_info, regs.bx), + offsetof(struct math_emu_info, regs.sp), + offsetof(struct math_emu_info, regs.bp), + offsetof(struct math_emu_info, regs.si), + offsetof(struct math_emu_info, regs.di) }; #define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info)) static int reg_offset_vm86[] = { - offsetof(struct info, ___cs), - offsetof(struct info, ___vm86_ds), - offsetof(struct info, ___vm86_es), - offsetof(struct info, ___vm86_fs), - offsetof(struct info, ___vm86_gs), - offsetof(struct info, ___ss), - offsetof(struct info, ___vm86_ds) + offsetof(struct math_emu_info, regs.cs), + offsetof(struct math_emu_info, vm86.ds), + offsetof(struct math_emu_info, vm86.es), + offsetof(struct math_emu_info, vm86.fs), + offsetof(struct math_emu_info, vm86.gs), + offsetof(struct math_emu_info, regs.ss), + offsetof(struct math_emu_info, vm86.ds) }; #define VM86_REG_(x) (*(unsigned short *) \ (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) -/* This dummy, gs is not saved on the stack. */ -#define ___GS ___ds - static int reg_offset_pm[] = { - offsetof(struct info, ___cs), - offsetof(struct info, ___ds), - offsetof(struct info, ___es), - offsetof(struct info, ___fs), - offsetof(struct info, ___GS), - offsetof(struct info, ___ss), - offsetof(struct info, ___ds) + offsetof(struct math_emu_info, regs.cs), + offsetof(struct math_emu_info, regs.ds), + offsetof(struct math_emu_info, regs.es), + offsetof(struct math_emu_info, regs.fs), + offsetof(struct math_emu_info, regs.ds), /* dummy, not saved on stack */ + offsetof(struct math_emu_info, regs.ss), + offsetof(struct math_emu_info, regs.ds) }; #define PM_REG_(x) (*(unsigned short *) \ @@ -349,34 +346,34 @@ void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, } switch (rm) { case 0: - address += FPU_info->___ebx + FPU_info->___esi; + address += FPU_info->regs.bx + FPU_info->regs.si; break; case 1: - address += FPU_info->___ebx + FPU_info->___edi; + address += FPU_info->regs.bx + FPU_info->regs.di; break; case 2: - address += FPU_info->___ebp + FPU_info->___esi; + address += FPU_info->regs.bp + FPU_info->regs.si; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 3: - address += FPU_info->___ebp + FPU_info->___edi; + address += FPU_info->regs.bp + FPU_info->regs.di; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 4: - address += FPU_info->___esi; + address += FPU_info->regs.si; break; case 5: - address += FPU_info->___edi; + address += FPU_info->regs.di; break; case 6: - address += FPU_info->___ebp; + address += FPU_info->regs.bp; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 7: - address += FPU_info->___ebx; + address += FPU_info->regs.bx; break; } From a5ef7ca0e2636bad0ccd07b996d775348ae2b65e Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Sun, 8 Feb 2009 17:39:58 -0500 Subject: [PATCH 440/566] x86: spinlocks: define dummy __raw_spin_is_contended Architectures other than mips and x86 are not using ticket spinlocks. Therefore, the contention on the lock is meaningless, since there is nobody known to be waiting on it (arguably /fairly/ unfair locks). Dummy it out to return 0 on other architectures. Signed-off-by: Kyle McMartin Acked-by: Ralf Baechle Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- arch/mips/include/asm/spinlock.h | 1 + arch/x86/include/asm/paravirt.h | 1 + arch/x86/include/asm/spinlock.h | 1 + include/linux/spinlock.h | 5 +++++ 4 files changed, 8 insertions(+) diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 1a1f320c30d8..0884947ebe27 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -51,6 +51,7 @@ static inline int __raw_spin_is_contended(raw_spinlock_t *lock) return (((counters >> 14) - counters) & 0x1fff) > 1; } +#define __raw_spin_is_contended __raw_spin_is_contended static inline void __raw_spin_lock(raw_spinlock_t *lock) { diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aedc..c09a14127584 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1402,6 +1402,7 @@ static inline int __raw_spin_is_contended(struct raw_spinlock *lock) { return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); } +#define __raw_spin_is_contended __raw_spin_is_contended static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) { diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index d17c91981da2..8247e94ac6b1 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -245,6 +245,7 @@ static inline int __raw_spin_is_contended(raw_spinlock_t *lock) { return __ticket_spin_is_contended(lock); } +#define __raw_spin_is_contended __raw_spin_is_contended static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) { diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index e0c0fccced46..a0c66a2e00ad 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -124,7 +124,12 @@ do { \ #ifdef CONFIG_GENERIC_LOCKBREAK #define spin_is_contended(lock) ((lock)->break_lock) #else + +#ifdef __raw_spin_is_contended #define spin_is_contended(lock) __raw_spin_is_contended(&(lock)->raw_lock) +#else +#define spin_is_contended(lock) (((void)(lock), 0)) +#endif /*__raw_spin_is_contended*/ #endif /** From a8e807f7607ab633de7be4e2f4c350923cc2cb63 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 9 Feb 2009 16:47:36 +0000 Subject: [PATCH 441/566] FRV: in_interrupt() requires #inclusion of linux/hardirq.h not asm/hardirq.h now in_interrupt() requires #inclusion of linux/hardirq.h not asm/hardirq.h now. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- arch/frv/mm/dma-alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/frv/mm/dma-alloc.c b/arch/frv/mm/dma-alloc.c index dc6522c464d4..44840e73e907 100644 --- a/arch/frv/mm/dma-alloc.c +++ b/arch/frv/mm/dma-alloc.c @@ -36,10 +36,10 @@ #include #include #include +#include #include #include -#include #include #include #include From f06da264cfb0f9444d41ca247213e419f90aa72a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 9 Feb 2009 08:57:29 -0800 Subject: [PATCH 442/566] i915: Fix more size_t format string warnings The DRI people seem to have a hard time getting these right (see also commit aeb565dfc3ac4c8b47c5049085b4c7bfb2c7d5d7). Signed-off-by: Linus Torvalds --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1441831fa06e..818576654092 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1457,7 +1457,7 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || (obj_priv->gtt_offset & (obj->size - 1))) { - WARN(1, "%s: object 0x%08x not 1M or size (0x%x) aligned\n", + WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n", __func__, obj_priv->gtt_offset, obj->size); return; } From 9d9b87c1218be78ddecbc85ec3bb91c79c1d56ab Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 4 Feb 2009 17:35:38 -0500 Subject: [PATCH 443/566] lockd: fix regression in lockd's handling of blocked locks If a client requests a blocking lock, is denied, then requests it again, then here in nlmsvc_lock() we will call vfs_lock_file() without FL_SLEEP set, because we've already queued a block and don't need the locks code to do it again. But that means vfs_lock_file() will return -EAGAIN instead of FILE_LOCK_DENIED. So we still need to translate that -EAGAIN return into a nlm_lck_blocked error in this case, and put ourselves back on lockd's block list. The bug was introduced by bde74e4bc64415b1 "locks: add special return value for asynchronous locks". Thanks to Frank van Maarseveen for the report; his original test case was essentially for i in `seq 30`; do flock /nfsmount/foo sleep 10 & done Tested-by: Frank van Maarseveen Reported-by: Frank van Maarseveen Cc: Miklos Szeredi Signed-off-by: J. Bruce Fields --- fs/lockd/svclock.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 6063a8e4b9f3..763b78a6e9de 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -427,7 +427,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; case -EAGAIN: ret = nlm_lck_denied; - goto out; + break; case FILE_LOCK_DEFERRED: if (wait) break; @@ -443,6 +443,10 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } + ret = nlm_lck_denied; + if (!wait) + goto out; + ret = nlm_lck_blocked; /* Append to list of blocked */ From 94f341db3dd080851f918da37e84659ef760da26 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 25 Dec 2008 17:15:02 +0300 Subject: [PATCH 444/566] USB: fsl_qe_udc: Fix oops on QE UDC probe failure In case of probing errors the driver kfrees the udc_controller, but it doesn't set the pointer to NULL. When usb_gadget_register_driver is called, it checks for udc_controller != NULL, the check passes and the driver accesses nonexistent memory. Fix this by setting udc_controller to NULL in case of errors. While at it, also implement irq_of_parse_and_map()'s failure and cleanup cases. Signed-off-by: Anton Vorontsov Acked-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/fsl_qe_udc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c index d6c5bcd40064..e8f7862acb3d 100644 --- a/drivers/usb/gadget/fsl_qe_udc.c +++ b/drivers/usb/gadget/fsl_qe_udc.c @@ -2604,6 +2604,10 @@ static int __devinit qe_udc_probe(struct of_device *ofdev, (unsigned long)udc_controller); /* request irq and disable DR */ udc_controller->usb_irq = irq_of_parse_and_map(np, 0); + if (!udc_controller->usb_irq) { + ret = -EINVAL; + goto err_noirq; + } ret = request_irq(udc_controller->usb_irq, qe_udc_irq, 0, driver_name, udc_controller); @@ -2625,6 +2629,8 @@ static int __devinit qe_udc_probe(struct of_device *ofdev, err6: free_irq(udc_controller->usb_irq, udc_controller); err5: + irq_dispose_mapping(udc_controller->usb_irq); +err_noirq: if (udc_controller->nullmap) { dma_unmap_single(udc_controller->gadget.dev.parent, udc_controller->nullp, 256, @@ -2648,7 +2654,7 @@ err2: iounmap(udc_controller->usb_regs); err1: kfree(udc_controller); - + udc_controller = NULL; return ret; } @@ -2710,6 +2716,7 @@ static int __devexit qe_udc_remove(struct of_device *ofdev) kfree(ep->txframe); free_irq(udc_controller->usb_irq, udc_controller); + irq_dispose_mapping(udc_controller->usb_irq); tasklet_kill(&udc_controller->rx_tasklet); From a30551db66afa1b53a4fa7ceadddb7122bdcf491 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 25 Dec 2008 17:15:05 +0300 Subject: [PATCH 445/566] USB: fsl_qe_udc: Fix recursive locking bug in ch9getstatus() The call chain is this: qe_udc_irq() <- grabs the udc->lock spinlock rx_irq() qe_ep0_rx() ep0_setup_handle() setup_received_handle() ch9getstatus() qe_ep_queue() <- tries to grab the udc->lock again It seems unsafe to temporarily drop the lock in the ch9getstatus(), so to fix that bug the lock-less __qe_ep_queue() function implemented and used by the ch9getstatus(). Signed-off-by: Anton Vorontsov Acked-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/fsl_qe_udc.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c index e8f7862acb3d..064582fb6a87 100644 --- a/drivers/usb/gadget/fsl_qe_udc.c +++ b/drivers/usb/gadget/fsl_qe_udc.c @@ -1681,14 +1681,11 @@ static void qe_free_request(struct usb_ep *_ep, struct usb_request *_req) kfree(req); } -/* queues (submits) an I/O request to an endpoint */ -static int qe_ep_queue(struct usb_ep *_ep, struct usb_request *_req, - gfp_t gfp_flags) +static int __qe_ep_queue(struct usb_ep *_ep, struct usb_request *_req) { struct qe_ep *ep = container_of(_ep, struct qe_ep, ep); struct qe_req *req = container_of(_req, struct qe_req, req); struct qe_udc *udc; - unsigned long flags; int reval; udc = ep->udc; @@ -1732,7 +1729,7 @@ static int qe_ep_queue(struct usb_ep *_ep, struct usb_request *_req, list_add_tail(&req->queue, &ep->queue); dev_vdbg(udc->dev, "gadget have request in %s! %d\n", ep->name, req->req.length); - spin_lock_irqsave(&udc->lock, flags); + /* push the request to device */ if (ep_is_in(ep)) reval = ep_req_send(ep, req); @@ -1748,11 +1745,24 @@ static int qe_ep_queue(struct usb_ep *_ep, struct usb_request *_req, if (ep->dir == USB_DIR_OUT) reval = ep_req_receive(ep, req); - spin_unlock_irqrestore(&udc->lock, flags); - return 0; } +/* queues (submits) an I/O request to an endpoint */ +static int qe_ep_queue(struct usb_ep *_ep, struct usb_request *_req, + gfp_t gfp_flags) +{ + struct qe_ep *ep = container_of(_ep, struct qe_ep, ep); + struct qe_udc *udc = ep->udc; + unsigned long flags; + int ret; + + spin_lock_irqsave(&udc->lock, flags); + ret = __qe_ep_queue(_ep, _req); + spin_unlock_irqrestore(&udc->lock, flags); + return ret; +} + /* dequeues (cancels, unlinks) an I/O request from an endpoint */ static int qe_ep_dequeue(struct usb_ep *_ep, struct usb_request *_req) { @@ -2008,7 +2018,7 @@ static void ch9getstatus(struct qe_udc *udc, u8 request_type, u16 value, udc->ep0_dir = USB_DIR_IN; /* data phase */ - status = qe_ep_queue(&ep->ep, &req->req, GFP_ATOMIC); + status = __qe_ep_queue(&ep->ep, &req->req); if (status == 0) return; From 2247818a329687f30d1e5c3a62efc33d07c47522 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 25 Dec 2008 17:15:07 +0300 Subject: [PATCH 446/566] USB: fsl_qe_udc: Fix QE USB controller initialization qe_udc_reg_init() leaves the USB controller enabled before muram memory initialized. Sometimes the uninitialized muram memory confuses the controller, and it start sending the busy interrupts. Fix this by disabling the controller, it will be enabled later by the gadget driver, at bind time. Signed-off-by: Anton Vorontsov Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/fsl_qe_udc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c index 064582fb6a87..1319f8f7acba 100644 --- a/drivers/usb/gadget/fsl_qe_udc.c +++ b/drivers/usb/gadget/fsl_qe_udc.c @@ -2452,8 +2452,12 @@ static int __devinit qe_udc_reg_init(struct qe_udc *udc) struct usb_ctlr __iomem *qe_usbregs; qe_usbregs = udc->usb_regs; - /* Init the usb register */ + /* Spec says that we must enable the USB controller to change mode. */ out_8(&qe_usbregs->usb_usmod, 0x01); + /* Mode changed, now disable it, since muram isn't initialized yet. */ + out_8(&qe_usbregs->usb_usmod, 0x00); + + /* Initialize the rest. */ out_be16(&qe_usbregs->usb_usbmr, 0); out_8(&qe_usbregs->usb_uscom, 0); out_be16(&qe_usbregs->usb_usber, USBER_ALL_CLEAR); From ef84e4055f3561495c4c0e0dfb0b9f4a6e20479d Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 25 Dec 2008 17:15:09 +0300 Subject: [PATCH 447/566] USB: fsl_qe_udc: Fix disconnects reporting during bus reset Freescale QE UDC controllers can't report the "port change" states, so the only way to handle disconnects is to process bus reset interrupts. The bus reset can take some time, that is, few irqs. Gadgets may print the disconnection events, and this causes few repetitive messages in the kernel log. This patch fixes the issue by using the usb_state machine, if the usb controller has been already reset, just quit the reset irq early. Signed-off-by: Anton Vorontsov Acked-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/fsl_qe_udc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c index 1319f8f7acba..7a820a3b4acd 100644 --- a/drivers/usb/gadget/fsl_qe_udc.c +++ b/drivers/usb/gadget/fsl_qe_udc.c @@ -2161,6 +2161,9 @@ static int reset_irq(struct qe_udc *udc) { unsigned char i; + if (udc->usb_state == USB_STATE_DEFAULT) + return 0; + qe_usb_disable(); out_8(&udc->usb_regs->usb_usadr, 0); From 82341b3690fce8f70998e3cfb79fbffff0eb7e6b Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 25 Dec 2008 17:15:11 +0300 Subject: [PATCH 448/566] USB: fsl_qe_udc: Fix muram corruption by disabled endpoints Before freeing an endpoint's muram memory, we should stop all activity of the endpoint, otherwise the QE UDC controller might do nasty things with the muram memory that isn't belong to that endpoint anymore. The qe_ep_reset() effectively flushes the hardware fifos, finishes all late transaction and thus prevents the corruption. Signed-off-by: Anton Vorontsov Acked-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/fsl_qe_udc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c index 7a820a3b4acd..32f415ebd3df 100644 --- a/drivers/usb/gadget/fsl_qe_udc.c +++ b/drivers/usb/gadget/fsl_qe_udc.c @@ -1622,6 +1622,7 @@ static int qe_ep_disable(struct usb_ep *_ep) nuke(ep, -ESHUTDOWN); ep->desc = NULL; ep->stopped = 1; + qe_ep_reset(udc, ep->epnum); spin_unlock_irqrestore(&udc->lock, flags); cpm_muram_free(cpm_muram_offset(ep->rxbase)); From af3ddbd76304f9f602c970f9b09a0c9d8cf8336c Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 25 Dec 2008 17:15:14 +0300 Subject: [PATCH 449/566] USB: fsl_qe_udc: Fix stalled TX requests bug While disabling an endpoint the driver nuking any pending requests, thus completing them with -ESHUTDOWN status. But the driver doesn't clear the tx_req, which means that a next TX request (after ep_enable), might get stalled, since the driver won't queue the new reqests. This patch fixes a bug I'm observing with ethernet gadget while playing with ifconfig usb0 up/down (the up/down sequence disables and enables `in' and `out' endpoints). Signed-off-by: Anton Vorontsov Acked-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/fsl_qe_udc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c index 32f415ebd3df..d701bf4698d2 100644 --- a/drivers/usb/gadget/fsl_qe_udc.c +++ b/drivers/usb/gadget/fsl_qe_udc.c @@ -1622,6 +1622,7 @@ static int qe_ep_disable(struct usb_ep *_ep) nuke(ep, -ESHUTDOWN); ep->desc = NULL; ep->stopped = 1; + ep->tx_req = NULL; qe_ep_reset(udc, ep->epnum); spin_unlock_irqrestore(&udc->lock, flags); From 2057ac86da09955c9f8671e36d4f6bd1e7a5d7d2 Mon Sep 17 00:00:00 2001 From: James Treacy Date: Thu, 29 Jan 2009 20:17:17 -0500 Subject: [PATCH 450/566] USB: cdc-acm.c: remove duplicate lines for MTK gps support The same patch to add support for MTK gps loggers was submitted by two different people and applied twice. Remove the redundant lines. Signed-off-by: James Treacy Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 97ba4a985edc..326dd7f65ee9 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1349,9 +1349,6 @@ static struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, - { USB_DEVICE(0x0e8d, 0x3329), /* i-blue 747, Qstarz BT-Q1000, Holux M-241 */ - .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ - }, { USB_DEVICE(0x0e8d, 0x3329), /* MediaTek Inc GPS */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, From 6b40c0057a7935bcf63a38a924094c7e61d4731f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 3 Feb 2009 16:02:21 -0800 Subject: [PATCH 451/566] Revert USB: option: add Pantech cards Revert 8b6346ec899713a90890c9e832f7eff91ea73504 as these devices really work just fine with the cdc-acm driver, as they follow the spec properly. Thanks to Chuck Ebbert for pointing out the problem here. Cc: Chuck Ebbert Cc: Dan Williams Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 6c89da9c6fea..9d2e77d9af20 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -274,12 +274,6 @@ static int option_send_setup(struct tty_struct *tty, struct usb_serial_port *po #define ERICSSON_VENDOR_ID 0x0bdb #define ERICSSON_PRODUCT_F3507G 0x1900 -/* Pantech products */ -#define PANTECH_VENDOR_ID 0x106c -#define PANTECH_PRODUCT_PC5740 0x3701 -#define PANTECH_PRODUCT_PC5750 0x3702 /* PX-500 */ -#define PANTECH_PRODUCT_UM150 0x3711 - static struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, @@ -488,9 +482,6 @@ static struct usb_device_id option_ids[] = { { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_MF628) }, { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_CDMA_TECH) }, { USB_DEVICE(ERICSSON_VENDOR_ID, ERICSSON_PRODUCT_F3507G) }, - { USB_DEVICE(PANTECH_VENDOR_ID, PANTECH_PRODUCT_PC5740) }, - { USB_DEVICE(PANTECH_VENDOR_ID, PANTECH_PRODUCT_PC5750) }, - { USB_DEVICE(PANTECH_VENDOR_ID, PANTECH_PRODUCT_UM150) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); From 0d020aae0a154cffce680a7775c74788fa0bea92 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 2 Feb 2009 09:51:01 -0500 Subject: [PATCH 452/566] USB: usb-storage: remove WARN from last-sector hacks This patch (as1201) removes the WARN() from the last-sector hacks in usb-storage, thereby making the code match the version now in .27-stable and .28-stable. The WARN() isn't needed, since there is no longer any intention of assuming that all storage devices have an even number of sectors, and it annoys users for no good reason. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/transport.c | 36 +++------------------------------ 1 file changed, 3 insertions(+), 33 deletions(-) diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 1d5438e6363b..fb65d221cedf 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -558,32 +558,10 @@ static void last_sector_hacks(struct us_data *us, struct scsi_cmnd *srb) if (srb->result == SAM_STAT_GOOD && scsi_get_resid(srb) == 0) { - /* The command succeeded. If the capacity is odd - * (i.e., if the sector number is even) then the - * "always-even" heuristic would be wrong for this - * device. Issue a WARN() so that the kerneloops.org - * project will be notified and we will then know to - * mark the device with a CAPACITY_OK flag. Hopefully - * this will occur for only a few devices. - * - * Use the sign of us->last_sector_hacks to tell whether - * the warning has already been issued; we don't need - * more than one warning per device. + /* The command succeeded. We know this device doesn't + * have the last-sector bug, so stop checking it. */ - if (!(sector & 1) && us->use_last_sector_hacks > 0) { - unsigned vid = le16_to_cpu( - us->pusb_dev->descriptor.idVendor); - unsigned pid = le16_to_cpu( - us->pusb_dev->descriptor.idProduct); - unsigned rev = le16_to_cpu( - us->pusb_dev->descriptor.bcdDevice); - - WARN(1, "%s: Successful last sector success at %u, " - "device %04x:%04x:%04x\n", - sdkp->disk->disk_name, sector, - vid, pid, rev); - us->use_last_sector_hacks = -1; - } + us->use_last_sector_hacks = 0; } else { /* The command failed. Allow up to 3 retries in case this @@ -599,14 +577,6 @@ static void last_sector_hacks(struct us_data *us, struct scsi_cmnd *srb) srb->result = SAM_STAT_CHECK_CONDITION; memcpy(srb->sense_buffer, record_not_found, sizeof(record_not_found)); - - /* In theory we might want to issue a WARN() here if the - * capacity is even, since it could indicate the device - * has the READ CAPACITY bug _and_ the real capacity is - * odd. But it could also indicate that the device - * simply can't access its last sector, a failure mode - * which is surprisingly common. So no warning. - */ } done: From 78c8fb3717cdff35ad118e17ac7495d0cf21a61f Mon Sep 17 00:00:00 2001 From: Dave Young Date: Sun, 1 Feb 2009 18:54:54 +0800 Subject: [PATCH 453/566] USB: usb-serial: fix the aircable_init failure path The failure path of aircable_init is wrong, fix the order of (goto) labels. Signed-off-by: Dave Young Acked-by: Naranjo Manuel Francisco Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/aircable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/aircable.c b/drivers/usb/serial/aircable.c index 537f953bd7f8..6d106e74265e 100644 --- a/drivers/usb/serial/aircable.c +++ b/drivers/usb/serial/aircable.c @@ -621,9 +621,9 @@ static int __init aircable_init(void) goto failed_usb_register; return 0; -failed_serial_register: - usb_serial_deregister(&aircable_device); failed_usb_register: + usb_serial_deregister(&aircable_device); +failed_serial_register: return retval; } From e38c287447e5a3ff905a59dd81269c14cd12ffa1 Mon Sep 17 00:00:00 2001 From: Stephane Clerambault Date: Mon, 2 Feb 2009 13:39:12 -0800 Subject: [PATCH 454/566] USB: ftdi_sio: add support for the NDI Polaris system Add support for the NDI Polaris system *http://www.ndigital.com/). Cc: Ian Abbott Cc: Oliver Neukum Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/ftdi_sio.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 75597337583e..64b84291c07e 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -662,6 +662,7 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_DOMINTELL_DUSB_PID) }, { USB_DEVICE(ALTI2_VID, ALTI2_N3_PID) }, { USB_DEVICE(FTDI_VID, DIEBOLD_BCS_SE923_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_NDI_HUC_PID) }, { }, /* Optional parameter entry */ { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio.h b/drivers/usb/serial/ftdi_sio.h index 1b62eff475d2..e300c840f8ca 100644 --- a/drivers/usb/serial/ftdi_sio.h +++ b/drivers/usb/serial/ftdi_sio.h @@ -844,6 +844,9 @@ #define TML_VID 0x1B91 /* Vendor ID */ #define TML_USB_SERIAL_PID 0x0064 /* USB - Serial Converter */ +/* NDI Polaris System */ +#define FTDI_NDI_HUC_PID 0xDA70 + /* Propox devices */ #define FTDI_PROPOX_JTAGCABLEII_PID 0xD738 From 506e9469833c66ed6bb9acd902e208f7301b6adb Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 4 Feb 2009 15:48:03 -0500 Subject: [PATCH 455/566] USB: usb-storage: add Pentax to the bad-vendor list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch (as1202) adds Pentax to usb-storage's list of bad vendors whose devices always need the CAPACITY_HEURISTICS flag. This is in addition to the existing entries: Nokia, Nikon, and Motorola. Signed-off-by: Alan Stern Tested-by: Virgo Pärna Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/scsiglue.c | 2 ++ drivers/usb/storage/unusual_devs.h | 15 --------------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 2a42b862aa9f..727c506417cc 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -64,6 +64,7 @@ */ #define VENDOR_ID_NOKIA 0x0421 #define VENDOR_ID_NIKON 0x04b0 +#define VENDOR_ID_PENTAX 0x0a17 #define VENDOR_ID_MOTOROLA 0x22b8 /*********************************************************************** @@ -158,6 +159,7 @@ static int slave_configure(struct scsi_device *sdev) switch (le16_to_cpu(us->pusb_dev->descriptor.idVendor)) { case VENDOR_ID_NOKIA: case VENDOR_ID_NIKON: + case VENDOR_ID_PENTAX: case VENDOR_ID_MOTOROLA: if (!(us->fflags & (US_FL_FIX_CAPACITY | US_FL_CAPACITY_OK))) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 69269f739563..8e878cab8dc7 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1354,21 +1354,6 @@ UNUSUAL_DEV( 0x0a17, 0x0004, 0x1000, 0x1000, US_SC_DEVICE, US_PR_DEVICE, NULL, US_FL_FIX_INQUIRY ), - -/* Submitted by Per Winkvist */ -UNUSUAL_DEV( 0x0a17, 0x006, 0x0000, 0xffff, - "Pentax", - "Optio S/S4", - US_SC_DEVICE, US_PR_DEVICE, NULL, - US_FL_FIX_INQUIRY ), - -/* Reported by Jaak Ristioja */ -UNUSUAL_DEV( 0x0a17, 0x006e, 0x0100, 0x0100, - "Pentax", - "K10D", - US_SC_DEVICE, US_PR_DEVICE, NULL, - US_FL_FIX_CAPACITY ), - /* These are virtual windows driver CDs, which the zd1211rw driver * automatically converts into WLAN devices. */ UNUSUAL_DEV( 0x0ace, 0x2011, 0x0101, 0x0101, From 64905b48098761e779bb848e69365c018894ea81 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Feb 2009 11:11:38 +0300 Subject: [PATCH 456/566] USB: ftdi_sio: unlock_kernel() on error in set_serial_info() There was one error path where unlock_kernel() wasn't called. This was found with a code checker (http://repo.or.cz/w/smatch.git/) Compile tested only, sorry. Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 64b84291c07e..f92f4d773374 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1065,8 +1065,10 @@ static int set_serial_info(struct tty_struct *tty, if (!capable(CAP_SYS_ADMIN)) { if (((new_serial.flags & ~ASYNC_USR_MASK) != - (priv->flags & ~ASYNC_USR_MASK))) + (priv->flags & ~ASYNC_USR_MASK))) { + unlock_kernel(); return -EPERM; + } priv->flags = ((priv->flags & ~ASYNC_USR_MASK) | (new_serial.flags & ASYNC_USR_MASK)); priv->custom_divisor = new_serial.custom_divisor; From 97dcf0416e390fc5c997d4ea60e6f975c7b7a1c3 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 4 Feb 2009 16:38:33 +0100 Subject: [PATCH 457/566] USB: two more usb ids for ti_usb_3410_5052 This patch adds device IDs and balances the counts to make the hot ID additioning mechanism work. Signed-off-by: Oliver Neukum Cc: stable Cc: Chris Adams Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ti_usb_3410_5052.c | 10 +++++++--- drivers/usb/serial/ti_usb_3410_5052.h | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index baf591137b80..2620bf6fe5e1 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -176,7 +176,7 @@ static unsigned int product_5052_count; /* the array dimension is the number of default entries plus */ /* TI_EXTRA_VID_PID_COUNT user defined entries plus 1 terminating */ /* null entry */ -static struct usb_device_id ti_id_table_3410[7+TI_EXTRA_VID_PID_COUNT+1] = { +static struct usb_device_id ti_id_table_3410[10+TI_EXTRA_VID_PID_COUNT+1] = { { USB_DEVICE(TI_VENDOR_ID, TI_3410_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_3410_EZ430_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_GSM_NO_FW_PRODUCT_ID) }, @@ -185,9 +185,11 @@ static struct usb_device_id ti_id_table_3410[7+TI_EXTRA_VID_PID_COUNT+1] = { { USB_DEVICE(MTS_VENDOR_ID, MTS_GSM_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_EDGE_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_4543_PRODUCT_ID) }, + { USB_DEVICE(IBM_VENDOR_ID, IBM_454B_PRODUCT_ID) }, + { USB_DEVICE(IBM_VENDOR_ID, IBM_454C_PRODUCT_ID) }, }; -static struct usb_device_id ti_id_table_5052[4+TI_EXTRA_VID_PID_COUNT+1] = { +static struct usb_device_id ti_id_table_5052[5+TI_EXTRA_VID_PID_COUNT+1] = { { USB_DEVICE(TI_VENDOR_ID, TI_5052_BOOT_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5152_BOOT_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_EEPROM_PRODUCT_ID) }, @@ -195,7 +197,7 @@ static struct usb_device_id ti_id_table_5052[4+TI_EXTRA_VID_PID_COUNT+1] = { { USB_DEVICE(IBM_VENDOR_ID, IBM_4543_PRODUCT_ID) }, }; -static struct usb_device_id ti_id_table_combined[6+2*TI_EXTRA_VID_PID_COUNT+1] = { +static struct usb_device_id ti_id_table_combined[14+2*TI_EXTRA_VID_PID_COUNT+1] = { { USB_DEVICE(TI_VENDOR_ID, TI_3410_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_3410_EZ430_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_GSM_NO_FW_PRODUCT_ID) }, @@ -208,6 +210,8 @@ static struct usb_device_id ti_id_table_combined[6+2*TI_EXTRA_VID_PID_COUNT+1] = { USB_DEVICE(TI_VENDOR_ID, TI_5052_EEPROM_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_FIRMWARE_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_4543_PRODUCT_ID) }, + { USB_DEVICE(IBM_VENDOR_ID, IBM_454B_PRODUCT_ID) }, + { USB_DEVICE(IBM_VENDOR_ID, IBM_454C_PRODUCT_ID) }, { } }; diff --git a/drivers/usb/serial/ti_usb_3410_5052.h b/drivers/usb/serial/ti_usb_3410_5052.h index b7ea5dbadee5..f323c6025858 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.h +++ b/drivers/usb/serial/ti_usb_3410_5052.h @@ -30,6 +30,8 @@ #define IBM_VENDOR_ID 0x04b3 #define TI_3410_PRODUCT_ID 0x3410 #define IBM_4543_PRODUCT_ID 0x4543 +#define IBM_454B_PRODUCT_ID 0x454b +#define IBM_454C_PRODUCT_ID 0x454c #define TI_3410_EZ430_ID 0xF430 /* TI ez430 development tool */ #define TI_5052_BOOT_PRODUCT_ID 0x5052 /* no EEPROM, no firmware */ #define TI_5152_BOOT_PRODUCT_ID 0x5152 /* no EEPROM, no firmware */ From c200b9c9e8ec93cdd262cfa1699ad92e883d4876 Mon Sep 17 00:00:00 2001 From: Dirk De Schepper Date: Fri, 6 Feb 2009 20:48:34 +0000 Subject: [PATCH 458/566] USB: option: New mobile broadband modems to be supported - New Novatel and Dell mobile broadband modem products added - Dell pid variables used in stead of numerical PIDs for known products Signed-off-by: Dirk De Schepper Cc: stable Signed-off-by: Matthias Urlichs Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 84 ++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 9d2e77d9af20..bfd0b68ceccd 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -199,14 +199,15 @@ static int option_send_setup(struct tty_struct *tty, struct usb_serial_port *po #define NOVATELWIRELESS_PRODUCT_MC950D 0x4400 /* FUTURE NOVATEL PRODUCTS */ -#define NOVATELWIRELESS_PRODUCT_EVDO_1 0x6000 -#define NOVATELWIRELESS_PRODUCT_HSPA_1 0x7000 -#define NOVATELWIRELESS_PRODUCT_EMBEDDED_1 0x8000 -#define NOVATELWIRELESS_PRODUCT_GLOBAL_1 0x9000 -#define NOVATELWIRELESS_PRODUCT_EVDO_2 0x6001 -#define NOVATELWIRELESS_PRODUCT_HSPA_2 0x7001 -#define NOVATELWIRELESS_PRODUCT_EMBEDDED_2 0x8001 -#define NOVATELWIRELESS_PRODUCT_GLOBAL_2 0x9001 +#define NOVATELWIRELESS_PRODUCT_EVDO_FULLSPEED 0X6000 +#define NOVATELWIRELESS_PRODUCT_EVDO_HIGHSPEED 0X6001 +#define NOVATELWIRELESS_PRODUCT_HSPA_FULLSPEED 0X7000 +#define NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED 0X7001 +#define NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_FULLSPEED 0X8000 +#define NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_HIGHSPEED 0X8001 +#define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_FULLSPEED 0X9000 +#define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED 0X9001 +#define NOVATELWIRELESS_PRODUCT_GLOBAL 0XA001 /* AMOI PRODUCTS */ #define AMOI_VENDOR_ID 0x1614 @@ -216,6 +217,27 @@ static int option_send_setup(struct tty_struct *tty, struct usb_serial_port *po #define DELL_VENDOR_ID 0x413C +/* Dell modems */ +#define DELL_PRODUCT_5700_MINICARD 0x8114 +#define DELL_PRODUCT_5500_MINICARD 0x8115 +#define DELL_PRODUCT_5505_MINICARD 0x8116 +#define DELL_PRODUCT_5700_EXPRESSCARD 0x8117 +#define DELL_PRODUCT_5510_EXPRESSCARD 0x8118 + +#define DELL_PRODUCT_5700_MINICARD_SPRINT 0x8128 +#define DELL_PRODUCT_5700_MINICARD_TELUS 0x8129 + +#define DELL_PRODUCT_5720_MINICARD_VZW 0x8133 +#define DELL_PRODUCT_5720_MINICARD_SPRINT 0x8134 +#define DELL_PRODUCT_5720_MINICARD_TELUS 0x8135 +#define DELL_PRODUCT_5520_MINICARD_CINGULAR 0x8136 +#define DELL_PRODUCT_5520_MINICARD_GENERIC_L 0x8137 +#define DELL_PRODUCT_5520_MINICARD_GENERIC_I 0x8138 + +#define DELL_PRODUCT_5730_MINICARD_SPRINT 0x8180 +#define DELL_PRODUCT_5730_MINICARD_TELUS 0x8181 +#define DELL_PRODUCT_5730_MINICARD_VZW 0x8182 + #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da #define KYOCERA_PRODUCT_KPC680 0x180a @@ -389,31 +411,37 @@ static struct usb_device_id option_ids[] = { { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EU870D) }, /* Novatel EU850D/EU860D/EU870D */ { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC950D) }, /* Novatel MC930D/MC950D */ { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC727) }, /* Novatel MC727/U727/USB727 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_1) }, /* Novatel EVDO product */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_1) }, /* Novatel HSPA product */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EMBEDDED_1) }, /* Novatel Embedded product */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_GLOBAL_1) }, /* Novatel Global product */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_2) }, /* Novatel EVDO product */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_2) }, /* Novatel HSPA product */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EMBEDDED_2) }, /* Novatel Embedded product */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_GLOBAL_2) }, /* Novatel Global product */ + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_FULLSPEED) }, /* Novatel EVDO product */ + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_FULLSPEED) }, /* Novatel HSPA product */ + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_FULLSPEED) }, /* Novatel EVDO Embedded product */ + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_FULLSPEED) }, /* Novatel HSPA Embedded product */ + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_HIGHSPEED) }, /* Novatel EVDO product */ + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED) }, /* Novatel HSPA product */ + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_HIGHSPEED) }, /* Novatel EVDO Embedded product */ + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED) }, /* Novatel HSPA Embedded product */ + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_GLOBAL) }, /* Novatel Global product */ { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H02) }, - { USB_DEVICE(DELL_VENDOR_ID, 0x8114) }, /* Dell Wireless 5700 Mobile Broadband CDMA/EVDO Mini-Card == Novatel Expedite EV620 CDMA/EV-DO */ - { USB_DEVICE(DELL_VENDOR_ID, 0x8115) }, /* Dell Wireless 5500 Mobile Broadband HSDPA Mini-Card == Novatel Expedite EU740 HSDPA/3G */ - { USB_DEVICE(DELL_VENDOR_ID, 0x8116) }, /* Dell Wireless 5505 Mobile Broadband HSDPA Mini-Card == Novatel Expedite EU740 HSDPA/3G */ - { USB_DEVICE(DELL_VENDOR_ID, 0x8117) }, /* Dell Wireless 5700 Mobile Broadband CDMA/EVDO ExpressCard == Novatel Merlin XV620 CDMA/EV-DO */ - { USB_DEVICE(DELL_VENDOR_ID, 0x8118) }, /* Dell Wireless 5510 Mobile Broadband HSDPA ExpressCard == Novatel Merlin XU870 HSDPA/3G */ - { USB_DEVICE(DELL_VENDOR_ID, 0x8128) }, /* Dell Wireless 5700 Mobile Broadband CDMA/EVDO Mini-Card == Novatel Expedite E720 CDMA/EV-DO */ - { USB_DEVICE(DELL_VENDOR_ID, 0x8129) }, /* Dell Wireless 5700 Mobile Broadband CDMA/EVDO Mini-Card == Novatel Expedite ET620 CDMA/EV-DO */ - { USB_DEVICE(DELL_VENDOR_ID, 0x8133) }, /* Dell Wireless 5720 == Novatel EV620 CDMA/EV-DO */ - { USB_DEVICE(DELL_VENDOR_ID, 0x8136) }, /* Dell Wireless HSDPA 5520 == Novatel Expedite EU860D */ - { USB_DEVICE(DELL_VENDOR_ID, 0x8137) }, /* Dell Wireless HSDPA 5520 */ - { USB_DEVICE(DELL_VENDOR_ID, 0x8138) }, /* Dell Wireless 5520 Voda I Mobile Broadband (3G HSDPA) Minicard */ - { USB_DEVICE(DELL_VENDOR_ID, 0x8147) }, /* Dell Wireless 5530 Mobile Broadband (3G HSPA) Mini-Card */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5700_MINICARD) }, /* Dell Wireless 5700 Mobile Broadband CDMA/EVDO Mini-Card == Novatel Expedite EV620 CDMA/EV-DO */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5500_MINICARD) }, /* Dell Wireless 5500 Mobile Broadband HSDPA Mini-Card == Novatel Expedite EU740 HSDPA/3G */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5505_MINICARD) }, /* Dell Wireless 5505 Mobile Broadband HSDPA Mini-Card == Novatel Expedite EU740 HSDPA/3G */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5700_EXPRESSCARD) }, /* Dell Wireless 5700 Mobile Broadband CDMA/EVDO ExpressCard == Novatel Merlin XV620 CDMA/EV-DO */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5510_EXPRESSCARD) }, /* Dell Wireless 5510 Mobile Broadband HSDPA ExpressCard == Novatel Merlin XU870 HSDPA/3G */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5700_MINICARD_SPRINT) }, /* Dell Wireless 5700 Mobile Broadband CDMA/EVDO Mini-Card == Novatel Expedite E720 CDMA/EV-DO */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5700_MINICARD_TELUS) }, /* Dell Wireless 5700 Mobile Broadband CDMA/EVDO Mini-Card == Novatel Expedite ET620 CDMA/EV-DO */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5720_MINICARD_VZW) }, /* Dell Wireless 5720 == Novatel EV620 CDMA/EV-DO */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5720_MINICARD_SPRINT) }, /* Dell Wireless 5720 == Novatel EV620 CDMA/EV-DO */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5720_MINICARD_TELUS) }, /* Dell Wireless 5720 == Novatel EV620 CDMA/EV-DO */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5520_MINICARD_CINGULAR) }, /* Dell Wireless HSDPA 5520 == Novatel Expedite EU860D */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5520_MINICARD_GENERIC_L) }, /* Dell Wireless HSDPA 5520 */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5520_MINICARD_GENERIC_I) }, /* Dell Wireless 5520 Voda I Mobile Broadband (3G HSDPA) Minicard */ + { USB_DEVICE(DELL_VENDOR_ID, 0x8147) }, /* Dell Wireless 5530 Mobile Broadband (3G HSPA) Mini-Card */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_SPRINT) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_TELUS) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */ + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_VZW) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) }, From 26e1287594864169577327fef233befc9739be3b Mon Sep 17 00:00:00 2001 From: Ivan Kuten Date: Fri, 6 Feb 2009 17:42:34 +0800 Subject: [PATCH 459/566] USB: Correct Makefile to make isp1760 buildable Signed-off-by: Ivan Kuten Signed-off-by: Michael Hennerich Signed-off-by: Bryan Wu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile index 8bcde8cde554..b2ceb4aff233 100644 --- a/drivers/usb/Makefile +++ b/drivers/usb/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_USB_MON) += mon/ obj-$(CONFIG_PCI) += host/ obj-$(CONFIG_USB_EHCI_HCD) += host/ obj-$(CONFIG_USB_ISP116X_HCD) += host/ +obj-$(CONFIG_USB_ISP1760_HCD) += host/ obj-$(CONFIG_USB_OHCI_HCD) += host/ obj-$(CONFIG_USB_UHCI_HCD) += host/ obj-$(CONFIG_USB_FHCI_HCD) += host/ From 049a6acb503ca7acc11d563db79a2d54f054e0d0 Mon Sep 17 00:00:00 2001 From: Nick Holloway Date: Sun, 25 Jan 2009 16:58:43 +0000 Subject: [PATCH 460/566] USB: Storage: Update unusual_devs entry for Datafab KECF-USB This device suffers from the off-by-one error when reporting the capacity, so add US_FL_FIX_CAPACITY to the existing entry. Signed-off-by: Nick Holloway Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 8e878cab8dc7..50dc33a6065b 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1214,7 +1214,7 @@ UNUSUAL_DEV( 0x07c4, 0xa400, 0x0000, 0xffff, "Datafab", "KECF-USB", US_SC_DEVICE, US_PR_DEVICE, NULL, - US_FL_FIX_INQUIRY ), + US_FL_FIX_INQUIRY | US_FL_FIX_CAPACITY ), /* Reported by Rauch Wolke */ UNUSUAL_DEV( 0x07c4, 0xa4a5, 0x0000, 0xffff, From 89cb7e7fd6c0917bb9236ea48bf538d4668ed009 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 3 Feb 2009 16:28:48 -0800 Subject: [PATCH 461/566] Revert Staging: at76_usb: update drivers/staging/at76_usb w/ mac80211 port Reverts 02227c28391b5059a7710d6039c52912b0ee2c1d (Had to be done by hand due to other patches that had come after this.) Turns out that we don't want the mac80211 port of this driver just yet, as there is a different driver working on adding this support. So keep things old and different for now. This is being reverted at the request of the linux-wireless developers. Cc: Kalle Valo Cc: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/staging/at76_usb/Kconfig | 2 +- drivers/staging/at76_usb/at76_usb.c | 4724 +++++++++++++++++++++------ drivers/staging/at76_usb/at76_usb.h | 229 +- 3 files changed, 3934 insertions(+), 1021 deletions(-) diff --git a/drivers/staging/at76_usb/Kconfig b/drivers/staging/at76_usb/Kconfig index 4c0e55e15448..8606f9621624 100644 --- a/drivers/staging/at76_usb/Kconfig +++ b/drivers/staging/at76_usb/Kconfig @@ -1,6 +1,6 @@ config USB_ATMEL tristate "Atmel at76c503/at76c505/at76c505a USB cards" - depends on MAC80211 && WLAN_80211 && USB + depends on WLAN_80211 && USB default N select FW_LOADER ---help--- diff --git a/drivers/staging/at76_usb/at76_usb.c b/drivers/staging/at76_usb/at76_usb.c index 185533e54769..9195ee9319ff 100644 --- a/drivers/staging/at76_usb/at76_usb.c +++ b/drivers/staging/at76_usb/at76_usb.c @@ -6,7 +6,6 @@ * Copyright (c) 2004 Nick Jones * Copyright (c) 2004 Balint Seeber * Copyright (c) 2007 Guido Guenther - * Copyright (c) 2007 Kalle Valo * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -17,13 +16,6 @@ * Atmel AT76C503A/505/505A. * * Some iw_handler code was taken from airo.c, (C) 1999 Benjamin Reed - * - * TODO for the mac80211 port: - * o adhoc support - * o RTS/CTS support - * o Power Save Mode support - * o support for short/long preambles - * o export variables through debugfs/sysfs */ #include @@ -44,7 +36,7 @@ #include #include #include -#include +#include #include "at76_usb.h" @@ -84,43 +76,31 @@ #define DBG_WE_EVENTS 0x08000000 /* dump wireless events */ #define DBG_FW 0x10000000 /* firmware download */ #define DBG_DFU 0x20000000 /* device firmware upgrade */ -#define DBG_CMD 0x40000000 -#define DBG_MAC80211 0x80000000 #define DBG_DEFAULTS 0 /* Use our own dbg macro */ #define at76_dbg(bits, format, arg...) \ -do { \ - if (at76_debug & (bits)) \ + do { \ + if (at76_debug & (bits)) \ printk(KERN_DEBUG DRIVER_NAME ": " format "\n" , ## arg); \ -} while (0) - -#define at76_dbg_dump(bits, buf, len, format, arg...) \ -do { \ - if (at76_debug & (bits)) { \ - printk(KERN_DEBUG DRIVER_NAME ": " format "\n" , ## arg); \ - print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, buf, len); \ - } \ -} while (0) + } while (0) static int at76_debug = DBG_DEFAULTS; -#define FIRMWARE_IS_WPA(ver) ((ver.major == 1) && (ver.minor == 103)) - /* Protect against concurrent firmware loading and parsing */ static struct mutex fw_mutex; static struct fwentry firmwares[] = { - [0] = { "" }, - [BOARD_503_ISL3861] = { "atmel_at76c503-i3861.bin" }, - [BOARD_503_ISL3863] = { "atmel_at76c503-i3863.bin" }, - [BOARD_503] = { "atmel_at76c503-rfmd.bin" }, - [BOARD_503_ACC] = { "atmel_at76c503-rfmd-acc.bin" }, - [BOARD_505] = { "atmel_at76c505-rfmd.bin" }, - [BOARD_505_2958] = { "atmel_at76c505-rfmd2958.bin" }, - [BOARD_505A] = { "atmel_at76c505a-rfmd2958.bin" }, - [BOARD_505AMX] = { "atmel_at76c505amx-rfmd.bin" }, + [0] = {""}, + [BOARD_503_ISL3861] = {"atmel_at76c503-i3861.bin"}, + [BOARD_503_ISL3863] = {"atmel_at76c503-i3863.bin"}, + [BOARD_503] = {"atmel_at76c503-rfmd.bin"}, + [BOARD_503_ACC] = {"atmel_at76c503-rfmd-acc.bin"}, + [BOARD_505] = {"atmel_at76c505-rfmd.bin"}, + [BOARD_505_2958] = {"atmel_at76c505-rfmd2958.bin"}, + [BOARD_505A] = {"atmel_at76c505a-rfmd2958.bin"}, + [BOARD_505AMX] = {"atmel_at76c505amx-rfmd.bin"}, }; #define USB_DEVICE_DATA(__ops) .driver_info = (kernel_ulong_t)(__ops) @@ -130,133 +110,133 @@ static struct usb_device_id dev_table[] = { * at76c503-i3861 */ /* Generic AT76C503/3861 device */ - { USB_DEVICE(0x03eb, 0x7603), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x03eb, 0x7603), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* Linksys WUSB11 v2.1/v2.6 */ - { USB_DEVICE(0x066b, 0x2211), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x066b, 0x2211), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* Netgear MA101 rev. A */ - { USB_DEVICE(0x0864, 0x4100), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x0864, 0x4100), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* Tekram U300C / Allnet ALL0193 */ - { USB_DEVICE(0x0b3b, 0x1612), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x0b3b, 0x1612), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* HP HN210W J7801A */ - { USB_DEVICE(0x03f0, 0x011c), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x03f0, 0x011c), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* Sitecom/Z-Com/Zyxel M4Y-750 */ - { USB_DEVICE(0x0cde, 0x0001), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x0cde, 0x0001), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* Dynalink/Askey WLL013 (intersil) */ - { USB_DEVICE(0x069a, 0x0320), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x069a, 0x0320), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* EZ connect 11Mpbs Wireless USB Adapter SMC2662W v1 */ - { USB_DEVICE(0x0d5c, 0xa001), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x0d5c, 0xa001), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* BenQ AWL300 */ - { USB_DEVICE(0x04a5, 0x9000), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x04a5, 0x9000), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* Addtron AWU-120, Compex WLU11 */ - { USB_DEVICE(0x05dd, 0xff31), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x05dd, 0xff31), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* Intel AP310 AnyPoint II USB */ - { USB_DEVICE(0x8086, 0x0200), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x8086, 0x0200), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* Dynalink L11U */ - { USB_DEVICE(0x0d8e, 0x7100), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x0d8e, 0x7100), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* Arescom WL-210, FCC id 07J-GL2411USB */ - { USB_DEVICE(0x0d8e, 0x7110), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x0d8e, 0x7110), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* I-O DATA WN-B11/USB */ - { USB_DEVICE(0x04bb, 0x0919), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x04bb, 0x0919), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* BT Voyager 1010 */ - { USB_DEVICE(0x069a, 0x0821), USB_DEVICE_DATA(BOARD_503_ISL3861) }, + {USB_DEVICE(0x069a, 0x0821), USB_DEVICE_DATA(BOARD_503_ISL3861)}, /* * at76c503-i3863 */ /* Generic AT76C503/3863 device */ - { USB_DEVICE(0x03eb, 0x7604), USB_DEVICE_DATA(BOARD_503_ISL3863) }, + {USB_DEVICE(0x03eb, 0x7604), USB_DEVICE_DATA(BOARD_503_ISL3863)}, /* Samsung SWL-2100U */ - { USB_DEVICE(0x055d, 0xa000), USB_DEVICE_DATA(BOARD_503_ISL3863) }, + {USB_DEVICE(0x055d, 0xa000), USB_DEVICE_DATA(BOARD_503_ISL3863)}, /* * at76c503-rfmd */ /* Generic AT76C503/RFMD device */ - { USB_DEVICE(0x03eb, 0x7605), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x03eb, 0x7605), USB_DEVICE_DATA(BOARD_503)}, /* Dynalink/Askey WLL013 (rfmd) */ - { USB_DEVICE(0x069a, 0x0321), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x069a, 0x0321), USB_DEVICE_DATA(BOARD_503)}, /* Linksys WUSB11 v2.6 */ - { USB_DEVICE(0x077b, 0x2219), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x077b, 0x2219), USB_DEVICE_DATA(BOARD_503)}, /* Network Everywhere NWU11B */ - { USB_DEVICE(0x077b, 0x2227), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x077b, 0x2227), USB_DEVICE_DATA(BOARD_503)}, /* Netgear MA101 rev. B */ - { USB_DEVICE(0x0864, 0x4102), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x0864, 0x4102), USB_DEVICE_DATA(BOARD_503)}, /* D-Link DWL-120 rev. E */ - { USB_DEVICE(0x2001, 0x3200), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x2001, 0x3200), USB_DEVICE_DATA(BOARD_503)}, /* Actiontec 802UAT1, HWU01150-01UK */ - { USB_DEVICE(0x1668, 0x7605), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x1668, 0x7605), USB_DEVICE_DATA(BOARD_503)}, /* AirVast W-Buddie WN210 */ - { USB_DEVICE(0x03eb, 0x4102), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x03eb, 0x4102), USB_DEVICE_DATA(BOARD_503)}, /* Dick Smith Electronics XH1153 802.11b USB adapter */ - { USB_DEVICE(0x1371, 0x5743), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x1371, 0x5743), USB_DEVICE_DATA(BOARD_503)}, /* CNet CNUSB611 */ - { USB_DEVICE(0x1371, 0x0001), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x1371, 0x0001), USB_DEVICE_DATA(BOARD_503)}, /* FiberLine FL-WL200U */ - { USB_DEVICE(0x1371, 0x0002), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x1371, 0x0002), USB_DEVICE_DATA(BOARD_503)}, /* BenQ AWL400 USB stick */ - { USB_DEVICE(0x04a5, 0x9001), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x04a5, 0x9001), USB_DEVICE_DATA(BOARD_503)}, /* 3Com 3CRSHEW696 */ - { USB_DEVICE(0x0506, 0x0a01), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x0506, 0x0a01), USB_DEVICE_DATA(BOARD_503)}, /* Siemens Santis ADSL WLAN USB adapter WLL 013 */ - { USB_DEVICE(0x0681, 0x001b), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x0681, 0x001b), USB_DEVICE_DATA(BOARD_503)}, /* Belkin F5D6050, version 2 */ - { USB_DEVICE(0x050d, 0x0050), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x050d, 0x0050), USB_DEVICE_DATA(BOARD_503)}, /* iBlitzz, BWU613 (not *B or *SB) */ - { USB_DEVICE(0x07b8, 0xb000), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x07b8, 0xb000), USB_DEVICE_DATA(BOARD_503)}, /* Gigabyte GN-WLBM101 */ - { USB_DEVICE(0x1044, 0x8003), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x1044, 0x8003), USB_DEVICE_DATA(BOARD_503)}, /* Planex GW-US11S */ - { USB_DEVICE(0x2019, 0x3220), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x2019, 0x3220), USB_DEVICE_DATA(BOARD_503)}, /* Internal WLAN adapter in h5[4,5]xx series iPAQs */ - { USB_DEVICE(0x049f, 0x0032), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x049f, 0x0032), USB_DEVICE_DATA(BOARD_503)}, /* Corega Wireless LAN USB-11 mini */ - { USB_DEVICE(0x07aa, 0x0011), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x07aa, 0x0011), USB_DEVICE_DATA(BOARD_503)}, /* Corega Wireless LAN USB-11 mini2 */ - { USB_DEVICE(0x07aa, 0x0018), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x07aa, 0x0018), USB_DEVICE_DATA(BOARD_503)}, /* Uniden PCW100 */ - { USB_DEVICE(0x05dd, 0xff35), USB_DEVICE_DATA(BOARD_503) }, + {USB_DEVICE(0x05dd, 0xff35), USB_DEVICE_DATA(BOARD_503)}, /* * at76c503-rfmd-acc */ /* SMC2664W */ - { USB_DEVICE(0x083a, 0x3501), USB_DEVICE_DATA(BOARD_503_ACC) }, + {USB_DEVICE(0x083a, 0x3501), USB_DEVICE_DATA(BOARD_503_ACC)}, /* Belkin F5D6050, SMC2662W v2, SMC2662W-AR */ - { USB_DEVICE(0x0d5c, 0xa002), USB_DEVICE_DATA(BOARD_503_ACC) }, + {USB_DEVICE(0x0d5c, 0xa002), USB_DEVICE_DATA(BOARD_503_ACC)}, /* * at76c505-rfmd */ /* Generic AT76C505/RFMD */ - { USB_DEVICE(0x03eb, 0x7606), USB_DEVICE_DATA(BOARD_505) }, + {USB_DEVICE(0x03eb, 0x7606), USB_DEVICE_DATA(BOARD_505)}, /* * at76c505-rfmd2958 */ /* Generic AT76C505/RFMD, OvisLink WL-1130USB */ - { USB_DEVICE(0x03eb, 0x7613), USB_DEVICE_DATA(BOARD_505_2958) }, + {USB_DEVICE(0x03eb, 0x7613), USB_DEVICE_DATA(BOARD_505_2958)}, /* Fiberline FL-WL240U */ - { USB_DEVICE(0x1371, 0x0014), USB_DEVICE_DATA(BOARD_505_2958) }, + {USB_DEVICE(0x1371, 0x0014), USB_DEVICE_DATA(BOARD_505_2958)}, /* CNet CNUSB-611G */ - { USB_DEVICE(0x1371, 0x0013), USB_DEVICE_DATA(BOARD_505_2958) }, + {USB_DEVICE(0x1371, 0x0013), USB_DEVICE_DATA(BOARD_505_2958)}, /* Linksys WUSB11 v2.8 */ - { USB_DEVICE(0x1915, 0x2233), USB_DEVICE_DATA(BOARD_505_2958) }, + {USB_DEVICE(0x1915, 0x2233), USB_DEVICE_DATA(BOARD_505_2958)}, /* Xterasys XN-2122B, IBlitzz BWU613B/BWU613SB */ - { USB_DEVICE(0x12fd, 0x1001), USB_DEVICE_DATA(BOARD_505_2958) }, + {USB_DEVICE(0x12fd, 0x1001), USB_DEVICE_DATA(BOARD_505_2958)}, /* Corega WLAN USB Stick 11 */ - { USB_DEVICE(0x07aa, 0x7613), USB_DEVICE_DATA(BOARD_505_2958) }, + {USB_DEVICE(0x07aa, 0x7613), USB_DEVICE_DATA(BOARD_505_2958)}, /* Microstar MSI Box MS6978 */ - { USB_DEVICE(0x0db0, 0x1020), USB_DEVICE_DATA(BOARD_505_2958) }, + {USB_DEVICE(0x0db0, 0x1020), USB_DEVICE_DATA(BOARD_505_2958)}, /* * at76c505a-rfmd2958 */ /* Generic AT76C505A device */ - { USB_DEVICE(0x03eb, 0x7614), USB_DEVICE_DATA(BOARD_505A) }, + {USB_DEVICE(0x03eb, 0x7614), USB_DEVICE_DATA(BOARD_505A)}, /* Generic AT76C505AS device */ - { USB_DEVICE(0x03eb, 0x7617), USB_DEVICE_DATA(BOARD_505A) }, + {USB_DEVICE(0x03eb, 0x7617), USB_DEVICE_DATA(BOARD_505A)}, /* Siemens Gigaset USB WLAN Adapter 11 */ - { USB_DEVICE(0x1690, 0x0701), USB_DEVICE_DATA(BOARD_505A) }, + {USB_DEVICE(0x1690, 0x0701), USB_DEVICE_DATA(BOARD_505A)}, /* * at76c505amx-rfmd */ /* Generic AT76C505AMX device */ - { USB_DEVICE(0x03eb, 0x7615), USB_DEVICE_DATA(BOARD_505AMX) }, - { } + {USB_DEVICE(0x03eb, 0x7615), USB_DEVICE_DATA(BOARD_505AMX)}, + {} }; MODULE_DEVICE_TABLE(usb, dev_table); @@ -264,8 +244,26 @@ MODULE_DEVICE_TABLE(usb, dev_table); /* Supported rates of this hardware, bit 7 marks basic rates */ static const u8 hw_rates[] = { 0x82, 0x84, 0x0b, 0x16 }; +/* Frequency of each channel in MHz */ +static const long channel_frequency[] = { + 2412, 2417, 2422, 2427, 2432, 2437, 2442, + 2447, 2452, 2457, 2462, 2467, 2472, 2484 +}; + +#define NUM_CHANNELS ARRAY_SIZE(channel_frequency) + static const char *const preambles[] = { "long", "short", "auto" }; +static const char *const mac_states[] = { + [MAC_INIT] = "INIT", + [MAC_SCANNING] = "SCANNING", + [MAC_AUTH] = "AUTH", + [MAC_ASSOC] = "ASSOC", + [MAC_JOINING] = "JOINING", + [MAC_CONNECTED] = "CONNECTED", + [MAC_OWN_IBSS] = "OWN_IBSS" +}; + /* Firmware download */ /* DFU states */ #define STATE_IDLE 0x00 @@ -300,30 +298,17 @@ struct dfu_status { static inline int at76_is_intersil(enum board_type board) { - if (board == BOARD_503_ISL3861 || board == BOARD_503_ISL3863) - return 1; - return 0; + return (board == BOARD_503_ISL3861 || board == BOARD_503_ISL3863); } static inline int at76_is_503rfmd(enum board_type board) { - if (board == BOARD_503 || board == BOARD_503_ACC) - return 1; - return 0; -} - -static inline int at76_is_505(enum board_type board) -{ - if (board == BOARD_505 || board == BOARD_505_2958) - return 1; - return 0; + return (board == BOARD_503 || board == BOARD_503_ACC); } static inline int at76_is_505a(enum board_type board) { - if (board == BOARD_505A || board == BOARD_505AMX) - return 1; - return 0; + return (board == BOARD_505A || board == BOARD_505AMX); } /* Load a block of the first (internal) part of the firmware */ @@ -504,6 +489,41 @@ exit: return ret; } +/* Report that the scan results are ready */ +static inline void at76_iwevent_scan_complete(struct net_device *netdev) +{ + union iwreq_data wrqu; + wrqu.data.length = 0; + wrqu.data.flags = 0; + wireless_send_event(netdev, SIOCGIWSCAN, &wrqu, NULL); + at76_dbg(DBG_WE_EVENTS, "%s: SIOCGIWSCAN sent", netdev->name); +} + +static inline void at76_iwevent_bss_connect(struct net_device *netdev, + u8 *bssid) +{ + union iwreq_data wrqu; + wrqu.data.length = 0; + wrqu.data.flags = 0; + memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN); + wrqu.ap_addr.sa_family = ARPHRD_ETHER; + wireless_send_event(netdev, SIOCGIWAP, &wrqu, NULL); + at76_dbg(DBG_WE_EVENTS, "%s: %s: SIOCGIWAP sent", netdev->name, + __func__); +} + +static inline void at76_iwevent_bss_disconnect(struct net_device *netdev) +{ + union iwreq_data wrqu; + wrqu.data.length = 0; + wrqu.data.flags = 0; + memset(wrqu.ap_addr.sa_data, 0, ETH_ALEN); + wrqu.ap_addr.sa_family = ARPHRD_ETHER; + wireless_send_event(netdev, SIOCGIWAP, &wrqu, NULL); + at76_dbg(DBG_WE_EVENTS, "%s: %s: SIOCGIWAP sent", netdev->name, + __func__); +} + #define HEX2STR_BUFFERS 4 #define HEX2STR_MAX_LEN 64 #define BIN2HEX(x) ((x) < 10 ? '0' + (x) : (x) + 'A' - 10) @@ -575,6 +595,37 @@ static void at76_ledtrig_tx_activity(void) mod_timer(&ledtrig_tx_timer, jiffies + HZ / 4); } +/* Check if the given ssid is hidden */ +static inline int at76_is_hidden_ssid(u8 *ssid, int length) +{ + static const u8 zeros[32]; + + if (length == 0) + return 1; + + if (length == 1 && ssid[0] == ' ') + return 1; + + return (memcmp(ssid, zeros, length) == 0); +} + +static inline void at76_free_bss_list(struct at76_priv *priv) +{ + struct list_head *next, *ptr; + unsigned long flags; + + spin_lock_irqsave(&priv->bss_list_spinlock, flags); + + priv->curr_bss = NULL; + + list_for_each_safe(ptr, next, &priv->bss_list) { + list_del(ptr); + kfree(list_entry(ptr, struct bss_info, list)); + } + + spin_unlock_irqrestore(&priv->bss_list_spinlock, flags); +} + static int at76_remap(struct usb_device *udev) { int ret; @@ -676,7 +727,7 @@ exit: kfree(hwcfg); if (ret < 0) printk(KERN_ERR "%s: cannot get HW Config (error %d)\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); return ret; } @@ -685,15 +736,15 @@ static struct reg_domain const *at76_get_reg_domain(u16 code) { int i; static struct reg_domain const fd_tab[] = { - { 0x10, "FCC (USA)", 0x7ff }, /* ch 1-11 */ - { 0x20, "IC (Canada)", 0x7ff }, /* ch 1-11 */ - { 0x30, "ETSI (most of Europe)", 0x1fff }, /* ch 1-13 */ - { 0x31, "Spain", 0x600 }, /* ch 10-11 */ - { 0x32, "France", 0x1e00 }, /* ch 10-13 */ - { 0x40, "MKK (Japan)", 0x2000 }, /* ch 14 */ - { 0x41, "MKK1 (Japan)", 0x3fff }, /* ch 1-14 */ - { 0x50, "Israel", 0x3fc }, /* ch 3-9 */ - { 0x00, "", 0xffffffff } /* ch 1-32 */ + {0x10, "FCC (USA)", 0x7ff}, /* ch 1-11 */ + {0x20, "IC (Canada)", 0x7ff}, /* ch 1-11 */ + {0x30, "ETSI (most of Europe)", 0x1fff}, /* ch 1-13 */ + {0x31, "Spain", 0x600}, /* ch 10-11 */ + {0x32, "France", 0x1e00}, /* ch 10-13 */ + {0x40, "MKK (Japan)", 0x2000}, /* ch 14 */ + {0x41, "MKK1 (Japan)", 0x3fff}, /* ch 1-14 */ + {0x50, "Israel", 0x3fc}, /* ch 3-9 */ + {0x00, "", 0xffffffff} /* ch 1-32 */ }; /* Last entry is fallback for unknown domain code */ @@ -739,24 +790,6 @@ static inline int at76_get_cmd_status(struct usb_device *udev, u8 cmd) return ret; } -#define MAKE_CMD_CASE(c) case (c): return #c - -static const char *at76_get_cmd_string(u8 cmd_status) -{ - switch (cmd_status) { - MAKE_CMD_CASE(CMD_SET_MIB); - MAKE_CMD_CASE(CMD_GET_MIB); - MAKE_CMD_CASE(CMD_SCAN); - MAKE_CMD_CASE(CMD_JOIN); - MAKE_CMD_CASE(CMD_START_IBSS); - MAKE_CMD_CASE(CMD_RADIO_ON); - MAKE_CMD_CASE(CMD_RADIO_OFF); - MAKE_CMD_CASE(CMD_STARTUP); - } - - return "UNKNOWN"; -} - static int at76_set_card_command(struct usb_device *udev, u8 cmd, void *buf, int buf_size) { @@ -772,10 +805,6 @@ static int at76_set_card_command(struct usb_device *udev, u8 cmd, void *buf, cmd_buf->size = cpu_to_le16(buf_size); memcpy(cmd_buf->data, buf, buf_size); - at76_dbg_dump(DBG_CMD, cmd_buf, sizeof(struct at76_command) + buf_size, - "issuing command %s (0x%02x)", - at76_get_cmd_string(cmd), cmd); - ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x0e, USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE, 0, 0, cmd_buf, @@ -813,13 +842,13 @@ static int at76_wait_completion(struct at76_priv *priv, int cmd) status = at76_get_cmd_status(priv->udev, cmd); if (status < 0) { printk(KERN_ERR "%s: at76_get_cmd_status failed: %d\n", - wiphy_name(priv->hw->wiphy), status); + priv->netdev->name, status); break; } at76_dbg(DBG_WAIT_COMPLETE, "%s: Waiting on cmd %d, status = %d (%s)", - wiphy_name(priv->hw->wiphy), cmd, status, + priv->netdev->name, cmd, status, at76_get_cmd_status_string(status)); if (status != CMD_STATUS_IN_PROGRESS @@ -830,7 +859,7 @@ static int at76_wait_completion(struct at76_priv *priv, int cmd) if (time_after(jiffies, timeout)) { printk(KERN_ERR "%s: completion timeout for command %d\n", - wiphy_name(priv->hw->wiphy), cmd); + priv->netdev->name, cmd); status = -ETIMEDOUT; break; } @@ -853,7 +882,7 @@ static int at76_set_mib(struct at76_priv *priv, struct set_mib_buffer *buf) if (ret != CMD_STATUS_COMPLETE) { printk(KERN_INFO "%s: set_mib: at76_wait_completion failed " - "with %d\n", wiphy_name(priv->hw->wiphy), ret); + "with %d\n", priv->netdev->name, ret); ret = -EIO; } @@ -874,7 +903,7 @@ static int at76_set_radio(struct at76_priv *priv, int enable) ret = at76_set_card_command(priv->udev, cmd, NULL, 0); if (ret < 0) printk(KERN_ERR "%s: at76_set_card_command(%d) failed: %d\n", - wiphy_name(priv->hw->wiphy), cmd, ret); + priv->netdev->name, cmd, ret); else ret = 1; @@ -895,7 +924,44 @@ static int at76_set_pm_mode(struct at76_priv *priv) ret = at76_set_mib(priv, &priv->mib_buf); if (ret < 0) printk(KERN_ERR "%s: set_mib (pm_mode) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); + + return ret; +} + +/* Set the association id for power save mode */ +static int at76_set_associd(struct at76_priv *priv, u16 id) +{ + int ret = 0; + + priv->mib_buf.type = MIB_MAC_MGMT; + priv->mib_buf.size = 2; + priv->mib_buf.index = offsetof(struct mib_mac_mgmt, station_id); + priv->mib_buf.data.word = cpu_to_le16(id); + + ret = at76_set_mib(priv, &priv->mib_buf); + if (ret < 0) + printk(KERN_ERR "%s: set_mib (associd) failed: %d\n", + priv->netdev->name, ret); + + return ret; +} + +/* Set the listen interval for power save mode */ +static int at76_set_listen_interval(struct at76_priv *priv, u16 interval) +{ + int ret = 0; + + priv->mib_buf.type = MIB_MAC; + priv->mib_buf.size = 2; + priv->mib_buf.index = offsetof(struct mib_mac, listen_interval); + priv->mib_buf.data.word = cpu_to_le16(interval); + + ret = at76_set_mib(priv, &priv->mib_buf); + if (ret < 0) + printk(KERN_ERR + "%s: set_mib (listen_interval) failed: %d\n", + priv->netdev->name, ret); return ret; } @@ -912,7 +978,7 @@ static int at76_set_preamble(struct at76_priv *priv, u8 type) ret = at76_set_mib(priv, &priv->mib_buf); if (ret < 0) printk(KERN_ERR "%s: set_mib (preamble) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); return ret; } @@ -929,7 +995,7 @@ static int at76_set_frag(struct at76_priv *priv, u16 size) ret = at76_set_mib(priv, &priv->mib_buf); if (ret < 0) printk(KERN_ERR "%s: set_mib (frag threshold) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); return ret; } @@ -946,7 +1012,7 @@ static int at76_set_rts(struct at76_priv *priv, u16 size) ret = at76_set_mib(priv, &priv->mib_buf); if (ret < 0) printk(KERN_ERR "%s: set_mib (rts) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); return ret; } @@ -963,41 +1029,24 @@ static int at76_set_autorate_fallback(struct at76_priv *priv, int onoff) ret = at76_set_mib(priv, &priv->mib_buf); if (ret < 0) printk(KERN_ERR "%s: set_mib (autorate fallback) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); return ret; } -static int at76_set_tkip_bssid(struct at76_priv *priv, const void *addr) +static int at76_add_mac_address(struct at76_priv *priv, void *addr) { int ret = 0; - priv->mib_buf.type = MIB_MAC_ENCRYPTION; + priv->mib_buf.type = MIB_MAC_ADDR; priv->mib_buf.size = ETH_ALEN; - priv->mib_buf.index = offsetof(struct mib_mac_encryption, tkip_bssid); + priv->mib_buf.index = offsetof(struct mib_mac_addr, mac_addr); memcpy(priv->mib_buf.data.addr, addr, ETH_ALEN); ret = at76_set_mib(priv, &priv->mib_buf); if (ret < 0) - printk(KERN_ERR "%s: set_mib (MAC_ENCRYPTION, tkip_bssid) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); - - return ret; -} - -static int at76_reset_rsc(struct at76_priv *priv) -{ - int ret = 0; - - priv->mib_buf.type = MIB_MAC_ENCRYPTION; - priv->mib_buf.size = 4 * 8; - priv->mib_buf.index = offsetof(struct mib_mac_encryption, key_rsc); - memset(priv->mib_buf.data.data, 0 , priv->mib_buf.size); - - ret = at76_set_mib(priv, &priv->mib_buf); - if (ret < 0) - printk(KERN_ERR "%s: set_mib (MAC_ENCRYPTION, key_rsc) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + printk(KERN_ERR "%s: set_mib (MAC_ADDR, mac_addr) failed: %d\n", + priv->netdev->name, ret); return ret; } @@ -1016,16 +1065,16 @@ static void at76_dump_mib_mac_addr(struct at76_priv *priv) sizeof(struct mib_mac_addr)); if (ret < 0) { printk(KERN_ERR "%s: at76_get_mib (MAC_ADDR) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); goto exit; } at76_dbg(DBG_MIB, "%s: MIB MAC_ADDR: mac_addr %s res 0x%x 0x%x", - wiphy_name(priv->hw->wiphy), + priv->netdev->name, mac2str(m->mac_addr), m->res[0], m->res[1]); for (i = 0; i < ARRAY_SIZE(m->group_addr); i++) at76_dbg(DBG_MIB, "%s: MIB MAC_ADDR: group addr %d: %s, " - "status %d", wiphy_name(priv->hw->wiphy), i, + "status %d", priv->netdev->name, i, mac2str(m->group_addr[i]), m->group_addr_status[i]); exit: kfree(m); @@ -1045,13 +1094,13 @@ static void at76_dump_mib_mac_wep(struct at76_priv *priv) sizeof(struct mib_mac_wep)); if (ret < 0) { printk(KERN_ERR "%s: at76_get_mib (MAC_WEP) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); goto exit; } at76_dbg(DBG_MIB, "%s: MIB MAC_WEP: priv_invoked %u def_key_id %u " "key_len %u excl_unencr %u wep_icv_err %u wep_excluded %u " - "encr_level %u key %d", wiphy_name(priv->hw->wiphy), + "encr_level %u key %d", priv->netdev->name, m->privacy_invoked, m->wep_default_key_id, m->wep_key_mapping_len, m->exclude_unencrypted, le32_to_cpu(m->wep_icv_error_count), @@ -1063,55 +1112,12 @@ static void at76_dump_mib_mac_wep(struct at76_priv *priv) for (i = 0; i < WEP_KEYS; i++) at76_dbg(DBG_MIB, "%s: MIB MAC_WEP: key %d: %s", - wiphy_name(priv->hw->wiphy), i, + priv->netdev->name, i, hex2str(m->wep_default_keyvalue[i], key_len)); exit: kfree(m); } -static void at76_dump_mib_mac_encryption(struct at76_priv *priv) -{ - int i; - int ret; - /*int key_len;*/ - struct mib_mac_encryption *m; - - m = kmalloc(sizeof(struct mib_mac_encryption), GFP_KERNEL); - if (!m) - return; - - ret = at76_get_mib(priv->udev, MIB_MAC_ENCRYPTION, m, - sizeof(struct mib_mac_encryption)); - if (ret < 0) { - dev_err(&priv->udev->dev, - "%s: at76_get_mib (MAC_ENCRYPTION) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); - goto exit; - } - - at76_dbg(DBG_MIB, - "%s: MIB MAC_ENCRYPTION: tkip_bssid %s priv_invoked %u " - "ciph_key_id %u grp_key_id %u excl_unencr %u " - "ckip_key_perm %u wep_icv_err %u wep_excluded %u", - wiphy_name(priv->hw->wiphy), mac2str(m->tkip_bssid), - m->privacy_invoked, m->cipher_default_key_id, - m->cipher_default_group_key_id, m->exclude_unencrypted, - m->ckip_key_permutation, - le32_to_cpu(m->wep_icv_error_count), - le32_to_cpu(m->wep_excluded_count)); - - /*key_len = (m->encryption_level == 1) ? - WEP_SMALL_KEY_LEN : WEP_LARGE_KEY_LEN;*/ - - for (i = 0; i < CIPHER_KEYS; i++) - at76_dbg(DBG_MIB, "%s: MIB MAC_ENCRYPTION: key %d: %s", - wiphy_name(priv->hw->wiphy), i, - hex2str(m->cipher_default_keyvalue[i], - CIPHER_KEY_LEN)); -exit: - kfree(m); -} - static void at76_dump_mib_mac_mgmt(struct at76_priv *priv) { int ret; @@ -1125,7 +1131,7 @@ static void at76_dump_mib_mac_mgmt(struct at76_priv *priv) sizeof(struct mib_mac_mgmt)); if (ret < 0) { printk(KERN_ERR "%s: at76_get_mib (MAC_MGMT) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); goto exit; } @@ -1136,7 +1142,7 @@ static void at76_dump_mib_mac_mgmt(struct at76_priv *priv) "pm_mode %d ibss_change %d res %d " "multi_domain_capability_implemented %d " "international_roaming %d country_string %.3s", - wiphy_name(priv->hw->wiphy), le16_to_cpu(m->beacon_period), + priv->netdev->name, le16_to_cpu(m->beacon_period), le16_to_cpu(m->CFP_max_duration), le16_to_cpu(m->medium_occupancy_limit), le16_to_cpu(m->station_id), le16_to_cpu(m->ATIM_window), @@ -1161,7 +1167,7 @@ static void at76_dump_mib_mac(struct at76_priv *priv) ret = at76_get_mib(priv->udev, MIB_MAC, m, sizeof(struct mib_mac)); if (ret < 0) { printk(KERN_ERR "%s: at76_get_mib (MAC) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); goto exit; } @@ -1171,8 +1177,7 @@ static void at76_dump_mib_mac(struct at76_priv *priv) "scan_type %d scan_channel %d probe_delay %u " "min_channel_time %d max_channel_time %d listen_int %d " "desired_ssid %s desired_bssid %s desired_bsstype %d", - wiphy_name(priv->hw->wiphy), - le32_to_cpu(m->max_tx_msdu_lifetime), + priv->netdev->name, le32_to_cpu(m->max_tx_msdu_lifetime), le32_to_cpu(m->max_rx_lifetime), le16_to_cpu(m->frag_threshold), le16_to_cpu(m->rts_threshold), le16_to_cpu(m->cwmin), le16_to_cpu(m->cwmax), @@ -1198,7 +1203,7 @@ static void at76_dump_mib_phy(struct at76_priv *priv) ret = at76_get_mib(priv->udev, MIB_PHY, m, sizeof(struct mib_phy)); if (ret < 0) { printk(KERN_ERR "%s: at76_get_mib (PHY) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); goto exit; } @@ -1207,7 +1212,7 @@ static void at76_dump_mib_phy(struct at76_priv *priv) "mpdu_max_length %d cca_mode_supported %d operation_rate_set " "0x%x 0x%x 0x%x 0x%x channel_id %d current_cca_mode %d " "phy_type %d current_reg_domain %d", - wiphy_name(priv->hw->wiphy), le32_to_cpu(m->ed_threshold), + priv->netdev->name, le32_to_cpu(m->ed_threshold), le16_to_cpu(m->slot_time), le16_to_cpu(m->sifs_time), le16_to_cpu(m->preamble_length), le16_to_cpu(m->plcp_header_length), @@ -1231,14 +1236,13 @@ static void at76_dump_mib_local(struct at76_priv *priv) ret = at76_get_mib(priv->udev, MIB_LOCAL, m, sizeof(struct mib_local)); if (ret < 0) { printk(KERN_ERR "%s: at76_get_mib (LOCAL) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); goto exit; } at76_dbg(DBG_MIB, "%s: MIB LOCAL: beacon_enable %d " "txautorate_fallback %d ssid_size %d promiscuous_mode %d " - "preamble_type %d", wiphy_name(priv->hw->wiphy), - m->beacon_enable, + "preamble_type %d", priv->netdev->name, m->beacon_enable, m->txautorate_fallback, m->ssid_size, m->promiscuous_mode, m->preamble_type); exit: @@ -1257,21 +1261,118 @@ static void at76_dump_mib_mdomain(struct at76_priv *priv) sizeof(struct mib_mdomain)); if (ret < 0) { printk(KERN_ERR "%s: at76_get_mib (MDOMAIN) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); goto exit; } at76_dbg(DBG_MIB, "%s: MIB MDOMAIN: channel_list %s", - wiphy_name(priv->hw->wiphy), + priv->netdev->name, hex2str(m->channel_list, sizeof(m->channel_list))); at76_dbg(DBG_MIB, "%s: MIB MDOMAIN: tx_powerlevel %s", - wiphy_name(priv->hw->wiphy), + priv->netdev->name, hex2str(m->tx_powerlevel, sizeof(m->tx_powerlevel))); exit: kfree(m); } +static int at76_get_current_bssid(struct at76_priv *priv) +{ + int ret = 0; + struct mib_mac_mgmt *mac_mgmt = + kmalloc(sizeof(struct mib_mac_mgmt), GFP_KERNEL); + + if (!mac_mgmt) { + ret = -ENOMEM; + goto exit; + } + + ret = at76_get_mib(priv->udev, MIB_MAC_MGMT, mac_mgmt, + sizeof(struct mib_mac_mgmt)); + if (ret < 0) { + printk(KERN_ERR "%s: at76_get_mib failed: %d\n", + priv->netdev->name, ret); + goto error; + } + memcpy(priv->bssid, mac_mgmt->current_bssid, ETH_ALEN); + printk(KERN_INFO "%s: using BSSID %s\n", priv->netdev->name, + mac2str(priv->bssid)); +error: + kfree(mac_mgmt); +exit: + return ret; +} + +static int at76_get_current_channel(struct at76_priv *priv) +{ + int ret = 0; + struct mib_phy *phy = kmalloc(sizeof(struct mib_phy), GFP_KERNEL); + + if (!phy) { + ret = -ENOMEM; + goto exit; + } + ret = at76_get_mib(priv->udev, MIB_PHY, phy, sizeof(struct mib_phy)); + if (ret < 0) { + printk(KERN_ERR "%s: at76_get_mib(MIB_PHY) failed: %d\n", + priv->netdev->name, ret); + goto error; + } + priv->channel = phy->channel_id; +error: + kfree(phy); +exit: + return ret; +} + +/** + * at76_start_scan - start a scan + * + * @use_essid - use the configured ESSID in non passive mode + */ +static int at76_start_scan(struct at76_priv *priv, int use_essid) +{ + struct at76_req_scan scan; + + memset(&scan, 0, sizeof(struct at76_req_scan)); + memset(scan.bssid, 0xff, ETH_ALEN); + + if (use_essid) { + memcpy(scan.essid, priv->essid, IW_ESSID_MAX_SIZE); + scan.essid_size = priv->essid_size; + } else + scan.essid_size = 0; + + /* jal: why should we start at a certain channel? we do scan the whole + range allowed by reg domain. */ + scan.channel = priv->channel; + + /* atmelwlandriver differs between scan type 0 and 1 (active/passive) + For ad-hoc mode, it uses type 0 only. */ + scan.scan_type = priv->scan_mode; + + /* INFO: For probe_delay, not multiplying by 1024 as this will be + slightly less than min_channel_time + (per spec: probe delay < min. channel time) */ + scan.min_channel_time = cpu_to_le16(priv->scan_min_time); + scan.max_channel_time = cpu_to_le16(priv->scan_max_time); + scan.probe_delay = cpu_to_le16(priv->scan_min_time * 1000); + scan.international_scan = 0; + + /* other values are set to 0 for type 0 */ + + at76_dbg(DBG_PROGRESS, "%s: start_scan (use_essid = %d, intl = %d, " + "channel = %d, probe_delay = %d, scan_min_time = %d, " + "scan_max_time = %d)", + priv->netdev->name, use_essid, + scan.international_scan, scan.channel, + le16_to_cpu(scan.probe_delay), + le16_to_cpu(scan.min_channel_time), + le16_to_cpu(scan.max_channel_time)); + + return at76_set_card_command(priv->udev, CMD_SCAN, &scan, sizeof(scan)); +} + /* Enable monitor mode */ static int at76_start_monitor(struct at76_priv *priv) { @@ -1292,6 +1393,86 @@ static int at76_start_monitor(struct at76_priv *priv) return ret; } +static int at76_start_ibss(struct at76_priv *priv) +{ + struct at76_req_ibss bss; + int ret; + + WARN_ON(priv->mac_state != MAC_OWN_IBSS); + if (priv->mac_state != MAC_OWN_IBSS) + return -EBUSY; + + memset(&bss, 0, sizeof(struct at76_req_ibss)); + memset(bss.bssid, 0xff, ETH_ALEN); + memcpy(bss.essid, priv->essid, IW_ESSID_MAX_SIZE); + bss.essid_size = priv->essid_size; + bss.bss_type = ADHOC_MODE; + bss.channel = priv->channel; + + ret = at76_set_card_command(priv->udev, CMD_START_IBSS, &bss, + sizeof(struct at76_req_ibss)); + if (ret < 0) { + printk(KERN_ERR "%s: start_ibss failed: %d\n", + priv->netdev->name, ret); + return ret; + } + + ret = at76_wait_completion(priv, CMD_START_IBSS); + if (ret != CMD_STATUS_COMPLETE) { + printk(KERN_ERR "%s: start_ibss failed to complete, %d\n", + priv->netdev->name, ret); + return ret; + } + + ret = at76_get_current_bssid(priv); + if (ret < 0) + return ret; + + ret = at76_get_current_channel(priv); + if (ret < 0) + return ret; + + /* not sure what this is good for ??? */ + priv->mib_buf.type = MIB_MAC_MGMT; + priv->mib_buf.size = 1; + priv->mib_buf.index = offsetof(struct mib_mac_mgmt, ibss_change); + priv->mib_buf.data.byte = 0; + + ret = at76_set_mib(priv, &priv->mib_buf); + if (ret < 0) { + printk(KERN_ERR "%s: set_mib (ibss change ok) failed: %d\n", + priv->netdev->name, ret); + return ret; + } + + netif_carrier_on(priv->netdev); + netif_start_queue(priv->netdev); + return 0; +} + +/* Request card to join BSS in managed or ad-hoc mode */ +static int at76_join_bss(struct at76_priv *priv, struct bss_info *ptr) +{ + struct at76_req_join join; + + BUG_ON(!ptr); + + memset(&join, 0, sizeof(struct at76_req_join)); + memcpy(join.bssid, ptr->bssid, ETH_ALEN); + memcpy(join.essid, ptr->ssid, ptr->ssid_len); + join.essid_size = ptr->ssid_len; + join.bss_type = (priv->iw_mode == IW_MODE_ADHOC ? 1 : 2); + join.channel = ptr->channel; + join.timeout = cpu_to_le16(2000); + + at76_dbg(DBG_PROGRESS, + "%s join addr %s ssid %s type %d ch %d timeout %d", + priv->netdev->name, mac2str(join.bssid), join.essid, + join.bss_type, join.channel, le16_to_cpu(join.timeout)); + return at76_set_card_command(priv->udev, CMD_JOIN, &join, + sizeof(struct at76_req_join)); +} + /* Calculate padding from txbuf->wlength (which excludes the USB TX header), likely to compensate a flaw in the AT76C503A USB part ... */ static inline int at76_calc_padding(int wlen) @@ -1310,6 +1491,14 @@ static inline int at76_calc_padding(int wlen) return 0; } +/* We are doing a lot of things here in an interrupt. Need + a bh handler (Watching TV with a TV card is probably + a good test: if you see flickers, we are doing too much. + Currently I do see flickers... even with our tasklet :-( ) + Maybe because the bttv driver and usb-uhci use the same interrupt +*/ +/* Or maybe because our BH handler is preempting bttv's BH handler.. BHs don't + * solve everything.. (alex) */ static void at76_rx_callback(struct urb *urb) { struct at76_priv *priv = urb->context; @@ -1319,6 +1508,1758 @@ static void at76_rx_callback(struct urb *urb) return; } +static void at76_tx_callback(struct urb *urb) +{ + struct at76_priv *priv = urb->context; + struct net_device_stats *stats = &priv->stats; + unsigned long flags; + struct at76_tx_buffer *mgmt_buf; + int ret; + + switch (urb->status) { + case 0: + stats->tx_packets++; + break; + case -ENOENT: + case -ECONNRESET: + /* urb has been unlinked */ + return; + default: + at76_dbg(DBG_URB, "%s - nonzero tx status received: %d", + __func__, urb->status); + stats->tx_errors++; + break; + } + + spin_lock_irqsave(&priv->mgmt_spinlock, flags); + mgmt_buf = priv->next_mgmt_bulk; + priv->next_mgmt_bulk = NULL; + spin_unlock_irqrestore(&priv->mgmt_spinlock, flags); + + if (!mgmt_buf) { + netif_wake_queue(priv->netdev); + return; + } + + /* we don't copy the padding bytes, but add them + to the length */ + memcpy(priv->bulk_out_buffer, mgmt_buf, + le16_to_cpu(mgmt_buf->wlength) + AT76_TX_HDRLEN); + usb_fill_bulk_urb(priv->tx_urb, priv->udev, priv->tx_pipe, + priv->bulk_out_buffer, + le16_to_cpu(mgmt_buf->wlength) + mgmt_buf->padding + + AT76_TX_HDRLEN, at76_tx_callback, priv); + ret = usb_submit_urb(priv->tx_urb, GFP_ATOMIC); + if (ret) + printk(KERN_ERR "%s: error in tx submit urb: %d\n", + priv->netdev->name, ret); + + kfree(mgmt_buf); +} + +/* Send a management frame on bulk-out. txbuf->wlength must be set */ +static int at76_tx_mgmt(struct at76_priv *priv, struct at76_tx_buffer *txbuf) +{ + unsigned long flags; + int ret; + int urb_status; + void *oldbuf = NULL; + + netif_carrier_off(priv->netdev); /* stop netdev watchdog */ + netif_stop_queue(priv->netdev); /* stop tx data packets */ + + spin_lock_irqsave(&priv->mgmt_spinlock, flags); + + urb_status = priv->tx_urb->status; + if (urb_status == -EINPROGRESS) { + /* cannot transmit now, put in the queue */ + oldbuf = priv->next_mgmt_bulk; + priv->next_mgmt_bulk = txbuf; + } + spin_unlock_irqrestore(&priv->mgmt_spinlock, flags); + + if (oldbuf) { + /* a data/mgmt tx is already pending in the URB - + if this is no error in some situations we must + implement a queue or silently modify the old msg */ + printk(KERN_ERR "%s: removed pending mgmt buffer %s\n", + priv->netdev->name, hex2str(oldbuf, 64)); + kfree(oldbuf); + return 0; + } + + txbuf->tx_rate = TX_RATE_1MBIT; + txbuf->padding = at76_calc_padding(le16_to_cpu(txbuf->wlength)); + memset(txbuf->reserved, 0, sizeof(txbuf->reserved)); + + if (priv->next_mgmt_bulk) + printk(KERN_ERR "%s: URB status %d, but mgmt is pending\n", + priv->netdev->name, urb_status); + + at76_dbg(DBG_TX_MGMT, + "%s: tx mgmt: wlen %d tx_rate %d pad %d %s", + priv->netdev->name, le16_to_cpu(txbuf->wlength), + txbuf->tx_rate, txbuf->padding, + hex2str(txbuf->packet, le16_to_cpu(txbuf->wlength))); + + /* txbuf was not consumed above -> send mgmt msg immediately */ + memcpy(priv->bulk_out_buffer, txbuf, + le16_to_cpu(txbuf->wlength) + AT76_TX_HDRLEN); + usb_fill_bulk_urb(priv->tx_urb, priv->udev, priv->tx_pipe, + priv->bulk_out_buffer, + le16_to_cpu(txbuf->wlength) + txbuf->padding + + AT76_TX_HDRLEN, at76_tx_callback, priv); + ret = usb_submit_urb(priv->tx_urb, GFP_ATOMIC); + if (ret) + printk(KERN_ERR "%s: error in tx submit urb: %d\n", + priv->netdev->name, ret); + + kfree(txbuf); + + return ret; +} + +/* Go to the next information element */ +static inline void next_ie(struct ieee80211_info_element **ie) +{ + *ie = (struct ieee80211_info_element *)(&(*ie)->data[(*ie)->len]); +} + +/* Challenge is the challenge string (in TLV format) + we got with seq_nr 2 for shared secret authentication only and + send in seq_nr 3 WEP encrypted to prove we have the correct WEP key; + otherwise it is NULL */ +static int at76_auth_req(struct at76_priv *priv, struct bss_info *bss, + int seq_nr, struct ieee80211_info_element *challenge) +{ + struct at76_tx_buffer *tx_buffer; + struct ieee80211_hdr_3addr *mgmt; + struct ieee80211_auth *req; + int buf_len = (seq_nr != 3 ? AUTH_FRAME_SIZE : + AUTH_FRAME_SIZE + 1 + 1 + challenge->len); + + BUG_ON(!bss); + BUG_ON(seq_nr == 3 && !challenge); + tx_buffer = kmalloc(buf_len + MAX_PADDING_SIZE, GFP_ATOMIC); + if (!tx_buffer) + return -ENOMEM; + + req = (struct ieee80211_auth *)tx_buffer->packet; + mgmt = &req->header; + + /* make wireless header */ + /* first auth msg is not encrypted, only the second (seq_nr == 3) */ + mgmt->frame_ctl = + cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_AUTH | + (seq_nr == 3 ? IEEE80211_FCTL_PROTECTED : 0)); + + mgmt->duration_id = cpu_to_le16(0x8000); + memcpy(mgmt->addr1, bss->bssid, ETH_ALEN); + memcpy(mgmt->addr2, priv->netdev->dev_addr, ETH_ALEN); + memcpy(mgmt->addr3, bss->bssid, ETH_ALEN); + mgmt->seq_ctl = cpu_to_le16(0); + + req->algorithm = cpu_to_le16(priv->auth_mode); + req->transaction = cpu_to_le16(seq_nr); + req->status = cpu_to_le16(0); + + if (seq_nr == 3) + memcpy(req->info_element, challenge, 1 + 1 + challenge->len); + + /* init. at76_priv tx header */ + tx_buffer->wlength = cpu_to_le16(buf_len - AT76_TX_HDRLEN); + at76_dbg(DBG_TX_MGMT, "%s: AuthReq bssid %s alg %d seq_nr %d", + priv->netdev->name, mac2str(mgmt->addr3), + le16_to_cpu(req->algorithm), le16_to_cpu(req->transaction)); + if (seq_nr == 3) + at76_dbg(DBG_TX_MGMT, "%s: AuthReq challenge: %s ...", + priv->netdev->name, hex2str(req->info_element, 18)); + + /* either send immediately (if no data tx is pending + or put it in pending list */ + return at76_tx_mgmt(priv, tx_buffer); +} + +static int at76_assoc_req(struct at76_priv *priv, struct bss_info *bss) +{ + struct at76_tx_buffer *tx_buffer; + struct ieee80211_hdr_3addr *mgmt; + struct ieee80211_assoc_request *req; + struct ieee80211_info_element *ie; + char *essid; + int essid_len; + u16 capa; + + BUG_ON(!bss); + + tx_buffer = kmalloc(ASSOCREQ_MAX_SIZE + MAX_PADDING_SIZE, GFP_ATOMIC); + if (!tx_buffer) + return -ENOMEM; + + req = (struct ieee80211_assoc_request *)tx_buffer->packet; + mgmt = &req->header; + ie = req->info_element; + + /* make wireless header */ + mgmt->frame_ctl = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ASSOC_REQ); + + mgmt->duration_id = cpu_to_le16(0x8000); + memcpy(mgmt->addr1, bss->bssid, ETH_ALEN); + memcpy(mgmt->addr2, priv->netdev->dev_addr, ETH_ALEN); + memcpy(mgmt->addr3, bss->bssid, ETH_ALEN); + mgmt->seq_ctl = cpu_to_le16(0); + + /* we must set the Privacy bit in the capabilities to assure an + Agere-based AP with optional WEP transmits encrypted frames + to us. AP only set the Privacy bit in their capabilities + if WEP is mandatory in the BSS! */ + capa = bss->capa; + if (priv->wep_enabled) + capa |= WLAN_CAPABILITY_PRIVACY; + if (priv->preamble_type != PREAMBLE_TYPE_LONG) + capa |= WLAN_CAPABILITY_SHORT_PREAMBLE; + req->capability = cpu_to_le16(capa); + + req->listen_interval = cpu_to_le16(2 * bss->beacon_interval); + + /* write TLV data elements */ + + ie->id = MFIE_TYPE_SSID; + ie->len = bss->ssid_len; + memcpy(ie->data, bss->ssid, bss->ssid_len); + next_ie(&ie); + + ie->id = MFIE_TYPE_RATES; + ie->len = sizeof(hw_rates); + memcpy(ie->data, hw_rates, sizeof(hw_rates)); + next_ie(&ie); /* ie points behind the supp_rates field */ + + /* init. at76_priv tx header */ + tx_buffer->wlength = cpu_to_le16((u8 *)ie - (u8 *)mgmt); + + ie = req->info_element; + essid = ie->data; + essid_len = min_t(int, IW_ESSID_MAX_SIZE, ie->len); + + next_ie(&ie); /* points to IE of rates now */ + at76_dbg(DBG_TX_MGMT, + "%s: AssocReq bssid %s capa 0x%04x ssid %.*s rates %s", + priv->netdev->name, mac2str(mgmt->addr3), + le16_to_cpu(req->capability), essid_len, essid, + hex2str(ie->data, ie->len)); + + /* either send immediately (if no data tx is pending + or put it in pending list */ + return at76_tx_mgmt(priv, tx_buffer); +} + +/* We got to check the bss_list for old entries */ +static void at76_bss_list_timeout(unsigned long par) +{ + struct at76_priv *priv = (struct at76_priv *)par; + unsigned long flags; + struct list_head *lptr, *nptr; + struct bss_info *ptr; + + spin_lock_irqsave(&priv->bss_list_spinlock, flags); + + list_for_each_safe(lptr, nptr, &priv->bss_list) { + + ptr = list_entry(lptr, struct bss_info, list); + + if (ptr != priv->curr_bss + && time_after(jiffies, ptr->last_rx + BSS_LIST_TIMEOUT)) { + at76_dbg(DBG_BSS_TABLE_RM, + "%s: bss_list: removing old BSS %s ch %d", + priv->netdev->name, mac2str(ptr->bssid), + ptr->channel); + list_del(&ptr->list); + kfree(ptr); + } + } + spin_unlock_irqrestore(&priv->bss_list_spinlock, flags); + /* restart the timer */ + mod_timer(&priv->bss_list_timer, jiffies + BSS_LIST_TIMEOUT); +} + +static inline void at76_set_mac_state(struct at76_priv *priv, + enum mac_state mac_state) +{ + at76_dbg(DBG_MAC_STATE, "%s state: %s", priv->netdev->name, + mac_states[mac_state]); + priv->mac_state = mac_state; +} + +static void at76_dump_bss_table(struct at76_priv *priv) +{ + struct bss_info *ptr; + unsigned long flags; + struct list_head *lptr; + + spin_lock_irqsave(&priv->bss_list_spinlock, flags); + + at76_dbg(DBG_BSS_TABLE, "%s BSS table (curr=%p):", priv->netdev->name, + priv->curr_bss); + + list_for_each(lptr, &priv->bss_list) { + ptr = list_entry(lptr, struct bss_info, list); + at76_dbg(DBG_BSS_TABLE, "0x%p: bssid %s channel %d ssid %.*s " + "(%s) capa 0x%04x rates %s rssi %d link %d noise %d", + ptr, mac2str(ptr->bssid), ptr->channel, ptr->ssid_len, + ptr->ssid, hex2str(ptr->ssid, ptr->ssid_len), + ptr->capa, hex2str(ptr->rates, ptr->rates_len), + ptr->rssi, ptr->link_qual, ptr->noise_level); + } + spin_unlock_irqrestore(&priv->bss_list_spinlock, flags); +} + +/* Called upon successful association to mark interface as connected */ +static void at76_work_assoc_done(struct work_struct *work) +{ + struct at76_priv *priv = container_of(work, struct at76_priv, + work_assoc_done); + + mutex_lock(&priv->mtx); + + WARN_ON(priv->mac_state != MAC_ASSOC); + WARN_ON(!priv->curr_bss); + if (priv->mac_state != MAC_ASSOC || !priv->curr_bss) + goto exit; + + if (priv->iw_mode == IW_MODE_INFRA) { + if (priv->pm_mode != AT76_PM_OFF) { + /* calculate the listen interval in units of + beacon intervals of the curr_bss */ + u32 pm_period_beacon = (priv->pm_period >> 10) / + priv->curr_bss->beacon_interval; + + pm_period_beacon = max(pm_period_beacon, 2u); + pm_period_beacon = min(pm_period_beacon, 0xffffu); + + at76_dbg(DBG_PM, + "%s: pm_mode %d assoc id 0x%x listen int %d", + priv->netdev->name, priv->pm_mode, + priv->assoc_id, pm_period_beacon); + + at76_set_associd(priv, priv->assoc_id); + at76_set_listen_interval(priv, (u16)pm_period_beacon); + } + schedule_delayed_work(&priv->dwork_beacon, BEACON_TIMEOUT); + } + at76_set_pm_mode(priv); + + netif_carrier_on(priv->netdev); + netif_wake_queue(priv->netdev); + at76_set_mac_state(priv, MAC_CONNECTED); + at76_iwevent_bss_connect(priv->netdev, priv->curr_bss->bssid); + at76_dbg(DBG_PROGRESS, "%s: connected to BSSID %s", + priv->netdev->name, mac2str(priv->curr_bss->bssid)); + +exit: + mutex_unlock(&priv->mtx); +} + +/* We only store the new mac address in netdev struct, + it gets set when the netdev is opened. */ +static int at76_set_mac_address(struct net_device *netdev, void *addr) +{ + struct sockaddr *mac = addr; + memcpy(netdev->dev_addr, mac->sa_data, ETH_ALEN); + return 1; +} + +static struct net_device_stats *at76_get_stats(struct net_device *netdev) +{ + struct at76_priv *priv = netdev_priv(netdev); + return &priv->stats; +} + +static struct iw_statistics *at76_get_wireless_stats(struct net_device *netdev) +{ + struct at76_priv *priv = netdev_priv(netdev); + + at76_dbg(DBG_IOCTL, "RETURN qual %d level %d noise %d updated %d", + priv->wstats.qual.qual, priv->wstats.qual.level, + priv->wstats.qual.noise, priv->wstats.qual.updated); + + return &priv->wstats; +} + +static void at76_set_multicast(struct net_device *netdev) +{ + struct at76_priv *priv = netdev_priv(netdev); + int promisc; + + promisc = ((netdev->flags & IFF_PROMISC) != 0); + if (promisc != priv->promisc) { + /* This gets called in interrupt, must reschedule */ + priv->promisc = promisc; + schedule_work(&priv->work_set_promisc); + } +} + +/* Stop all network activity, flush all pending tasks */ +static void at76_quiesce(struct at76_priv *priv) +{ + unsigned long flags; + + netif_stop_queue(priv->netdev); + netif_carrier_off(priv->netdev); + + at76_set_mac_state(priv, MAC_INIT); + + cancel_delayed_work(&priv->dwork_get_scan); + cancel_delayed_work(&priv->dwork_beacon); + cancel_delayed_work(&priv->dwork_auth); + cancel_delayed_work(&priv->dwork_assoc); + cancel_delayed_work(&priv->dwork_restart); + + spin_lock_irqsave(&priv->mgmt_spinlock, flags); + kfree(priv->next_mgmt_bulk); + priv->next_mgmt_bulk = NULL; + spin_unlock_irqrestore(&priv->mgmt_spinlock, flags); +} + +/******************************************************************************* + * at76_priv implementations of iw_handler functions: + */ +static int at76_iw_handler_commit(struct net_device *netdev, + struct iw_request_info *info, + void *null, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + + at76_dbg(DBG_IOCTL, "%s %s: restarting the device", netdev->name, + __func__); + + if (priv->mac_state != MAC_INIT) + at76_quiesce(priv); + + /* Wait half second before the restart to process subsequent + * requests from the same iwconfig in a single restart */ + schedule_delayed_work(&priv->dwork_restart, HZ / 2); + + return 0; +} + +static int at76_iw_handler_get_name(struct net_device *netdev, + struct iw_request_info *info, + char *name, char *extra) +{ + strcpy(name, "IEEE 802.11b"); + at76_dbg(DBG_IOCTL, "%s: SIOCGIWNAME - name %s", netdev->name, name); + return 0; +} + +static int at76_iw_handler_set_freq(struct net_device *netdev, + struct iw_request_info *info, + struct iw_freq *freq, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int chan = -1; + int ret = -EIWCOMMIT; + at76_dbg(DBG_IOCTL, "%s: SIOCSIWFREQ - freq.m %d freq.e %d", + netdev->name, freq->m, freq->e); + + if ((freq->e == 0) && (freq->m <= 1000)) + /* Setting by channel number */ + chan = freq->m; + else { + /* Setting by frequency - search the table */ + int mult = 1; + int i; + + for (i = 0; i < (6 - freq->e); i++) + mult *= 10; + + for (i = 0; i < NUM_CHANNELS; i++) { + if (freq->m == (channel_frequency[i] * mult)) + chan = i + 1; + } + } + + if (chan < 1 || !priv->domain) + /* non-positive channels are invalid + * we need a domain info to set the channel + * either that or an invalid frequency was + * provided by the user */ + ret = -EINVAL; + else if (!(priv->domain->channel_map & (1 << (chan - 1)))) { + printk(KERN_INFO "%s: channel %d not allowed for domain %s\n", + priv->netdev->name, chan, priv->domain->name); + ret = -EINVAL; + } + + if (ret == -EIWCOMMIT) { + priv->channel = chan; + at76_dbg(DBG_IOCTL, "%s: SIOCSIWFREQ - ch %d", netdev->name, + chan); + } + + return ret; +} + +static int at76_iw_handler_get_freq(struct net_device *netdev, + struct iw_request_info *info, + struct iw_freq *freq, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + + freq->m = priv->channel; + freq->e = 0; + + if (priv->channel) + at76_dbg(DBG_IOCTL, "%s: SIOCGIWFREQ - freq %ld x 10e%d", + netdev->name, channel_frequency[priv->channel - 1], 6); + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWFREQ - ch %d", netdev->name, + priv->channel); + + return 0; +} + +static int at76_iw_handler_set_mode(struct net_device *netdev, + struct iw_request_info *info, + __u32 *mode, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + + at76_dbg(DBG_IOCTL, "%s: SIOCSIWMODE - %d", netdev->name, *mode); + + if ((*mode != IW_MODE_ADHOC) && (*mode != IW_MODE_INFRA) && + (*mode != IW_MODE_MONITOR)) + return -EINVAL; + + priv->iw_mode = *mode; + if (priv->iw_mode != IW_MODE_INFRA) + priv->pm_mode = AT76_PM_OFF; + + return -EIWCOMMIT; +} + +static int at76_iw_handler_get_mode(struct net_device *netdev, + struct iw_request_info *info, + __u32 *mode, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + + *mode = priv->iw_mode; + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWMODE - %d", netdev->name, *mode); + + return 0; +} + +static int at76_iw_handler_get_range(struct net_device *netdev, + struct iw_request_info *info, + struct iw_point *data, char *extra) +{ + /* inspired by atmel.c */ + struct at76_priv *priv = netdev_priv(netdev); + struct iw_range *range = (struct iw_range *)extra; + int i; + + data->length = sizeof(struct iw_range); + memset(range, 0, sizeof(struct iw_range)); + + /* TODO: range->throughput = xxxxxx; */ + + range->min_nwid = 0x0000; + range->max_nwid = 0x0000; + + /* this driver doesn't maintain sensitivity information */ + range->sensitivity = 0; + + range->max_qual.qual = 100; + range->max_qual.level = 100; + range->max_qual.noise = 0; + range->max_qual.updated = IW_QUAL_NOISE_INVALID; + + range->avg_qual.qual = 50; + range->avg_qual.level = 50; + range->avg_qual.noise = 0; + range->avg_qual.updated = IW_QUAL_NOISE_INVALID; + + range->bitrate[0] = 1000000; + range->bitrate[1] = 2000000; + range->bitrate[2] = 5500000; + range->bitrate[3] = 11000000; + range->num_bitrates = 4; + + range->min_rts = 0; + range->max_rts = MAX_RTS_THRESHOLD; + + range->min_frag = MIN_FRAG_THRESHOLD; + range->max_frag = MAX_FRAG_THRESHOLD; + + range->pmp_flags = IW_POWER_PERIOD; + range->pmt_flags = IW_POWER_ON; + range->pm_capa = IW_POWER_PERIOD | IW_POWER_ALL_R; + + range->encoding_size[0] = WEP_SMALL_KEY_LEN; + range->encoding_size[1] = WEP_LARGE_KEY_LEN; + range->num_encoding_sizes = 2; + range->max_encoding_tokens = WEP_KEYS; + + /* both WL-240U and Linksys WUSB11 v2.6 specify 15 dBm as output power + - take this for all (ignore antenna gains) */ + range->txpower[0] = 15; + range->num_txpower = 1; + range->txpower_capa = IW_TXPOW_DBM; + + range->we_version_source = WIRELESS_EXT; + range->we_version_compiled = WIRELESS_EXT; + + /* same as the values used in atmel.c */ + range->retry_capa = IW_RETRY_LIMIT; + range->retry_flags = IW_RETRY_LIMIT; + range->r_time_flags = 0; + range->min_retry = 1; + range->max_retry = 255; + + range->num_channels = NUM_CHANNELS; + range->num_frequency = 0; + + for (i = 0; i < NUM_CHANNELS; i++) { + /* test if channel map bit is raised */ + if (priv->domain->channel_map & (0x1 << i)) { + range->num_frequency += 1; + + range->freq[i].i = i + 1; + range->freq[i].m = channel_frequency[i] * 100000; + range->freq[i].e = 1; /* freq * 10^1 */ + } + } + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWRANGE", netdev->name); + + return 0; +} + +static int at76_iw_handler_set_spy(struct net_device *netdev, + struct iw_request_info *info, + struct iw_point *data, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int ret = 0; + + at76_dbg(DBG_IOCTL, "%s: SIOCSIWSPY - number of addresses %d", + netdev->name, data->length); + + spin_lock_bh(&priv->spy_spinlock); + ret = iw_handler_set_spy(priv->netdev, info, (union iwreq_data *)data, + extra); + spin_unlock_bh(&priv->spy_spinlock); + + return ret; +} + +static int at76_iw_handler_get_spy(struct net_device *netdev, + struct iw_request_info *info, + struct iw_point *data, char *extra) +{ + + struct at76_priv *priv = netdev_priv(netdev); + int ret = 0; + + spin_lock_bh(&priv->spy_spinlock); + ret = iw_handler_get_spy(priv->netdev, info, + (union iwreq_data *)data, extra); + spin_unlock_bh(&priv->spy_spinlock); + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWSPY - number of addresses %d", + netdev->name, data->length); + + return ret; +} + +static int at76_iw_handler_set_thrspy(struct net_device *netdev, + struct iw_request_info *info, + struct iw_point *data, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int ret; + + at76_dbg(DBG_IOCTL, "%s: SIOCSIWTHRSPY - number of addresses %d)", + netdev->name, data->length); + + spin_lock_bh(&priv->spy_spinlock); + ret = iw_handler_set_thrspy(netdev, info, (union iwreq_data *)data, + extra); + spin_unlock_bh(&priv->spy_spinlock); + + return ret; +} + +static int at76_iw_handler_get_thrspy(struct net_device *netdev, + struct iw_request_info *info, + struct iw_point *data, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int ret; + + spin_lock_bh(&priv->spy_spinlock); + ret = iw_handler_get_thrspy(netdev, info, (union iwreq_data *)data, + extra); + spin_unlock_bh(&priv->spy_spinlock); + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWTHRSPY - number of addresses %d)", + netdev->name, data->length); + + return ret; +} + +static int at76_iw_handler_set_wap(struct net_device *netdev, + struct iw_request_info *info, + struct sockaddr *ap_addr, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + + at76_dbg(DBG_IOCTL, "%s: SIOCSIWAP - wap/bssid %s", netdev->name, + mac2str(ap_addr->sa_data)); + + /* if the incoming address == ff:ff:ff:ff:ff:ff, the user has + chosen any or auto AP preference */ + if (is_broadcast_ether_addr(ap_addr->sa_data) + || is_zero_ether_addr(ap_addr->sa_data)) + priv->wanted_bssid_valid = 0; + else { + /* user wants to set a preferred AP address */ + priv->wanted_bssid_valid = 1; + memcpy(priv->wanted_bssid, ap_addr->sa_data, ETH_ALEN); + } + + return -EIWCOMMIT; +} + +static int at76_iw_handler_get_wap(struct net_device *netdev, + struct iw_request_info *info, + struct sockaddr *ap_addr, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + + ap_addr->sa_family = ARPHRD_ETHER; + memcpy(ap_addr->sa_data, priv->bssid, ETH_ALEN); + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWAP - wap/bssid %s", netdev->name, + mac2str(ap_addr->sa_data)); + + return 0; +} + +static int at76_iw_handler_set_scan(struct net_device *netdev, + struct iw_request_info *info, + union iwreq_data *wrqu, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int ret = 0; + + at76_dbg(DBG_IOCTL, "%s: SIOCSIWSCAN", netdev->name); + + if (mutex_lock_interruptible(&priv->mtx)) + return -EINTR; + + if (!netif_running(netdev)) { + ret = -ENETDOWN; + goto exit; + } + + /* jal: we don't allow "iwlist ethX scan" while we are + in monitor mode */ + if (priv->iw_mode == IW_MODE_MONITOR) { + ret = -EBUSY; + goto exit; + } + + /* Discard old scan results */ + if ((jiffies - priv->last_scan) > (20 * HZ)) + priv->scan_state = SCAN_IDLE; + priv->last_scan = jiffies; + + /* Initiate a scan command */ + if (priv->scan_state == SCAN_IN_PROGRESS) { + ret = -EBUSY; + goto exit; + } + + priv->scan_state = SCAN_IN_PROGRESS; + + at76_quiesce(priv); + + /* Try to do passive or active scan if WE asks as. */ + if (wrqu->data.length + && wrqu->data.length == sizeof(struct iw_scan_req)) { + struct iw_scan_req *req = (struct iw_scan_req *)extra; + + if (req->scan_type == IW_SCAN_TYPE_PASSIVE) + priv->scan_mode = SCAN_TYPE_PASSIVE; + else if (req->scan_type == IW_SCAN_TYPE_ACTIVE) + priv->scan_mode = SCAN_TYPE_ACTIVE; + + /* Sanity check values? */ + if (req->min_channel_time > 0) + priv->scan_min_time = req->min_channel_time; + + if (req->max_channel_time > 0) + priv->scan_max_time = req->max_channel_time; + } + + /* change to scanning state */ + at76_set_mac_state(priv, MAC_SCANNING); + schedule_work(&priv->work_start_scan); + +exit: + mutex_unlock(&priv->mtx); + return ret; +} + +static int at76_iw_handler_get_scan(struct net_device *netdev, + struct iw_request_info *info, + struct iw_point *data, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + unsigned long flags; + struct list_head *lptr, *nptr; + struct bss_info *curr_bss; + struct iw_event *iwe = kmalloc(sizeof(struct iw_event), GFP_KERNEL); + char *curr_val, *curr_pos = extra; + int i; + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWSCAN", netdev->name); + + if (!iwe) + return -ENOMEM; + + if (priv->scan_state != SCAN_COMPLETED) { + /* scan not yet finished */ + kfree(iwe); + return -EAGAIN; + } + + spin_lock_irqsave(&priv->bss_list_spinlock, flags); + + list_for_each_safe(lptr, nptr, &priv->bss_list) { + curr_bss = list_entry(lptr, struct bss_info, list); + + iwe->cmd = SIOCGIWAP; + iwe->u.ap_addr.sa_family = ARPHRD_ETHER; + memcpy(iwe->u.ap_addr.sa_data, curr_bss->bssid, 6); + curr_pos = iwe_stream_add_event(info, curr_pos, + extra + IW_SCAN_MAX_DATA, iwe, + IW_EV_ADDR_LEN); + + iwe->u.data.length = curr_bss->ssid_len; + iwe->cmd = SIOCGIWESSID; + iwe->u.data.flags = 1; + + curr_pos = iwe_stream_add_point(info, curr_pos, + extra + IW_SCAN_MAX_DATA, iwe, + curr_bss->ssid); + + iwe->cmd = SIOCGIWMODE; + iwe->u.mode = (curr_bss->capa & WLAN_CAPABILITY_IBSS) ? + IW_MODE_ADHOC : + (curr_bss->capa & WLAN_CAPABILITY_ESS) ? + IW_MODE_MASTER : IW_MODE_AUTO; + /* IW_MODE_AUTO = 0 which I thought is + * the most logical value to return in this case */ + curr_pos = iwe_stream_add_event(info, curr_pos, + extra + IW_SCAN_MAX_DATA, iwe, + IW_EV_UINT_LEN); + + iwe->cmd = SIOCGIWFREQ; + iwe->u.freq.m = curr_bss->channel; + iwe->u.freq.e = 0; + curr_pos = iwe_stream_add_event(info, curr_pos, + extra + IW_SCAN_MAX_DATA, iwe, + IW_EV_FREQ_LEN); + + iwe->cmd = SIOCGIWENCODE; + if (curr_bss->capa & WLAN_CAPABILITY_PRIVACY) + iwe->u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY; + else + iwe->u.data.flags = IW_ENCODE_DISABLED; + + iwe->u.data.length = 0; + curr_pos = iwe_stream_add_point(info, curr_pos, + extra + IW_SCAN_MAX_DATA, iwe, + NULL); + + /* Add quality statistics */ + iwe->cmd = IWEVQUAL; + iwe->u.qual.noise = 0; + iwe->u.qual.updated = + IW_QUAL_NOISE_INVALID | IW_QUAL_LEVEL_UPDATED; + iwe->u.qual.level = (curr_bss->rssi * 100 / 42); + if (iwe->u.qual.level > 100) + iwe->u.qual.level = 100; + if (at76_is_intersil(priv->board_type)) + iwe->u.qual.qual = curr_bss->link_qual; + else { + iwe->u.qual.qual = 0; + iwe->u.qual.updated |= IW_QUAL_QUAL_INVALID; + } + /* Add new value to event */ + curr_pos = iwe_stream_add_event(info, curr_pos, + extra + IW_SCAN_MAX_DATA, iwe, + IW_EV_QUAL_LEN); + + /* Rate: stuffing multiple values in a single event requires + * a bit more of magic - Jean II */ + curr_val = curr_pos + IW_EV_LCP_LEN; + + iwe->cmd = SIOCGIWRATE; + /* Those two flags are ignored... */ + iwe->u.bitrate.fixed = 0; + iwe->u.bitrate.disabled = 0; + /* Max 8 values */ + for (i = 0; i < curr_bss->rates_len; i++) { + /* Bit rate given in 500 kb/s units (+ 0x80) */ + iwe->u.bitrate.value = + ((curr_bss->rates[i] & 0x7f) * 500000); + /* Add new value to event */ + curr_val = iwe_stream_add_value(info, curr_pos, + curr_val, + extra + + IW_SCAN_MAX_DATA, iwe, + IW_EV_PARAM_LEN); + } + + /* Check if we added any event */ + if ((curr_val - curr_pos) > IW_EV_LCP_LEN) + curr_pos = curr_val; + + /* more information may be sent back using IWECUSTOM */ + + } + + spin_unlock_irqrestore(&priv->bss_list_spinlock, flags); + + data->length = (curr_pos - extra); + data->flags = 0; + + kfree(iwe); + return 0; +} + +static int at76_iw_handler_set_essid(struct net_device *netdev, + struct iw_request_info *info, + struct iw_point *data, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + + at76_dbg(DBG_IOCTL, "%s: SIOCSIWESSID - %s", netdev->name, extra); + + if (data->flags) { + memcpy(priv->essid, extra, data->length); + priv->essid_size = data->length; + } else + priv->essid_size = 0; /* Use any SSID */ + + return -EIWCOMMIT; +} + +static int at76_iw_handler_get_essid(struct net_device *netdev, + struct iw_request_info *info, + struct iw_point *data, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + + if (priv->essid_size) { + /* not the ANY ssid in priv->essid */ + data->flags = 1; + data->length = priv->essid_size; + memcpy(extra, priv->essid, data->length); + } else { + /* the ANY ssid was specified */ + if (priv->mac_state == MAC_CONNECTED && priv->curr_bss) { + /* report the SSID we have found */ + data->flags = 1; + data->length = priv->curr_bss->ssid_len; + memcpy(extra, priv->curr_bss->ssid, data->length); + } else { + /* report ANY back */ + data->flags = 0; + data->length = 0; + } + } + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWESSID - %.*s", netdev->name, + data->length, extra); + + return 0; +} + +static int at76_iw_handler_set_rate(struct net_device *netdev, + struct iw_request_info *info, + struct iw_param *bitrate, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int ret = -EIWCOMMIT; + + at76_dbg(DBG_IOCTL, "%s: SIOCSIWRATE - %d", netdev->name, + bitrate->value); + + switch (bitrate->value) { + case -1: + priv->txrate = TX_RATE_AUTO; + break; /* auto rate */ + case 1000000: + priv->txrate = TX_RATE_1MBIT; + break; + case 2000000: + priv->txrate = TX_RATE_2MBIT; + break; + case 5500000: + priv->txrate = TX_RATE_5_5MBIT; + break; + case 11000000: + priv->txrate = TX_RATE_11MBIT; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int at76_iw_handler_get_rate(struct net_device *netdev, + struct iw_request_info *info, + struct iw_param *bitrate, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int ret = 0; + + switch (priv->txrate) { + /* return max rate if RATE_AUTO */ + case TX_RATE_AUTO: + bitrate->value = 11000000; + break; + case TX_RATE_1MBIT: + bitrate->value = 1000000; + break; + case TX_RATE_2MBIT: + bitrate->value = 2000000; + break; + case TX_RATE_5_5MBIT: + bitrate->value = 5500000; + break; + case TX_RATE_11MBIT: + bitrate->value = 11000000; + break; + default: + ret = -EINVAL; + } + + bitrate->fixed = (priv->txrate != TX_RATE_AUTO); + bitrate->disabled = 0; + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWRATE - %d", netdev->name, + bitrate->value); + + return ret; +} + +static int at76_iw_handler_set_rts(struct net_device *netdev, + struct iw_request_info *info, + struct iw_param *rts, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int ret = -EIWCOMMIT; + int rthr = rts->value; + + at76_dbg(DBG_IOCTL, "%s: SIOCSIWRTS - value %d disabled %s", + netdev->name, rts->value, (rts->disabled) ? "true" : "false"); + + if (rts->disabled) + rthr = MAX_RTS_THRESHOLD; + + if ((rthr < 0) || (rthr > MAX_RTS_THRESHOLD)) + ret = -EINVAL; + else + priv->rts_threshold = rthr; + + return ret; +} + +static int at76_iw_handler_get_rts(struct net_device *netdev, + struct iw_request_info *info, + struct iw_param *rts, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + + rts->value = priv->rts_threshold; + rts->disabled = (rts->value >= MAX_RTS_THRESHOLD); + rts->fixed = 1; + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWRTS - value %d disabled %s", + netdev->name, rts->value, (rts->disabled) ? "true" : "false"); + + return 0; +} + +static int at76_iw_handler_set_frag(struct net_device *netdev, + struct iw_request_info *info, + struct iw_param *frag, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int ret = -EIWCOMMIT; + int fthr = frag->value; + + at76_dbg(DBG_IOCTL, "%s: SIOCSIWFRAG - value %d, disabled %s", + netdev->name, frag->value, + (frag->disabled) ? "true" : "false"); + + if (frag->disabled) + fthr = MAX_FRAG_THRESHOLD; + + if ((fthr < MIN_FRAG_THRESHOLD) || (fthr > MAX_FRAG_THRESHOLD)) + ret = -EINVAL; + else + priv->frag_threshold = fthr & ~0x1; /* get an even value */ + + return ret; +} + +static int at76_iw_handler_get_frag(struct net_device *netdev, + struct iw_request_info *info, + struct iw_param *frag, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + + frag->value = priv->frag_threshold; + frag->disabled = (frag->value >= MAX_FRAG_THRESHOLD); + frag->fixed = 1; + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWFRAG - value %d, disabled %s", + netdev->name, frag->value, + (frag->disabled) ? "true" : "false"); + + return 0; +} + +static int at76_iw_handler_get_txpow(struct net_device *netdev, + struct iw_request_info *info, + struct iw_param *power, char *extra) +{ + power->value = 15; + power->fixed = 1; /* No power control */ + power->disabled = 0; + power->flags = IW_TXPOW_DBM; + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWTXPOW - txpow %d dBm", netdev->name, + power->value); + + return 0; +} + +/* jal: short retry is handled by the firmware (at least 0.90.x), + while long retry is not (?) */ +static int at76_iw_handler_set_retry(struct net_device *netdev, + struct iw_request_info *info, + struct iw_param *retry, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int ret = -EIWCOMMIT; + + at76_dbg(DBG_IOCTL, "%s: SIOCSIWRETRY disabled %d flags 0x%x val %d", + netdev->name, retry->disabled, retry->flags, retry->value); + + if (!retry->disabled && (retry->flags & IW_RETRY_LIMIT)) { + if ((retry->flags & IW_RETRY_MIN) || + !(retry->flags & IW_RETRY_MAX)) + priv->short_retry_limit = retry->value; + else + ret = -EINVAL; + } else + ret = -EINVAL; + + return ret; +} + +/* Adapted (ripped) from atmel.c */ +static int at76_iw_handler_get_retry(struct net_device *netdev, + struct iw_request_info *info, + struct iw_param *retry, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWRETRY", netdev->name); + + retry->disabled = 0; /* Can't be disabled */ + retry->flags = IW_RETRY_LIMIT; + retry->value = priv->short_retry_limit; + + return 0; +} + +static int at76_iw_handler_set_encode(struct net_device *netdev, + struct iw_request_info *info, + struct iw_point *encoding, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int index = (encoding->flags & IW_ENCODE_INDEX) - 1; + int len = encoding->length; + + at76_dbg(DBG_IOCTL, "%s: SIOCSIWENCODE - enc.flags %08x " + "pointer %p len %d", netdev->name, encoding->flags, + encoding->pointer, encoding->length); + at76_dbg(DBG_IOCTL, + "%s: SIOCSIWENCODE - old wepstate: enabled %s key_id %d " + "auth_mode %s", netdev->name, + (priv->wep_enabled) ? "true" : "false", priv->wep_key_id, + (priv->auth_mode == + WLAN_AUTH_SHARED_KEY) ? "restricted" : "open"); + + /* take the old default key if index is invalid */ + if ((index < 0) || (index >= WEP_KEYS)) + index = priv->wep_key_id; + + if (len > 0) { + if (len > WEP_LARGE_KEY_LEN) + len = WEP_LARGE_KEY_LEN; + + memset(priv->wep_keys[index], 0, WEP_KEY_LEN); + memcpy(priv->wep_keys[index], extra, len); + priv->wep_keys_len[index] = (len <= WEP_SMALL_KEY_LEN) ? + WEP_SMALL_KEY_LEN : WEP_LARGE_KEY_LEN; + priv->wep_enabled = 1; + } + + priv->wep_key_id = index; + priv->wep_enabled = ((encoding->flags & IW_ENCODE_DISABLED) == 0); + + if (encoding->flags & IW_ENCODE_RESTRICTED) + priv->auth_mode = WLAN_AUTH_SHARED_KEY; + if (encoding->flags & IW_ENCODE_OPEN) + priv->auth_mode = WLAN_AUTH_OPEN; + + at76_dbg(DBG_IOCTL, + "%s: SIOCSIWENCODE - new wepstate: enabled %s key_id %d " + "key_len %d auth_mode %s", netdev->name, + (priv->wep_enabled) ? "true" : "false", priv->wep_key_id + 1, + priv->wep_keys_len[priv->wep_key_id], + (priv->auth_mode == + WLAN_AUTH_SHARED_KEY) ? "restricted" : "open"); + + return -EIWCOMMIT; +} + +static int at76_iw_handler_get_encode(struct net_device *netdev, + struct iw_request_info *info, + struct iw_point *encoding, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int index = (encoding->flags & IW_ENCODE_INDEX) - 1; + + if ((index < 0) || (index >= WEP_KEYS)) + index = priv->wep_key_id; + + encoding->flags = + (priv->auth_mode == WLAN_AUTH_SHARED_KEY) ? + IW_ENCODE_RESTRICTED : IW_ENCODE_OPEN; + + if (!priv->wep_enabled) + encoding->flags |= IW_ENCODE_DISABLED; + + if (encoding->pointer) { + encoding->length = priv->wep_keys_len[index]; + + memcpy(extra, priv->wep_keys[index], priv->wep_keys_len[index]); + + encoding->flags |= (index + 1); + } + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWENCODE - enc.flags %08x " + "pointer %p len %d", netdev->name, encoding->flags, + encoding->pointer, encoding->length); + at76_dbg(DBG_IOCTL, + "%s: SIOCGIWENCODE - wepstate: enabled %s key_id %d " + "key_len %d auth_mode %s", netdev->name, + (priv->wep_enabled) ? "true" : "false", priv->wep_key_id + 1, + priv->wep_keys_len[priv->wep_key_id], + (priv->auth_mode == + WLAN_AUTH_SHARED_KEY) ? "restricted" : "open"); + + return 0; +} + +static int at76_iw_handler_set_power(struct net_device *netdev, + struct iw_request_info *info, + struct iw_param *prq, char *extra) +{ + int err = -EIWCOMMIT; + struct at76_priv *priv = netdev_priv(netdev); + + at76_dbg(DBG_IOCTL, + "%s: SIOCSIWPOWER - disabled %s flags 0x%x value 0x%x", + netdev->name, (prq->disabled) ? "true" : "false", prq->flags, + prq->value); + + if (prq->disabled) + priv->pm_mode = AT76_PM_OFF; + else { + switch (prq->flags & IW_POWER_MODE) { + case IW_POWER_ALL_R: + case IW_POWER_ON: + break; + default: + err = -EINVAL; + goto exit; + } + if (prq->flags & IW_POWER_PERIOD) + priv->pm_period = prq->value; + + if (prq->flags & IW_POWER_TIMEOUT) { + err = -EINVAL; + goto exit; + } + priv->pm_mode = AT76_PM_ON; + } +exit: + return err; +} + +static int at76_iw_handler_get_power(struct net_device *netdev, + struct iw_request_info *info, + struct iw_param *power, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + + power->disabled = (priv->pm_mode == AT76_PM_OFF); + if (!power->disabled) { + power->flags = IW_POWER_PERIOD | IW_POWER_ALL_R; + power->value = priv->pm_period; + } + + at76_dbg(DBG_IOCTL, "%s: SIOCGIWPOWER - %s flags 0x%x value 0x%x", + netdev->name, power->disabled ? "disabled" : "enabled", + power->flags, power->value); + + return 0; +} + +/******************************************************************************* + * Private IOCTLS + */ +static int at76_iw_set_short_preamble(struct net_device *netdev, + struct iw_request_info *info, char *name, + char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int val = *((int *)name); + int ret = -EIWCOMMIT; + + at76_dbg(DBG_IOCTL, "%s: AT76_SET_SHORT_PREAMBLE, %d", + netdev->name, val); + + if (val < PREAMBLE_TYPE_LONG || val > PREAMBLE_TYPE_AUTO) + ret = -EINVAL; + else + priv->preamble_type = val; + + return ret; +} + +static int at76_iw_get_short_preamble(struct net_device *netdev, + struct iw_request_info *info, + union iwreq_data *wrqu, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + + snprintf(wrqu->name, sizeof(wrqu->name), "%s (%d)", + preambles[priv->preamble_type], priv->preamble_type); + return 0; +} + +static int at76_iw_set_debug(struct net_device *netdev, + struct iw_request_info *info, + struct iw_point *data, char *extra) +{ + char *ptr; + u32 val; + + if (data->length > 0) { + val = simple_strtol(extra, &ptr, 0); + + if (ptr == extra) + val = DBG_DEFAULTS; + + at76_dbg(DBG_IOCTL, "%s: AT76_SET_DEBUG input %d: %s -> 0x%x", + netdev->name, data->length, extra, val); + } else + val = DBG_DEFAULTS; + + at76_dbg(DBG_IOCTL, "%s: AT76_SET_DEBUG, old 0x%x, new 0x%x", + netdev->name, at76_debug, val); + + /* jal: some more output to pin down lockups */ + at76_dbg(DBG_IOCTL, "%s: netif running %d queue_stopped %d " + "carrier_ok %d", netdev->name, netif_running(netdev), + netif_queue_stopped(netdev), netif_carrier_ok(netdev)); + + at76_debug = val; + + return 0; +} + +static int at76_iw_get_debug(struct net_device *netdev, + struct iw_request_info *info, + union iwreq_data *wrqu, char *extra) +{ + snprintf(wrqu->name, sizeof(wrqu->name), "0x%08x", at76_debug); + return 0; +} + +static int at76_iw_set_powersave_mode(struct net_device *netdev, + struct iw_request_info *info, char *name, + char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int val = *((int *)name); + int ret = -EIWCOMMIT; + + at76_dbg(DBG_IOCTL, "%s: AT76_SET_POWERSAVE_MODE, %d (%s)", + netdev->name, val, + val == AT76_PM_OFF ? "active" : val == AT76_PM_ON ? "save" : + val == AT76_PM_SMART ? "smart save" : ""); + if (val < AT76_PM_OFF || val > AT76_PM_SMART) + ret = -EINVAL; + else + priv->pm_mode = val; + + return ret; +} + +static int at76_iw_get_powersave_mode(struct net_device *netdev, + struct iw_request_info *info, + union iwreq_data *wrqu, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int *param = (int *)extra; + + param[0] = priv->pm_mode; + return 0; +} + +static int at76_iw_set_scan_times(struct net_device *netdev, + struct iw_request_info *info, char *name, + char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int mint = *((int *)name); + int maxt = *((int *)name + 1); + int ret = -EIWCOMMIT; + + at76_dbg(DBG_IOCTL, "%s: AT76_SET_SCAN_TIMES - min %d max %d", + netdev->name, mint, maxt); + if (mint <= 0 || maxt <= 0 || mint > maxt) + ret = -EINVAL; + else { + priv->scan_min_time = mint; + priv->scan_max_time = maxt; + } + + return ret; +} + +static int at76_iw_get_scan_times(struct net_device *netdev, + struct iw_request_info *info, + union iwreq_data *wrqu, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int *param = (int *)extra; + + param[0] = priv->scan_min_time; + param[1] = priv->scan_max_time; + return 0; +} + +static int at76_iw_set_scan_mode(struct net_device *netdev, + struct iw_request_info *info, char *name, + char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int val = *((int *)name); + int ret = -EIWCOMMIT; + + at76_dbg(DBG_IOCTL, "%s: AT76_SET_SCAN_MODE - mode %s", + netdev->name, (val = SCAN_TYPE_ACTIVE) ? "active" : + (val = SCAN_TYPE_PASSIVE) ? "passive" : ""); + + if (val != SCAN_TYPE_ACTIVE && val != SCAN_TYPE_PASSIVE) + ret = -EINVAL; + else + priv->scan_mode = val; + + return ret; +} + +static int at76_iw_get_scan_mode(struct net_device *netdev, + struct iw_request_info *info, + union iwreq_data *wrqu, char *extra) +{ + struct at76_priv *priv = netdev_priv(netdev); + int *param = (int *)extra; + + param[0] = priv->scan_mode; + return 0; +} + +#define AT76_SET_HANDLER(h, f) [h - SIOCIWFIRST] = (iw_handler) f + +/* Standard wireless handlers */ +static const iw_handler at76_handlers[] = { + AT76_SET_HANDLER(SIOCSIWCOMMIT, at76_iw_handler_commit), + AT76_SET_HANDLER(SIOCGIWNAME, at76_iw_handler_get_name), + AT76_SET_HANDLER(SIOCSIWFREQ, at76_iw_handler_set_freq), + AT76_SET_HANDLER(SIOCGIWFREQ, at76_iw_handler_get_freq), + AT76_SET_HANDLER(SIOCSIWMODE, at76_iw_handler_set_mode), + AT76_SET_HANDLER(SIOCGIWMODE, at76_iw_handler_get_mode), + AT76_SET_HANDLER(SIOCGIWRANGE, at76_iw_handler_get_range), + AT76_SET_HANDLER(SIOCSIWSPY, at76_iw_handler_set_spy), + AT76_SET_HANDLER(SIOCGIWSPY, at76_iw_handler_get_spy), + AT76_SET_HANDLER(SIOCSIWTHRSPY, at76_iw_handler_set_thrspy), + AT76_SET_HANDLER(SIOCGIWTHRSPY, at76_iw_handler_get_thrspy), + AT76_SET_HANDLER(SIOCSIWAP, at76_iw_handler_set_wap), + AT76_SET_HANDLER(SIOCGIWAP, at76_iw_handler_get_wap), + AT76_SET_HANDLER(SIOCSIWSCAN, at76_iw_handler_set_scan), + AT76_SET_HANDLER(SIOCGIWSCAN, at76_iw_handler_get_scan), + AT76_SET_HANDLER(SIOCSIWESSID, at76_iw_handler_set_essid), + AT76_SET_HANDLER(SIOCGIWESSID, at76_iw_handler_get_essid), + AT76_SET_HANDLER(SIOCSIWRATE, at76_iw_handler_set_rate), + AT76_SET_HANDLER(SIOCGIWRATE, at76_iw_handler_get_rate), + AT76_SET_HANDLER(SIOCSIWRTS, at76_iw_handler_set_rts), + AT76_SET_HANDLER(SIOCGIWRTS, at76_iw_handler_get_rts), + AT76_SET_HANDLER(SIOCSIWFRAG, at76_iw_handler_set_frag), + AT76_SET_HANDLER(SIOCGIWFRAG, at76_iw_handler_get_frag), + AT76_SET_HANDLER(SIOCGIWTXPOW, at76_iw_handler_get_txpow), + AT76_SET_HANDLER(SIOCSIWRETRY, at76_iw_handler_set_retry), + AT76_SET_HANDLER(SIOCGIWRETRY, at76_iw_handler_get_retry), + AT76_SET_HANDLER(SIOCSIWENCODE, at76_iw_handler_set_encode), + AT76_SET_HANDLER(SIOCGIWENCODE, at76_iw_handler_get_encode), + AT76_SET_HANDLER(SIOCSIWPOWER, at76_iw_handler_set_power), + AT76_SET_HANDLER(SIOCGIWPOWER, at76_iw_handler_get_power) +}; + +#define AT76_SET_PRIV(h, f) [h - SIOCIWFIRSTPRIV] = (iw_handler) f + +/* Private wireless handlers */ +static const iw_handler at76_priv_handlers[] = { + AT76_SET_PRIV(AT76_SET_SHORT_PREAMBLE, at76_iw_set_short_preamble), + AT76_SET_PRIV(AT76_GET_SHORT_PREAMBLE, at76_iw_get_short_preamble), + AT76_SET_PRIV(AT76_SET_DEBUG, at76_iw_set_debug), + AT76_SET_PRIV(AT76_GET_DEBUG, at76_iw_get_debug), + AT76_SET_PRIV(AT76_SET_POWERSAVE_MODE, at76_iw_set_powersave_mode), + AT76_SET_PRIV(AT76_GET_POWERSAVE_MODE, at76_iw_get_powersave_mode), + AT76_SET_PRIV(AT76_SET_SCAN_TIMES, at76_iw_set_scan_times), + AT76_SET_PRIV(AT76_GET_SCAN_TIMES, at76_iw_get_scan_times), + AT76_SET_PRIV(AT76_SET_SCAN_MODE, at76_iw_set_scan_mode), + AT76_SET_PRIV(AT76_GET_SCAN_MODE, at76_iw_get_scan_mode), +}; + +/* Names and arguments of private wireless handlers */ +static const struct iw_priv_args at76_priv_args[] = { + /* 0 - long, 1 - short */ + {AT76_SET_SHORT_PREAMBLE, + IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0, "set_preamble"}, + + {AT76_GET_SHORT_PREAMBLE, + 0, IW_PRIV_TYPE_CHAR | IW_PRIV_SIZE_FIXED | 10, "get_preamble"}, + + /* we must pass the new debug mask as a string, because iwpriv cannot + * parse hex numbers starting with 0x :-( */ + {AT76_SET_DEBUG, + IW_PRIV_TYPE_CHAR | 10, 0, "set_debug"}, + + {AT76_GET_DEBUG, + 0, IW_PRIV_TYPE_CHAR | IW_PRIV_SIZE_FIXED | 10, "get_debug"}, + + /* 1 - active, 2 - power save, 3 - smart power save */ + {AT76_SET_POWERSAVE_MODE, + IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0, "set_powersave"}, + + {AT76_GET_POWERSAVE_MODE, + 0, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "get_powersave"}, + + /* min_channel_time, max_channel_time */ + {AT76_SET_SCAN_TIMES, + IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 2, 0, "set_scan_times"}, + + {AT76_GET_SCAN_TIMES, + 0, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 2, "get_scan_times"}, + + /* 0 - active, 1 - passive scan */ + {AT76_SET_SCAN_MODE, + IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0, "set_scan_mode"}, + + {AT76_GET_SCAN_MODE, + 0, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, "get_scan_mode"}, +}; + +static const struct iw_handler_def at76_handler_def = { + .num_standard = ARRAY_SIZE(at76_handlers), + .num_private = ARRAY_SIZE(at76_priv_handlers), + .num_private_args = ARRAY_SIZE(at76_priv_args), + .standard = at76_handlers, + .private = at76_priv_handlers, + .private_args = at76_priv_args, + .get_wireless_stats = at76_get_wireless_stats, +}; + +static const u8 snapsig[] = { 0xaa, 0xaa, 0x03 }; + +/* RFC 1042 encapsulates Ethernet frames in 802.2 SNAP (0xaa, 0xaa, 0x03) with + * a SNAP OID of 0 (0x00, 0x00, 0x00) */ +static const u8 rfc1042sig[] = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; + +static int at76_tx(struct sk_buff *skb, struct net_device *netdev) +{ + struct at76_priv *priv = netdev_priv(netdev); + struct net_device_stats *stats = &priv->stats; + int ret = 0; + int wlen; + int submit_len; + struct at76_tx_buffer *tx_buffer = priv->bulk_out_buffer; + struct ieee80211_hdr_3addr *i802_11_hdr = + (struct ieee80211_hdr_3addr *)tx_buffer->packet; + u8 *payload = i802_11_hdr->payload; + struct ethhdr *eh = (struct ethhdr *)skb->data; + + if (netif_queue_stopped(netdev)) { + printk(KERN_ERR "%s: %s called while netdev is stopped\n", + netdev->name, __func__); + /* skip this packet */ + dev_kfree_skb(skb); + return 0; + } + + if (priv->tx_urb->status == -EINPROGRESS) { + printk(KERN_ERR "%s: %s called while tx urb is pending\n", + netdev->name, __func__); + /* skip this packet */ + dev_kfree_skb(skb); + return 0; + } + + if (skb->len < ETH_HLEN) { + printk(KERN_ERR "%s: %s: skb too short (%d)\n", + netdev->name, __func__, skb->len); + dev_kfree_skb(skb); + return 0; + } + + at76_ledtrig_tx_activity(); /* tell ledtrigger we send a packet */ + + /* we can get rid of memcpy if we set netdev->hard_header_len to + reserve enough space, but we would need to keep the skb around */ + + if (ntohs(eh->h_proto) <= ETH_DATA_LEN) { + /* this is a 802.3 packet */ + if (skb->len >= ETH_HLEN + sizeof(rfc1042sig) + && skb->data[ETH_HLEN] == rfc1042sig[0] + && skb->data[ETH_HLEN + 1] == rfc1042sig[1]) { + /* higher layer delivered SNAP header - keep it */ + memcpy(payload, skb->data + ETH_HLEN, + skb->len - ETH_HLEN); + wlen = IEEE80211_3ADDR_LEN + skb->len - ETH_HLEN; + } else { + printk(KERN_ERR "%s: dropping non-SNAP 802.2 packet " + "(DSAP 0x%02x SSAP 0x%02x cntrl 0x%02x)\n", + priv->netdev->name, skb->data[ETH_HLEN], + skb->data[ETH_HLEN + 1], + skb->data[ETH_HLEN + 2]); + dev_kfree_skb(skb); + return 0; + } + } else { + /* add RFC 1042 header in front */ + memcpy(payload, rfc1042sig, sizeof(rfc1042sig)); + memcpy(payload + sizeof(rfc1042sig), &eh->h_proto, + skb->len - offsetof(struct ethhdr, h_proto)); + wlen = IEEE80211_3ADDR_LEN + sizeof(rfc1042sig) + skb->len - + offsetof(struct ethhdr, h_proto); + } + + /* make wireless header */ + i802_11_hdr->frame_ctl = + cpu_to_le16(IEEE80211_FTYPE_DATA | + (priv->wep_enabled ? IEEE80211_FCTL_PROTECTED : 0) | + (priv->iw_mode == + IW_MODE_INFRA ? IEEE80211_FCTL_TODS : 0)); + + if (priv->iw_mode == IW_MODE_ADHOC) { + memcpy(i802_11_hdr->addr1, eh->h_dest, ETH_ALEN); + memcpy(i802_11_hdr->addr2, eh->h_source, ETH_ALEN); + memcpy(i802_11_hdr->addr3, priv->bssid, ETH_ALEN); + } else if (priv->iw_mode == IW_MODE_INFRA) { + memcpy(i802_11_hdr->addr1, priv->bssid, ETH_ALEN); + memcpy(i802_11_hdr->addr2, eh->h_source, ETH_ALEN); + memcpy(i802_11_hdr->addr3, eh->h_dest, ETH_ALEN); + } + + i802_11_hdr->duration_id = cpu_to_le16(0); + i802_11_hdr->seq_ctl = cpu_to_le16(0); + + /* setup 'Atmel' header */ + tx_buffer->wlength = cpu_to_le16(wlen); + tx_buffer->tx_rate = priv->txrate; + /* for broadcast destination addresses, the firmware 0.100.x + seems to choose the highest rate set with CMD_STARTUP in + basic_rate_set replacing this value */ + + memset(tx_buffer->reserved, 0, sizeof(tx_buffer->reserved)); + + tx_buffer->padding = at76_calc_padding(wlen); + submit_len = wlen + AT76_TX_HDRLEN + tx_buffer->padding; + + at76_dbg(DBG_TX_DATA_CONTENT, "%s skb->data %s", priv->netdev->name, + hex2str(skb->data, 32)); + at76_dbg(DBG_TX_DATA, "%s tx: wlen 0x%x pad 0x%x rate %d hdr %s", + priv->netdev->name, + le16_to_cpu(tx_buffer->wlength), + tx_buffer->padding, tx_buffer->tx_rate, + hex2str(i802_11_hdr, sizeof(*i802_11_hdr))); + at76_dbg(DBG_TX_DATA_CONTENT, "%s payload %s", priv->netdev->name, + hex2str(payload, 48)); + + /* send stuff */ + netif_stop_queue(netdev); + netdev->trans_start = jiffies; + + usb_fill_bulk_urb(priv->tx_urb, priv->udev, priv->tx_pipe, tx_buffer, + submit_len, at76_tx_callback, priv); + ret = usb_submit_urb(priv->tx_urb, GFP_ATOMIC); + if (ret) { + stats->tx_errors++; + printk(KERN_ERR "%s: error in tx submit urb: %d\n", + netdev->name, ret); + if (ret == -EINVAL) + printk(KERN_ERR + "%s: -EINVAL: tx urb %p hcpriv %p complete %p\n", + priv->netdev->name, priv->tx_urb, + priv->tx_urb->hcpriv, priv->tx_urb->complete); + } else { + stats->tx_bytes += skb->len; + dev_kfree_skb(skb); + } + + return ret; +} + +static void at76_tx_timeout(struct net_device *netdev) +{ + struct at76_priv *priv = netdev_priv(netdev); + + if (!priv) + return; + dev_warn(&netdev->dev, "tx timeout."); + + usb_unlink_urb(priv->tx_urb); + priv->stats.tx_errors++; +} + static int at76_submit_rx_urb(struct at76_priv *priv) { int ret; @@ -1327,7 +3268,7 @@ static int at76_submit_rx_urb(struct at76_priv *priv) if (!priv->rx_urb) { printk(KERN_ERR "%s: %s: priv->rx_urb is NULL\n", - wiphy_name(priv->hw->wiphy), __func__); + priv->netdev->name, __func__); return -EFAULT; } @@ -1335,7 +3276,7 @@ static int at76_submit_rx_urb(struct at76_priv *priv) skb = dev_alloc_skb(sizeof(struct at76_rx_buffer)); if (!skb) { printk(KERN_ERR "%s: cannot allocate rx skbuff\n", - wiphy_name(priv->hw->wiphy)); + priv->netdev->name); ret = -ENOMEM; goto exit; } @@ -1355,18 +3296,110 @@ static int at76_submit_rx_urb(struct at76_priv *priv) "usb_submit_urb returned -ENODEV"); else printk(KERN_ERR "%s: rx, usb_submit_urb failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); } exit: if (ret < 0 && ret != -ENODEV) printk(KERN_ERR "%s: cannot submit rx urb - please unload the " "driver and/or power cycle the device\n", - wiphy_name(priv->hw->wiphy)); + priv->netdev->name); return ret; } +static int at76_open(struct net_device *netdev) +{ + struct at76_priv *priv = netdev_priv(netdev); + int ret = 0; + + at76_dbg(DBG_PROC_ENTRY, "%s(): entry", __func__); + + if (mutex_lock_interruptible(&priv->mtx)) + return -EINTR; + + /* if netdev->dev_addr != priv->mac_addr we must + set the mac address in the device ! */ + if (compare_ether_addr(netdev->dev_addr, priv->mac_addr)) { + if (at76_add_mac_address(priv, netdev->dev_addr) >= 0) + at76_dbg(DBG_PROGRESS, "%s: set new MAC addr %s", + netdev->name, mac2str(netdev->dev_addr)); + } + + priv->scan_state = SCAN_IDLE; + priv->last_scan = jiffies; + + ret = at76_submit_rx_urb(priv); + if (ret < 0) { + printk(KERN_ERR "%s: open: submit_rx_urb failed: %d\n", + netdev->name, ret); + goto error; + } + + schedule_delayed_work(&priv->dwork_restart, 0); + + at76_dbg(DBG_PROC_ENTRY, "%s(): end", __func__); +error: + mutex_unlock(&priv->mtx); + return ret < 0 ? ret : 0; +} + +static int at76_stop(struct net_device *netdev) +{ + struct at76_priv *priv = netdev_priv(netdev); + + at76_dbg(DBG_DEVSTART, "%s: ENTER", __func__); + + if (mutex_lock_interruptible(&priv->mtx)) + return -EINTR; + + at76_quiesce(priv); + + if (!priv->device_unplugged) { + /* We are called by "ifconfig ethX down", not because the + * device is not available anymore. */ + at76_set_radio(priv, 0); + + /* We unlink rx_urb because at76_open() re-submits it. + * If unplugged, at76_delete_device() takes care of it. */ + usb_kill_urb(priv->rx_urb); + } + + /* free the bss_list */ + at76_free_bss_list(priv); + + mutex_unlock(&priv->mtx); + at76_dbg(DBG_DEVSTART, "%s: EXIT", __func__); + + return 0; +} + +static void at76_ethtool_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + struct at76_priv *priv = netdev_priv(netdev); + + strncpy(info->driver, DRIVER_NAME, sizeof(info->driver)); + strncpy(info->version, DRIVER_VERSION, sizeof(info->version)); + + usb_make_path(priv->udev, info->bus_info, sizeof(info->bus_info)); + + snprintf(info->fw_version, sizeof(info->fw_version), "%d.%d.%d-%d", + priv->fw_version.major, priv->fw_version.minor, + priv->fw_version.patch, priv->fw_version.build); +} + +static u32 at76_ethtool_get_link(struct net_device *netdev) +{ + struct at76_priv *priv = netdev_priv(netdev); + return priv->mac_state == MAC_CONNECTED; +} + +static struct ethtool_ops at76_ethtool_ops = { + .get_drvinfo = at76_ethtool_get_drvinfo, + .get_link = at76_ethtool_get_link, +}; + /* Download external firmware */ static int at76_load_external_fw(struct usb_device *udev, struct fwentry *fwe) { @@ -1463,6 +3496,406 @@ exit: return ret; } +static int at76_match_essid(struct at76_priv *priv, struct bss_info *ptr) +{ + /* common criteria for both modi */ + + int ret = (priv->essid_size == 0 /* ANY ssid */ || + (priv->essid_size == ptr->ssid_len && + !memcmp(priv->essid, ptr->ssid, ptr->ssid_len))); + if (!ret) + at76_dbg(DBG_BSS_MATCH, + "%s bss table entry %p: essid didn't match", + priv->netdev->name, ptr); + return ret; +} + +static inline int at76_match_mode(struct at76_priv *priv, struct bss_info *ptr) +{ + int ret; + + if (priv->iw_mode == IW_MODE_ADHOC) + ret = ptr->capa & WLAN_CAPABILITY_IBSS; + else + ret = ptr->capa & WLAN_CAPABILITY_ESS; + if (!ret) + at76_dbg(DBG_BSS_MATCH, + "%s bss table entry %p: mode didn't match", + priv->netdev->name, ptr); + return ret; +} + +static int at76_match_rates(struct at76_priv *priv, struct bss_info *ptr) +{ + int i; + + for (i = 0; i < ptr->rates_len; i++) { + u8 rate = ptr->rates[i]; + + if (!(rate & 0x80)) + continue; + + /* this is a basic rate we have to support + (see IEEE802.11, ch. 7.3.2.2) */ + if (rate != (0x80 | hw_rates[0]) + && rate != (0x80 | hw_rates[1]) + && rate != (0x80 | hw_rates[2]) + && rate != (0x80 | hw_rates[3])) { + at76_dbg(DBG_BSS_MATCH, + "%s: bss table entry %p: basic rate %02x not " + "supported", priv->netdev->name, ptr, rate); + return 0; + } + } + + /* if we use short preamble, the bss must support it */ + if (priv->preamble_type == PREAMBLE_TYPE_SHORT && + !(ptr->capa & WLAN_CAPABILITY_SHORT_PREAMBLE)) { + at76_dbg(DBG_BSS_MATCH, + "%s: %p does not support short preamble", + priv->netdev->name, ptr); + return 0; + } else + return 1; +} + +static inline int at76_match_wep(struct at76_priv *priv, struct bss_info *ptr) +{ + if (!priv->wep_enabled && ptr->capa & WLAN_CAPABILITY_PRIVACY) { + /* we have disabled WEP, but the BSS signals privacy */ + at76_dbg(DBG_BSS_MATCH, + "%s: bss table entry %p: requires encryption", + priv->netdev->name, ptr); + return 0; + } + /* otherwise if the BSS does not signal privacy it may well + accept encrypted packets from us ... */ + return 1; +} + +static inline int at76_match_bssid(struct at76_priv *priv, struct bss_info *ptr) +{ + if (!priv->wanted_bssid_valid || + !compare_ether_addr(ptr->bssid, priv->wanted_bssid)) + return 1; + + at76_dbg(DBG_BSS_MATCH, + "%s: requested bssid - %s does not match", + priv->netdev->name, mac2str(priv->wanted_bssid)); + at76_dbg(DBG_BSS_MATCH, + " AP bssid - %s of bss table entry %p", + mac2str(ptr->bssid), ptr); + return 0; +} + +/** + * at76_match_bss - try to find a matching bss in priv->bss + * + * last - last bss tried + * + * last == NULL signals a new round starting with priv->bss_list.next + * this function must be called inside an acquired priv->bss_list_spinlock + * otherwise the timeout on bss may remove the newly chosen entry + */ +static struct bss_info *at76_match_bss(struct at76_priv *priv, + struct bss_info *last) +{ + struct bss_info *ptr = NULL; + struct list_head *curr; + + curr = last ? last->list.next : priv->bss_list.next; + while (curr != &priv->bss_list) { + ptr = list_entry(curr, struct bss_info, list); + if (at76_match_essid(priv, ptr) && at76_match_mode(priv, ptr) + && at76_match_wep(priv, ptr) && at76_match_rates(priv, ptr) + && at76_match_bssid(priv, ptr)) + break; + curr = curr->next; + } + + if (curr == &priv->bss_list) + ptr = NULL; + /* otherwise ptr points to the struct bss_info we have chosen */ + + at76_dbg(DBG_BSS_TABLE, "%s %s: returned %p", priv->netdev->name, + __func__, ptr); + return ptr; +} + +/* Start joining a matching BSS, or create own IBSS */ +static void at76_work_join(struct work_struct *work) +{ + struct at76_priv *priv = container_of(work, struct at76_priv, + work_join); + int ret; + unsigned long flags; + + mutex_lock(&priv->mtx); + + WARN_ON(priv->mac_state != MAC_JOINING); + if (priv->mac_state != MAC_JOINING) + goto exit; + + /* secure the access to priv->curr_bss ! */ + spin_lock_irqsave(&priv->bss_list_spinlock, flags); + priv->curr_bss = at76_match_bss(priv, priv->curr_bss); + spin_unlock_irqrestore(&priv->bss_list_spinlock, flags); + + if (!priv->curr_bss) { + /* here we haven't found a matching (i)bss ... */ + if (priv->iw_mode == IW_MODE_ADHOC) { + at76_set_mac_state(priv, MAC_OWN_IBSS); + at76_start_ibss(priv); + goto exit; + } + /* haven't found a matching BSS in infra mode - try again */ + at76_set_mac_state(priv, MAC_SCANNING); + schedule_work(&priv->work_start_scan); + goto exit; + } + + ret = at76_join_bss(priv, priv->curr_bss); + if (ret < 0) { + printk(KERN_ERR "%s: join_bss failed with %d\n", + priv->netdev->name, ret); + goto exit; + } + + ret = at76_wait_completion(priv, CMD_JOIN); + if (ret != CMD_STATUS_COMPLETE) { + if (ret != CMD_STATUS_TIME_OUT) + printk(KERN_ERR "%s: join_bss completed with %d\n", + priv->netdev->name, ret); + else + printk(KERN_INFO "%s: join_bss ssid %s timed out\n", + priv->netdev->name, + mac2str(priv->curr_bss->bssid)); + + /* retry next BSS immediately */ + schedule_work(&priv->work_join); + goto exit; + } + + /* here we have joined the (I)BSS */ + if (priv->iw_mode == IW_MODE_ADHOC) { + struct bss_info *bptr = priv->curr_bss; + at76_set_mac_state(priv, MAC_CONNECTED); + /* get ESSID, BSSID and channel for priv->curr_bss */ + priv->essid_size = bptr->ssid_len; + memcpy(priv->essid, bptr->ssid, bptr->ssid_len); + memcpy(priv->bssid, bptr->bssid, ETH_ALEN); + priv->channel = bptr->channel; + at76_iwevent_bss_connect(priv->netdev, bptr->bssid); + netif_carrier_on(priv->netdev); + netif_start_queue(priv->netdev); + /* just to be sure */ + cancel_delayed_work(&priv->dwork_get_scan); + cancel_delayed_work(&priv->dwork_auth); + cancel_delayed_work(&priv->dwork_assoc); + } else { + /* send auth req */ + priv->retries = AUTH_RETRIES; + at76_set_mac_state(priv, MAC_AUTH); + at76_auth_req(priv, priv->curr_bss, 1, NULL); + at76_dbg(DBG_MGMT_TIMER, + "%s:%d: starting mgmt_timer + HZ", __func__, __LINE__); + schedule_delayed_work(&priv->dwork_auth, AUTH_TIMEOUT); + } + +exit: + mutex_unlock(&priv->mtx); +} + +/* Reap scan results */ +static void at76_dwork_get_scan(struct work_struct *work) +{ + int status; + int ret; + struct at76_priv *priv = container_of(work, struct at76_priv, + dwork_get_scan.work); + + mutex_lock(&priv->mtx); + WARN_ON(priv->mac_state != MAC_SCANNING); + if (priv->mac_state != MAC_SCANNING) + goto exit; + + status = at76_get_cmd_status(priv->udev, CMD_SCAN); + if (status < 0) { + printk(KERN_ERR "%s: %s: at76_get_cmd_status failed with %d\n", + priv->netdev->name, __func__, status); + status = CMD_STATUS_IN_PROGRESS; + /* INFO: Hope it was a one off error - if not, scanning + further down the line and stop this cycle */ + } + at76_dbg(DBG_PROGRESS, + "%s %s: got cmd_status %d (state %s, need_any %d)", + priv->netdev->name, __func__, status, + mac_states[priv->mac_state], priv->scan_need_any); + + if (status != CMD_STATUS_COMPLETE) { + if ((status != CMD_STATUS_IN_PROGRESS) && + (status != CMD_STATUS_IDLE)) + printk(KERN_ERR "%s: %s: Bad scan status: %s\n", + priv->netdev->name, __func__, + at76_get_cmd_status_string(status)); + + /* the first cmd status after scan start is always a IDLE -> + start the timer to poll again until COMPLETED */ + at76_dbg(DBG_MGMT_TIMER, + "%s:%d: starting mgmt_timer for %d ticks", + __func__, __LINE__, SCAN_POLL_INTERVAL); + schedule_delayed_work(&priv->dwork_get_scan, + SCAN_POLL_INTERVAL); + goto exit; + } + + if (at76_debug & DBG_BSS_TABLE) + at76_dump_bss_table(priv); + + if (priv->scan_need_any) { + ret = at76_start_scan(priv, 0); + if (ret < 0) + printk(KERN_ERR + "%s: %s: start_scan (ANY) failed with %d\n", + priv->netdev->name, __func__, ret); + at76_dbg(DBG_MGMT_TIMER, + "%s:%d: starting mgmt_timer for %d ticks", __func__, + __LINE__, SCAN_POLL_INTERVAL); + schedule_delayed_work(&priv->dwork_get_scan, + SCAN_POLL_INTERVAL); + priv->scan_need_any = 0; + } else { + priv->scan_state = SCAN_COMPLETED; + /* report the end of scan to user space */ + at76_iwevent_scan_complete(priv->netdev); + at76_set_mac_state(priv, MAC_JOINING); + schedule_work(&priv->work_join); + } + +exit: + mutex_unlock(&priv->mtx); +} + +/* Handle loss of beacons from the AP */ +static void at76_dwork_beacon(struct work_struct *work) +{ + struct at76_priv *priv = container_of(work, struct at76_priv, + dwork_beacon.work); + + mutex_lock(&priv->mtx); + if (priv->mac_state != MAC_CONNECTED || priv->iw_mode != IW_MODE_INFRA) + goto exit; + + /* We haven't received any beacons from out AP for BEACON_TIMEOUT */ + printk(KERN_INFO "%s: lost beacon bssid %s\n", + priv->netdev->name, mac2str(priv->curr_bss->bssid)); + + netif_carrier_off(priv->netdev); + netif_stop_queue(priv->netdev); + at76_iwevent_bss_disconnect(priv->netdev); + at76_set_mac_state(priv, MAC_SCANNING); + schedule_work(&priv->work_start_scan); + +exit: + mutex_unlock(&priv->mtx); +} + +/* Handle authentication response timeout */ +static void at76_dwork_auth(struct work_struct *work) +{ + struct at76_priv *priv = container_of(work, struct at76_priv, + dwork_auth.work); + + mutex_lock(&priv->mtx); + WARN_ON(priv->mac_state != MAC_AUTH); + if (priv->mac_state != MAC_AUTH) + goto exit; + + at76_dbg(DBG_PROGRESS, "%s: authentication response timeout", + priv->netdev->name); + + if (priv->retries-- >= 0) { + at76_auth_req(priv, priv->curr_bss, 1, NULL); + at76_dbg(DBG_MGMT_TIMER, "%s:%d: starting mgmt_timer + HZ", + __func__, __LINE__); + schedule_delayed_work(&priv->dwork_auth, AUTH_TIMEOUT); + } else { + /* try to get next matching BSS */ + at76_set_mac_state(priv, MAC_JOINING); + schedule_work(&priv->work_join); + } + +exit: + mutex_unlock(&priv->mtx); +} + +/* Handle association response timeout */ +static void at76_dwork_assoc(struct work_struct *work) +{ + struct at76_priv *priv = container_of(work, struct at76_priv, + dwork_assoc.work); + + mutex_lock(&priv->mtx); + WARN_ON(priv->mac_state != MAC_ASSOC); + if (priv->mac_state != MAC_ASSOC) + goto exit; + + at76_dbg(DBG_PROGRESS, "%s: association response timeout", + priv->netdev->name); + + if (priv->retries-- >= 0) { + at76_assoc_req(priv, priv->curr_bss); + at76_dbg(DBG_MGMT_TIMER, "%s:%d: starting mgmt_timer + HZ", + __func__, __LINE__); + schedule_delayed_work(&priv->dwork_assoc, ASSOC_TIMEOUT); + } else { + /* try to get next matching BSS */ + at76_set_mac_state(priv, MAC_JOINING); + schedule_work(&priv->work_join); + } + +exit: + mutex_unlock(&priv->mtx); +} + +/* Read new bssid in ad-hoc mode */ +static void at76_work_new_bss(struct work_struct *work) +{ + struct at76_priv *priv = container_of(work, struct at76_priv, + work_new_bss); + int ret; + struct mib_mac_mgmt mac_mgmt; + + mutex_lock(&priv->mtx); + + ret = at76_get_mib(priv->udev, MIB_MAC_MGMT, &mac_mgmt, + sizeof(struct mib_mac_mgmt)); + if (ret < 0) { + printk(KERN_ERR "%s: at76_get_mib failed: %d\n", + priv->netdev->name, ret); + goto exit; + } + + at76_dbg(DBG_PROGRESS, "ibss_change = 0x%2x", mac_mgmt.ibss_change); + memcpy(priv->bssid, mac_mgmt.current_bssid, ETH_ALEN); + at76_dbg(DBG_PROGRESS, "using BSSID %s", mac2str(priv->bssid)); + + at76_iwevent_bss_connect(priv->netdev, priv->bssid); + + priv->mib_buf.type = MIB_MAC_MGMT; + priv->mib_buf.size = 1; + priv->mib_buf.index = offsetof(struct mib_mac_mgmt, ibss_change); + priv->mib_buf.data.byte = 0; + + ret = at76_set_mib(priv, &priv->mib_buf); + if (ret < 0) + printk(KERN_ERR "%s: set_mib (ibss change ok) failed: %d\n", + priv->netdev->name, ret); + +exit: + mutex_unlock(&priv->mtx); +} + static int at76_startup_device(struct at76_priv *priv) { struct at76_card_config *ccfg = &priv->card_config; @@ -1470,14 +3903,14 @@ static int at76_startup_device(struct at76_priv *priv) at76_dbg(DBG_PARAMS, "%s param: ssid %.*s (%s) mode %s ch %d wep %s key %d " - "keylen %d", wiphy_name(priv->hw->wiphy), priv->essid_size, - priv->essid, hex2str(priv->essid, IW_ESSID_MAX_SIZE), + "keylen %d", priv->netdev->name, priv->essid_size, priv->essid, + hex2str(priv->essid, IW_ESSID_MAX_SIZE), priv->iw_mode == IW_MODE_ADHOC ? "adhoc" : "infra", priv->channel, priv->wep_enabled ? "enabled" : "disabled", priv->wep_key_id, priv->wep_keys_len[priv->wep_key_id]); at76_dbg(DBG_PARAMS, "%s param: preamble %s rts %d retry %d frag %d " - "txrate %s auth_mode %d", wiphy_name(priv->hw->wiphy), + "txrate %s auth_mode %d", priv->netdev->name, preambles[priv->preamble_type], priv->rts_threshold, priv->short_retry_limit, priv->frag_threshold, priv->txrate == TX_RATE_1MBIT ? "1MBit" : priv->txrate == @@ -1488,7 +3921,7 @@ static int at76_startup_device(struct at76_priv *priv) at76_dbg(DBG_PARAMS, "%s param: pm_mode %d pm_period %d auth_mode %s " "scan_times %d %d scan_mode %s", - wiphy_name(priv->hw->wiphy), priv->pm_mode, priv->pm_period, + priv->netdev->name, priv->pm_mode, priv->pm_period, priv->auth_mode == WLAN_AUTH_OPEN ? "open" : "shared_secret", priv->scan_min_time, priv->scan_max_time, priv->scan_mode == SCAN_TYPE_ACTIVE ? "active" : "passive"); @@ -1522,8 +3955,7 @@ static int at76_startup_device(struct at76_priv *priv) ccfg->ssid_len = priv->essid_size; ccfg->wep_default_key_id = priv->wep_key_id; - memcpy(ccfg->wep_default_key_value, priv->wep_keys, - sizeof(priv->wep_keys)); + memcpy(ccfg->wep_default_key_value, priv->wep_keys, 4 * WEP_KEY_LEN); ccfg->short_preamble = priv->preamble_type; ccfg->beacon_period = cpu_to_le16(priv->beacon_period); @@ -1532,7 +3964,7 @@ static int at76_startup_device(struct at76_priv *priv) sizeof(struct at76_card_config)); if (ret < 0) { printk(KERN_ERR "%s: at76_set_card_command failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); return ret; } @@ -1578,6 +4010,69 @@ static int at76_startup_device(struct at76_priv *priv) return 0; } +/* Restart the interface */ +static void at76_dwork_restart(struct work_struct *work) +{ + struct at76_priv *priv = container_of(work, struct at76_priv, + dwork_restart.work); + + mutex_lock(&priv->mtx); + + netif_carrier_off(priv->netdev); /* stop netdev watchdog */ + netif_stop_queue(priv->netdev); /* stop tx data packets */ + + at76_startup_device(priv); + + if (priv->iw_mode != IW_MODE_MONITOR) { + priv->netdev->type = ARPHRD_ETHER; + at76_set_mac_state(priv, MAC_SCANNING); + schedule_work(&priv->work_start_scan); + } else { + priv->netdev->type = ARPHRD_IEEE80211_RADIOTAP; + at76_start_monitor(priv); + } + + mutex_unlock(&priv->mtx); +} + +/* Initiate scanning */ +static void at76_work_start_scan(struct work_struct *work) +{ + struct at76_priv *priv = container_of(work, struct at76_priv, + work_start_scan); + int ret; + + mutex_lock(&priv->mtx); + + WARN_ON(priv->mac_state != MAC_SCANNING); + if (priv->mac_state != MAC_SCANNING) + goto exit; + + /* only clear the bss list when a scan is actively initiated, + * otherwise simply rely on at76_bss_list_timeout */ + if (priv->scan_state == SCAN_IN_PROGRESS) { + at76_free_bss_list(priv); + priv->scan_need_any = 1; + } else + priv->scan_need_any = 0; + + ret = at76_start_scan(priv, 1); + + if (ret < 0) + printk(KERN_ERR "%s: %s: start_scan failed with %d\n", + priv->netdev->name, __func__, ret); + else { + at76_dbg(DBG_MGMT_TIMER, + "%s:%d: starting mgmt_timer for %d ticks", + __func__, __LINE__, SCAN_POLL_INTERVAL); + schedule_delayed_work(&priv->dwork_get_scan, + SCAN_POLL_INTERVAL); + } + +exit: + mutex_unlock(&priv->mtx); +} + /* Enable or disable promiscuous mode */ static void at76_work_set_promisc(struct work_struct *work) { @@ -1595,7 +4090,7 @@ static void at76_work_set_promisc(struct work_struct *work) ret = at76_set_mib(priv, &priv->mib_buf); if (ret < 0) printk(KERN_ERR "%s: set_mib (promiscuous_mode) failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); + priv->netdev->name, ret); mutex_unlock(&priv->mtx); } @@ -1611,12 +4106,894 @@ static void at76_work_submit_rx(struct work_struct *work) mutex_unlock(&priv->mtx); } +/* We got an association response */ +static void at76_rx_mgmt_assoc(struct at76_priv *priv, + struct at76_rx_buffer *buf) +{ + struct ieee80211_assoc_response *resp = + (struct ieee80211_assoc_response *)buf->packet; + u16 assoc_id = le16_to_cpu(resp->aid); + u16 status = le16_to_cpu(resp->status); + + at76_dbg(DBG_RX_MGMT, "%s: rx AssocResp bssid %s capa 0x%04x status " + "0x%04x assoc_id 0x%04x rates %s", priv->netdev->name, + mac2str(resp->header.addr3), le16_to_cpu(resp->capability), + status, assoc_id, hex2str(resp->info_element->data, + resp->info_element->len)); + + if (priv->mac_state != MAC_ASSOC) { + printk(KERN_INFO "%s: AssocResp in state %s ignored\n", + priv->netdev->name, mac_states[priv->mac_state]); + return; + } + + BUG_ON(!priv->curr_bss); + + cancel_delayed_work(&priv->dwork_assoc); + if (status == WLAN_STATUS_SUCCESS) { + struct bss_info *ptr = priv->curr_bss; + priv->assoc_id = assoc_id & 0x3fff; + /* update iwconfig params */ + memcpy(priv->bssid, ptr->bssid, ETH_ALEN); + memcpy(priv->essid, ptr->ssid, ptr->ssid_len); + priv->essid_size = ptr->ssid_len; + priv->channel = ptr->channel; + schedule_work(&priv->work_assoc_done); + } else { + at76_set_mac_state(priv, MAC_JOINING); + schedule_work(&priv->work_join); + } +} + +/* Process disassociation request from the AP */ +static void at76_rx_mgmt_disassoc(struct at76_priv *priv, + struct at76_rx_buffer *buf) +{ + struct ieee80211_disassoc *resp = + (struct ieee80211_disassoc *)buf->packet; + struct ieee80211_hdr_3addr *mgmt = &resp->header; + + at76_dbg(DBG_RX_MGMT, + "%s: rx DisAssoc bssid %s reason 0x%04x destination %s", + priv->netdev->name, mac2str(mgmt->addr3), + le16_to_cpu(resp->reason), mac2str(mgmt->addr1)); + + /* We are not connected, ignore */ + if (priv->mac_state == MAC_SCANNING || priv->mac_state == MAC_INIT + || !priv->curr_bss) + return; + + /* Not our BSSID, ignore */ + if (compare_ether_addr(mgmt->addr3, priv->curr_bss->bssid)) + return; + + /* Not for our STA and not broadcast, ignore */ + if (compare_ether_addr(priv->netdev->dev_addr, mgmt->addr1) + && !is_broadcast_ether_addr(mgmt->addr1)) + return; + + if (priv->mac_state != MAC_ASSOC && priv->mac_state != MAC_CONNECTED + && priv->mac_state != MAC_JOINING) { + printk(KERN_INFO "%s: DisAssoc in state %s ignored\n", + priv->netdev->name, mac_states[priv->mac_state]); + return; + } + + if (priv->mac_state == MAC_CONNECTED) { + netif_carrier_off(priv->netdev); + netif_stop_queue(priv->netdev); + at76_iwevent_bss_disconnect(priv->netdev); + } + cancel_delayed_work(&priv->dwork_get_scan); + cancel_delayed_work(&priv->dwork_beacon); + cancel_delayed_work(&priv->dwork_auth); + cancel_delayed_work(&priv->dwork_assoc); + at76_set_mac_state(priv, MAC_JOINING); + schedule_work(&priv->work_join); +} + +static void at76_rx_mgmt_auth(struct at76_priv *priv, + struct at76_rx_buffer *buf) +{ + struct ieee80211_auth *resp = (struct ieee80211_auth *)buf->packet; + struct ieee80211_hdr_3addr *mgmt = &resp->header; + int seq_nr = le16_to_cpu(resp->transaction); + int alg = le16_to_cpu(resp->algorithm); + int status = le16_to_cpu(resp->status); + + at76_dbg(DBG_RX_MGMT, + "%s: rx AuthFrame bssid %s alg %d seq_nr %d status %d " + "destination %s", priv->netdev->name, mac2str(mgmt->addr3), + alg, seq_nr, status, mac2str(mgmt->addr1)); + + if (alg == WLAN_AUTH_SHARED_KEY && seq_nr == 2) + at76_dbg(DBG_RX_MGMT, "%s: AuthFrame challenge %s ...", + priv->netdev->name, hex2str(resp->info_element, 18)); + + if (priv->mac_state != MAC_AUTH) { + printk(KERN_INFO "%s: ignored AuthFrame in state %s\n", + priv->netdev->name, mac_states[priv->mac_state]); + return; + } + if (priv->auth_mode != alg) { + printk(KERN_INFO "%s: ignored AuthFrame for alg %d\n", + priv->netdev->name, alg); + return; + } + + BUG_ON(!priv->curr_bss); + + /* Not our BSSID or not for our STA, ignore */ + if (compare_ether_addr(mgmt->addr3, priv->curr_bss->bssid) + || compare_ether_addr(priv->netdev->dev_addr, mgmt->addr1)) + return; + + cancel_delayed_work(&priv->dwork_auth); + if (status != WLAN_STATUS_SUCCESS) { + /* try to join next bss */ + at76_set_mac_state(priv, MAC_JOINING); + schedule_work(&priv->work_join); + return; + } + + if (priv->auth_mode == WLAN_AUTH_OPEN || seq_nr == 4) { + priv->retries = ASSOC_RETRIES; + at76_set_mac_state(priv, MAC_ASSOC); + at76_assoc_req(priv, priv->curr_bss); + at76_dbg(DBG_MGMT_TIMER, + "%s:%d: starting mgmt_timer + HZ", __func__, __LINE__); + schedule_delayed_work(&priv->dwork_assoc, ASSOC_TIMEOUT); + return; + } + + WARN_ON(seq_nr != 2); + at76_auth_req(priv, priv->curr_bss, seq_nr + 1, resp->info_element); + at76_dbg(DBG_MGMT_TIMER, "%s:%d: starting mgmt_timer + HZ", __func__, + __LINE__); + schedule_delayed_work(&priv->dwork_auth, AUTH_TIMEOUT); +} + +static void at76_rx_mgmt_deauth(struct at76_priv *priv, + struct at76_rx_buffer *buf) +{ + struct ieee80211_disassoc *resp = + (struct ieee80211_disassoc *)buf->packet; + struct ieee80211_hdr_3addr *mgmt = &resp->header; + + at76_dbg(DBG_RX_MGMT | DBG_PROGRESS, + "%s: rx DeAuth bssid %s reason 0x%04x destination %s", + priv->netdev->name, mac2str(mgmt->addr3), + le16_to_cpu(resp->reason), mac2str(mgmt->addr1)); + + if (priv->mac_state != MAC_AUTH && priv->mac_state != MAC_ASSOC + && priv->mac_state != MAC_CONNECTED) { + printk(KERN_INFO "%s: DeAuth in state %s ignored\n", + priv->netdev->name, mac_states[priv->mac_state]); + return; + } + + BUG_ON(!priv->curr_bss); + + /* Not our BSSID, ignore */ + if (compare_ether_addr(mgmt->addr3, priv->curr_bss->bssid)) + return; + + /* Not for our STA and not broadcast, ignore */ + if (compare_ether_addr(priv->netdev->dev_addr, mgmt->addr1) + && !is_broadcast_ether_addr(mgmt->addr1)) + return; + + if (priv->mac_state == MAC_CONNECTED) + at76_iwevent_bss_disconnect(priv->netdev); + + at76_set_mac_state(priv, MAC_JOINING); + schedule_work(&priv->work_join); + cancel_delayed_work(&priv->dwork_get_scan); + cancel_delayed_work(&priv->dwork_beacon); + cancel_delayed_work(&priv->dwork_auth); + cancel_delayed_work(&priv->dwork_assoc); +} + +static void at76_rx_mgmt_beacon(struct at76_priv *priv, + struct at76_rx_buffer *buf) +{ + int varpar_len; + /* beacon content */ + struct ieee80211_beacon *bdata = (struct ieee80211_beacon *)buf->packet; + struct ieee80211_hdr_3addr *mgmt = &bdata->header; + + struct list_head *lptr; + struct bss_info *match; /* entry matching addr3 with its bssid */ + int new_entry = 0; + int len; + struct ieee80211_info_element *ie; + int have_ssid = 0; + int have_rates = 0; + int have_channel = 0; + int keep_going = 1; + unsigned long flags; + + spin_lock_irqsave(&priv->bss_list_spinlock, flags); + if (priv->mac_state == MAC_CONNECTED) { + /* in state MAC_CONNECTED we use the mgmt_timer to control + the beacon of the BSS */ + BUG_ON(!priv->curr_bss); + + if (!compare_ether_addr(priv->curr_bss->bssid, mgmt->addr3)) { + /* We got our AP's beacon, defer the timeout handler. + Kill pending work first, as schedule_delayed_work() + won't do it. */ + cancel_delayed_work(&priv->dwork_beacon); + schedule_delayed_work(&priv->dwork_beacon, + BEACON_TIMEOUT); + priv->curr_bss->rssi = buf->rssi; + priv->beacons_received++; + goto exit; + } + } + + /* look if we have this BSS already in the list */ + match = NULL; + + if (!list_empty(&priv->bss_list)) { + list_for_each(lptr, &priv->bss_list) { + struct bss_info *bss_ptr = + list_entry(lptr, struct bss_info, list); + if (!compare_ether_addr(bss_ptr->bssid, mgmt->addr3)) { + match = bss_ptr; + break; + } + } + } + + if (!match) { + /* BSS not in the list - append it */ + match = kzalloc(sizeof(struct bss_info), GFP_ATOMIC); + if (!match) { + at76_dbg(DBG_BSS_TABLE, + "%s: cannot kmalloc new bss info (%zd byte)", + priv->netdev->name, sizeof(struct bss_info)); + goto exit; + } + new_entry = 1; + list_add_tail(&match->list, &priv->bss_list); + } + + match->capa = le16_to_cpu(bdata->capability); + match->beacon_interval = le16_to_cpu(bdata->beacon_interval); + match->rssi = buf->rssi; + match->link_qual = buf->link_quality; + match->noise_level = buf->noise_level; + memcpy(match->bssid, mgmt->addr3, ETH_ALEN); + at76_dbg(DBG_RX_BEACON, "%s: bssid %s", priv->netdev->name, + mac2str(match->bssid)); + + ie = bdata->info_element; + + /* length of var length beacon parameters */ + varpar_len = min_t(int, le16_to_cpu(buf->wlength) - + sizeof(struct ieee80211_beacon), + BEACON_MAX_DATA_LENGTH); + + /* This routine steps through the bdata->data array to get + * some useful information about the access point. + * Currently, this implementation supports receipt of: SSID, + * supported transfer rates and channel, in any order, with some + * tolerance for intermittent unknown codes (although this + * functionality may not be necessary as the useful information will + * usually arrive in consecutively, but there have been some + * reports of some of the useful information fields arriving in a + * different order). + * It does not support any more IE types although MFIE_TYPE_TIM may + * be supported (on my AP at least). + * The bdata->data array is about 1500 bytes long but only ~36 of those + * bytes are useful, hence the have_ssid etc optimizations. */ + + while (keep_going && + ((&ie->data[ie->len] - (u8 *)bdata->info_element) <= + varpar_len)) { + + switch (ie->id) { + + case MFIE_TYPE_SSID: + if (have_ssid) + break; + + len = min_t(int, IW_ESSID_MAX_SIZE, ie->len); + + /* we copy only if this is a new entry, + or the incoming SSID is not a hidden SSID. This + will protect us from overwriting a real SSID read + in a ProbeResponse with a hidden one from a + following beacon. */ + if (!new_entry && at76_is_hidden_ssid(ie->data, len)) { + have_ssid = 1; + break; + } + + match->ssid_len = len; + memcpy(match->ssid, ie->data, len); + at76_dbg(DBG_RX_BEACON, "%s: SSID - %.*s", + priv->netdev->name, len, match->ssid); + have_ssid = 1; + break; + + case MFIE_TYPE_RATES: + if (have_rates) + break; + + match->rates_len = + min_t(int, sizeof(match->rates), ie->len); + memcpy(match->rates, ie->data, match->rates_len); + have_rates = 1; + at76_dbg(DBG_RX_BEACON, "%s: SUPPORTED RATES %s", + priv->netdev->name, + hex2str(ie->data, ie->len)); + break; + + case MFIE_TYPE_DS_SET: + if (have_channel) + break; + + match->channel = ie->data[0]; + have_channel = 1; + at76_dbg(DBG_RX_BEACON, "%s: CHANNEL - %d", + priv->netdev->name, match->channel); + break; + + case MFIE_TYPE_CF_SET: + case MFIE_TYPE_TIM: + case MFIE_TYPE_IBSS_SET: + default: + at76_dbg(DBG_RX_BEACON, "%s: beacon IE id %d len %d %s", + priv->netdev->name, ie->id, ie->len, + hex2str(ie->data, ie->len)); + break; + } + + /* advance to the next informational element */ + next_ie(&ie); + + /* Optimization: after all, the bdata->data array is + * varpar_len bytes long, whereas we get all of the useful + * information after only ~36 bytes, this saves us a lot of + * time (and trouble as the remaining portion of the array + * could be full of junk) + * Comment this out if you want to see what other information + * comes from the AP - although little of it may be useful */ + } + + at76_dbg(DBG_RX_BEACON, "%s: Finished processing beacon data", + priv->netdev->name); + + match->last_rx = jiffies; /* record last rx of beacon */ + +exit: + spin_unlock_irqrestore(&priv->bss_list_spinlock, flags); +} + +/* Calculate the link level from a given rx_buffer */ +static void at76_calc_level(struct at76_priv *priv, struct at76_rx_buffer *buf, + struct iw_quality *qual) +{ + /* just a guess for now, might be different for other chips */ + int max_rssi = 42; + + qual->level = (buf->rssi * 100 / max_rssi); + if (qual->level > 100) + qual->level = 100; + qual->updated |= IW_QUAL_LEVEL_UPDATED; +} + +/* Calculate the link quality from a given rx_buffer */ +static void at76_calc_qual(struct at76_priv *priv, struct at76_rx_buffer *buf, + struct iw_quality *qual) +{ + if (at76_is_intersil(priv->board_type)) + qual->qual = buf->link_quality; + else { + unsigned long elapsed; + + /* Update qual at most once a second */ + elapsed = jiffies - priv->beacons_last_qual; + if (elapsed < 1 * HZ) + return; + + qual->qual = qual->level * priv->beacons_received * + msecs_to_jiffies(priv->beacon_period) / elapsed; + + priv->beacons_last_qual = jiffies; + priv->beacons_received = 0; + } + qual->qual = (qual->qual > 100) ? 100 : qual->qual; + qual->updated |= IW_QUAL_QUAL_UPDATED; +} + +/* Calculate the noise quality from a given rx_buffer */ +static void at76_calc_noise(struct at76_priv *priv, struct at76_rx_buffer *buf, + struct iw_quality *qual) +{ + qual->noise = 0; + qual->updated |= IW_QUAL_NOISE_INVALID; +} + +static void at76_update_wstats(struct at76_priv *priv, + struct at76_rx_buffer *buf) +{ + struct iw_quality *qual = &priv->wstats.qual; + + if (buf->rssi && priv->mac_state == MAC_CONNECTED) { + qual->updated = 0; + at76_calc_level(priv, buf, qual); + at76_calc_qual(priv, buf, qual); + at76_calc_noise(priv, buf, qual); + } else { + qual->qual = 0; + qual->level = 0; + qual->noise = 0; + qual->updated = IW_QUAL_ALL_INVALID; + } +} + +static void at76_rx_mgmt(struct at76_priv *priv, struct at76_rx_buffer *buf) +{ + struct ieee80211_hdr_3addr *mgmt = + (struct ieee80211_hdr_3addr *)buf->packet; + u16 framectl = le16_to_cpu(mgmt->frame_ctl); + + /* update wstats */ + if (priv->mac_state != MAC_INIT && priv->mac_state != MAC_SCANNING) { + /* jal: this is a dirty hack needed by Tim in ad-hoc mode */ + /* Data packets always seem to have a 0 link level, so we + only read link quality info from management packets. + Atmel driver actually averages the present, and previous + values, we just present the raw value at the moment - TJS */ + if (priv->iw_mode == IW_MODE_ADHOC + || (priv->curr_bss + && !compare_ether_addr(mgmt->addr3, + priv->curr_bss->bssid))) + at76_update_wstats(priv, buf); + } + + at76_dbg(DBG_RX_MGMT_CONTENT, "%s rx mgmt framectl 0x%x %s", + priv->netdev->name, framectl, + hex2str(mgmt, le16_to_cpu(buf->wlength))); + + switch (framectl & IEEE80211_FCTL_STYPE) { + case IEEE80211_STYPE_BEACON: + case IEEE80211_STYPE_PROBE_RESP: + at76_rx_mgmt_beacon(priv, buf); + break; + + case IEEE80211_STYPE_ASSOC_RESP: + at76_rx_mgmt_assoc(priv, buf); + break; + + case IEEE80211_STYPE_DISASSOC: + at76_rx_mgmt_disassoc(priv, buf); + break; + + case IEEE80211_STYPE_AUTH: + at76_rx_mgmt_auth(priv, buf); + break; + + case IEEE80211_STYPE_DEAUTH: + at76_rx_mgmt_deauth(priv, buf); + break; + + default: + printk(KERN_DEBUG "%s: ignoring frame with framectl 0x%04x\n", + priv->netdev->name, framectl); + } + + return; +} + +/* Convert the 802.11 header into an ethernet-style header, make skb + * ready for consumption by netif_rx() */ +static void at76_ieee80211_to_eth(struct sk_buff *skb, int iw_mode) +{ + struct ieee80211_hdr_3addr *i802_11_hdr; + struct ethhdr *eth_hdr_p; + u8 *src_addr; + u8 *dest_addr; + + i802_11_hdr = (struct ieee80211_hdr_3addr *)skb->data; + + /* That would be the ethernet header if the hardware converted + * the frame for us. Make sure the source and the destination + * match the 802.11 header. Which hardware does it? */ + eth_hdr_p = (struct ethhdr *)skb_pull(skb, IEEE80211_3ADDR_LEN); + + dest_addr = i802_11_hdr->addr1; + if (iw_mode == IW_MODE_ADHOC) + src_addr = i802_11_hdr->addr2; + else + src_addr = i802_11_hdr->addr3; + + if (!compare_ether_addr(eth_hdr_p->h_source, src_addr) && + !compare_ether_addr(eth_hdr_p->h_dest, dest_addr)) + /* Yes, we already have an ethernet header */ + skb_reset_mac_header(skb); + else { + u16 len; + + /* Need to build an ethernet header */ + if (!memcmp(skb->data, snapsig, sizeof(snapsig))) { + /* SNAP frame - decapsulate, keep proto */ + skb_push(skb, offsetof(struct ethhdr, h_proto) - + sizeof(rfc1042sig)); + len = 0; + } else { + /* 802.3 frame, proto is length */ + len = skb->len; + skb_push(skb, ETH_HLEN); + } + + skb_reset_mac_header(skb); + eth_hdr_p = eth_hdr(skb); + /* This needs to be done in this order (eth_hdr_p->h_dest may + * overlap src_addr) */ + memcpy(eth_hdr_p->h_source, src_addr, ETH_ALEN); + memcpy(eth_hdr_p->h_dest, dest_addr, ETH_ALEN); + if (len) + eth_hdr_p->h_proto = htons(len); + } + + skb->protocol = eth_type_trans(skb, skb->dev); +} + +/* Check for fragmented data in priv->rx_skb. If the packet was no fragment + or it was the last of a fragment set a skb containing the whole packet + is returned for further processing. Otherwise we get NULL and are + done and the packet is either stored inside the fragment buffer + or thrown away. Every returned skb starts with the ieee802_11 header + and contains _no_ FCS at the end */ +static struct sk_buff *at76_check_for_rx_frags(struct at76_priv *priv) +{ + struct sk_buff *skb = priv->rx_skb; + struct at76_rx_buffer *buf = (struct at76_rx_buffer *)skb->data; + struct ieee80211_hdr_3addr *i802_11_hdr = + (struct ieee80211_hdr_3addr *)buf->packet; + /* seq_ctrl, fragment_number, sequence number of new packet */ + u16 sctl = le16_to_cpu(i802_11_hdr->seq_ctl); + u16 fragnr = sctl & 0xf; + u16 seqnr = sctl >> 4; + u16 frame_ctl = le16_to_cpu(i802_11_hdr->frame_ctl); + + /* Length including the IEEE802.11 header, but without the trailing + * FCS and without the Atmel Rx header */ + int length = le16_to_cpu(buf->wlength) - IEEE80211_FCS_LEN; + + /* where does the data payload start in skb->data ? */ + u8 *data = i802_11_hdr->payload; + + /* length of payload, excl. the trailing FCS */ + int data_len = length - IEEE80211_3ADDR_LEN; + + int i; + struct rx_data_buf *bptr, *optr; + unsigned long oldest = ~0UL; + + at76_dbg(DBG_RX_FRAGS, + "%s: rx data frame_ctl %04x addr2 %s seq/frag %d/%d " + "length %d data %d: %s ...", priv->netdev->name, frame_ctl, + mac2str(i802_11_hdr->addr2), seqnr, fragnr, length, data_len, + hex2str(data, 32)); + + at76_dbg(DBG_RX_FRAGS_SKB, "%s: incoming skb: head %p data %p " + "tail %p end %p len %d", priv->netdev->name, skb->head, + skb->data, skb_tail_pointer(skb), skb_end_pointer(skb), + skb->len); + + if (data_len < 0) { + /* make sure data starts in the buffer */ + printk(KERN_INFO "%s: data frame too short\n", + priv->netdev->name); + return NULL; + } + + WARN_ON(length <= AT76_RX_HDRLEN); + if (length <= AT76_RX_HDRLEN) + return NULL; + + /* remove the at76_rx_buffer header - we don't need it anymore */ + /* we need the IEEE802.11 header (for the addresses) if this packet + is the first of a chain */ + skb_pull(skb, AT76_RX_HDRLEN); + + /* remove FCS at end */ + skb_trim(skb, length); + + at76_dbg(DBG_RX_FRAGS_SKB, "%s: trimmed skb: head %p data %p tail %p " + "end %p len %d data %p data_len %d", priv->netdev->name, + skb->head, skb->data, skb_tail_pointer(skb), + skb_end_pointer(skb), skb->len, data, data_len); + + if (fragnr == 0 && !(frame_ctl & IEEE80211_FCTL_MOREFRAGS)) { + /* unfragmented packet received */ + /* Use a new skb for the next receive */ + priv->rx_skb = NULL; + at76_dbg(DBG_RX_FRAGS, "%s: unfragmented", priv->netdev->name); + return skb; + } + + /* look if we've got a chain for the sender address. + afterwards optr points to first free or the oldest entry, + or, if i < NR_RX_DATA_BUF, bptr points to the entry for the + sender address */ + /* determining the oldest entry doesn't cope with jiffies wrapping + but I don't care to delete a young entry at these rare moments ... */ + + bptr = priv->rx_data; + optr = NULL; + for (i = 0; i < NR_RX_DATA_BUF; i++, bptr++) { + if (!bptr->skb) { + optr = bptr; + oldest = 0UL; + continue; + } + + if (!compare_ether_addr(i802_11_hdr->addr2, bptr->sender)) + break; + + if (!optr) { + optr = bptr; + oldest = bptr->last_rx; + } else if (bptr->last_rx < oldest) + optr = bptr; + } + + if (i < NR_RX_DATA_BUF) { + + at76_dbg(DBG_RX_FRAGS, "%s: %d. cacheentry (seq/frag = %d/%d) " + "matched sender addr", + priv->netdev->name, i, bptr->seqnr, bptr->fragnr); + + /* bptr points to an entry for the sender address */ + if (bptr->seqnr == seqnr) { + int left; + /* the fragment has the current sequence number */ + if (((bptr->fragnr + 1) & 0xf) != fragnr) { + /* wrong fragment number -> ignore it */ + /* is & 0xf necessary above ??? */ + at76_dbg(DBG_RX_FRAGS, + "%s: frag nr mismatch: %d + 1 != %d", + priv->netdev->name, bptr->fragnr, + fragnr); + return NULL; + } + bptr->last_rx = jiffies; + /* the next following fragment number -> + add the data at the end */ + + /* for test only ??? */ + left = skb_tailroom(bptr->skb); + if (left < data_len) + printk(KERN_INFO + "%s: only %d byte free (need %d)\n", + priv->netdev->name, left, data_len); + else + memcpy(skb_put(bptr->skb, data_len), data, + data_len); + + bptr->fragnr = fragnr; + if (frame_ctl & IEEE80211_FCTL_MOREFRAGS) + return NULL; + + /* this was the last fragment - send it */ + skb = bptr->skb; + bptr->skb = NULL; /* free the entry */ + at76_dbg(DBG_RX_FRAGS, "%s: last frag of seq %d", + priv->netdev->name, seqnr); + return skb; + } + + /* got another sequence number */ + if (fragnr == 0) { + /* it's the start of a new chain - replace the + old one by this */ + /* bptr->sender has the correct value already */ + at76_dbg(DBG_RX_FRAGS, + "%s: start of new seq %d, removing old seq %d", + priv->netdev->name, seqnr, bptr->seqnr); + bptr->seqnr = seqnr; + bptr->fragnr = 0; + bptr->last_rx = jiffies; + /* swap bptr->skb and priv->rx_skb */ + skb = bptr->skb; + bptr->skb = priv->rx_skb; + priv->rx_skb = skb; + } else { + /* it from the middle of a new chain -> + delete the old entry and skip the new one */ + at76_dbg(DBG_RX_FRAGS, + "%s: middle of new seq %d (%d) " + "removing old seq %d", + priv->netdev->name, seqnr, fragnr, + bptr->seqnr); + dev_kfree_skb(bptr->skb); + bptr->skb = NULL; + } + return NULL; + } + + /* if we didn't find a chain for the sender address, optr + points either to the first free or the oldest entry */ + + if (fragnr != 0) { + /* this is not the begin of a fragment chain ... */ + at76_dbg(DBG_RX_FRAGS, + "%s: no chain for non-first fragment (%d)", + priv->netdev->name, fragnr); + return NULL; + } + + BUG_ON(!optr); + if (optr->skb) { + /* swap the skb's */ + skb = optr->skb; + optr->skb = priv->rx_skb; + priv->rx_skb = skb; + + at76_dbg(DBG_RX_FRAGS, + "%s: free old contents: sender %s seq/frag %d/%d", + priv->netdev->name, mac2str(optr->sender), + optr->seqnr, optr->fragnr); + + } else { + /* take the skb from priv->rx_skb */ + optr->skb = priv->rx_skb; + /* let at76_submit_rx_urb() allocate a new skb */ + priv->rx_skb = NULL; + + at76_dbg(DBG_RX_FRAGS, "%s: use a free entry", + priv->netdev->name); + } + memcpy(optr->sender, i802_11_hdr->addr2, ETH_ALEN); + optr->seqnr = seqnr; + optr->fragnr = 0; + optr->last_rx = jiffies; + + return NULL; +} + +/* Rx interrupt: we expect the complete data buffer in priv->rx_skb */ +static void at76_rx_data(struct at76_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct net_device_stats *stats = &priv->stats; + struct sk_buff *skb = priv->rx_skb; + struct at76_rx_buffer *buf = (struct at76_rx_buffer *)skb->data; + struct ieee80211_hdr_3addr *i802_11_hdr; + int length = le16_to_cpu(buf->wlength); + + at76_dbg(DBG_RX_DATA, "%s received data packet: %s", netdev->name, + hex2str(skb->data, AT76_RX_HDRLEN)); + + at76_dbg(DBG_RX_DATA_CONTENT, "rx packet: %s", + hex2str(skb->data + AT76_RX_HDRLEN, length)); + + skb = at76_check_for_rx_frags(priv); + if (!skb) + return; + + /* Atmel header and the FCS are already removed */ + i802_11_hdr = (struct ieee80211_hdr_3addr *)skb->data; + + skb->dev = netdev; + skb->ip_summed = CHECKSUM_NONE; /* TODO: should check CRC */ + + if (is_broadcast_ether_addr(i802_11_hdr->addr1)) { + if (!compare_ether_addr(i802_11_hdr->addr1, netdev->broadcast)) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + } else if (compare_ether_addr(i802_11_hdr->addr1, netdev->dev_addr)) + skb->pkt_type = PACKET_OTHERHOST; + + at76_ieee80211_to_eth(skb, priv->iw_mode); + + netdev->last_rx = jiffies; + netif_rx(skb); + stats->rx_packets++; + stats->rx_bytes += length; + + return; +} + +static void at76_rx_monitor_mode(struct at76_priv *priv) +{ + struct at76_rx_radiotap *rt; + u8 *payload; + int skblen; + struct net_device *netdev = priv->netdev; + struct at76_rx_buffer *buf = + (struct at76_rx_buffer *)priv->rx_skb->data; + /* length including the IEEE802.11 header and the trailing FCS, + but not at76_rx_buffer */ + int length = le16_to_cpu(buf->wlength); + struct sk_buff *skb = priv->rx_skb; + struct net_device_stats *stats = &priv->stats; + + if (length < IEEE80211_FCS_LEN) { + /* buffer contains no data */ + at76_dbg(DBG_MONITOR_MODE, + "%s: MONITOR MODE: rx skb without data", + priv->netdev->name); + return; + } + + skblen = sizeof(struct at76_rx_radiotap) + length; + + skb = dev_alloc_skb(skblen); + if (!skb) { + printk(KERN_ERR "%s: MONITOR MODE: dev_alloc_skb for radiotap " + "header returned NULL\n", priv->netdev->name); + return; + } + + skb_put(skb, skblen); + + rt = (struct at76_rx_radiotap *)skb->data; + payload = skb->data + sizeof(struct at76_rx_radiotap); + + rt->rt_hdr.it_version = 0; + rt->rt_hdr.it_pad = 0; + rt->rt_hdr.it_len = cpu_to_le16(sizeof(struct at76_rx_radiotap)); + rt->rt_hdr.it_present = cpu_to_le32(AT76_RX_RADIOTAP_PRESENT); + + rt->rt_tsft = cpu_to_le64(le32_to_cpu(buf->rx_time)); + rt->rt_rate = hw_rates[buf->rx_rate] & (~0x80); + rt->rt_signal = buf->rssi; + rt->rt_noise = buf->noise_level; + rt->rt_flags = IEEE80211_RADIOTAP_F_FCS; + if (buf->fragmentation) + rt->rt_flags |= IEEE80211_RADIOTAP_F_FRAG; + + memcpy(payload, buf->packet, length); + skb->dev = netdev; + skb->ip_summed = CHECKSUM_NONE; + skb_reset_mac_header(skb); + skb->pkt_type = PACKET_OTHERHOST; + skb->protocol = htons(ETH_P_802_2); + + netdev->last_rx = jiffies; + netif_rx(skb); + stats->rx_packets++; + stats->rx_bytes += length; +} + +/* Check if we spy on the sender address in buf and update stats */ +static void at76_iwspy_update(struct at76_priv *priv, + struct at76_rx_buffer *buf) +{ + struct ieee80211_hdr_3addr *hdr = + (struct ieee80211_hdr_3addr *)buf->packet; + struct iw_quality qual; + + /* We can only set the level here */ + qual.updated = IW_QUAL_QUAL_INVALID | IW_QUAL_NOISE_INVALID; + qual.level = 0; + qual.noise = 0; + at76_calc_level(priv, buf, &qual); + + spin_lock_bh(&priv->spy_spinlock); + + if (priv->spy_data.spy_number > 0) + wireless_spy_update(priv->netdev, hdr->addr2, &qual); + + spin_unlock_bh(&priv->spy_spinlock); +} + static void at76_rx_tasklet(unsigned long param) { struct urb *urb = (struct urb *)param; struct at76_priv *priv = urb->context; + struct net_device *netdev = priv->netdev; struct at76_rx_buffer *buf; - struct ieee80211_rx_status rx_status = { 0 }; + struct ieee80211_hdr_3addr *i802_11_hdr; + u16 frame_ctl; if (priv->device_unplugged) { at76_dbg(DBG_DEVSTART, "device unplugged"); @@ -1625,44 +5002,63 @@ static void at76_rx_tasklet(unsigned long param) return; } - if (!priv->rx_skb || !priv->rx_skb->data) + if (!priv->rx_skb || !netdev || !priv->rx_skb->data) return; buf = (struct at76_rx_buffer *)priv->rx_skb->data; + i802_11_hdr = (struct ieee80211_hdr_3addr *)buf->packet; + + frame_ctl = le16_to_cpu(i802_11_hdr->frame_ctl); + if (urb->status != 0) { if (urb->status != -ENOENT && urb->status != -ECONNRESET) at76_dbg(DBG_URB, "%s %s: - nonzero Rx bulk status received: %d", - __func__, wiphy_name(priv->hw->wiphy), - urb->status); + __func__, netdev->name, urb->status); return; } at76_dbg(DBG_RX_ATMEL_HDR, - "%s: rx frame: rate %d rssi %d noise %d link %d", - wiphy_name(priv->hw->wiphy), buf->rx_rate, buf->rssi, - buf->noise_level, buf->link_quality); + "%s: rx frame: rate %d rssi %d noise %d link %d %s", + priv->netdev->name, buf->rx_rate, buf->rssi, buf->noise_level, + buf->link_quality, hex2str(i802_11_hdr, 48)); + if (priv->iw_mode == IW_MODE_MONITOR) { + at76_rx_monitor_mode(priv); + goto exit; + } - skb_trim(priv->rx_skb, le16_to_cpu(buf->wlength) + AT76_RX_HDRLEN); - at76_dbg_dump(DBG_RX_DATA, &priv->rx_skb->data[AT76_RX_HDRLEN], - priv->rx_skb->len, "RX: len=%d", - (int)(priv->rx_skb->len - AT76_RX_HDRLEN)); + /* there is a new bssid around, accept it: */ + if (buf->newbss && priv->iw_mode == IW_MODE_ADHOC) { + at76_dbg(DBG_PROGRESS, "%s: rx newbss", netdev->name); + schedule_work(&priv->work_new_bss); + } - rx_status.signal = buf->rssi; - /* FIXME: is rate_idx still present in structure? */ - rx_status.rate_idx = buf->rx_rate; - rx_status.flag |= RX_FLAG_DECRYPTED; - rx_status.flag |= RX_FLAG_IV_STRIPPED; + switch (frame_ctl & IEEE80211_FCTL_FTYPE) { + case IEEE80211_FTYPE_DATA: + at76_rx_data(priv); + break; - skb_pull(priv->rx_skb, AT76_RX_HDRLEN); - at76_dbg(DBG_MAC80211, "calling ieee80211_rx_irqsafe(): %d/%d", - priv->rx_skb->len, priv->rx_skb->data_len); - ieee80211_rx_irqsafe(priv->hw, priv->rx_skb, &rx_status); + case IEEE80211_FTYPE_MGMT: + /* jal: TODO: find out if we can update iwspy also on + other frames than management (might depend on the + radio chip / firmware version !) */ - /* Use a new skb for the next receive */ - priv->rx_skb = NULL; + at76_iwspy_update(priv, buf); + at76_rx_mgmt(priv, buf); + break; + + case IEEE80211_FTYPE_CTL: + at76_dbg(DBG_RX_CTRL, "%s: ignored ctrl frame: %04x", + priv->netdev->name, frame_ctl); + break; + + default: + printk(KERN_DEBUG "%s: ignoring frame with framectl 0x%04x\n", + priv->netdev->name, frame_ctl); + } +exit: at76_submit_rx_urb(priv); } @@ -1689,7 +5085,7 @@ static struct fwentry *at76_load_firmware(struct usb_device *udev, fwe->fwname); dev_printk(KERN_ERR, &udev->dev, "you may need to download the firmware from " - "http://developer.berlios.de/projects/at76c503a/\n"); + "http://developer.berlios.de/projects/at76c503a/"); goto exit; } @@ -1742,628 +5138,56 @@ exit: return NULL; } -static void at76_mac80211_tx_callback(struct urb *urb) -{ - struct at76_priv *priv = urb->context; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(priv->tx_skb); - - at76_dbg(DBG_MAC80211, "%s()", __func__); - - switch (urb->status) { - case 0: - /* success */ - /* FIXME: - * is the frame really ACKed when tx_callback is called ? */ - info->flags |= IEEE80211_TX_STAT_ACK; - break; - case -ENOENT: - case -ECONNRESET: - /* fail, urb has been unlinked */ - /* FIXME: add error message */ - break; - default: - at76_dbg(DBG_URB, "%s - nonzero tx status received: %d", - __func__, urb->status); - break; - } - - memset(&info->status, 0, sizeof(info->status)); - - ieee80211_tx_status_irqsafe(priv->hw, priv->tx_skb); - - priv->tx_skb = NULL; - - ieee80211_wake_queues(priv->hw); -} - -static int at76_mac80211_tx(struct ieee80211_hw *hw, struct sk_buff *skb) -{ - struct at76_priv *priv = hw->priv; - struct at76_tx_buffer *tx_buffer = priv->bulk_out_buffer; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int padding, submit_len, ret; - - at76_dbg(DBG_MAC80211, "%s()", __func__); - - if (priv->tx_urb->status == -EINPROGRESS) { - printk(KERN_ERR "%s: %s called while tx urb is pending\n", - wiphy_name(priv->hw->wiphy), __func__); - return NETDEV_TX_BUSY; - } - - ieee80211_stop_queues(hw); - - at76_ledtrig_tx_activity(); /* tell ledtrigger we send a packet */ - - WARN_ON(priv->tx_skb != NULL); - - priv->tx_skb = skb; - padding = at76_calc_padding(skb->len); - submit_len = AT76_TX_HDRLEN + skb->len + padding; - - /* setup 'Atmel' header */ - memset(tx_buffer, 0, sizeof(*tx_buffer)); - tx_buffer->padding = padding; - tx_buffer->wlength = cpu_to_le16(skb->len); - tx_buffer->tx_rate = ieee80211_get_tx_rate(hw, info)->hw_value; - if (FIRMWARE_IS_WPA(priv->fw_version) && info->control.hw_key) { - tx_buffer->key_id = (info->control.hw_key->keyidx); - tx_buffer->cipher_type = - priv->keys[info->control.hw_key->keyidx].cipher; - tx_buffer->cipher_length = - priv->keys[info->control.hw_key->keyidx].keylen; - tx_buffer->reserved = 0; - } else { - tx_buffer->key_id = 0; - tx_buffer->cipher_type = 0; - tx_buffer->cipher_length = 0; - tx_buffer->reserved = 0; - }; - /* memset(tx_buffer->reserved, 0, sizeof(tx_buffer->reserved)); */ - memcpy(tx_buffer->packet, skb->data, skb->len); - - at76_dbg(DBG_TX_DATA, "%s tx: wlen 0x%x pad 0x%x rate %d hdr", - wiphy_name(priv->hw->wiphy), le16_to_cpu(tx_buffer->wlength), - tx_buffer->padding, tx_buffer->tx_rate); - - /* send stuff */ - at76_dbg_dump(DBG_TX_DATA_CONTENT, tx_buffer, submit_len, - "%s(): tx_buffer %d bytes:", __func__, submit_len); - usb_fill_bulk_urb(priv->tx_urb, priv->udev, priv->tx_pipe, tx_buffer, - submit_len, at76_mac80211_tx_callback, priv); - ret = usb_submit_urb(priv->tx_urb, GFP_ATOMIC); - if (ret) { - printk(KERN_ERR "%s: error in tx submit urb: %d\n", - wiphy_name(priv->hw->wiphy), ret); - if (ret == -EINVAL) - printk(KERN_ERR - "%s: -EINVAL: tx urb %p hcpriv %p complete %p\n", - wiphy_name(priv->hw->wiphy), priv->tx_urb, - priv->tx_urb->hcpriv, priv->tx_urb->complete); - } - - return 0; -} - -static int at76_mac80211_start(struct ieee80211_hw *hw) -{ - struct at76_priv *priv = hw->priv; - int ret; - - at76_dbg(DBG_MAC80211, "%s()", __func__); - - mutex_lock(&priv->mtx); - - ret = at76_submit_rx_urb(priv); - if (ret < 0) { - printk(KERN_ERR "%s: open: submit_rx_urb failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); - goto error; - } - - at76_startup_device(priv); - - at76_start_monitor(priv); - -error: - mutex_unlock(&priv->mtx); - - return 0; -} - -static void at76_mac80211_stop(struct ieee80211_hw *hw) -{ - struct at76_priv *priv = hw->priv; - - at76_dbg(DBG_MAC80211, "%s()", __func__); - - mutex_lock(&priv->mtx); - - if (!priv->device_unplugged) { - /* We are called by "ifconfig ethX down", not because the - * device is not available anymore. */ - if (at76_set_radio(priv, 0) == 1) - at76_wait_completion(priv, CMD_RADIO_ON); - - /* We unlink rx_urb because at76_open() re-submits it. - * If unplugged, at76_delete_device() takes care of it. */ - usb_kill_urb(priv->rx_urb); - } - - mutex_unlock(&priv->mtx); -} - -static int at76_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) -{ - struct at76_priv *priv = hw->priv; - int ret = 0; - - at76_dbg(DBG_MAC80211, "%s()", __func__); - - mutex_lock(&priv->mtx); - - switch (conf->type) { - case NL80211_IFTYPE_STATION: - priv->iw_mode = IW_MODE_INFRA; - break; - default: - ret = -EOPNOTSUPP; - goto exit; - } - -exit: - mutex_unlock(&priv->mtx); - - return ret; -} - -static void at76_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) -{ - at76_dbg(DBG_MAC80211, "%s()", __func__); -} - -static int at76_join(struct at76_priv *priv) -{ - struct at76_req_join join; - int ret; - - memset(&join, 0, sizeof(struct at76_req_join)); - memcpy(join.essid, priv->essid, priv->essid_size); - join.essid_size = priv->essid_size; - memcpy(join.bssid, priv->bssid, ETH_ALEN); - join.bss_type = INFRASTRUCTURE_MODE; - join.channel = priv->channel; - join.timeout = cpu_to_le16(2000); - - at76_dbg(DBG_MAC80211, "%s: sending CMD_JOIN", __func__); - ret = at76_set_card_command(priv->udev, CMD_JOIN, &join, - sizeof(struct at76_req_join)); - - if (ret < 0) { - printk(KERN_ERR "%s: at76_set_card_command failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); - return 0; - } - - ret = at76_wait_completion(priv, CMD_JOIN); - at76_dbg(DBG_MAC80211, "%s: CMD_JOIN returned: 0x%02x", __func__, ret); - if (ret != CMD_STATUS_COMPLETE) { - printk(KERN_ERR "%s: at76_wait_completion failed: %d\n", - wiphy_name(priv->hw->wiphy), ret); - return 0; - } - - at76_set_tkip_bssid(priv, priv->bssid); - at76_set_pm_mode(priv); - - return 0; -} - -static void at76_dwork_hw_scan(struct work_struct *work) -{ - struct at76_priv *priv = container_of(work, struct at76_priv, - dwork_hw_scan.work); - int ret; - - ret = at76_get_cmd_status(priv->udev, CMD_SCAN); - at76_dbg(DBG_MAC80211, "%s: CMD_SCAN status 0x%02x", __func__, ret); - - /* FIXME: add maximum time for scan to complete */ - - if (ret != CMD_STATUS_COMPLETE) { - queue_delayed_work(priv->hw->workqueue, &priv->dwork_hw_scan, - SCAN_POLL_INTERVAL); - goto exit; - } - - ieee80211_scan_completed(priv->hw); - - if (is_valid_ether_addr(priv->bssid)) { - ieee80211_wake_queues(priv->hw); - at76_join(priv); - } - - ieee80211_wake_queues(priv->hw); - -exit: - return; -} - -static int at76_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t len) -{ - struct at76_priv *priv = hw->priv; - struct at76_req_scan scan; - int ret; - - at76_dbg(DBG_MAC80211, "%s():", __func__); - at76_dbg_dump(DBG_MAC80211, ssid, len, "ssid %zd bytes:", len); - - mutex_lock(&priv->mtx); - - ieee80211_stop_queues(hw); - - memset(&scan, 0, sizeof(struct at76_req_scan)); - memset(scan.bssid, 0xFF, ETH_ALEN); - scan.scan_type = SCAN_TYPE_ACTIVE; - if (priv->essid_size > 0) { - memcpy(scan.essid, ssid, len); - scan.essid_size = len; - } - scan.min_channel_time = cpu_to_le16(priv->scan_min_time); - scan.max_channel_time = cpu_to_le16(priv->scan_max_time); - scan.probe_delay = cpu_to_le16(priv->scan_min_time * 1000); - scan.international_scan = 0; - - at76_dbg(DBG_MAC80211, "%s: sending CMD_SCAN", __func__); - ret = at76_set_card_command(priv->udev, CMD_SCAN, &scan, sizeof(scan)); - - if (ret < 0) { - err("CMD_SCAN failed: %d", ret); - goto exit; - } - - queue_delayed_work(priv->hw->workqueue, &priv->dwork_hw_scan, - SCAN_POLL_INTERVAL); - -exit: - mutex_unlock(&priv->mtx); - - return 0; -} - -static int at76_config(struct ieee80211_hw *hw, u32 changed) -{ - struct at76_priv *priv = hw->priv; - struct ieee80211_conf *conf = &hw->conf; - - at76_dbg(DBG_MAC80211, "%s(): channel %d radio %d", - __func__, conf->channel->hw_value, conf->radio_enabled); - at76_dbg_dump(DBG_MAC80211, priv->essid, priv->essid_size, "ssid:"); - at76_dbg_dump(DBG_MAC80211, priv->bssid, ETH_ALEN, "bssid:"); - - mutex_lock(&priv->mtx); - - priv->channel = conf->channel->hw_value; - - if (is_valid_ether_addr(priv->bssid)) { - at76_join(priv); - ieee80211_wake_queues(priv->hw); - } else { - ieee80211_stop_queues(priv->hw); - at76_start_monitor(priv); - }; - - mutex_unlock(&priv->mtx); - - return 0; -} - -static int at76_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct at76_priv *priv = hw->priv; - - at76_dbg_dump(DBG_MAC80211, conf->bssid, ETH_ALEN, "bssid:"); - - mutex_lock(&priv->mtx); - - memcpy(priv->bssid, conf->bssid, ETH_ALEN); -// memcpy(priv->essid, conf->ssid, conf->ssid_len); -// priv->essid_size = conf->ssid_len; - - if (is_valid_ether_addr(priv->bssid)) { - /* mac80211 is joining a bss */ - ieee80211_wake_queues(priv->hw); - at76_join(priv); - } else - ieee80211_stop_queues(priv->hw); - - mutex_unlock(&priv->mtx); - - return 0; -} - -/* must be atomic */ -static void at76_configure_filter(struct ieee80211_hw *hw, - unsigned int changed_flags, - unsigned int *total_flags, int mc_count, - struct dev_addr_list *mc_list) -{ - struct at76_priv *priv = hw->priv; - int flags; - - at76_dbg(DBG_MAC80211, "%s(): changed_flags=0x%08x " - "total_flags=0x%08x mc_count=%d", - __func__, changed_flags, *total_flags, mc_count); - - flags = changed_flags & AT76_SUPPORTED_FILTERS; - *total_flags = AT76_SUPPORTED_FILTERS; - - /* FIXME: access to priv->promisc should be protected with - * priv->mtx, but it's impossible because this function needs to be - * atomic */ - - if (flags && !priv->promisc) { - /* mac80211 wants us to enable promiscuous mode */ - priv->promisc = 1; - } else if (!flags && priv->promisc) { - /* we need to disable promiscuous mode */ - priv->promisc = 0; - } else - return; - - queue_work(hw->workqueue, &priv->work_set_promisc); -} - -static int at76_set_key_oldfw(struct ieee80211_hw *hw, enum set_key_cmd cmd, - const u8 *local_address, const u8 *address, - struct ieee80211_key_conf *key) -{ - struct at76_priv *priv = hw->priv; - - int i; - - at76_dbg(DBG_MAC80211, "%s(): cmd %d key->alg %d key->keyidx %d " - "key->keylen %d", - __func__, cmd, key->alg, key->keyidx, key->keylen); - - if (key->alg != ALG_WEP) - return -EOPNOTSUPP; - - key->hw_key_idx = key->keyidx; - - mutex_lock(&priv->mtx); - - switch (cmd) { - case SET_KEY: - memcpy(priv->wep_keys[key->keyidx], key->key, key->keylen); - priv->wep_keys_len[key->keyidx] = key->keylen; - - /* FIXME: find out how to do this properly */ - priv->wep_key_id = key->keyidx; - - break; - case DISABLE_KEY: - default: - priv->wep_keys_len[key->keyidx] = 0; - break; - } - - priv->wep_enabled = 0; - - for (i = 0; i < WEP_KEYS; i++) { - if (priv->wep_keys_len[i] != 0) - priv->wep_enabled = 1; - } - - at76_startup_device(priv); - - mutex_unlock(&priv->mtx); - - return 0; -} - -static int at76_set_key_newfw(struct ieee80211_hw *hw, enum set_key_cmd cmd, - const u8 *local_address, const u8 *address, - struct ieee80211_key_conf *key) -{ - struct at76_priv *priv = hw->priv; - int ret = -EOPNOTSUPP; - - at76_dbg(DBG_MAC80211, "%s(): cmd %d key->alg %d key->keyidx %d " - "key->keylen %d", - __func__, cmd, key->alg, key->keyidx, key->keylen); - - mutex_lock(&priv->mtx); - - priv->mib_buf.type = MIB_MAC_ENCRYPTION; - - if (cmd == DISABLE_KEY) { - priv->mib_buf.size = CIPHER_KEY_LEN; - priv->mib_buf.index = offsetof(struct mib_mac_encryption, - cipher_default_keyvalue[key->keyidx]); - memset(priv->mib_buf.data.data, 0, CIPHER_KEY_LEN); - if (at76_set_mib(priv, &priv->mib_buf) != CMD_STATUS_COMPLETE) - ret = -EOPNOTSUPP; /* -EIO would be probably better */ - else { - - priv->keys[key->keyidx].cipher = CIPHER_NONE; - priv->keys[key->keyidx].keylen = 0; - }; - if (priv->default_group_key == key->keyidx) - priv->default_group_key = 0xff; - - if (priv->default_pairwise_key == key->keyidx) - priv->default_pairwise_key = 0xff; - /* If default pairwise key is removed, fall back to - * group key? */ - ret = 0; - goto exit; - }; - - if (cmd == SET_KEY) { - /* store key into MIB */ - priv->mib_buf.size = CIPHER_KEY_LEN; - priv->mib_buf.index = offsetof(struct mib_mac_encryption, - cipher_default_keyvalue[key->keyidx]); - memset(priv->mib_buf.data.data, 0, CIPHER_KEY_LEN); - memcpy(priv->mib_buf.data.data, key->key, key->keylen); - - switch (key->alg) { - case ALG_WEP: - if (key->keylen == 5) { - priv->keys[key->keyidx].cipher = - CIPHER_WEP64; - priv->keys[key->keyidx].keylen = 8; - } else if (key->keylen == 13) { - priv->keys[key->keyidx].cipher = - CIPHER_WEP128; - /* Firmware needs this */ - priv->keys[key->keyidx].keylen = 8; - } else { - ret = -EOPNOTSUPP; - goto exit; - }; - break; - case ALG_TKIP: - key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC; - priv->keys[key->keyidx].cipher = CIPHER_TKIP; - priv->keys[key->keyidx].keylen = 12; - break; - - case ALG_CCMP: - if (!at76_is_505a(priv->board_type)) { - ret = -EOPNOTSUPP; - goto exit; - }; - key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC; - priv->keys[key->keyidx].cipher = CIPHER_CCMP; - priv->keys[key->keyidx].keylen = 16; - break; - - default: - ret = -EOPNOTSUPP; - goto exit; - }; - - priv->mib_buf.data.data[38] = priv->keys[key->keyidx].cipher; - priv->mib_buf.data.data[39] = 1; /* Taken from atmelwlandriver, - not documented */ - - if (is_valid_ether_addr(address)) - /* Pairwise key */ - priv->mib_buf.data.data[39] |= (KEY_PAIRWISE | KEY_TX); - else if (is_broadcast_ether_addr(address)) - /* Group key */ - priv->mib_buf.data.data[39] |= (KEY_TX); - else /* Key used only for transmission ??? */ - priv->mib_buf.data.data[39] |= (KEY_TX); - - if (at76_set_mib(priv, &priv->mib_buf) != - CMD_STATUS_COMPLETE) { - ret = -EOPNOTSUPP; /* -EIO would be probably better */ - goto exit; - }; - - if ((key->alg == ALG_TKIP) || (key->alg == ALG_CCMP)) - at76_reset_rsc(priv); - - key->hw_key_idx = key->keyidx; - - /* Set up default keys */ - if (is_broadcast_ether_addr(address)) - priv->default_group_key = key->keyidx; - if (is_valid_ether_addr(address)) - priv->default_pairwise_key = key->keyidx; - - /* Set up encryption MIBs */ - - /* first block of settings */ - priv->mib_buf.size = 3; - priv->mib_buf.index = offsetof(struct mib_mac_encryption, - privacy_invoked); - priv->mib_buf.data.data[0] = 1; /* privacy_invoked */ - priv->mib_buf.data.data[1] = priv->default_pairwise_key; - priv->mib_buf.data.data[2] = priv->default_group_key; - - ret = at76_set_mib(priv, &priv->mib_buf); - if (ret != CMD_STATUS_COMPLETE) - goto exit; - - /* second block of settings */ - priv->mib_buf.size = 3; - priv->mib_buf.index = offsetof(struct mib_mac_encryption, - exclude_unencrypted); - priv->mib_buf.data.data[0] = 1; /* exclude_unencrypted */ - priv->mib_buf.data.data[1] = 0; /* wep_encryption_type */ - priv->mib_buf.data.data[2] = 0; /* ckip_key_permutation */ - - ret = at76_set_mib(priv, &priv->mib_buf); - if (ret != CMD_STATUS_COMPLETE) - goto exit; - ret = 0; - }; -exit: - at76_dump_mib_mac_encryption(priv); - mutex_unlock(&priv->mtx); - return ret; -} - -static int at76_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, - const u8 *local_address, const u8 *address, - struct ieee80211_key_conf *key) -{ - struct at76_priv *priv = hw->priv; - - at76_dbg(DBG_MAC80211, "%s(): cmd %d key->alg %d key->keyidx %d " - "key->keylen %d", - __func__, cmd, key->alg, key->keyidx, key->keylen); - - if (FIRMWARE_IS_WPA(priv->fw_version)) - return at76_set_key_newfw(hw, cmd, local_address, address, key); - else - return at76_set_key_oldfw(hw, cmd, local_address, address, key); - -} - -static const struct ieee80211_ops at76_ops = { - .tx = at76_mac80211_tx, - .add_interface = at76_add_interface, - .remove_interface = at76_remove_interface, - .config = at76_config, - .config_interface = at76_config_interface, - .configure_filter = at76_configure_filter, - .start = at76_mac80211_start, - .stop = at76_mac80211_stop, - .hw_scan = at76_hw_scan, - .set_key = at76_set_key, -}; - /* Allocate network device and initialize private data */ static struct at76_priv *at76_alloc_new_device(struct usb_device *udev) { - struct ieee80211_hw *hw; + struct net_device *netdev; struct at76_priv *priv; + int i; - hw = ieee80211_alloc_hw(sizeof(struct at76_priv), &at76_ops); - if (!hw) { - printk(KERN_ERR DRIVER_NAME ": could not register" - " ieee80211_hw\n"); + /* allocate memory for our device state and initialize it */ + netdev = alloc_etherdev(sizeof(struct at76_priv)); + if (!netdev) { + dev_printk(KERN_ERR, &udev->dev, "out of memory\n"); return NULL; } - priv = hw->priv; - priv->hw = hw; + priv = netdev_priv(netdev); priv->udev = udev; + priv->netdev = netdev; mutex_init(&priv->mtx); + INIT_WORK(&priv->work_assoc_done, at76_work_assoc_done); + INIT_WORK(&priv->work_join, at76_work_join); + INIT_WORK(&priv->work_new_bss, at76_work_new_bss); + INIT_WORK(&priv->work_start_scan, at76_work_start_scan); INIT_WORK(&priv->work_set_promisc, at76_work_set_promisc); INIT_WORK(&priv->work_submit_rx, at76_work_submit_rx); - INIT_DELAYED_WORK(&priv->dwork_hw_scan, at76_dwork_hw_scan); + INIT_DELAYED_WORK(&priv->dwork_restart, at76_dwork_restart); + INIT_DELAYED_WORK(&priv->dwork_get_scan, at76_dwork_get_scan); + INIT_DELAYED_WORK(&priv->dwork_beacon, at76_dwork_beacon); + INIT_DELAYED_WORK(&priv->dwork_auth, at76_dwork_auth); + INIT_DELAYED_WORK(&priv->dwork_assoc, at76_dwork_assoc); + + spin_lock_init(&priv->mgmt_spinlock); + priv->next_mgmt_bulk = NULL; + priv->mac_state = MAC_INIT; + + /* initialize empty BSS list */ + priv->curr_bss = NULL; + INIT_LIST_HEAD(&priv->bss_list); + spin_lock_init(&priv->bss_list_spinlock); + + init_timer(&priv->bss_list_timer); + priv->bss_list_timer.data = (unsigned long)priv; + priv->bss_list_timer.function = at76_bss_list_timeout; + + spin_lock_init(&priv->spy_spinlock); + + /* mark all rx data entries as unused */ + for (i = 0; i < NR_RX_DATA_BUF; i++) + priv->rx_data[i].skb = NULL; priv->rx_tasklet.func = at76_rx_tasklet; priv->rx_tasklet.data = 0; @@ -2371,9 +5195,6 @@ static struct at76_priv *at76_alloc_new_device(struct usb_device *udev) priv->pm_mode = AT76_PM_OFF; priv->pm_period = 0; - /* unit us */ - priv->hw->channel_change_time = 100000; - return priv; } @@ -2436,42 +5257,11 @@ static int at76_alloc_urbs(struct at76_priv *priv, return 0; } -static struct ieee80211_rate at76_rates[] = { - { .bitrate = 10, .hw_value = TX_RATE_1MBIT, }, - { .bitrate = 20, .hw_value = TX_RATE_2MBIT, }, - { .bitrate = 55, .hw_value = TX_RATE_5_5MBIT, }, - { .bitrate = 110, .hw_value = TX_RATE_11MBIT, }, -}; - -static struct ieee80211_channel at76_channels[] = { - { .center_freq = 2412, .hw_value = 1 }, - { .center_freq = 2417, .hw_value = 2 }, - { .center_freq = 2422, .hw_value = 3 }, - { .center_freq = 2427, .hw_value = 4 }, - { .center_freq = 2432, .hw_value = 5 }, - { .center_freq = 2437, .hw_value = 6 }, - { .center_freq = 2442, .hw_value = 7 }, - { .center_freq = 2447, .hw_value = 8 }, - { .center_freq = 2452, .hw_value = 9 }, - { .center_freq = 2457, .hw_value = 10 }, - { .center_freq = 2462, .hw_value = 11 }, - { .center_freq = 2467, .hw_value = 12 }, - { .center_freq = 2472, .hw_value = 13 }, - { .center_freq = 2484, .hw_value = 14 } -}; - -static struct ieee80211_supported_band at76_supported_band = { - .channels = at76_channels, - .n_channels = ARRAY_SIZE(at76_channels), - .bitrates = at76_rates, - .n_bitrates = ARRAY_SIZE(at76_rates), -}; - /* Register network device and initialize the hardware */ static int at76_init_new_device(struct at76_priv *priv, struct usb_interface *interface) { - struct device *dev = &interface->dev; + struct net_device *netdev = priv->netdev; int ret; /* set up the endpoint information */ @@ -2487,11 +5277,14 @@ static int at76_init_new_device(struct at76_priv *priv, /* MAC address */ ret = at76_get_hw_config(priv); if (ret < 0) { - dev_err(dev, "cannot get MAC address\n"); + dev_printk(KERN_ERR, &interface->dev, + "cannot get MAC address\n"); goto exit; } priv->domain = at76_get_reg_domain(priv->regulatory_domain); + /* init. netdev->dev_addr */ + memcpy(netdev->dev_addr, priv->mac_addr, ETH_ALEN); priv->channel = DEF_CHANNEL; priv->iw_mode = IW_MODE_INFRA; @@ -2501,54 +5294,47 @@ static int at76_init_new_device(struct at76_priv *priv, priv->txrate = TX_RATE_AUTO; priv->preamble_type = PREAMBLE_TYPE_LONG; priv->beacon_period = 100; + priv->beacons_last_qual = jiffies; priv->auth_mode = WLAN_AUTH_OPEN; priv->scan_min_time = DEF_SCAN_MIN_TIME; priv->scan_max_time = DEF_SCAN_MAX_TIME; priv->scan_mode = SCAN_TYPE_ACTIVE; - priv->default_pairwise_key = 0xff; - priv->default_group_key = 0xff; - /* mac80211 initialisation */ - priv->hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &at76_supported_band; + netdev->flags &= ~IFF_MULTICAST; /* not yet or never */ + netdev->open = at76_open; + netdev->stop = at76_stop; + netdev->get_stats = at76_get_stats; + netdev->ethtool_ops = &at76_ethtool_ops; - if (FIRMWARE_IS_WPA(priv->fw_version) && - (at76_is_503rfmd(priv->board_type) || - at76_is_505(priv->board_type))) - priv->hw->flags = IEEE80211_HW_SIGNAL_UNSPEC; - else - priv->hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_SIGNAL_UNSPEC; + /* Add pointers to enable iwspy support. */ + priv->wireless_data.spy_data = &priv->spy_data; + netdev->wireless_data = &priv->wireless_data; - priv->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); + netdev->hard_start_xmit = at76_tx; + netdev->tx_timeout = at76_tx_timeout; + netdev->watchdog_timeo = 2 * HZ; + netdev->wireless_handlers = &at76_handler_def; + netdev->set_multicast_list = at76_set_multicast; + netdev->set_mac_address = at76_set_mac_address; + dev_alloc_name(netdev, "wlan%d"); - SET_IEEE80211_DEV(priv->hw, &interface->dev); - SET_IEEE80211_PERM_ADDR(priv->hw, priv->mac_addr); - - ret = ieee80211_register_hw(priv->hw); + ret = register_netdev(priv->netdev); if (ret) { - dev_err(dev, "cannot register mac80211 hw (status %d)!\n", ret); + dev_printk(KERN_ERR, &interface->dev, + "cannot register netdevice (status %d)!\n", ret); goto exit; } + priv->netdev_registered = 1; - priv->mac80211_registered = 1; + printk(KERN_INFO "%s: USB %s, MAC %s, firmware %d.%d.%d-%d\n", + netdev->name, dev_name(&interface->dev), mac2str(priv->mac_addr), + priv->fw_version.major, priv->fw_version.minor, + priv->fw_version.patch, priv->fw_version.build); + printk(KERN_INFO "%s: regulatory domain 0x%02x: %s\n", netdev->name, + priv->regulatory_domain, priv->domain->name); - dev_info(dev, "%s: USB %s, MAC %s, firmware %d.%d.%d-%d\n", - wiphy_name(priv->hw->wiphy), - dev_name(&interface->dev), mac2str(priv->mac_addr), - priv->fw_version.major, priv->fw_version.minor, - priv->fw_version.patch, priv->fw_version.build); - dev_info(dev, "%s: regulatory domain 0x%02x: %s\n", - wiphy_name(priv->hw->wiphy), - priv->regulatory_domain, priv->domain->name); - dev_info(dev, "%s: WPA support: ", wiphy_name(priv->hw->wiphy)); - if (!FIRMWARE_IS_WPA(priv->fw_version)) - printk("none\n"); - else { - if (!at76_is_505a(priv->board_type)) - printk("TKIP\n"); - else - printk("TKIP, AES/CCMP\n"); - }; + /* we let this timer run the whole time this driver instance lives */ + mod_timer(&priv->bss_list_timer, jiffies + BSS_LIST_TIMEOUT); exit: return ret; @@ -2556,13 +5342,15 @@ exit: static void at76_delete_device(struct at76_priv *priv) { + int i; + at76_dbg(DBG_PROC_ENTRY, "%s: ENTER", __func__); /* The device is gone, don't bother turning it off */ priv->device_unplugged = 1; - if (priv->mac80211_registered) - ieee80211_unregister_hw(priv->hw); + if (priv->netdev_registered) + unregister_netdev(priv->netdev); /* assuming we used keventd, it must quiesce too */ flush_scheduled_work(); @@ -2583,11 +5371,25 @@ static void at76_delete_device(struct at76_priv *priv) if (priv->rx_skb) kfree_skb(priv->rx_skb); + at76_free_bss_list(priv); + del_timer_sync(&priv->bss_list_timer); + cancel_delayed_work(&priv->dwork_get_scan); + cancel_delayed_work(&priv->dwork_beacon); + cancel_delayed_work(&priv->dwork_auth); + cancel_delayed_work(&priv->dwork_assoc); + + if (priv->mac_state == MAC_CONNECTED) + at76_iwevent_bss_disconnect(priv->netdev); + + for (i = 0; i < NR_RX_DATA_BUF; i++) + if (priv->rx_data[i].skb) { + dev_kfree_skb(priv->rx_data[i].skb); + priv->rx_data[i].skb = NULL; + } usb_put_dev(priv->udev); - at76_dbg(DBG_PROC_ENTRY, "%s: before freeing priv/ieee80211_hw", - __func__); - ieee80211_free_hw(priv->hw); + at76_dbg(DBG_PROC_ENTRY, "%s: before freeing priv/netdev", __func__); + free_netdev(priv->netdev); /* priv is in netdev */ at76_dbg(DBG_PROC_ENTRY, "%s: EXIT", __func__); } @@ -2621,8 +5423,8 @@ static int at76_probe(struct usb_interface *interface, we get 204 with 2.4.23, Fiberline FL-WL240u (505A+RFMD2958) ??? */ if (op_mode == OPMODE_HW_CONFIG_MODE) { - dev_err(&interface->dev, - "cannot handle a device in HW_CONFIG_MODE\n"); + dev_printk(KERN_ERR, &interface->dev, + "cannot handle a device in HW_CONFIG_MODE\n"); ret = -EBUSY; goto error; } @@ -2630,12 +5432,13 @@ static int at76_probe(struct usb_interface *interface, if (op_mode != OPMODE_NORMAL_NIC_WITH_FLASH && op_mode != OPMODE_NORMAL_NIC_WITHOUT_FLASH) { /* download internal firmware part */ - dev_dbg(&interface->dev, "downloading internal firmware\n"); + dev_printk(KERN_DEBUG, &interface->dev, + "downloading internal firmware\n"); ret = at76_load_internal_fw(udev, fwe); if (ret < 0) { - dev_err(&interface->dev, - "error %d downloading internal firmware\n", - ret); + dev_printk(KERN_ERR, &interface->dev, + "error %d downloading internal firmware\n", + ret); goto error; } usb_put_dev(udev); @@ -2660,7 +5463,8 @@ static int at76_probe(struct usb_interface *interface, need_ext_fw = 1; if (need_ext_fw) { - dev_dbg(&interface->dev, "downloading external firmware\n"); + dev_printk(KERN_DEBUG, &interface->dev, + "downloading external firmware\n"); ret = at76_load_external_fw(udev, fwe); if (ret) @@ -2669,8 +5473,8 @@ static int at76_probe(struct usb_interface *interface, /* Re-check firmware version */ ret = at76_get_mib(udev, MIB_FW_VERSION, &fwv, sizeof(fwv)); if (ret < 0) { - dev_err(&interface->dev, - "error %d getting firmware version\n", ret); + dev_printk(KERN_ERR, &interface->dev, + "error %d getting firmware version\n", ret); goto error; } } @@ -2681,6 +5485,7 @@ static int at76_probe(struct usb_interface *interface, goto error; } + SET_NETDEV_DEV(priv->netdev, &interface->dev); usb_set_intfdata(interface, priv); memcpy(&priv->fw_version, &fwv, sizeof(struct mib_fw_version)); @@ -2708,7 +5513,7 @@ static void at76_disconnect(struct usb_interface *interface) if (!priv) return; - printk(KERN_INFO "%s: disconnecting\n", wiphy_name(priv->hw->wiphy)); + printk(KERN_INFO "%s: disconnecting\n", priv->netdev->name); at76_delete_device(priv); dev_printk(KERN_INFO, &interface->dev, "disconnected\n"); } @@ -2764,8 +5569,5 @@ MODULE_AUTHOR("Alex "); MODULE_AUTHOR("Nick Jones"); MODULE_AUTHOR("Balint Seeber "); MODULE_AUTHOR("Pavel Roskin "); -MODULE_AUTHOR("Guido Guenther "); -MODULE_AUTHOR("Kalle Valo "); -MODULE_AUTHOR("Milan Plzik "); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); diff --git a/drivers/staging/at76_usb/at76_usb.h b/drivers/staging/at76_usb/at76_usb.h index 8bb352f16d45..b20be9da1fa1 100644 --- a/drivers/staging/at76_usb/at76_usb.h +++ b/drivers/staging/at76_usb/at76_usb.h @@ -34,6 +34,23 @@ enum board_type { BOARD_505AMX = 8 }; +/* our private ioctl's */ +/* preamble length (0 - long, 1 - short, 2 - auto) */ +#define AT76_SET_SHORT_PREAMBLE (SIOCIWFIRSTPRIV + 0) +#define AT76_GET_SHORT_PREAMBLE (SIOCIWFIRSTPRIV + 1) +/* which debug channels are enabled */ +#define AT76_SET_DEBUG (SIOCIWFIRSTPRIV + 2) +#define AT76_GET_DEBUG (SIOCIWFIRSTPRIV + 3) +/* power save mode (incl. the Atmel proprietary smart save mode) */ +#define AT76_SET_POWERSAVE_MODE (SIOCIWFIRSTPRIV + 4) +#define AT76_GET_POWERSAVE_MODE (SIOCIWFIRSTPRIV + 5) +/* min and max channel times for scan */ +#define AT76_SET_SCAN_TIMES (SIOCIWFIRSTPRIV + 6) +#define AT76_GET_SCAN_TIMES (SIOCIWFIRSTPRIV + 7) +/* scan mode (0 - active, 1 - passive) */ +#define AT76_SET_SCAN_MODE (SIOCIWFIRSTPRIV + 8) +#define AT76_GET_SCAN_MODE (SIOCIWFIRSTPRIV + 9) + #define CMD_STATUS_IDLE 0x00 #define CMD_STATUS_COMPLETE 0x01 #define CMD_STATUS_UNKNOWN 0x02 @@ -65,7 +82,6 @@ enum board_type { #define MIB_MAC 0x03 #define MIB_MAC_MGMT 0x05 #define MIB_MAC_WEP 0x06 -#define MIB_MAC_ENCRYPTION 0x06 #define MIB_PHY 0x07 #define MIB_FW_VERSION 0x08 #define MIB_MDOMAIN 0x09 @@ -90,26 +106,6 @@ enum board_type { #define AT76_PM_ON 2 #define AT76_PM_SMART 3 -/* cipher values for encryption keys */ -#define CIPHER_NONE 0 /* this value is only guessed */ -#define CIPHER_WEP64 1 -#define CIPHER_TKIP 2 -#define CIPHER_CCMP 3 -#define CIPHER_CCX 4 /* for consistency sake only */ -#define CIPHER_WEP128 5 - -/* bit flags key types for encryption keys */ -#define KEY_PAIRWISE 2 -#define KEY_TX 4 - -#define CIPHER_KEYS (4) -#define CIPHER_KEY_LEN (40) - -struct key_config { - u8 cipher; - u8 keylen; -}; - struct hwcfg_r505 { u8 cr39_values[14]; u8 reserved1[14]; @@ -151,9 +147,6 @@ union at76_hwcfg { #define WEP_SMALL_KEY_LEN (40 / 8) #define WEP_LARGE_KEY_LEN (104 / 8) -#define WEP_KEYS (4) - - struct at76_card_config { u8 exclude_unencrypted; @@ -168,7 +161,7 @@ struct at76_card_config { u8 privacy_invoked; u8 wep_default_key_id; /* 0..3 */ u8 current_ssid[32]; - u8 wep_default_key_value[4][WEP_LARGE_KEY_LEN]; + u8 wep_default_key_value[4][WEP_KEY_LEN]; u8 ssid_len; u8 short_preamble; __le16 beacon_period; @@ -193,7 +186,7 @@ struct at76_rx_buffer { u8 link_quality; u8 noise_level; __le32 rx_time; - u8 packet[IEEE80211_MAX_FRAG_THRESHOLD]; + u8 packet[IEEE80211_FRAME_LEN + IEEE80211_FCS_LEN]; } __attribute__((packed)); /* Length of Atmel-specific Tx header before 802.11 frame */ @@ -203,11 +196,8 @@ struct at76_tx_buffer { __le16 wlength; u8 tx_rate; u8 padding; - u8 key_id; - u8 cipher_type; - u8 cipher_length; - u8 reserved; - u8 packet[IEEE80211_MAX_FRAG_THRESHOLD]; + u8 reserved[4]; + u8 packet[IEEE80211_FRAME_LEN + IEEE80211_FCS_LEN]; } __attribute__((packed)); /* defines for scan_type below */ @@ -254,7 +244,6 @@ struct set_mib_buffer { u8 byte; __le16 word; u8 addr[ETH_ALEN]; - u8 data[256]; /* we need more space for mib_mac_encryption */ } data; } __attribute__((packed)); @@ -328,24 +317,10 @@ struct mib_mac_wep { u8 exclude_unencrypted; __le32 wep_icv_error_count; __le32 wep_excluded_count; - u8 wep_default_keyvalue[WEP_KEYS][WEP_LARGE_KEY_LEN]; + u8 wep_default_keyvalue[WEP_KEYS][WEP_KEY_LEN]; u8 encryption_level; /* 1 for 40bit, 2 for 104bit encryption */ } __attribute__((packed)); -struct mib_mac_encryption { - u8 cipher_default_keyvalue[CIPHER_KEYS][CIPHER_KEY_LEN]; - u8 tkip_bssid[6]; - u8 privacy_invoked; - u8 cipher_default_key_id; - u8 cipher_default_group_key_id; - u8 exclude_unencrypted; - u8 wep_encryption_type; - u8 ckip_key_permutation; /* bool */ - __le32 wep_icv_error_count; - __le32 wep_excluded_count; - u8 key_rsc[CIPHER_KEYS][8]; -} __attribute__((packed)); - struct mib_phy { __le32 ed_threshold; @@ -389,6 +364,16 @@ struct at76_fw_header { __le32 ext_fw_len; /* external firmware image length */ } __attribute__((packed)); +enum mac_state { + MAC_INIT, + MAC_SCANNING, + MAC_AUTH, + MAC_ASSOC, + MAC_JOINING, + MAC_CONNECTED, + MAC_OWN_IBSS +}; + /* a description of a regulatory domain and the allowed channels */ struct reg_domain { u16 code; @@ -396,6 +381,47 @@ struct reg_domain { u32 channel_map; /* if bit N is set, channel (N+1) is allowed */ }; +/* how long do we keep a (I)BSS in the bss_list in jiffies + this should be long enough for the user to retrieve the table + (by iwlist ?) after the device started, because all entries from + other channels than the one the device locks on get removed, too */ +#define BSS_LIST_TIMEOUT (120 * HZ) +/* struct to store BSS info found during scan */ +#define BSS_LIST_MAX_RATE_LEN 32 /* 32 rates should be enough ... */ + +struct bss_info { + struct list_head list; + + u8 bssid[ETH_ALEN]; /* bssid */ + u8 ssid[IW_ESSID_MAX_SIZE]; /* essid */ + u8 ssid_len; /* length of ssid above */ + u8 channel; + u16 capa; /* BSS capabilities */ + u16 beacon_interval; /* beacon interval, Kus (1024 microseconds) */ + u8 rates[BSS_LIST_MAX_RATE_LEN]; /* supported rates in units of + 500 kbps, ORed with 0x80 for + basic rates */ + u8 rates_len; + + /* quality of received beacon */ + u8 rssi; + u8 link_qual; + u8 noise_level; + + unsigned long last_rx; /* time (jiffies) of last beacon received */ +}; + +/* a rx data buffer to collect rx fragments */ +struct rx_data_buf { + u8 sender[ETH_ALEN]; /* sender address */ + u16 seqnr; /* sequence number */ + u16 fragnr; /* last fragment received */ + unsigned long last_rx; /* jiffies of last rx */ + struct sk_buff *skb; /* == NULL if entry is free */ +}; + +#define NR_RX_DATA_BUF 8 + /* Data for one loaded firmware file */ struct fwentry { const char *const fwname; @@ -412,9 +438,11 @@ struct fwentry { struct at76_priv { struct usb_device *udev; /* USB device pointer */ + struct net_device *netdev; /* net device pointer */ + struct net_device_stats stats; /* net device stats */ + struct iw_statistics wstats; /* wireless stats */ struct sk_buff *rx_skb; /* skbuff for receiving data */ - struct sk_buff *tx_skb; /* skbuff for transmitting data */ void *bulk_out_buffer; /* buffer for sending data */ struct urb *tx_urb; /* URB for sending data */ @@ -426,17 +454,26 @@ struct at76_priv { struct mutex mtx; /* locks this structure */ /* work queues */ + struct work_struct work_assoc_done; + struct work_struct work_join; + struct work_struct work_new_bss; + struct work_struct work_start_scan; struct work_struct work_set_promisc; struct work_struct work_submit_rx; - struct delayed_work dwork_hw_scan; + struct delayed_work dwork_restart; + struct delayed_work dwork_get_scan; + struct delayed_work dwork_beacon; + struct delayed_work dwork_auth; + struct delayed_work dwork_assoc; struct tasklet_struct rx_tasklet; /* the WEP stuff */ int wep_enabled; /* 1 if WEP is enabled */ int wep_key_id; /* key id to be used */ - u8 wep_keys[WEP_KEYS][WEP_LARGE_KEY_LEN]; /* WEP keys */ - u8 wep_keys_len[WEP_KEYS]; /* length of WEP keys */ + u8 wep_keys[WEP_KEYS][WEP_KEY_LEN]; /* the four WEP keys, + 5 or 13 bytes are used */ + u8 wep_keys_len[WEP_KEYS]; /* the length of the above keys */ int channel; int iw_mode; @@ -458,13 +495,44 @@ struct at76_priv { int scan_mode; /* SCAN_TYPE_ACTIVE, SCAN_TYPE_PASSIVE */ int scan_need_any; /* if set, need to scan for any ESSID */ + /* the list we got from scanning */ + spinlock_t bss_list_spinlock; /* protects bss_list operations */ + struct list_head bss_list; /* list of BSS we got beacons from */ + struct timer_list bss_list_timer; /* timer to purge old entries + from bss_list */ + struct bss_info *curr_bss; /* current BSS */ u16 assoc_id; /* current association ID, if associated */ + u8 wanted_bssid[ETH_ALEN]; + int wanted_bssid_valid; /* != 0 if wanted_bssid is to be used */ + + /* some data for infrastructure mode only */ + spinlock_t mgmt_spinlock; /* this spinlock protects access to + next_mgmt_bulk */ + + struct at76_tx_buffer *next_mgmt_bulk; /* pending management msg to + send via bulk out */ + enum mac_state mac_state; + enum { + SCAN_IDLE, + SCAN_IN_PROGRESS, + SCAN_COMPLETED + } scan_state; + time_t last_scan; + + int retries; /* remaining retries in case of timeout when + * sending AuthReq or AssocReq */ u8 pm_mode; /* power management mode */ u32 pm_period; /* power management period in microseconds */ struct reg_domain const *domain; /* reg domain description */ + /* iwspy support */ + spinlock_t spy_spinlock; + struct iw_spy_data spy_data; + + struct iw_public_data wireless_data; + /* These fields contain HW config provided by the device (not all of * these fields are used by all board types) */ u8 mac_addr[ETH_ALEN]; @@ -472,6 +540,9 @@ struct at76_priv { struct at76_card_config card_config; + /* store rx fragments until complete */ + struct rx_data_buf rx_data[NR_RX_DATA_BUF]; + enum board_type board_type; struct mib_fw_version fw_version; @@ -479,20 +550,58 @@ struct at76_priv { unsigned int netdev_registered:1; struct set_mib_buffer mib_buf; /* global buffer for set_mib calls */ + /* beacon counting */ int beacon_period; /* period of mgmt beacons, Kus */ - - struct ieee80211_hw *hw; - int mac80211_registered; - - struct key_config keys[4]; /* installed key types */ - u8 default_pairwise_key; - u8 default_group_key; + int beacons_received; + unsigned long beacons_last_qual; /* time we restarted counting + beacons */ }; -#define AT76_SUPPORTED_FILTERS FIF_PROMISC_IN_BSS +struct at76_rx_radiotap { + struct ieee80211_radiotap_header rt_hdr; + __le64 rt_tsft; + u8 rt_flags; + u8 rt_rate; + s8 rt_signal; + s8 rt_noise; +}; +#define AT76_RX_RADIOTAP_PRESENT \ + ((1 << IEEE80211_RADIOTAP_TSFT) | \ + (1 << IEEE80211_RADIOTAP_FLAGS) | \ + (1 << IEEE80211_RADIOTAP_RATE) | \ + (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL) | \ + (1 << IEEE80211_RADIOTAP_DB_ANTNOISE)) + +#define BEACON_MAX_DATA_LENGTH 1500 + +/* the maximum size of an AssocReq packet */ +#define ASSOCREQ_MAX_SIZE \ + (AT76_TX_HDRLEN + sizeof(struct ieee80211_assoc_request) + \ + 1 + 1 + IW_ESSID_MAX_SIZE + 1 + 1 + 4) + +/* for shared secret auth, add the challenge text size */ +#define AUTH_FRAME_SIZE (AT76_TX_HDRLEN + sizeof(struct ieee80211_auth)) + +/* Maximal number of AuthReq retries */ +#define AUTH_RETRIES 3 + +/* Maximal number of AssocReq retries */ +#define ASSOC_RETRIES 3 + +/* Beacon timeout in managed mode when we are connected */ +#define BEACON_TIMEOUT (10 * HZ) + +/* Timeout for authentication response */ +#define AUTH_TIMEOUT (1 * HZ) + +/* Timeout for association response */ +#define ASSOC_TIMEOUT (1 * HZ) + +/* Polling interval when scan is running */ #define SCAN_POLL_INTERVAL (HZ / 4) +/* Command completion timeout */ #define CMD_COMPLETION_TIMEOUT (5 * HZ) #define DEF_RTS_THRESHOLD 1536 @@ -502,6 +611,8 @@ struct at76_priv { #define DEF_SCAN_MIN_TIME 10 #define DEF_SCAN_MAX_TIME 120 +#define MAX_RTS_THRESHOLD (MAX_FRAG_THRESHOLD + 1) + /* the max padding size for tx in bytes (see calc_padding) */ #define MAX_PADDING_SIZE 53 From ea8f9fe634da9042c01ca2f4e459a7b187056021 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 30 Jan 2009 09:05:03 -0500 Subject: [PATCH 462/566] Staging: at76_usb: fix bugs introduced by "Staging: at76_usb: cleanup dma on stack issues" Tracking down the firmware loading problem led to this commit. $ git bisect bad 0d1d1424330cc1934f2b2742f0cfa2c31e6a250b is first bad commit commit 0d1d1424330cc1934f2b2742f0cfa2c31e6a250b Author: Oliver Neukum Date: Thu Dec 18 13:16:40 2008 +0100 Staging: at76_usb: cleanup dma on stack issues - no DMA on stack - cleanup unclear endianness issue Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman :040000 040000 c4fee9ea0fef25926229d810d19dc2f89cca9401 8b165a35d16280d2413b2700a6080ef290ca1009 M drivers The "no DMA on stack" conversion was incomplete with respect to updating the arguments passed to usb_control_msg. The value 40 is hardcoded as it was prior to conversion. The driver can now load firmware, but is not fully functional. Signed-off-by: Jason Andryuk Cc: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/staging/at76_usb/at76_usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/at76_usb/at76_usb.c b/drivers/staging/at76_usb/at76_usb.c index 9195ee9319ff..06ae16341337 100644 --- a/drivers/staging/at76_usb/at76_usb.c +++ b/drivers/staging/at76_usb/at76_usb.c @@ -649,7 +649,7 @@ static int at76_get_op_mode(struct usb_device *udev) return -ENOMEM; ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x33, USB_TYPE_VENDOR | USB_DIR_IN | - USB_RECIP_INTERFACE, 0x01, 0, &op_mode, 1, + USB_RECIP_INTERFACE, 0x01, 0, op_mode, 1, USB_CTRL_GET_TIMEOUT); saved = *op_mode; kfree(op_mode); @@ -782,7 +782,7 @@ static inline int at76_get_cmd_status(struct usb_device *udev, u8 cmd) ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x22, USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_INTERFACE, cmd, 0, stat_buf, - sizeof(stat_buf), USB_CTRL_GET_TIMEOUT); + 40, USB_CTRL_GET_TIMEOUT); if (ret >= 0) ret = stat_buf[5]; kfree(stat_buf); From 07f269862a2981f1512de5393e2d0ce5b2ee8305 Mon Sep 17 00:00:00 2001 From: Jamie Lentin Date: Wed, 4 Feb 2009 13:38:44 +0000 Subject: [PATCH 463/566] Staging: at76_usb: Add support for OQO Model 01+ Add USB device ID for OQO 01+'s internal wireless LAN An OQO employee mentions the chip's true identity here:- ftp://ftp.oqo.com/unsupported/linux/OQOLinux.html Signed-off-by: Jamie Lentin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/at76_usb/at76_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/at76_usb/at76_usb.c b/drivers/staging/at76_usb/at76_usb.c index 06ae16341337..c8e4d31c7df2 100644 --- a/drivers/staging/at76_usb/at76_usb.c +++ b/drivers/staging/at76_usb/at76_usb.c @@ -231,6 +231,8 @@ static struct usb_device_id dev_table[] = { {USB_DEVICE(0x03eb, 0x7617), USB_DEVICE_DATA(BOARD_505A)}, /* Siemens Gigaset USB WLAN Adapter 11 */ {USB_DEVICE(0x1690, 0x0701), USB_DEVICE_DATA(BOARD_505A)}, + /* OQO Model 01+ Internal Wi-Fi */ + {USB_DEVICE(0x1557, 0x0002), USB_DEVICE_DATA(BOARD_505A)}, /* * at76c505amx-rfmd */ From 5701c0519b7a357a602fda5c96f26197ecfc4c85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Fri, 30 Jan 2009 20:21:09 -0800 Subject: [PATCH 464/566] Staging: android: ram_console: Disable ECC when early init is enabled and validate buffer size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Arve Hjønnevåg Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/Kconfig | 1 + drivers/staging/android/ram_console.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig index 6b996db0dd6a..604bd1e0d546 100644 --- a/drivers/staging/android/Kconfig +++ b/drivers/staging/android/Kconfig @@ -27,6 +27,7 @@ menuconfig ANDROID_RAM_CONSOLE_ERROR_CORRECTION bool "Android RAM Console Enable error correction" default n depends on ANDROID_RAM_CONSOLE + depends on !ANDROID_RAM_CONSOLE_EARLY_INIT select REED_SOLOMON select REED_SOLOMON_ENC8 select REED_SOLOMON_DEC8 diff --git a/drivers/staging/android/ram_console.c b/drivers/staging/android/ram_console.c index bf006857a87a..643ac5ce381d 100644 --- a/drivers/staging/android/ram_console.c +++ b/drivers/staging/android/ram_console.c @@ -224,9 +224,23 @@ static int __init ram_console_init(struct ram_console_buffer *buffer, ram_console_buffer_size = buffer_size - sizeof(struct ram_console_buffer); + if (ram_console_buffer_size > buffer_size) { + pr_err("ram_console: buffer %p, invalid size %d, datasize %d\n", + buffer, buffer_size, ram_console_buffer_size); + return 0; + } + #ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION ram_console_buffer_size -= (DIV_ROUND_UP(ram_console_buffer_size, ECC_BLOCK_SIZE) + 1) * ECC_SIZE; + + if (ram_console_buffer_size > buffer_size) { + pr_err("ram_console: buffer %p, invalid size %d, " + "non-ecc datasize %d\n", + buffer, buffer_size, ram_console_buffer_size); + return 0; + } + ram_console_par_buffer = buffer->data + ram_console_buffer_size; From d88dfb8dc4bfb66066e7c68727c219faaa541206 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 4 Feb 2009 15:05:05 -0800 Subject: [PATCH 465/566] Staging: android: fix up units in timed_gpio The last build fix I did messed up the units of the sysfs file. This puts them back to be milliseconds, like they originally were. Thanks to Juha Motorsportcom for pointing this out. Reported-by: Juha Motorsportcom Cc: Mike Lockwood Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/timed_gpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/android/timed_gpio.c b/drivers/staging/android/timed_gpio.c index 903270cbbe02..33daff0481d2 100644 --- a/drivers/staging/android/timed_gpio.c +++ b/drivers/staging/android/timed_gpio.c @@ -50,7 +50,7 @@ static ssize_t gpio_enable_show(struct device *dev, struct device_attribute *att if (hrtimer_active(&gpio_data->timer)) { ktime_t r = hrtimer_get_remaining(&gpio_data->timer); struct timeval t = ktime_to_timeval(r); - remaining = t.tv_sec * 1000 + t.tv_usec; + remaining = t.tv_sec * 1000 + t.tv_usec / 1000; } else remaining = 0; From 6136ac86b719716694884a84cd9d403207cc52b9 Mon Sep 17 00:00:00 2001 From: "Sachin P. Sant" Date: Tue, 3 Feb 2009 21:10:58 +0530 Subject: [PATCH 466/566] Staging: panel: fix lcd panel driver build failure * Fix build break for lcd panel driver. Signed-off-by : Sachin Sant Cc: Willy Tarreau Signed-off-by: Greg Kroah-Hartman --- drivers/staging/panel/panel.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/staging/panel/panel.c b/drivers/staging/panel/panel.c index 5ffe269c2382..ab69c1bf36a8 100644 --- a/drivers/staging/panel/panel.c +++ b/drivers/staging/panel/panel.c @@ -622,7 +622,7 @@ static int set_ctrl_bits(void) } /* sets ctrl & data port bits according to current signals values */ -static void set_bits(void) +static void panel_set_bits(void) { set_data_bits(); set_ctrl_bits(); @@ -707,12 +707,12 @@ static void lcd_send_serial(int byte) */ for (bit = 0; bit < 8; bit++) { bits.cl = BIT_CLR; /* CLK low */ - set_bits(); + panel_set_bits(); bits.da = byte & 1; - set_bits(); + panel_set_bits(); udelay(2); /* maintain the data during 2 us before CLK up */ bits.cl = BIT_SET; /* CLK high */ - set_bits(); + panel_set_bits(); udelay(1); /* maintain the strobe during 1 us */ byte >>= 1; } @@ -727,7 +727,7 @@ static void lcd_backlight(int on) /* The backlight is activated by seting the AUTOFEED line to +5V */ spin_lock(&pprt_lock); bits.bl = on; - set_bits(); + panel_set_bits(); spin_unlock(&pprt_lock); } From 284b066af41579f62649048fdec5c5e7091703e6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 9 Feb 2009 16:22:03 -0500 Subject: [PATCH 467/566] Btrfs: don't use spin_is_contended Btrfs was using spin_is_contended to see if it should drop locks before doing extent allocations during btrfs_search_slot. The idea was to avoid expensive searches in the tree unless the lock was actually contended. But, spin_is_contended is specific to the ticket spinlocks on x86, so this is causing compile errors everywhere else. In practice, the contention could easily appear some time after we started doing the extent allocation, and it makes more sense to always drop the lock instead. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 3 +-- fs/btrfs/locking.c | 22 ---------------------- fs/btrfs/locking.h | 2 -- 3 files changed, 1 insertion(+), 26 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 551177c0011a..35443cc4b9a9 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1530,8 +1530,7 @@ again: * for higher level blocks, try not to allocate blocks * with the block and the parent locks held. */ - if (level > 0 && !prealloc_block.objectid && - btrfs_path_lock_waiting(p, level)) { + if (level > 0 && !prealloc_block.objectid) { u32 size = b->len; u64 hint = b->start; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 68fd9ccf1805..9ebe9385129b 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -236,25 +236,3 @@ int btrfs_tree_locked(struct extent_buffer *eb) return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) || spin_is_locked(&eb->lock); } - -/* - * btrfs_search_slot uses this to decide if it should drop its locks - * before doing something expensive like allocating free blocks for cow. - */ -int btrfs_path_lock_waiting(struct btrfs_path *path, int level) -{ - int i; - struct extent_buffer *eb; - - for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) { - eb = path->nodes[i]; - if (!eb) - break; - smp_mb(); - if (spin_is_contended(&eb->lock) || - waitqueue_active(&eb->lock_wq)) - return 1; - } - return 0; -} - diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index d92e707f5870..6bb0afbff928 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -26,8 +26,6 @@ int btrfs_tree_locked(struct extent_buffer *eb); int btrfs_try_tree_lock(struct extent_buffer *eb); int btrfs_try_spin_lock(struct extent_buffer *eb); -int btrfs_path_lock_waiting(struct btrfs_path *path, int level); - void btrfs_set_lock_blocking(struct extent_buffer *eb); void btrfs_clear_lock_blocking(struct extent_buffer *eb); #endif From 3c552ac8a747d6c26d13302c54d71dae9f56f4ac Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:47 -0800 Subject: [PATCH 468/566] x86: make apic_* operations inline functions Mainly to get proper type-checking and consistency. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/apic.h | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b03711d7990b..f4835a1be360 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -139,12 +139,35 @@ struct apic_ops { extern struct apic_ops *apic_ops; -#define apic_read (apic_ops->read) -#define apic_write (apic_ops->write) -#define apic_icr_read (apic_ops->icr_read) -#define apic_icr_write (apic_ops->icr_write) -#define apic_wait_icr_idle (apic_ops->wait_icr_idle) -#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle) +static inline u32 apic_read(u32 reg) +{ + return apic_ops->read(reg); +} + +static inline void apic_write(u32 reg, u32 val) +{ + apic_ops->write(reg, val); +} + +static inline u64 apic_icr_read(void) +{ + return apic_ops->icr_read(); +} + +static inline void apic_icr_write(u32 low, u32 high) +{ + apic_ops->icr_write(low, high); +} + +static inline void apic_wait_icr_idle(void) +{ + apic_ops->wait_icr_idle(); +} + +static inline u32 safe_apic_wait_icr_idle(void) +{ + return apic_ops->safe_wait_icr_idle(); +} extern int get_physical_broadcast(void); From 4924e228ae039029a9503ad571d91086e4042c90 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:47 -0800 Subject: [PATCH 469/566] x86: unstatic mp_find_ioapic so it can be used elsewhere Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/mpspec.h | 1 + arch/x86/kernel/acpi/boot.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 8c5620147c40..b59371a312f1 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -77,6 +77,7 @@ extern int acpi_probe_gsi(void); #ifdef CONFIG_X86_IO_APIC extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity); +extern int mp_find_ioapic(int gsi); #else static inline int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c334fe75dcd6..068b900f4b06 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -872,7 +872,7 @@ static struct { DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); } mp_ioapic_routing[MAX_IO_APICS]; -static int mp_find_ioapic(int gsi) +int mp_find_ioapic(int gsi) { int i = 0; From c3e137d1e882c4fab9adcce7ae2be9bf3eb64c4c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:47 -0800 Subject: [PATCH 470/566] x86: add mp_find_ioapic_pin Add mp_find_ioapic_pin() to find an IO APIC's specific pin from a GSI, and use this function within acpi/boot. Make it non-static so other code can use it too. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/mpspec.h | 1 + arch/x86/kernel/acpi/boot.c | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index b59371a312f1..5916c8df09d9 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -78,6 +78,7 @@ extern int acpi_probe_gsi(void); extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity); extern int mp_find_ioapic(int gsi); +extern int mp_find_ioapic_pin(int ioapic, int gsi); #else static inline int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 068b900f4b06..bba162c81d5b 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -887,6 +887,16 @@ int mp_find_ioapic(int gsi) return -1; } +int mp_find_ioapic_pin(int ioapic, int gsi) +{ + if (WARN_ON(ioapic == -1)) + return -1; + if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end)) + return -1; + + return gsi - mp_ioapic_routing[ioapic].gsi_base; +} + static u8 __init uniq_ioapic_id(u8 id) { #ifdef CONFIG_X86_32 @@ -1022,7 +1032,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) ioapic = mp_find_ioapic(gsi); if (ioapic < 0) return; - pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + pin = mp_find_ioapic_pin(ioapic, gsi); /* * TBD: This check is for faulty timer entries, where the override @@ -1142,7 +1152,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) return gsi; } - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); #ifdef CONFIG_X86_32 if (ioapic_renumber_irq) @@ -1231,7 +1241,7 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); ioapic = mp_find_ioapic(gsi); mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id; - mp_irq.dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; + mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); save_mp_irq(&mp_irq); #endif From ca97ab90164c7b978abf9d82dc82d6dc2cbac4a0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:47 -0800 Subject: [PATCH 471/566] x86: unstatic ioapic entry funcs Unstatic ioapic_write_entry and setup_ioapic_entry functions so that the Xen code can do its own ioapic routing setup. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/io_apic.h | 6 ++++++ arch/x86/kernel/io_apic.c | 10 +++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 309d0e23193a..59cb4a1317b7 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -169,6 +169,12 @@ extern void reinit_intr_remapped_IO_APIC(int); extern void probe_nr_irqs_gsi(void); +extern int setup_ioapic_entry(int apic, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector); +extern void ioapic_write_entry(int apic, int pin, + struct IO_APIC_route_entry e); #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 static const int timer_through_8259 = 0; diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 56e51eb551a5..7248ca11bdcd 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -486,7 +486,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) io_apic_write(apic, 0x10 + 2*pin, eu.w1); } -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); @@ -1478,10 +1478,10 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t handle_edge_irq, "edge"); } -static int setup_ioapic_entry(int apic_id, int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, int trigger, - int polarity, int vector) +int setup_ioapic_entry(int apic_id, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector) { /* * add it to the IO-APIC irq-routing table: From bf56957d176c279175464f385f3eb03d65819328 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 9 Feb 2009 12:05:48 -0800 Subject: [PATCH 472/566] xen: expose enable_IO_APIC for 32-bit enable_IO_APIC() is defined for both 32- and 64-bit x86, so it should be declared for both. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/hw_irq.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 1b82781b898d..370e1c83bb49 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -65,9 +65,7 @@ extern void disable_IO_APIC(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); extern void setup_ioapic_dest(void); -#ifdef CONFIG_X86_64 extern void enable_IO_APIC(void); -#endif /* Statistics */ extern atomic_t irq_err_count; From a51f42f3c940e5582c40454ece066d033bc7e24f Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Mon, 9 Feb 2009 14:33:03 -0800 Subject: [PATCH 473/566] netfilter: fix tuple inversion for Node information request The patch fixes a typo in the inverse mapping of Node Information request. Following draft-ietf-ipngwg-icmp-name-lookups-09, "Querier" sends a type 139 (ICMPV6_NI_QUERY) packet to "Responder" which answer with a type 140 (ICMPV6_NI_REPLY) packet. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index c455cf4ee756..114a92e4258d 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -49,8 +49,8 @@ static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, static const u_int8_t invmap[] = { [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, - [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1, - [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1 + [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_REPLY + 1, + [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY +1 }; static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, From 3f9007135c1dc896db9a9e35920aafc65b157230 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Mon, 9 Feb 2009 14:33:20 -0800 Subject: [PATCH 474/566] netfilter: nf_conntrack_ipv6: don't track ICMPv6 negotiation message This patch removes connection tracking handling for ICMPv6 messages related to Stateless Address Autoconfiguration, MLD, and MLDv2. They can not be tracked because they are massively using multicast (on pre-defined address). But they are not invalid and should not be detected as such. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- .../netfilter/nf_conntrack_proto_icmpv6.c | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 114a92e4258d..c323643ffcf9 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -53,6 +53,17 @@ static const u_int8_t invmap[] = { [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY +1 }; +static const u_int8_t noct_valid_new[] = { + [ICMPV6_MGM_QUERY - 130] = 1, + [ICMPV6_MGM_REPORT -130] = 1, + [ICMPV6_MGM_REDUCTION - 130] = 1, + [NDISC_ROUTER_SOLICITATION - 130] = 1, + [NDISC_ROUTER_ADVERTISEMENT - 130] = 1, + [NDISC_NEIGHBOUR_SOLICITATION - 130] = 1, + [NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1, + [ICMPV6_MLD2_REPORT - 130] = 1 +}; + static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig) { @@ -178,6 +189,7 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, { const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; + int type; icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { @@ -194,6 +206,15 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, return -NF_ACCEPT; } + type = icmp6h->icmp6_type - 130; + if (type >= 0 && type < sizeof(noct_valid_new) && + noct_valid_new[type]) { + skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); + return NF_ACCEPT; + } + /* is not error message ? */ if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; From c969aa7d2cd5621ad4129dae6b6551af422944c6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 9 Feb 2009 14:33:57 -0800 Subject: [PATCH 475/566] netfilter: ctnetlink: allow changing NAT sequence adjustment in creation This patch fixes an inconsistency in the current ctnetlink code since NAT sequence adjustment bit can only be updated but not set in the conntrack entry creation. This patch is used by conntrackd to successfully recover newly created entries that represent connections with helpers and NAT payload mangling. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_netlink.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c32a7e8e3a1b..9051bb4f81da 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1215,6 +1215,16 @@ ctnetlink_create_conntrack(struct nlattr *cda[], } } +#ifdef CONFIG_NF_NAT_NEEDED + if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) { + err = ctnetlink_change_nat_seq_adj(ct, cda); + if (err < 0) { + rcu_read_unlock(); + goto err; + } + } +#endif + if (cda[CTA_PROTOINFO]) { err = ctnetlink_change_protoinfo(ct, cda); if (err < 0) { From 1f9da256163e3ff91a12d0b861091f0e525139df Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 9 Feb 2009 14:34:26 -0800 Subject: [PATCH 476/566] netfilter: ctnetlink: fix echo if not subscribed to any multicast group This patch fixes echoing if the socket that has sent the request to create/update/delete an entry is not subscribed to any multicast group. With the current code, ctnetlink would not send the echo message via unicast as nfnetlink_send() would be skip. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_netlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9051bb4f81da..cb78aa00399e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -434,7 +434,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, } else return NOTIFY_DONE; - if (!nfnetlink_has_listeners(group)) + if (!item->report && !nfnetlink_has_listeners(group)) return NOTIFY_DONE; skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); @@ -1502,7 +1502,8 @@ static int ctnetlink_expect_event(struct notifier_block *this, } else return NOTIFY_DONE; - if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) + if (!item->report && + !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) return NOTIFY_DONE; skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); From d4e2675a61890a84849a24affedf80d5cae8b199 Mon Sep 17 00:00:00 2001 From: Qu Haoran Date: Mon, 9 Feb 2009 14:34:56 -0800 Subject: [PATCH 477/566] netfilter: xt_sctp: sctp chunk mapping doesn't work When user tries to map all chunks given in argument, kernel works on a copy of the chunkmap, but at the end it doesn't check the copy, but the orginal one. Signed-off-by: Qu Haoran Signed-off-by: Nicolas Dichtel Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_sctp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index e223cb43ae8e..a189ada9128f 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -105,7 +105,7 @@ match_packet(const struct sk_buff *skb, switch (chunk_match_type) { case SCTP_CHUNK_MATCH_ALL: - return SCTP_CHUNKMAP_IS_CLEAR(info->chunkmap); + return SCTP_CHUNKMAP_IS_CLEAR(chunkmapcopy); case SCTP_CHUNK_MATCH_ANY: return false; case SCTP_CHUNK_MATCH_ONLY: From 8707bdd48ab705a459ac1b12014075a139d1d4f9 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 9 Feb 2009 14:59:30 -0800 Subject: [PATCH 478/566] gianfar: Fix boot hangs while bringing up gianfar ethernet Ira Snyder found that commit 8c7396aebb68994c0519e438eecdf4d5fa9c7844 "gianfar: Merge Tx and Rx interrupt for scheduling clean up ring" can cause hangs. It's because there was removed clearing of interrupts in gfar_schedule_cleanup() (which is called by an interrupt handler) in case when netif scheduling has been disabled. This patch brings back this action and a comment. Reported-by: Ira Snyder Reported-by: Peter Korsgaard Bisected-by: Ira Snyder Tested-by: Peter Korsgaard Tested-by: Ira Snyder Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- drivers/net/gianfar.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index acae2d8cd688..9b12a13a640f 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -1629,6 +1629,12 @@ static void gfar_schedule_cleanup(struct net_device *dev) if (netif_rx_schedule_prep(&priv->napi)) { gfar_write(&priv->regs->imask, IMASK_RTX_DISABLED); __netif_rx_schedule(&priv->napi); + } else { + /* + * Clear IEVENT, so interrupts aren't called again + * because of the packets that have already arrived. + */ + gfar_write(&priv->regs->ievent, IEVENT_RTX_MASK); } spin_unlock(&priv->rxlock); From 20461c1740cac5e02733221c9f653098a703f55a Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Mon, 9 Feb 2009 15:01:19 -0800 Subject: [PATCH 479/566] IPv6: fix to set device name when new IPv6 over IPv6 tunnel device is created. When the user creates IPv6 over IPv6 tunnel, the device name created by the kernel isn't set to t->parm.name, which is referred as the result of ioctl(). Signed-off-by: Noriaki TAKAMIYA Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 58e2b0d93758..d994c55a5b16 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -249,8 +249,8 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) } t = netdev_priv(dev); - ip6_tnl_dev_init(dev); t->parms = *p; + ip6_tnl_dev_init(dev); if ((err = register_netdevice(dev)) < 0) goto failed_free; From 4906f9985e310fc01f956256b0d58ac28b0dcb19 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 9 Feb 2009 15:07:18 -0800 Subject: [PATCH 480/566] bridge: Fix LRO crash with tun > Kernel BUG at drivers/net/tun.c:444 > invalid opcode: 0000 [1] SMP > last sysfs file: /class/net/lo/ifindex > CPU 0 > Modules linked in: tun ipt_MASQUERADE iptable_nat ip_nat xt_state ip_conntrack > nfnetlink ipt_REJECT xt_tcpudp iptable_filter d > Pid: 6912, comm: qemu-kvm Tainted: G 2.6.18-128.el5 #1 > RIP: 0010:[] [] > :tun:tun_chr_readv+0x2b1/0x3a6 > RSP: 0018:ffff8102202c5e48 EFLAGS: 00010246 > RAX: 0000000000000000 RBX: ffff8102202c5e98 RCX: 0000000004010000 > RDX: ffff810227063680 RSI: ffff8102202c5e9e RDI: ffff8102202c5e92 > RBP: 0000000000010ff6 R08: 0000000000000000 R09: 0000000000000001 > R10: ffff8102202c5e94 R11: 0000000000000202 R12: ffff8102275357c0 > R13: ffff81022755e500 R14: 0000000000000000 R15: ffff8102202c5ef8 > FS: 00002ae4398db980(0000) GS:ffffffff803ac000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > CR2: 00002ae4ab514000 CR3: 0000000221344000 CR4: 00000000000026e0 > Process qemu-kvm (pid: 6912, threadinfo ffff8102202c4000, task > ffff81022e58d820) > Stack: 00000000498735cb ffff810229d1a3c0 0000000000000000 ffff81022e58d820 > ffffffff8008a461 ffff81022755e528 ffff81022755e528 ffffffff8009f925 > 000005ea05ea0000 ffff8102209d0000 00001051143e1600 ffffffff8003c00e > Call Trace: > [] default_wake_function+0x0/0xe > [] enqueue_hrtimer+0x55/0x70 > [] hrtimer_start+0xbc/0xce > [] :tun:tun_chr_read+0x1a/0x1f > [] vfs_read+0xcb/0x171 > [] sys_read+0x45/0x6e > [] system_call+0x7e/0x83 > > > Code: 0f 0b 68 40 62 6f 88 c2 bc 01 f6 42 0a 08 74 0c 80 4c 24 41 > RIP [] :tun:tun_chr_readv+0x2b1/0x3a6 > RSP > <0>Kernel panic - not syncing: Fatal exception This crashed when an LRO packet generated by bnx2x reached a tun device through the bridge. We're supposed to drop it at the bridge. However, because the check was placed in br_forward instead of __br_forward, it's only effective if we are sending the packet through a single port. This patch fixes it by moving the check into __br_forward. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index bdd9ccea17ce..d2c27c808d3b 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -67,6 +67,11 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) { struct net_device *indev; + if (skb_warn_if_lro(skb)) { + kfree_skb(skb); + return; + } + indev = skb->dev; skb->dev = to->dev; skb_forward_csum(skb); @@ -89,7 +94,7 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) /* called with rcu_read_lock */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb) { - if (!skb_warn_if_lro(skb) && should_deliver(to, skb)) { + if (should_deliver(to, skb)) { __br_forward(to, skb); return; } From d315760ffa261c15ff92699ac6f514112543d7ca Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: [PATCH 481/566] x86: fix math_emu register frame access do_device_not_available() is the handler for #NM and it declares that it takes a unsigned long and calls math_emu(), which takes a long argument and surprisingly expects the stack frame starting at the zero argument would match struct math_emu_info, which isn't true regardless of configuration in the current code. This patch makes do_device_not_available() take struct pt_regs like other exception handlers and initialize struct math_emu_info with pointer to it and pass pointer to the math_emu_info to math_emulate() like normal C functions do. This way, unless gcc makes a copy of struct pt_regs in do_device_not_available(), the register frame is correctly accessed regardless of kernel configuration or compiler used. This doesn't fix all math_emu problems but it at least gets it somewhat working. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/math_emu.h | 4 +- arch/x86/include/asm/traps.h | 4 +- arch/x86/kernel/traps.c | 15 +++++--- arch/x86/math-emu/fpu_entry.c | 4 +- arch/x86/math-emu/fpu_proto.h | 2 +- arch/x86/math-emu/fpu_system.h | 16 +++----- arch/x86/math-emu/get_address.c | 66 ++++++++++++++++----------------- 7 files changed, 55 insertions(+), 56 deletions(-) diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h index 302492c77956..031f6266f425 100644 --- a/arch/x86/include/asm/math_emu.h +++ b/arch/x86/include/asm/math_emu.h @@ -11,8 +11,8 @@ struct math_emu_info { long ___orig_eip; union { - struct pt_regs regs; - struct kernel_vm86_regs vm86; + struct pt_regs *regs; + struct kernel_vm86_regs *vm86; }; }; #endif /* _ASM_X86_MATH_EMU_H */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 2ee0a3bceedf..cf3bb053da0b 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -41,7 +41,7 @@ dotraplinkage void do_int3(struct pt_regs *, long); dotraplinkage void do_overflow(struct pt_regs *, long); dotraplinkage void do_bounds(struct pt_regs *, long); dotraplinkage void do_invalid_op(struct pt_regs *, long); -dotraplinkage void do_device_not_available(struct pt_regs *, long); +dotraplinkage void do_device_not_available(struct pt_regs); dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); dotraplinkage void do_invalid_TSS(struct pt_regs *, long); dotraplinkage void do_segment_not_present(struct pt_regs *, long); @@ -77,7 +77,7 @@ extern int panic_on_unrecovered_nmi; extern int kstack_depth_to_print; void math_error(void __user *); -asmlinkage void math_emulate(long); +void math_emulate(struct math_emu_info *); #ifdef CONFIG_X86_32 unsigned long patch_espfix_desc(unsigned long, unsigned long); #else diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 98c2d055284b..7932338d7cb3 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -896,7 +896,7 @@ asmlinkage void math_state_restore(void) EXPORT_SYMBOL_GPL(math_state_restore); #ifndef CONFIG_MATH_EMULATION -asmlinkage void math_emulate(long arg) +void math_emulate(struct math_emu_info *info) { printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n"); @@ -906,16 +906,19 @@ asmlinkage void math_emulate(long arg) } #endif /* CONFIG_MATH_EMULATION */ -dotraplinkage void __kprobes -do_device_not_available(struct pt_regs *regs, long error) +dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) { #ifdef CONFIG_X86_32 if (read_cr0() & X86_CR0_EM) { - conditional_sti(regs); - math_emulate(0); + struct math_emu_info info = { }; + + conditional_sti(®s); + + info.regs = ®s; + math_emulate(&info); } else { math_state_restore(); /* interrupts still off */ - conditional_sti(regs); + conditional_sti(®s); } #else math_state_restore(); diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index c268abe72253..5d87f586f8d7 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -131,7 +131,7 @@ u_char emulating = 0; static int valid_prefix(u_char *Byte, u_char __user ** fpu_eip, overrides * override); -asmlinkage void math_emulate(long arg) +void math_emulate(struct math_emu_info *info) { u_char FPU_modrm, byte1; unsigned short code; @@ -161,7 +161,7 @@ asmlinkage void math_emulate(long arg) RE_ENTRANT_CHECK_ON; #endif /* RE_ENTRANT_CHECKING */ - SETUP_DATA_AREA(arg); + FPU_info = info; FPU_ORIG_EIP = FPU_EIP; diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h index 51bfbb61c5b1..9779df436b7d 100644 --- a/arch/x86/math-emu/fpu_proto.h +++ b/arch/x86/math-emu/fpu_proto.h @@ -51,7 +51,7 @@ extern void ffreep(void); extern void fst_i_(void); extern void fstp_i(void); /* fpu_entry.c */ -asmlinkage extern void math_emulate(long arg); +extern void math_emulate(struct math_emu_info *info); extern void math_abort(struct math_emu_info *info, unsigned int signal); /* fpu_etc.c */ extern void FPU_etc(void); diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 6729c6a31348..50fa0ec2c8a5 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -16,10 +16,6 @@ #include #include -/* This sets the pointer FPU_info to point to the argument part - of the stack frame of math_emulate() */ -#define SETUP_DATA_AREA(arg) FPU_info = (struct math_emu_info *) &arg - /* s is always from a cpu register, and the cpu does bounds checking * during register load --> no further bounds checks needed */ #define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) @@ -38,12 +34,12 @@ #define I387 (current->thread.xstate) #define FPU_info (I387->soft.info) -#define FPU_CS (*(unsigned short *) &(FPU_info->regs.cs)) -#define FPU_SS (*(unsigned short *) &(FPU_info->regs.ss)) -#define FPU_DS (*(unsigned short *) &(FPU_info->regs.ds)) -#define FPU_EAX (FPU_info->regs.ax) -#define FPU_EFLAGS (FPU_info->regs.flags) -#define FPU_EIP (FPU_info->regs.ip) +#define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) +#define FPU_SS (*(unsigned short *) &(FPU_info->regs->ss)) +#define FPU_DS (*(unsigned short *) &(FPU_info->regs->ds)) +#define FPU_EAX (FPU_info->regs->ax) +#define FPU_EFLAGS (FPU_info->regs->flags) +#define FPU_EIP (FPU_info->regs->ip) #define FPU_ORIG_EIP (FPU_info->___orig_eip) #define FPU_lookahead (I387->soft.lookahead) diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index 62daa7fcc44c..420b3b6e3915 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -29,43 +29,43 @@ #define FPU_WRITE_BIT 0x10 static int reg_offset[] = { - offsetof(struct math_emu_info, regs.ax), - offsetof(struct math_emu_info, regs.cx), - offsetof(struct math_emu_info, regs.dx), - offsetof(struct math_emu_info, regs.bx), - offsetof(struct math_emu_info, regs.sp), - offsetof(struct math_emu_info, regs.bp), - offsetof(struct math_emu_info, regs.si), - offsetof(struct math_emu_info, regs.di) + offsetof(struct pt_regs, ax), + offsetof(struct pt_regs, cx), + offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, bx), + offsetof(struct pt_regs, sp), + offsetof(struct pt_regs, bp), + offsetof(struct pt_regs, si), + offsetof(struct pt_regs, di) }; -#define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info)) +#define REG_(x) (*(long *)(reg_offset[(x)] + (u_char *)FPU_info->regs)) static int reg_offset_vm86[] = { - offsetof(struct math_emu_info, regs.cs), - offsetof(struct math_emu_info, vm86.ds), - offsetof(struct math_emu_info, vm86.es), - offsetof(struct math_emu_info, vm86.fs), - offsetof(struct math_emu_info, vm86.gs), - offsetof(struct math_emu_info, regs.ss), - offsetof(struct math_emu_info, vm86.ds) + offsetof(struct pt_regs, cs), + offsetof(struct kernel_vm86_regs, ds), + offsetof(struct kernel_vm86_regs, es), + offsetof(struct kernel_vm86_regs, fs), + offsetof(struct kernel_vm86_regs, gs), + offsetof(struct pt_regs, ss), + offsetof(struct kernel_vm86_regs, ds) }; #define VM86_REG_(x) (*(unsigned short *) \ - (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) + (reg_offset_vm86[((unsigned)x)] + (u_char *)FPU_info->regs)) static int reg_offset_pm[] = { - offsetof(struct math_emu_info, regs.cs), - offsetof(struct math_emu_info, regs.ds), - offsetof(struct math_emu_info, regs.es), - offsetof(struct math_emu_info, regs.fs), - offsetof(struct math_emu_info, regs.ds), /* dummy, not saved on stack */ - offsetof(struct math_emu_info, regs.ss), - offsetof(struct math_emu_info, regs.ds) + offsetof(struct pt_regs, cs), + offsetof(struct pt_regs, ds), + offsetof(struct pt_regs, es), + offsetof(struct pt_regs, fs), + offsetof(struct pt_regs, ds), /* dummy, not saved on stack */ + offsetof(struct pt_regs, ss), + offsetof(struct pt_regs, ds) }; #define PM_REG_(x) (*(unsigned short *) \ - (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info)) + (reg_offset_pm[((unsigned)x)] + (u_char *)FPU_info->regs)) /* Decode the SIB byte. This function assumes mod != 0 */ static int sib(int mod, unsigned long *fpu_eip) @@ -346,34 +346,34 @@ void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, } switch (rm) { case 0: - address += FPU_info->regs.bx + FPU_info->regs.si; + address += FPU_info->regs->bx + FPU_info->regs->si; break; case 1: - address += FPU_info->regs.bx + FPU_info->regs.di; + address += FPU_info->regs->bx + FPU_info->regs->di; break; case 2: - address += FPU_info->regs.bp + FPU_info->regs.si; + address += FPU_info->regs->bp + FPU_info->regs->si; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 3: - address += FPU_info->regs.bp + FPU_info->regs.di; + address += FPU_info->regs->bp + FPU_info->regs->di; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 4: - address += FPU_info->regs.si; + address += FPU_info->regs->si; break; case 5: - address += FPU_info->regs.di; + address += FPU_info->regs->di; break; case 6: - address += FPU_info->regs.bp; + address += FPU_info->regs->bp; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 7: - address += FPU_info->regs.bx; + address += FPU_info->regs->bx; break; } From 6cd61c0baa8bce32271226198b46c67a7a05d108 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: [PATCH 482/566] elf: add ELF_CORE_COPY_KERNEL_REGS() ELF core dump is used for both user land core dump and kernel crash dump. Depending on architecture, register might need to be accessed differently for userland and kernel. Allow architectures to define ELF_CORE_COPY_KERNEL_REGS() and use different operation for kernel register dump. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- include/linux/elfcore.h | 9 +++++++++ kernel/kexec.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 5ca54d77079f..7605c5e9589f 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -111,6 +111,15 @@ static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *re #endif } +static inline void elf_core_copy_kernel_regs(elf_gregset_t *elfregs, struct pt_regs *regs) +{ +#ifdef ELF_CORE_COPY_KERNEL_REGS + ELF_CORE_COPY_KERNEL_REGS((*elfregs), regs); +#else + elf_core_copy_regs(elfregs, regs); +#endif +} + static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs) { #ifdef ELF_CORE_COPY_TASK_REGS diff --git a/kernel/kexec.c b/kernel/kexec.c index 8a6d7b08864e..795e7b67a228 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1130,7 +1130,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu) return; memset(&prstatus, 0, sizeof(prstatus)); prstatus.pr_pid = current->pid; - elf_core_copy_regs(&prstatus.pr_reg, regs); + elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, &prstatus, sizeof(prstatus)); final_note(buf); From 76397f72fb9f4c9a96dfe05462887811c81b0e17 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: [PATCH 483/566] x86: stackprotector.h misc update Impact: misc udpate * wrap content with CONFIG_CC_STACK_PROTECTOR so that other arch files can include it directly * add missing includes This will help future changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stackprotector.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 36a700acaf2b..ee275e9f48ab 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -1,8 +1,12 @@ #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H 1 +#ifdef CONFIG_CC_STACKPROTECTOR + #include #include +#include +#include /* * Initialize the stackprotector canary value. @@ -35,4 +39,5 @@ static __always_inline void boot_init_stack_canary(void) percpu_write(irq_stack_union.stack_canary, canary); } -#endif +#endif /* CC_STACKPROTECTOR */ +#endif /* _ASM_STACKPROTECTOR_H */ From 5d707e9c8ef2a3596ed5c975c6ff05cec890c2b4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: [PATCH 484/566] stackprotector: update make rules Impact: no default -fno-stack-protector if stackp is enabled, cleanup Stackprotector make rules had the following problems. * cc support test and warning are scattered across makefile and kernel/panic.c. * -fno-stack-protector was always added regardless of configuration. Update such that cc support test and warning are contained in makefile and -fno-stack-protector is added iff stackp is turned off. While at it, prepare for 32bit support. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- Makefile | 3 ++- arch/x86/Makefile | 17 ++++++++++------- kernel/panic.c | 4 ---- scripts/gcc-x86_64-has-stack-protector.sh | 4 +++- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 681c1d23b4d4..77a006dae2da 100644 --- a/Makefile +++ b/Makefile @@ -532,8 +532,9 @@ KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN}) endif # Force gcc to behave correct even for buggy distributions -# Arch Makefiles may override this setting +ifndef CONFIG_CC_STACKPROTECTOR KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector) +endif ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls diff --git a/arch/x86/Makefile b/arch/x86/Makefile index cacee981d166..ab48ab497e5a 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -70,14 +70,17 @@ else # this works around some issues with generating unwind tables in older gccs # newer gccs do it by default KBUILD_CFLAGS += -maccumulate-outgoing-args +endif - stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh - stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \ - "$(CC)" "-fstack-protector -DGCC_HAS_SP" ) - stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \ - "$(CC)" -fstack-protector-all ) - - KBUILD_CFLAGS += $(stackp-y) +ifdef CONFIG_CC_STACKPROTECTOR + cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh + ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y) + stackp-y := -fstack-protector + stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all + KBUILD_CFLAGS += $(stackp-y) + else + $(warning stack protector enabled but no compiler support) + endif endif # Stackpointer is addressed different for 32 bit and 64 bit x86 diff --git a/kernel/panic.c b/kernel/panic.c index 33cab3de1763..32fe4eff1b89 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -359,10 +359,6 @@ EXPORT_SYMBOL(warn_slowpath); #ifdef CONFIG_CC_STACKPROTECTOR -#ifndef GCC_HAS_SP -#warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this. -#endif - /* * Called when gcc's -fstack-protector feature is used, and * gcc detects corruption of the on-stack canary value diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh index 325c0a1b03b6..2d69fcdc5609 100644 --- a/scripts/gcc-x86_64-has-stack-protector.sh +++ b/scripts/gcc-x86_64-has-stack-protector.sh @@ -2,5 +2,7 @@ echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs" if [ "$?" -eq "0" ] ; then - echo $2 + echo y +else + echo n fi From d627ded5ab2f7818154d57369e3a8cdb40db2569 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: [PATCH 485/566] x86: no stack protector for vdso Impact: avoid crash on vsyscall Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 4d6ef0a336d6..16a9020c8f11 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -38,7 +38,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ - $(filter -g%,$(KBUILD_CFLAGS)) + $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) $(vobjs): KBUILD_CFLAGS += $(CFL) From f0d96110f9fd98a1a22e03b8adba69508843d910 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: [PATCH 486/566] x86: use asm .macro instead of cpp #define in entry_32.S Impact: cleanup Use .macro instead of cpp #define where approriate. This cleans up code and will ease future changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 293 +++++++++++++++++++------------------ 1 file changed, 151 insertions(+), 142 deletions(-) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index a0b91aac72a1..c461925d3b64 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -101,121 +101,127 @@ #define resume_userspace_sig resume_userspace #endif -#define SAVE_ALL \ - cld; \ - pushl %fs; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET fs, 0;*/\ - pushl %es; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET es, 0;*/\ - pushl %ds; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET ds, 0;*/\ - pushl %eax; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET eax, 0;\ - pushl %ebp; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ebp, 0;\ - pushl %edi; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET edi, 0;\ - pushl %esi; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET esi, 0;\ - pushl %edx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET edx, 0;\ - pushl %ecx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ecx, 0;\ - pushl %ebx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ebx, 0;\ - movl $(__USER_DS), %edx; \ - movl %edx, %ds; \ - movl %edx, %es; \ - movl $(__KERNEL_PERCPU), %edx; \ +.macro SAVE_ALL + cld + pushl %fs + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET fs, 0;*/ + pushl %es + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET es, 0;*/ + pushl %ds + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET ds, 0;*/ + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eax, 0 + pushl %ebp + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebp, 0 + pushl %edi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edi, 0 + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx, 0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx, 0 + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + movl $(__USER_DS), %edx + movl %edx, %ds + movl %edx, %es + movl $(__KERNEL_PERCPU), %edx movl %edx, %fs +.endm -#define RESTORE_INT_REGS \ - popl %ebx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ebx;\ - popl %ecx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ecx;\ - popl %edx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE edx;\ - popl %esi; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE esi;\ - popl %edi; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE edi;\ - popl %ebp; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ebp;\ - popl %eax; \ - CFI_ADJUST_CFA_OFFSET -4;\ +.macro RESTORE_INT_REGS + popl %ebx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebx + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + popl %esi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE esi + popl %edi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edi + popl %ebp + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebp + popl %eax + CFI_ADJUST_CFA_OFFSET -4 CFI_RESTORE eax +.endm -#define RESTORE_REGS \ - RESTORE_INT_REGS; \ -1: popl %ds; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE ds;*/\ -2: popl %es; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE es;*/\ -3: popl %fs; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE fs;*/\ -.pushsection .fixup,"ax"; \ -4: movl $0,(%esp); \ - jmp 1b; \ -5: movl $0,(%esp); \ - jmp 2b; \ -6: movl $0,(%esp); \ - jmp 3b; \ -.section __ex_table,"a";\ - .align 4; \ - .long 1b,4b; \ - .long 2b,5b; \ - .long 3b,6b; \ +.macro RESTORE_REGS + RESTORE_INT_REGS +1: popl %ds + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_RESTORE ds;*/ +2: popl %es + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_RESTORE es;*/ +3: popl %fs + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_RESTORE fs;*/ +.pushsection .fixup, "ax" +4: movl $0, (%esp) + jmp 1b +5: movl $0, (%esp) + jmp 2b +6: movl $0, (%esp) + jmp 3b +.section __ex_table, "a" + .align 4 + .long 1b, 4b + .long 2b, 5b + .long 3b, 6b .popsection +.endm -#define RING0_INT_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, 3*4;\ - /*CFI_OFFSET cs, -2*4;*/\ +.macro RING0_INT_FRAME + CFI_STARTPROC simple + CFI_SIGNAL_FRAME + CFI_DEF_CFA esp, 3*4 + /*CFI_OFFSET cs, -2*4;*/ CFI_OFFSET eip, -3*4 +.endm -#define RING0_EC_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, 4*4;\ - /*CFI_OFFSET cs, -2*4;*/\ +.macro RING0_EC_FRAME + CFI_STARTPROC simple + CFI_SIGNAL_FRAME + CFI_DEF_CFA esp, 4*4 + /*CFI_OFFSET cs, -2*4;*/ CFI_OFFSET eip, -3*4 +.endm -#define RING0_PTREGS_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ - /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ - CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ - /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ - /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ - CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ - CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ - CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ - CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ - CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ - CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ +.macro RING0_PTREGS_FRAME + CFI_STARTPROC simple + CFI_SIGNAL_FRAME + CFI_DEF_CFA esp, PT_OLDESP-PT_EBX + /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/ + CFI_OFFSET eip, PT_EIP-PT_OLDESP + /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/ + /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/ + CFI_OFFSET eax, PT_EAX-PT_OLDESP + CFI_OFFSET ebp, PT_EBP-PT_OLDESP + CFI_OFFSET edi, PT_EDI-PT_OLDESP + CFI_OFFSET esi, PT_ESI-PT_OLDESP + CFI_OFFSET edx, PT_EDX-PT_OLDESP + CFI_OFFSET ecx, PT_ECX-PT_OLDESP CFI_OFFSET ebx, PT_EBX-PT_OLDESP +.endm ENTRY(ret_from_fork) CFI_STARTPROC @@ -595,28 +601,30 @@ syscall_badsys: END(syscall_badsys) CFI_ENDPROC -#define FIXUP_ESPFIX_STACK \ - /* since we are on a wrong stack, we cant make it a C code :( */ \ - PER_CPU(gdt_page, %ebx); \ - GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ - addl %esp, %eax; \ - pushl $__KERNEL_DS; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl %eax; \ - CFI_ADJUST_CFA_OFFSET 4; \ - lss (%esp), %esp; \ - CFI_ADJUST_CFA_OFFSET -8; -#define UNWIND_ESPFIX_STACK \ - movl %ss, %eax; \ - /* see if on espfix stack */ \ - cmpw $__ESPFIX_SS, %ax; \ - jne 27f; \ - movl $__KERNEL_DS, %eax; \ - movl %eax, %ds; \ - movl %eax, %es; \ - /* switch to normal stack */ \ - FIXUP_ESPFIX_STACK; \ -27:; +.macro FIXUP_ESPFIX_STACK + /* since we are on a wrong stack, we cant make it a C code :( */ + PER_CPU(gdt_page, %ebx) + GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) + addl %esp, %eax + pushl $__KERNEL_DS + CFI_ADJUST_CFA_OFFSET 4 + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + lss (%esp), %esp + CFI_ADJUST_CFA_OFFSET -8 +.endm +.macro UNWIND_ESPFIX_STACK + movl %ss, %eax + /* see if on espfix stack */ + cmpw $__ESPFIX_SS, %ax + jne 27f + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %es + /* switch to normal stack */ + FIXUP_ESPFIX_STACK +27: +.endm /* * Build the entry stubs and pointer table with some assembler magic. @@ -1136,26 +1144,27 @@ END(page_fault) * by hand onto the new stack - while updating the return eip past * the instruction that would have done it for sysenter. */ -#define FIX_STACK(offset, ok, label) \ - cmpw $__KERNEL_CS,4(%esp); \ - jne ok; \ -label: \ - movl TSS_sysenter_sp0+offset(%esp),%esp; \ - CFI_DEF_CFA esp, 0; \ - CFI_UNDEFINED eip; \ - pushfl; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $__KERNEL_CS; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $sysenter_past_esp; \ - CFI_ADJUST_CFA_OFFSET 4; \ +.macro FIX_STACK offset ok label + cmpw $__KERNEL_CS, 4(%esp) + jne \ok +\label: + movl TSS_sysenter_sp0 + \offset(%esp), %esp + CFI_DEF_CFA esp, 0 + CFI_UNDEFINED eip + pushfl + CFI_ADJUST_CFA_OFFSET 4 + pushl $__KERNEL_CS + CFI_ADJUST_CFA_OFFSET 4 + pushl $sysenter_past_esp + CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eip, 0 +.endm ENTRY(debug) RING0_INT_FRAME cmpl $ia32_sysenter_target,(%esp) jne debug_stack_correct - FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) + FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn debug_stack_correct: pushl $-1 # mark this as an int CFI_ADJUST_CFA_OFFSET 4 @@ -1213,7 +1222,7 @@ nmi_stack_correct: nmi_stack_fixup: RING0_INT_FRAME - FIX_STACK(12,nmi_stack_correct, 1) + FIX_STACK 12, nmi_stack_correct, 1 jmp nmi_stack_correct nmi_debug_stack_check: @@ -1224,7 +1233,7 @@ nmi_debug_stack_check: jb nmi_stack_correct cmpl $debug_esp_fix_insn,(%esp) ja nmi_stack_correct - FIX_STACK(24,nmi_stack_correct, 1) + FIX_STACK 24, nmi_stack_correct, 1 jmp nmi_stack_correct nmi_espfix_stack: From d9a89a26e02ef9ed03f74a755a8b4d8f3a066622 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: [PATCH 487/566] x86: add %gs accessors for x86_32 Impact: cleanup On x86_32, %gs is handled lazily. It's not saved and restored on kernel entry/exit but only when necessary which usually is during task switch but there are few other places. Currently, it's done by calling savesegment() and loadsegment() explicitly. Define get_user_gs(), set_user_gs() and task_user_gs() and use them instead. While at it, clean up register access macros in signal.c. This cleans up code a bit and will help future changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/a.out-core.h | 2 +- arch/x86/include/asm/elf.h | 2 +- arch/x86/include/asm/mmu_context.h | 2 +- arch/x86/include/asm/system.h | 9 +++++++ arch/x86/kernel/process_32.c | 6 ++--- arch/x86/kernel/ptrace.c | 14 +++++----- arch/x86/kernel/signal.c | 41 ++++++++++++------------------ arch/x86/kernel/vm86_32.c | 4 +-- arch/x86/math-emu/get_address.c | 6 ++--- 9 files changed, 41 insertions(+), 45 deletions(-) diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index 3c601f8224be..bb70e397aa84 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -55,7 +55,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) dump->regs.ds = (u16)regs->ds; dump->regs.es = (u16)regs->es; dump->regs.fs = (u16)regs->fs; - savesegment(gs, dump->regs.gs); + dump->regs.gs = get_user_gs(regs); dump->regs.orig_ax = regs->orig_ax; dump->regs.ip = regs->ip; dump->regs.cs = (u16)regs->cs; diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index f51a3ddde01a..39b0aac1675c 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -124,7 +124,7 @@ do { \ pr_reg[7] = regs->ds & 0xffff; \ pr_reg[8] = regs->es & 0xffff; \ pr_reg[9] = regs->fs & 0xffff; \ - savesegment(gs, pr_reg[10]); \ + pr_reg[10] = get_user_gs(regs); \ pr_reg[11] = regs->orig_ax; \ pr_reg[12] = regs->ip; \ pr_reg[13] = regs->cs & 0xffff; \ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 52948df9cd1d..4955165682c5 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -79,7 +79,7 @@ do { \ #ifdef CONFIG_X86_32 #define deactivate_mm(tsk, mm) \ do { \ - loadsegment(gs, 0); \ + set_user_gs(task_pt_regs(tsk), 0); \ } while (0) #else #define deactivate_mm(tsk, mm) \ diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 2fcc70bc85f3..70c74b8db875 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -182,6 +182,15 @@ extern void native_load_gs_index(unsigned); #define savesegment(seg, value) \ asm("mov %%" #seg ",%0":"=r" (value) : : "memory") +/* + * x86_32 user gs accessors. + */ +#ifdef CONFIG_X86_32 +#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) +#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) +#define task_user_gs(tsk) ((tsk)->thread.gs) +#endif + static inline unsigned long get_limit(unsigned long segment) { unsigned long __limit; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 1a1ae8edc40c..d58a340e1be3 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -131,7 +131,7 @@ void __show_regs(struct pt_regs *regs, int all) if (user_mode_vm(regs)) { sp = regs->sp; ss = regs->ss & 0xffff; - savesegment(gs, gs); + gs = get_user_gs(regs); } else { sp = (unsigned long) (®s->sp); savesegment(ss, ss); @@ -304,7 +304,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, p->thread.ip = (unsigned long) ret_from_fork; - savesegment(gs, p->thread.gs); + task_user_gs(p) = get_user_gs(regs); tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { @@ -342,7 +342,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) { - __asm__("movl %0, %%gs" : : "r"(0)); + set_user_gs(regs, 0); regs->fs = 0; set_fs(USER_DS); regs->ds = __USER_DS; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a5df5f82fb9..508b6b57d0c3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -90,9 +90,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset) if (offset != offsetof(struct user_regs_struct, gs)) retval = *pt_regs_access(task_pt_regs(task), offset); else { - retval = task->thread.gs; if (task == current) - savesegment(gs, retval); + retval = get_user_gs(task_pt_regs(task)); + else + retval = task_user_gs(task); } return retval; } @@ -126,13 +127,10 @@ static int set_segment_reg(struct task_struct *task, break; case offsetof(struct user_regs_struct, gs): - task->thread.gs = value; if (task == current) - /* - * The user-mode %gs is not affected by - * kernel entry, so we must update the CPU. - */ - loadsegment(gs, value); + set_user_gs(task_pt_regs(task), value); + else + task_user_gs(task) = value; } return 0; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 7fc78b019815..8562387c75a7 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -50,27 +50,23 @@ # define FIX_EFLAGS __FIX_EFLAGS #endif -#define COPY(x) { \ - get_user_ex(regs->x, &sc->x); \ -} +#define COPY(x) do { \ + get_user_ex(regs->x, &sc->x); \ +} while (0) -#define COPY_SEG(seg) { \ - unsigned short tmp; \ - get_user_ex(tmp, &sc->seg); \ - regs->seg = tmp; \ -} +#define GET_SEG(seg) ({ \ + unsigned short tmp; \ + get_user_ex(tmp, &sc->seg); \ + tmp; \ +}) -#define COPY_SEG_CPL3(seg) { \ - unsigned short tmp; \ - get_user_ex(tmp, &sc->seg); \ - regs->seg = tmp | 3; \ -} +#define COPY_SEG(seg) do { \ + regs->seg = GET_SEG(seg); \ +} while (0) -#define GET_SEG(seg) { \ - unsigned short tmp; \ - get_user_ex(tmp, &sc->seg); \ - loadsegment(seg, tmp); \ -} +#define COPY_SEG_CPL3(seg) do { \ + regs->seg = GET_SEG(seg) | 3; \ +} while (0) static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, @@ -86,7 +82,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, get_user_try { #ifdef CONFIG_X86_32 - GET_SEG(gs); + set_user_gs(regs, GET_SEG(gs)); COPY_SEG(fs); COPY_SEG(es); COPY_SEG(ds); @@ -138,12 +134,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, put_user_try { #ifdef CONFIG_X86_32 - { - unsigned int tmp; - - savesegment(gs, tmp); - put_user_ex(tmp, (unsigned int __user *)&sc->gs); - } + put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs); put_user_ex(regs->fs, (unsigned int __user *)&sc->fs); put_user_ex(regs->es, (unsigned int __user *)&sc->es); put_user_ex(regs->ds, (unsigned int __user *)&sc->ds); diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 4eeb5cf9720d..55ea30d2a3d6 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -158,7 +158,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) ret = KVM86->regs32; ret->fs = current->thread.saved_fs; - loadsegment(gs, current->thread.saved_gs); + set_user_gs(ret, current->thread.saved_gs); return ret; } @@ -323,7 +323,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk info->regs32->ax = 0; tsk->thread.saved_sp0 = tsk->thread.sp0; tsk->thread.saved_fs = info->regs32->fs; - savesegment(gs, tsk->thread.saved_gs); + tsk->thread.saved_gs = get_user_gs(info->regs32); tss = &per_cpu(init_tss, get_cpu()); tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index 420b3b6e3915..6ef5e99380f9 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -150,11 +150,9 @@ static long pm_address(u_char FPU_modrm, u_char segment, #endif /* PARANOID */ switch (segment) { - /* gs isn't used by the kernel, so it still has its - user-space value. */ case PREFIX_GS_ - 1: - /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */ - savesegment(gs, addr->selector); + /* user gs handling can be lazy, use special accessors */ + addr->selector = get_user_gs(FPU_info->regs); break; default: addr->selector = PM_REG_(segment); From ccbeed3a05908d201b47b6c3dd1a373138bba566 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: [PATCH 488/566] x86: make lazy %gs optional on x86_32 Impact: pt_regs changed, lazy gs handling made optional, add slight overhead to SAVE_ALL, simplifies error_code path a bit On x86_32, %gs hasn't been used by kernel and handled lazily. pt_regs doesn't have place for it and gs is saved/loaded only when necessary. In preparation for stack protector support, this patch makes lazy %gs handling optional by doing the followings. * Add CONFIG_X86_32_LAZY_GS and place for gs in pt_regs. * Save and restore %gs along with other registers in entry_32.S unless LAZY_GS. Note that this unfortunately adds "pushl $0" on SAVE_ALL even when LAZY_GS. However, it adds no overhead to common exit path and simplifies entry path with error code. * Define different user_gs accessors depending on LAZY_GS and add lazy_save_gs() and lazy_load_gs() which are noop if !LAZY_GS. The lazy_*_gs() ops are used to save, load and clear %gs lazily. * Define ELF_CORE_COPY_KERNEL_REGS() which always read %gs directly. xen and lguest changes need to be verified. Signed-off-by: Tejun Heo Cc: Jeremy Fitzhardinge Cc: Rusty Russell Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 + arch/x86/include/asm/elf.h | 15 +++- arch/x86/include/asm/mmu_context.h | 2 +- arch/x86/include/asm/ptrace.h | 4 +- arch/x86/include/asm/system.h | 12 ++- arch/x86/kernel/asm-offsets_32.c | 1 + arch/x86/kernel/entry_32.S | 132 ++++++++++++++++++++++++----- arch/x86/kernel/process_32.c | 4 +- arch/x86/kernel/ptrace.c | 5 +- arch/x86/lguest/boot.c | 2 +- arch/x86/xen/enlighten.c | 17 ++-- 11 files changed, 158 insertions(+), 40 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5c8e353c1122..5bcdede71ba4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -207,6 +207,10 @@ config X86_TRAMPOLINE depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP) default y +config X86_32_LAZY_GS + def_bool y + depends on X86_32 + config KTIME_SCALAR def_bool X86_32 source "init/Kconfig" diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 39b0aac1675c..83c1bc8d2e8a 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -112,7 +112,7 @@ extern unsigned int vdso_enabled; * now struct_user_regs, they are different) */ -#define ELF_CORE_COPY_REGS(pr_reg, regs) \ +#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \ do { \ pr_reg[0] = regs->bx; \ pr_reg[1] = regs->cx; \ @@ -124,7 +124,6 @@ do { \ pr_reg[7] = regs->ds & 0xffff; \ pr_reg[8] = regs->es & 0xffff; \ pr_reg[9] = regs->fs & 0xffff; \ - pr_reg[10] = get_user_gs(regs); \ pr_reg[11] = regs->orig_ax; \ pr_reg[12] = regs->ip; \ pr_reg[13] = regs->cs & 0xffff; \ @@ -133,6 +132,18 @@ do { \ pr_reg[16] = regs->ss & 0xffff; \ } while (0); +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ +do { \ + ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ + pr_reg[10] = get_user_gs(regs); \ +} while (0); + +#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \ +do { \ + ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ + savesegment(gs, pr_reg[10]); \ +} while (0); + #define ELF_PLATFORM (utsname()->machine) #define set_personality_64bit() do { } while (0) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 4955165682c5..f923203dc39a 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -79,7 +79,7 @@ do { \ #ifdef CONFIG_X86_32 #define deactivate_mm(tsk, mm) \ do { \ - set_user_gs(task_pt_regs(tsk), 0); \ + lazy_load_gs(0); \ } while (0) #else #define deactivate_mm(tsk, mm) \ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 6d34d954c228..e304b66abeea 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -28,7 +28,7 @@ struct pt_regs { int xds; int xes; int xfs; - /* int gs; */ + int xgs; long orig_eax; long eip; int xcs; @@ -50,7 +50,7 @@ struct pt_regs { unsigned long ds; unsigned long es; unsigned long fs; - /* int gs; */ + unsigned long gs; unsigned long orig_ax; unsigned long ip; unsigned long cs; diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 70c74b8db875..79b98e5b96f4 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -186,10 +186,20 @@ extern void native_load_gs_index(unsigned); * x86_32 user gs accessors. */ #ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_32_LAZY_GS #define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) #define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) #define task_user_gs(tsk) ((tsk)->thread.gs) -#endif +#define lazy_save_gs(v) savesegment(gs, (v)) +#define lazy_load_gs(v) loadsegment(gs, (v)) +#else /* X86_32_LAZY_GS */ +#define get_user_gs(regs) (u16)((regs)->gs) +#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) +#define task_user_gs(tsk) (task_pt_regs(tsk)->gs) +#define lazy_save_gs(v) do { } while (0) +#define lazy_load_gs(v) do { } while (0) +#endif /* X86_32_LAZY_GS */ +#endif /* X86_32 */ static inline unsigned long get_limit(unsigned long segment) { diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index ee4df08feee6..fbf2f33e3080 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -75,6 +75,7 @@ void foo(void) OFFSET(PT_DS, pt_regs, ds); OFFSET(PT_ES, pt_regs, es); OFFSET(PT_FS, pt_regs, fs); + OFFSET(PT_GS, pt_regs, gs); OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); OFFSET(PT_EIP, pt_regs, ip); OFFSET(PT_CS, pt_regs, cs); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c461925d3b64..82e6868bee47 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -30,12 +30,13 @@ * 1C(%esp) - %ds * 20(%esp) - %es * 24(%esp) - %fs - * 28(%esp) - orig_eax - * 2C(%esp) - %eip - * 30(%esp) - %cs - * 34(%esp) - %eflags - * 38(%esp) - %oldesp - * 3C(%esp) - %oldss + * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS + * 2C(%esp) - orig_eax + * 30(%esp) - %eip + * 34(%esp) - %cs + * 38(%esp) - %eflags + * 3C(%esp) - %oldesp + * 40(%esp) - %oldss * * "current" is in register %ebx during any slow entries. */ @@ -101,8 +102,99 @@ #define resume_userspace_sig resume_userspace #endif +/* + * User gs save/restore + * + * %gs is used for userland TLS and kernel only uses it for stack + * canary which is required to be at %gs:20 by gcc. Read the comment + * at the top of stackprotector.h for more info. + * + * Local labels 98 and 99 are used. + */ +#ifdef CONFIG_X86_32_LAZY_GS + + /* unfortunately push/pop can't be no-op */ +.macro PUSH_GS + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 +.endm +.macro POP_GS pop=0 + addl $(4 + \pop), %esp + CFI_ADJUST_CFA_OFFSET -(4 + \pop) +.endm +.macro POP_GS_EX +.endm + + /* all the rest are no-op */ +.macro PTGS_TO_GS +.endm +.macro PTGS_TO_GS_EX +.endm +.macro GS_TO_REG reg +.endm +.macro REG_TO_PTGS reg +.endm +.macro SET_KERNEL_GS reg +.endm + +#else /* CONFIG_X86_32_LAZY_GS */ + +.macro PUSH_GS + pushl %gs + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET gs, 0*/ +.endm + +.macro POP_GS pop=0 +98: popl %gs + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_RESTORE gs*/ + .if \pop <> 0 + add $\pop, %esp + CFI_ADJUST_CFA_OFFSET -\pop + .endif +.endm +.macro POP_GS_EX +.pushsection .fixup, "ax" +99: movl $0, (%esp) + jmp 98b +.section __ex_table, "a" + .align 4 + .long 98b, 99b +.popsection +.endm + +.macro PTGS_TO_GS +98: mov PT_GS(%esp), %gs +.endm +.macro PTGS_TO_GS_EX +.pushsection .fixup, "ax" +99: movl $0, PT_GS(%esp) + jmp 98b +.section __ex_table, "a" + .align 4 + .long 98b, 99b +.popsection +.endm + +.macro GS_TO_REG reg + movl %gs, \reg + /*CFI_REGISTER gs, \reg*/ +.endm +.macro REG_TO_PTGS reg + movl \reg, PT_GS(%esp) + /*CFI_REL_OFFSET gs, PT_GS*/ +.endm +.macro SET_KERNEL_GS reg + xorl \reg, \reg + movl \reg, %gs +.endm + +#endif /* CONFIG_X86_32_LAZY_GS */ + .macro SAVE_ALL cld + PUSH_GS pushl %fs CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET fs, 0;*/ @@ -138,6 +230,7 @@ movl %edx, %es movl $(__KERNEL_PERCPU), %edx movl %edx, %fs + SET_KERNEL_GS %edx .endm .macro RESTORE_INT_REGS @@ -164,7 +257,7 @@ CFI_RESTORE eax .endm -.macro RESTORE_REGS +.macro RESTORE_REGS pop=0 RESTORE_INT_REGS 1: popl %ds CFI_ADJUST_CFA_OFFSET -4 @@ -175,6 +268,7 @@ 3: popl %fs CFI_ADJUST_CFA_OFFSET -4 /*CFI_RESTORE fs;*/ + POP_GS \pop .pushsection .fixup, "ax" 4: movl $0, (%esp) jmp 1b @@ -188,6 +282,7 @@ .long 2b, 5b .long 3b, 6b .popsection + POP_GS_EX .endm .macro RING0_INT_FRAME @@ -368,6 +463,7 @@ sysenter_exit: xorl %ebp,%ebp TRACE_IRQS_ON 1: mov PT_FS(%esp), %fs + PTGS_TO_GS ENABLE_INTERRUPTS_SYSEXIT #ifdef CONFIG_AUDITSYSCALL @@ -416,6 +512,7 @@ sysexit_audit: .align 4 .long 1b,2b .popsection + PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) # system call handler stub @@ -458,8 +555,7 @@ restore_all: restore_nocheck: TRACE_IRQS_IRET restore_nocheck_notrace: - RESTORE_REGS - addl $4, %esp # skip orig_eax/error_code + RESTORE_REGS 4 # skip orig_eax/error_code CFI_ADJUST_CFA_OFFSET -4 irq_return: INTERRUPT_RETURN @@ -1078,7 +1174,10 @@ ENTRY(page_fault) CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: - /* the function address is in %fs's slot on the stack */ + /* the function address is in %gs's slot on the stack */ + pushl %fs + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET fs, 0*/ pushl %es CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET es, 0*/ @@ -1107,20 +1206,15 @@ error_code: CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebx, 0 cld - pushl %fs - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET fs, 0*/ movl $(__KERNEL_PERCPU), %ecx movl %ecx, %fs UNWIND_ESPFIX_STACK - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - /*CFI_REGISTER es, ecx*/ - movl PT_FS(%esp), %edi # get the function address + GS_TO_REG %ecx + movl PT_GS(%esp), %edi # get the function address movl PT_ORIG_EAX(%esp), %edx # get the error code movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart - mov %ecx, PT_FS(%esp) - /*CFI_REL_OFFSET fs, ES*/ + REG_TO_PTGS %ecx + SET_KERNEL_GS %ecx movl $(__USER_DS), %ecx movl %ecx, %ds movl %ecx, %es diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d58a340e1be3..86122fa2a1ba 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -539,7 +539,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * used %fs or %gs (it does not today), or if the kernel is * running inside of a hypervisor layer. */ - savesegment(gs, prev->gs); + lazy_save_gs(prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. @@ -585,7 +585,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Restore %gs if needed (which is common) */ if (prev->gs | next->gs) - loadsegment(gs, next->gs); + lazy_load_gs(next->gs); percpu_write(current_task, next_p); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 508b6b57d0c3..7ec39ab37a2d 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value) static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); - regno >>= 2; - if (regno > FS) - --regno; - return ®s->bx + regno; + return ®s->bx + (regno >> 2); } static u16 get_segment_reg(struct task_struct *task, unsigned long offset) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 19e33b6cd593..da2e314f61b5 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -283,7 +283,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) /* There's one problem which normal hardware doesn't have: the Host * can't handle us removing entries we're currently using. So we clear * the GS register here: if it's needed it'll be reloaded anyway. */ - loadsegment(gs, 0); + lazy_load_gs(0); lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 37230342c2c4..95ff6a0e942a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -323,13 +323,14 @@ static void load_TLS_descriptor(struct thread_struct *t, static void xen_load_tls(struct thread_struct *t, unsigned int cpu) { /* - * XXX sleazy hack: If we're being called in a lazy-cpu zone, - * it means we're in a context switch, and %gs has just been - * saved. This means we can zero it out to prevent faults on - * exit from the hypervisor if the next process has no %gs. - * Either way, it has been saved, and the new value will get - * loaded properly. This will go away as soon as Xen has been - * modified to not save/restore %gs for normal hypercalls. + * XXX sleazy hack: If we're being called in a lazy-cpu zone + * and lazy gs handling is enabled, it means we're in a + * context switch, and %gs has just been saved. This means we + * can zero it out to prevent faults on exit from the + * hypervisor if the next process has no %gs. Either way, it + * has been saved, and the new value will get loaded properly. + * This will go away as soon as Xen has been modified to not + * save/restore %gs for normal hypercalls. * * On x86_64, this hack is not used for %gs, because gs points * to KERNEL_GS_BASE (and uses it for PDA references), so we @@ -341,7 +342,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) */ if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { #ifdef CONFIG_X86_32 - loadsegment(gs, 0); + lazy_load_gs(0); #else loadsegment(fs, 0); #endif From 60a5317ff0f42dd313094b88f809f63041568b08 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: [PATCH 489/566] x86: implement x86_32 stack protector Impact: stack protector for x86_32 Implement stack protector for x86_32. GDT entry 28 is used for it. It's set to point to stack_canary-20 and have the length of 24 bytes. CONFIG_CC_STACKPROTECTOR turns off CONFIG_X86_32_LAZY_GS and sets %gs to the stack canary segment on entry. As %gs is otherwise unused by the kernel, the canary can be anywhere. It's defined as a percpu variable. x86_32 exception handlers take register frame on stack directly as struct pt_regs. With -fstack-protector turned on, gcc copies the whole structure after the stack canary and (of course) doesn't copy back on return thus losing all changed. For now, -fno-stack-protector is added to all files which contain those functions. We definitely need something better. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 3 +- arch/x86/include/asm/processor.h | 4 + arch/x86/include/asm/segment.h | 9 ++- arch/x86/include/asm/stackprotector.h | 91 +++++++++++++++++++++-- arch/x86/include/asm/system.h | 21 ++++++ arch/x86/kernel/Makefile | 18 +++++ arch/x86/kernel/cpu/common.c | 17 +++-- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/head_32.S | 20 ++++- arch/x86/kernel/process_32.c | 1 + arch/x86/kernel/setup_percpu.c | 2 + scripts/gcc-x86_32-has-stack-protector.sh | 8 ++ 12 files changed, 180 insertions(+), 16 deletions(-) create mode 100644 scripts/gcc-x86_32-has-stack-protector.sh diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5bcdede71ba4..f760a22f95dc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -209,7 +209,7 @@ config X86_TRAMPOLINE config X86_32_LAZY_GS def_bool y - depends on X86_32 + depends on X86_32 && !CC_STACKPROTECTOR config KTIME_SCALAR def_bool X86_32 @@ -1356,7 +1356,6 @@ config CC_STACKPROTECTOR_ALL config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" - depends on X86_64 select CC_STACKPROTECTOR_ALL help This option turns on the -fstack-protector GCC feature. This diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9763eb700138..5a9472104253 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -396,7 +396,11 @@ DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); +#else /* X86_64 */ +#ifdef CONFIG_CC_STACKPROTECTOR +DECLARE_PER_CPU(unsigned long, stack_canary); #endif +#endif /* X86_64 */ extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int xstate_size; diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 1dc1b51ac623..14e0ed86a6f9 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -61,7 +61,7 @@ * * 26 - ESPFIX small SS * 27 - per-cpu [ offset to per-cpu data area ] - * 28 - unused + * 28 - stack_canary-20 [ for stack protector ] * 29 - unused * 30 - unused * 31 - TSS for double fault handler @@ -95,6 +95,13 @@ #define __KERNEL_PERCPU 0 #endif +#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE + 16) +#ifdef CONFIG_CC_STACKPROTECTOR +#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY * 8) +#else +#define __KERNEL_STACK_CANARY 0 +#endif + #define GDT_ENTRY_DOUBLEFAULT_TSS 31 /* diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index ee275e9f48ab..fa7e5bd6fbe8 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -1,3 +1,35 @@ +/* + * GCC stack protector support. + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary + * and unfortunately gcc requires it to be at a fixed offset from %gs. + * On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64 + * and x86_32 use segment registers differently and thus handles this + * requirement differently. + * + * On x86_64, %gs is shared by percpu area and stack canary. All + * percpu symbols are zero based and %gs points to the base of percpu + * area. The first occupant of the percpu area is always + * irq_stack_union which contains stack_canary at offset 40. Userland + * %gs is always saved and restored on kernel entry and exit using + * swapgs, so stack protector doesn't add any complexity there. + * + * On x86_32, it's slightly more complicated. As in x86_64, %gs is + * used for userland TLS. Unfortunately, some processors are much + * slower at loading segment registers with different value when + * entering and leaving the kernel, so the kernel uses %fs for percpu + * area and manages %gs lazily so that %gs is switched only when + * necessary, usually during task switch. + * + * As gcc requires the stack canary at %gs:20, %gs can't be managed + * lazily if stack protector is enabled, so the kernel saves and + * restores userland %gs on kernel entry and exit. This behavior is + * controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in + * system.h to hide the details. + */ + #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H 1 @@ -6,8 +38,18 @@ #include #include #include +#include +#include #include +/* + * 24 byte read-only segment initializer for stack canary. Linker + * can't handle the address bit shifting. Address will be set in + * head_32 for boot CPU and setup_per_cpu_areas() for others. + */ +#define GDT_STACK_CANARY_INIT \ + [GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } }, + /* * Initialize the stackprotector canary value. * @@ -19,12 +61,9 @@ static __always_inline void boot_init_stack_canary(void) u64 canary; u64 tsc; - /* - * Build time only check to make sure the stack_canary is at - * offset 40 in the pda; this is a gcc ABI requirement - */ +#ifdef CONFIG_X86_64 BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); - +#endif /* * We both use the random pool and the current TSC as a source * of randomness. The TSC only matters for very early init, @@ -36,7 +75,49 @@ static __always_inline void boot_init_stack_canary(void) canary += tsc + (tsc << 32UL); current->stack_canary = canary; +#ifdef CONFIG_X86_64 percpu_write(irq_stack_union.stack_canary, canary); +#else + percpu_write(stack_canary, canary); +#endif +} + +static inline void setup_stack_canary_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu); + struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); + struct desc_struct desc; + + desc = gdt_table[GDT_ENTRY_STACK_CANARY]; + desc.base0 = canary & 0xffff; + desc.base1 = (canary >> 16) & 0xff; + desc.base2 = (canary >> 24) & 0xff; + write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S); +#endif +} + +static inline void load_stack_canary_segment(void) +{ +#ifdef CONFIG_X86_32 + asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory"); +#endif +} + +#else /* CC_STACKPROTECTOR */ + +#define GDT_STACK_CANARY_INIT + +/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */ + +static inline void setup_stack_canary_segment(int cpu) +{ } + +static inline void load_stack_canary_segment(void) +{ +#ifdef CONFIG_X86_32 + asm volatile ("mov %0, %%gs" : : "r" (0)); +#endif } #endif /* CC_STACKPROTECTOR */ diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 79b98e5b96f4..2692ee8ef031 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -23,6 +23,22 @@ struct task_struct *__switch_to(struct task_struct *prev, #ifdef CONFIG_X86_32 +#ifdef CONFIG_CC_STACKPROTECTOR +#define __switch_canary \ + "movl "__percpu_arg([current_task])",%%ebx\n\t" \ + "movl %P[task_canary](%%ebx),%%ebx\n\t" \ + "movl %%ebx,"__percpu_arg([stack_canary])"\n\t" +#define __switch_canary_oparam \ + , [stack_canary] "=m" (per_cpu_var(stack_canary)) +#define __switch_canary_iparam \ + , [current_task] "m" (per_cpu_var(current_task)) \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) +#else /* CC_STACKPROTECTOR */ +#define __switch_canary +#define __switch_canary_oparam +#define __switch_canary_iparam +#endif /* CC_STACKPROTECTOR */ + /* * Saving eflags is important. It switches not only IOPL between tasks, * it also protects other tasks from NT leaking through sysenter etc. @@ -46,6 +62,7 @@ do { \ "pushl %[next_ip]\n\t" /* restore EIP */ \ "jmp __switch_to\n" /* regparm call */ \ "1:\t" \ + __switch_canary \ "popl %%ebp\n\t" /* restore EBP */ \ "popfl\n" /* restore flags */ \ \ @@ -58,6 +75,8 @@ do { \ "=b" (ebx), "=c" (ecx), "=d" (edx), \ "=S" (esi), "=D" (edi) \ \ + __switch_canary_oparam \ + \ /* input parameters: */ \ : [next_sp] "m" (next->thread.sp), \ [next_ip] "m" (next->thread.ip), \ @@ -66,6 +85,8 @@ do { \ [prev] "a" (prev), \ [next] "d" (next) \ \ + __switch_canary_iparam \ + \ : /* reloaded segment registers */ \ "memory"); \ } while (0) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 37fa30bada17..b1f8be33300d 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -24,6 +24,24 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) CFLAGS_paravirt.o := $(nostackp) +# +# On x86_32, register frame is passed verbatim on stack as struct +# pt_regs. gcc considers the parameter to belong to the callee and +# with -fstack-protector it copies pt_regs to the callee's stack frame +# to put the structure after the stack canary causing changes made by +# the exception handlers to be lost. Turn off stack protector for all +# files containing functions which take struct pt_regs from register +# frame. +# +# The proper way to fix this is to teach gcc that the argument belongs +# to the caller for these functions, oh well... +# +ifdef CONFIG_X86_32 +CFLAGS_process_32.o := $(nostackp) +CFLAGS_vm86_32.o := $(nostackp) +CFLAGS_signal.o := $(nostackp) +CFLAGS_traps.o := $(nostackp) +endif obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 41b0de6df873..260fe4cb2c82 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "cpu.h" @@ -122,6 +123,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, + GDT_STACK_CANARY_INIT #endif } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); @@ -261,6 +263,7 @@ void load_percpu_segment(int cpu) loadsegment(gs, 0); wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); #endif + load_stack_canary_segment(); } /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -946,16 +949,21 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); -#else +#else /* x86_64 */ -/* Make sure %fs is initialized properly in idle threads */ +#ifdef CONFIG_CC_STACKPROTECTOR +DEFINE_PER_CPU(unsigned long, stack_canary); +#endif + +/* Make sure %fs and %gs are initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); regs->fs = __KERNEL_PERCPU; + regs->gs = __KERNEL_STACK_CANARY; return regs; } -#endif +#endif /* x86_64 */ /* * cpu_init() initializes state that is per-CPU. Some data is already @@ -1120,9 +1128,6 @@ void __cpuinit cpu_init(void) __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); #endif - /* Clear %gs. */ - asm volatile ("mov %0, %%gs" : : "r" (0)); - /* Clear all 6 debug registers: */ set_debugreg(0, 0); set_debugreg(0, 1); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 82e6868bee47..5f5bd22adcd4 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -186,7 +186,7 @@ /*CFI_REL_OFFSET gs, PT_GS*/ .endm .macro SET_KERNEL_GS reg - xorl \reg, \reg + movl $(__KERNEL_STACK_CANARY), \reg movl \reg, %gs .endm diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 24c0e5cd71e3..924e31615fb6 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -19,6 +19,7 @@ #include #include #include +#include /* Physical address */ #define pa(X) ((X) - __PAGE_OFFSET) @@ -437,8 +438,25 @@ is386: movl $2,%ecx # set MP movl $(__KERNEL_PERCPU), %eax movl %eax,%fs # set this cpu's percpu - xorl %eax,%eax # Clear GS and LDT +#ifdef CONFIG_CC_STACKPROTECTOR + /* + * The linker can't handle this by relocation. Manually set + * base address in stack canary segment descriptor. + */ + cmpb $0,ready + jne 1f + movl $per_cpu__gdt_page,%eax + movl $per_cpu__stack_canary,%ecx + movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) + shrl $16, %ecx + movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) + movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax) +1: +#endif + movl $(__KERNEL_STACK_CANARY),%eax movl %eax,%gs + + xorl %eax,%eax # Clear LDT lldt %ax cld # gcc2 wants the direction flag cleared at all times diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 86122fa2a1ba..9a62383e7c3c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -212,6 +212,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.ds = __USER_DS; regs.es = __USER_DS; regs.fs = __KERNEL_PERCPU; + regs.gs = __KERNEL_STACK_CANARY; regs.orig_ax = -1; regs.ip = (unsigned long) kernel_thread_helper; regs.cs = __KERNEL_CS | get_kernel_rpl(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ef91747bbed5..d992e6cff730 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_PER_CPU_MAPS # define DBG(x...) printk(KERN_DEBUG x) @@ -95,6 +96,7 @@ void __init setup_per_cpu_areas(void) per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); + setup_stack_canary_segment(cpu); /* * Copy data used in early init routines from the * initial arrays to the per cpu data areas. These diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh new file mode 100644 index 000000000000..4fdf6ce1b062 --- /dev/null +++ b/scripts/gcc-x86_32-has-stack-protector.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs" +if [ "$?" -eq "0" ] ; then + echo y +else + echo n +fi From acd895795d35d7c6405f20301a846d16998795ec Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 9 Feb 2009 19:20:50 +0000 Subject: [PATCH 490/566] profiling: fix broken profiling regression Impact: fix broken /proc/profile on UP machines Commit c309b917cab55799ea489d7b5f1b77025d9f8462 "cpumask: convert kernel/profile.c" broke profiling. prof_cpu_mask was previously initialized to CPU_MASK_ALL, but left uninitialized in that commit. We need to copy cpu_possible_mask (cpu_online_mask is not enough). Signed-off-by: Hugh Dickins Signed-off-by: Ingo Molnar --- kernel/profile.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/profile.c b/kernel/profile.c index 784933acf5b8..7724e0409bae 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -114,12 +114,15 @@ int __ref profile_init(void) if (!slab_is_available()) { prof_buffer = alloc_bootmem(buffer_bytes); alloc_bootmem_cpumask_var(&prof_cpu_mask); + cpumask_copy(prof_cpu_mask, cpu_possible_mask); return 0; } if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) return -ENOMEM; + cpumask_copy(prof_cpu_mask, cpu_possible_mask); + prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL); if (prof_buffer) return 0; From 6c24b17453c8dc444a746e45b8a404498fc9fcf7 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Mon, 9 Feb 2009 21:08:07 -0600 Subject: [PATCH 491/566] powerpc/fsl-booke: Fix mapping functions to use phys_addr_t Fixed v_mapped_by_tlbcam() and p_mapped_by_tlbcam() to use phys_addr_t instead of unsigned long. In 36-bit physical mode we really need these functions to deal with phys_addr_t when trying to match a physical address or when returning one. Signed-off-by: Kumar Gala --- arch/powerpc/mm/fsl_booke_mmu.c | 4 ++-- arch/powerpc/mm/pgtable_32.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 1971e4ee3d6e..ea6e41e39d9f 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -73,7 +73,7 @@ extern unsigned int tlbcam_index; /* * Return PA for this VA if it is mapped by a CAM, or 0 */ -unsigned long v_mapped_by_tlbcam(unsigned long va) +phys_addr_t v_mapped_by_tlbcam(unsigned long va) { int b; for (b = 0; b < tlbcam_index; ++b) @@ -85,7 +85,7 @@ unsigned long v_mapped_by_tlbcam(unsigned long va) /* * Return VA for a given PA or 0 if not mapped */ -unsigned long p_mapped_by_tlbcam(unsigned long pa) +unsigned long p_mapped_by_tlbcam(phys_addr_t pa) { int b; for (b = 0; b < tlbcam_index; ++b) diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 22972cd83cc9..58bcaeba728d 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -61,8 +61,8 @@ void setbat(int index, unsigned long virt, phys_addr_t phys, #ifdef HAVE_TLBCAM extern unsigned int tlbcam_index; -extern unsigned long v_mapped_by_tlbcam(unsigned long va); -extern unsigned long p_mapped_by_tlbcam(unsigned long pa); +extern phys_addr_t v_mapped_by_tlbcam(unsigned long va); +extern unsigned long p_mapped_by_tlbcam(phys_addr_t pa); #else /* !HAVE_TLBCAM */ #define v_mapped_by_tlbcam(x) (0UL) #define p_mapped_by_tlbcam(x) (0UL) From eef336189b2b5ae68bfbef0df24176a4a152d981 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Fri, 6 Feb 2009 02:02:00 +0000 Subject: [PATCH 492/566] powerpc: Don't emulate mr. instructions Currently emulate_step() emulates mr. instructions without updating cr0 and this can be disastrous. Don't emulate mr. This bug has been around for a while, but I am not sure if its a worthy -stable candidate. I'll leave it to Ben do decide. Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/lib/sstep.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 4aae0c387645..13b7d54f185b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -172,6 +172,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) } break; case 0x378: /* orx */ + if (instr & 1) + break; rs = (instr >> 21) & 0x1f; rb = (instr >> 11) & 0x1f; if (rs == rb) { /* mr */ From f25f9074c24f1451a74942c4bc089bb53e47f462 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2009 20:22:40 +0000 Subject: [PATCH 493/566] powerpc/ftrace: Fix math to calculate offset in TOC Impact: fix dynamic ftrace with large modules in PPC64 The math to calculate the offset into the TOC that is taken from reading the trampoline is incorrect. The bottom half of the offset is a signed extended short. The current code was using an OR to create the offset when it should have been using an addition. Signed-off-by: Steven Rostedt Acked-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/ftrace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 5355244c99ff..60c60ccf5e3c 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -195,8 +195,9 @@ __ftrace_make_nop(struct module *mod, return -EINVAL; } - offset = (unsigned)((unsigned short)jmp[0]) << 16 | - (unsigned)((unsigned short)jmp[1]); + /* The bottom half is signed extended */ + offset = ((unsigned)((unsigned short)jmp[0]) << 16) + + (int)((short)jmp[1]); DEBUGP(" %x ", offset); From d87bf76679bd37593ae4a3133f5da9395a4963ac Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Sun, 8 Feb 2009 13:04:14 +0000 Subject: [PATCH 494/566] powerpc/cell: Add missing #include for oprofile arch/powerpc/oprofile/cell/spu_profiler.c is missing a asm/time.h include which is required for ppc_proc_freq. This can cause compile failures for some config combinations. Signed-off-by: Michael Neuling Acked-by: Arnd Bergmann Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/oprofile/cell/spu_profiler.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c index 9305ddaac512..b129d007e7fe 100644 --- a/arch/powerpc/oprofile/cell/spu_profiler.c +++ b/arch/powerpc/oprofile/cell/spu_profiler.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "pr_util.h" #define SCALE_SHIFT 14 From 5b11abfdb572bf9284e596dd198ac2aaf95b6616 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sun, 8 Feb 2009 14:27:21 +0000 Subject: [PATCH 495/566] powerpc/pci: mmap anonymous memory when legacy_mem doesn't exist The new legacy_mem file in sysfs is causing problems with X on machines that don't support legacy memory access. The way I initially implemented it, we would fail with -ENXIO when trying to mmap it, thus exposing to X that we do support the API but there is no legacy memory. Unfortunately, X poor error handling is causing it to fail to start when it gets this error. This implements a workaround hack that instead maps anonymous memory instead (using shmem if VM_SHARED is set, just like /dev/zero does). Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci-common.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 19b12d2cbb4b..0f4181272311 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -561,8 +561,21 @@ int pci_mmap_legacy_page_range(struct pci_bus *bus, (unsigned long long)(offset + size - 1)); if (mmap_state == pci_mmap_mem) { - if ((offset + size) > hose->isa_mem_size) - return -ENXIO; + /* Hack alert ! + * + * Because X is lame and can fail starting if it gets an error trying + * to mmap legacy_mem (instead of just moving on without legacy memory + * access) we fake it here by giving it anonymous memory, effectively + * behaving just like /dev/zero + */ + if ((offset + size) > hose->isa_mem_size) { + printk(KERN_DEBUG + "Process %s (pid:%d) mapped non-existing PCI legacy memory for 0%04x:%02x\n", + current->comm, current->pid, pci_domain_nr(bus), bus->number); + if (vma->vm_flags & VM_SHARED) + return shmem_zero_setup(vma); + return 0; + } offset += hose->isa_mem_phys; } else { unsigned long io_offset = (unsigned long)hose->io_base_virt - _IO_BASE; From 0b2f82872ff855b92e9e8356b90ef429d96d6977 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Sun, 8 Feb 2009 14:49:39 +0000 Subject: [PATCH 496/566] powerpc: Add missing sparsemem.h include arch/powerpc/platforms/pseries/hotplug-memory.c uses remove_section_mapping() but doesn't include sparsemem.h which defines it. This can cause compilation fails for some configs. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/hotplug-memory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index a623ad256e9e..9b21ee68ea50 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -14,6 +14,7 @@ #include #include #include +#include static int pseries_remove_lmb(unsigned long base, unsigned int lmb_size) { From e0fc4f97ab6b11594d0b0658ef6cf02bd68f3b4e Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 9 Feb 2009 21:35:39 +0100 Subject: [PATCH 497/566] [ARM] Storage class should be before const qualifier The C99 specification states in section 6.11.5: The placement of a storage-class specifier other than at the beginning of the declaration specifiers in a declaration is an obsolescent feature. Signed-off-by: Tobias Klauser Signed-off-by: Russell King --- arch/arm/kernel/machine_kexec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 440dc62cdc3a..598ca61e7bca 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -13,8 +13,8 @@ #include #include -const extern unsigned char relocate_new_kernel[]; -const extern unsigned int relocate_new_kernel_size; +extern const unsigned char relocate_new_kernel[]; +extern const unsigned int relocate_new_kernel_size; extern void setup_mm_for_reboot(char mode); From f6f35bbe7c6494e66590cf519e21da2dd8d59e01 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sun, 8 Feb 2009 15:22:25 +0100 Subject: [PATCH 498/566] [ARM] AACI: timeout will reach -1 With a postfix decrement the timeout will reach -1 rather than 0, so the warning will not be issued. Signed-off-by: Roel Kluin Signed-off-by: Russell King --- sound/arm/aaci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/arm/aaci.c b/sound/arm/aaci.c index 89096e811a4b..772901e41ecb 100644 --- a/sound/arm/aaci.c +++ b/sound/arm/aaci.c @@ -90,7 +90,7 @@ static void aaci_ac97_write(struct snd_ac97 *ac97, unsigned short reg, */ do { v = readl(aaci->base + AACI_SLFR); - } while ((v & (SLFR_1TXB|SLFR_2TXB)) && timeout--); + } while ((v & (SLFR_1TXB|SLFR_2TXB)) && --timeout); if (!timeout) dev_err(&aaci->dev->dev, @@ -126,7 +126,7 @@ static unsigned short aaci_ac97_read(struct snd_ac97 *ac97, unsigned short reg) */ do { v = readl(aaci->base + AACI_SLFR); - } while ((v & SLFR_1TXB) && timeout--); + } while ((v & SLFR_1TXB) && --timeout); if (!timeout) { dev_err(&aaci->dev->dev, "timeout on slot 1 TX busy\n"); @@ -147,7 +147,7 @@ static unsigned short aaci_ac97_read(struct snd_ac97 *ac97, unsigned short reg) do { cond_resched(); v = readl(aaci->base + AACI_SLFR) & (SLFR_1RXV|SLFR_2RXV); - } while ((v != (SLFR_1RXV|SLFR_2RXV)) && timeout--); + } while ((v != (SLFR_1RXV|SLFR_2RXV)) && --timeout); if (!timeout) { dev_err(&aaci->dev->dev, "timeout on RX valid\n"); From b52af40923fc91a12e3c7152d833e0c0c6a508f6 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 10 Feb 2009 09:21:07 +0100 Subject: [PATCH 499/566] i8327: fix outb() parameter order In i8237A_resume(), when resetting the DMA controller, the parameters to dma_outb() were mixed up. Signed-off-by: Clemens Ladisch [ cleaned up the file a tiny bit. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8237.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c index dbd6c1d1b638..b42ca694dc68 100644 --- a/arch/x86/kernel/i8237.c +++ b/arch/x86/kernel/i8237.c @@ -28,10 +28,10 @@ static int i8237A_resume(struct sys_device *dev) flags = claim_dma_lock(); - dma_outb(DMA1_RESET_REG, 0); - dma_outb(DMA2_RESET_REG, 0); + dma_outb(0, DMA1_RESET_REG); + dma_outb(0, DMA2_RESET_REG); - for (i = 0;i < 8;i++) { + for (i = 0; i < 8; i++) { set_dma_addr(i, 0x000000); /* DMA count is a bit weird so this is not 0 */ set_dma_count(i, 1); @@ -51,14 +51,14 @@ static int i8237A_suspend(struct sys_device *dev, pm_message_t state) } static struct sysdev_class i8237_sysdev_class = { - .name = "i8237", - .suspend = i8237A_suspend, - .resume = i8237A_resume, + .name = "i8237", + .suspend = i8237A_suspend, + .resume = i8237A_resume, }; static struct sys_device device_i8237A = { - .id = 0, - .cls = &i8237_sysdev_class, + .id = 0, + .cls = &i8237_sysdev_class, }; static int __init i8237A_init_sysfs(void) @@ -68,5 +68,4 @@ static int __init i8237A_init_sysfs(void) error = sysdev_register(&device_i8237A); return error; } - device_initcall(i8237A_init_sysfs); From e3944bfac961cd7fc82f3b3143c55dc375748569 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Feb 2009 13:07:13 -0500 Subject: [PATCH 500/566] tracing, x86: fix fixup section to return to original code Impact: fix to prevent a kernel crash on fault If for some reason the pointer to the parent function on the stack takes a fault, the fix up code will not return back to the original faulting code. This can lead to unpredictable results and perhaps even a kernel panic. A fault should not happen, but if it does, we should simply disable the tracer, warn, and continue running the kernel. It should not lead to a kernel crash. Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 1b43086b097a..9d549e4fe880 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -491,13 +491,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) "1: " _ASM_MOV " (%[parent_old]), %[old]\n" "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" " movl $0, %[faulted]\n" + "3:\n" ".section .fixup, \"ax\"\n" - "3: movl $1, %[faulted]\n" + "4: movl $1, %[faulted]\n" + " jmp 3b\n" ".previous\n" - _ASM_EXTABLE(1b, 3b) - _ASM_EXTABLE(2b, 3b) + _ASM_EXTABLE(1b, 4b) + _ASM_EXTABLE(2b, 4b) : [parent_replaced] "=r" (parent), [old] "=r" (old), [faulted] "=r" (faulted) From 5a6fe125950676015f5108fb71b2a67441755003 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Feb 2009 14:02:27 +0000 Subject: [PATCH 501/566] Do not account for the address space used by hugetlbfs using VM_ACCOUNT When overcommit is disabled, the core VM accounts for pages used by anonymous shared, private mappings and special mappings. It keeps track of VMAs that should be accounted for with VM_ACCOUNT and VMAs that never had a reserve with VM_NORESERVE. Overcommit for hugetlbfs is much riskier than overcommit for base pages due to contiguity requirements. It avoids overcommiting on both shared and private mappings using reservation counters that are checked and updated during mmap(). This ensures (within limits) that hugepages exist in the future when faults occurs or it is too easy to applications to be SIGKILLed. As hugetlbfs makes its own reservations of a different unit to the base page size, VM_ACCOUNT should never be set. Even if the units were correct, we would double account for the usage in the core VM and hugetlbfs. VM_NORESERVE may be set because an application can request no reserves be made for hugetlbfs at the risk of getting killed later. With commit fc8744adc870a8d4366908221508bb113d8b72ee, VM_NORESERVE and VM_ACCOUNT are getting unconditionally set for hugetlbfs-backed mappings. This breaks the accounting for both the core VM and hugetlbfs, can trigger an OOM storm when hugepage pools are too small lockups and corrupted counters otherwise are used. This patch brings hugetlbfs more in line with how the core VM treats VM_NORESERVE but prevents VM_ACCOUNT being set. Signed-off-by: Mel Gorman Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 8 +++++--- include/linux/hugetlb.h | 5 +++-- include/linux/mm.h | 3 +-- ipc/shm.c | 8 +++++--- mm/fremap.c | 2 +- mm/hugetlb.c | 39 +++++++++++++++++++++++++-------------- mm/mmap.c | 38 ++++++++++++++++++++++---------------- mm/mprotect.c | 5 +++-- 8 files changed, 65 insertions(+), 43 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6903d37af037..9b800d97a687 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -108,7 +108,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) if (hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), - len >> huge_page_shift(h), vma)) + len >> huge_page_shift(h), vma, + vma->vm_flags)) goto out; ret = 0; @@ -947,7 +948,7 @@ static int can_do_hugetlb_shm(void) can_do_mlock()); } -struct file *hugetlb_file_setup(const char *name, size_t size) +struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag) { int error = -ENOMEM; struct file *file; @@ -981,7 +982,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size) error = -ENOMEM; if (hugetlb_reserve_pages(inode, 0, - size >> huge_page_shift(hstate_inode(inode)), NULL)) + size >> huge_page_shift(hstate_inode(inode)), NULL, + acctflag)) goto out_inode; d_instantiate(dentry, inode); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index f1d2fba19ea0..af09660001c7 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -33,7 +33,8 @@ unsigned long hugetlb_total_pages(void); int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access); int hugetlb_reserve_pages(struct inode *inode, long from, long to, - struct vm_area_struct *vma); + struct vm_area_struct *vma, + int acctflags); void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); extern unsigned long hugepages_treat_as_movable; @@ -138,7 +139,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) extern const struct file_operations hugetlbfs_file_operations; extern struct vm_operations_struct hugetlb_vm_ops; -struct file *hugetlb_file_setup(const char *name, size_t); +struct file *hugetlb_file_setup(const char *name, size_t, int); int hugetlb_get_quota(struct address_space *mapping, long delta); void hugetlb_put_quota(struct address_space *mapping, long delta); diff --git a/include/linux/mm.h b/include/linux/mm.h index e8ddc98b8405..323561582c10 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1129,8 +1129,7 @@ extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long flag, unsigned long pgoff); extern unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, unsigned long flags, - unsigned int vm_flags, unsigned long pgoff, - int accountable); + unsigned int vm_flags, unsigned long pgoff); static inline unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, diff --git a/ipc/shm.c b/ipc/shm.c index f8f69fad3a27..05d51d2a792c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -340,6 +340,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) struct file * file; char name[13]; int id; + int acctflag = 0; if (size < SHMMIN || size > ns->shm_ctlmax) return -EINVAL; @@ -364,11 +365,12 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) sprintf (name, "SYSV%08x", key); if (shmflg & SHM_HUGETLB) { - /* hugetlb_file_setup takes care of mlock user accounting */ - file = hugetlb_file_setup(name, size); + /* hugetlb_file_setup applies strict accounting */ + if (shmflg & SHM_NORESERVE) + acctflag = VM_NORESERVE; + file = hugetlb_file_setup(name, size, acctflag); shp->mlock_user = current_user(); } else { - int acctflag = 0; /* * Do not allow no accounting for OVERCOMMIT_NEVER, even * if it's asked for. diff --git a/mm/fremap.c b/mm/fremap.c index 736ba7f3306a..b6ec85abbb39 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -198,7 +198,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, flags &= MAP_NONBLOCK; get_file(file); addr = mmap_region(file, start, size, - flags, vma->vm_flags, pgoff, 1); + flags, vma->vm_flags, pgoff); fput(file); if (IS_ERR_VALUE(addr)) { err = addr; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 618e98304080..207464209546 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2269,14 +2269,12 @@ void hugetlb_change_protection(struct vm_area_struct *vma, int hugetlb_reserve_pages(struct inode *inode, long from, long to, - struct vm_area_struct *vma) + struct vm_area_struct *vma, + int acctflag) { - long ret, chg; + long ret = 0, chg; struct hstate *h = hstate_inode(inode); - if (vma && vma->vm_flags & VM_NORESERVE) - return 0; - /* * Shared mappings base their reservation on the number of pages that * are already allocated on behalf of the file. Private mappings need @@ -2285,22 +2283,25 @@ int hugetlb_reserve_pages(struct inode *inode, */ if (!vma || vma->vm_flags & VM_SHARED) chg = region_chg(&inode->i_mapping->private_list, from, to); - else { - struct resv_map *resv_map = resv_map_alloc(); - if (!resv_map) - return -ENOMEM; - + else chg = to - from; - set_vma_resv_map(vma, resv_map); - set_vma_resv_flags(vma, HPAGE_RESV_OWNER); - } - if (chg < 0) return chg; if (hugetlb_get_quota(inode->i_mapping, chg)) return -ENOSPC; + + /* + * Only apply hugepage reservation if asked. We still have to + * take the filesystem quota because it is an upper limit + * defined for the mount and not necessarily memory as a whole + */ + if (acctflag & VM_NORESERVE) { + reset_vma_resv_huge_pages(vma); + return 0; + } + ret = hugetlb_acct_memory(h, chg); if (ret < 0) { hugetlb_put_quota(inode->i_mapping, chg); @@ -2308,6 +2309,16 @@ int hugetlb_reserve_pages(struct inode *inode, } if (!vma || vma->vm_flags & VM_SHARED) region_add(&inode->i_mapping->private_list, from, to); + else { + struct resv_map *resv_map = resv_map_alloc(); + + if (!resv_map) + return -ENOMEM; + + set_vma_resv_map(vma, resv_map); + set_vma_resv_flags(vma, HPAGE_RESV_OWNER); + } + return 0; } diff --git a/mm/mmap.c b/mm/mmap.c index 214b6a258eeb..eb1270bebe67 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -918,7 +918,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, struct inode *inode; unsigned int vm_flags; int error; - int accountable = 1; unsigned long reqprot = prot; /* @@ -1019,8 +1018,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, return -EPERM; vm_flags &= ~VM_MAYEXEC; } - if (is_file_hugepages(file)) - accountable = 0; if (!file->f_op || !file->f_op->mmap) return -ENODEV; @@ -1053,8 +1050,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, if (error) return error; - return mmap_region(file, addr, len, flags, vm_flags, pgoff, - accountable); + return mmap_region(file, addr, len, flags, vm_flags, pgoff); } EXPORT_SYMBOL(do_mmap_pgoff); @@ -1092,17 +1088,23 @@ int vma_wants_writenotify(struct vm_area_struct *vma) /* * We account for memory if it's a private writeable mapping, - * and VM_NORESERVE wasn't set. + * not hugepages and VM_NORESERVE wasn't set. */ -static inline int accountable_mapping(unsigned int vm_flags) +static inline int accountable_mapping(struct file *file, unsigned int vm_flags) { + /* + * hugetlb has its own accounting separate from the core VM + * VM_HUGETLB may not be set yet so we cannot check for that flag. + */ + if (file && is_file_hugepages(file)) + return 0; + return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; } unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, unsigned long flags, - unsigned int vm_flags, unsigned long pgoff, - int accountable) + unsigned int vm_flags, unsigned long pgoff) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; @@ -1128,18 +1130,22 @@ munmap_back: /* * Set 'VM_NORESERVE' if we should not account for the - * memory use of this mapping. We only honor MAP_NORESERVE - * if we're allowed to overcommit memory. + * memory use of this mapping. */ - if ((flags & MAP_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER) - vm_flags |= VM_NORESERVE; - if (!accountable) - vm_flags |= VM_NORESERVE; + if ((flags & MAP_NORESERVE)) { + /* We honor MAP_NORESERVE if allowed to overcommit */ + if (sysctl_overcommit_memory != OVERCOMMIT_NEVER) + vm_flags |= VM_NORESERVE; + + /* hugetlb applies strict overcommit unless MAP_NORESERVE */ + if (file && is_file_hugepages(file)) + vm_flags |= VM_NORESERVE; + } /* * Private writable mapping: check memory availability */ - if (accountable_mapping(vm_flags)) { + if (accountable_mapping(file, vm_flags)) { charged = len >> PAGE_SHIFT; if (security_vm_enough_memory(charged)) return -ENOMEM; diff --git a/mm/mprotect.c b/mm/mprotect.c index abe2694e13f4..258197b76fb4 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -151,10 +151,11 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, /* * If we make a private mapping writable we increase our commit; * but (without finer accounting) cannot reduce our commit if we - * make it unwritable again. + * make it unwritable again. hugetlb mapping were accounted for + * even if read-only so there is no need to account for them here */ if (newflags & VM_WRITE) { - if (!(oldflags & (VM_ACCOUNT|VM_WRITE| + if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| VM_SHARED|VM_NORESERVE))) { charged = nrpages; if (security_vm_enough_memory(charged)) From 1db8508cf483dc1ecf66141f90a7c03659d69512 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 10 Feb 2009 23:27:32 +0100 Subject: [PATCH 502/566] hugetlbfs: fix build failure with !CONFIG_HUGETLBFS Fix regression due to 5a6fe125950676015f5108fb71b2a67441755003, "Do not account for the address space used by hugetlbfs using VM_ACCOUNT" which added an argument to the function hugetlb_file_setup() but not to the macro hugetlb_file_setup(). Reported-by: Chris Clayton Signed-off-by: Stefan Richter Acked-by: Mel Gorman Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index af09660001c7..03be7f29ca01 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -159,9 +159,9 @@ static inline void set_file_hugepages(struct file *file) } #else /* !CONFIG_HUGETLBFS */ -#define is_file_hugepages(file) 0 -#define set_file_hugepages(file) BUG() -#define hugetlb_file_setup(name,size) ERR_PTR(-ENOSYS) +#define is_file_hugepages(file) 0 +#define set_file_hugepages(file) BUG() +#define hugetlb_file_setup(name,size,acctflag) ERR_PTR(-ENOSYS) #endif /* !CONFIG_HUGETLBFS */ From f99fb8a2cbf0fd9ce9b2d5d298943d0d4dc479f7 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Tue, 10 Feb 2009 10:57:46 +0000 Subject: [PATCH 503/566] powerpc/mm: Fix _PAGE_COHERENT support on classic ppc32 HW The following commit: commit 64b3d0e8122b422e879b23d42f9e0e8efbbf9744 Author: Benjamin Herrenschmidt Date: Thu Dec 18 19:13:51 2008 +0000 powerpc/mm: Rework usage of _PAGE_COHERENT/NO_CACHE/GUARDED broke setting of the _PAGE_COHERENT bit in the PPC HW PTE. Since we now actually set _PAGE_COHERENT in the Linux PTE we shouldn't be clearing it out before we propogate it to the PPC HW PTE. Reported-by: Martyn Welch Signed-off-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hash_low_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index 67850ec9feb3..14af8cedab70 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -320,7 +320,7 @@ _GLOBAL(create_hpte) and r8,r8,r0 /* writable if _RW & _DIRTY */ rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ - ori r8,r8,0xe14 /* clear out reserved bits and M */ + ori r8,r8,0xe04 /* clear out reserved bits */ andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */ BEGIN_FTR_SECTION rlwinm r8,r8,0,~_PAGE_COHERENT /* clear M (coherence not required) */ From f47a454db9129d2e61b224a40f4365cdd4f83042 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Feb 2009 11:53:23 -0500 Subject: [PATCH 504/566] tracing, x86: fix constraint for parent variable The constraint used for retrieving and restoring the parent function pointer is incorrect. The parent variable is a pointer, and the address of the pointer is modified by the asm statement and not the pointer itself. It is incorrect to pass it in as an output constraint since the asm will never update the pointer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9d549e4fe880..231bdd3c5b1c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -488,8 +488,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) * ignore such a protection. */ asm volatile( - "1: " _ASM_MOV " (%[parent_old]), %[old]\n" - "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" + "1: " _ASM_MOV " (%[parent]), %[old]\n" + "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" " movl $0, %[faulted]\n" "3:\n" @@ -501,9 +501,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) _ASM_EXTABLE(1b, 4b) _ASM_EXTABLE(2b, 4b) - : [parent_replaced] "=r" (parent), [old] "=r" (old), - [faulted] "=r" (faulted) - : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) + : [old] "=r" (old), [faulted] "=r" (faulted) + : [parent] "r" (parent), [return_hooker] "r" (return_hooker) : "memory" ); From 06eb23b1ba39c61ee5d5faeb42a097635693e370 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 9 Feb 2009 02:02:33 +0100 Subject: [PATCH 505/566] ptrace, x86: fix the usage of ptrace_fork() I noticed by pure accident we have ptrace_fork() and friends. This was added by "x86, bts: add fork and exit handling", commit bf53de907dfdaac178c92d774aae7370d7b97d20. I can't test this, ds_request_bts() returns -EOPNOTSUPP, but I strongly believe this needs the fix. I think something like this program int main(void) { int pid = fork(); if (!pid) { ptrace(PTRACE_TRACEME, 0, NULL, NULL); kill(getpid(), SIGSTOP); fork(); } else { struct ptrace_bts_config bts = { .flags = PTRACE_BTS_O_ALLOC, .size = 4 * 4096, }; wait(NULL); ptrace(PTRACE_SETOPTIONS, pid, NULL, PTRACE_O_TRACEFORK); ptrace(PTRACE_BTS_CONFIG, pid, &bts, sizeof(bts)); ptrace(PTRACE_CONT, pid, NULL, NULL); sleep(1); } return 0; } should crash the kernel. If the task is traced by its natural parent ptrace_reparented() returns 0 but we should clear ->btsxxx anyway. Signed-off-by: Oleg Nesterov Acked-by: Markus Metzger Signed-off-by: Ingo Molnar --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index 242a706e7721..43c039d55e95 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1093,7 +1093,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif - if (unlikely(ptrace_reparented(current))) + if (unlikely(current->ptrace)) ptrace_fork(p, clone_flags); /* Perform scheduler related setup. Assign this task to a CPU. */ From d5e842c4b79cc8e454c4fbbc1ce6a43d43184367 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 11 Feb 2009 10:37:28 +0100 Subject: [PATCH 506/566] [S390] vdso: fix per cpu vdso pointer in lowcore The vdso_per_cpu_data entry in the lowcore structure uses __u32 instead of __u64. If the data page is above 4GB the pointer is truncated and the kernel crashes. Reported-by: Mijo Safradin Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index ffdef5fe8587..f3720defdd16 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -384,8 +384,8 @@ struct _lowcore __u32 panic_magic; /* 0xe00 */ /* Per cpu primary space access list */ - __u8 pad_0xe04[0xe3c-0xe04]; /* 0xe04 */ - __u32 vdso_per_cpu_data; /* 0xe3c */ + __u8 pad_0xe04[0xe38-0xe04]; /* 0xe04 */ + __u64 vdso_per_cpu_data; /* 0xe38 */ __u32 paste[16]; /* 0xe40 */ __u8 pad13[0x11b8-0xe80]; /* 0xe80 */ From 0addff81513a71b279a5eca5bf7cba2052c8b737 Mon Sep 17 00:00:00 2001 From: Sachin Sant Date: Wed, 11 Feb 2009 10:37:29 +0100 Subject: [PATCH 507/566] [S390] Fix init irq proc build break. Embed init_irq_proc(s390) within CONFIG_PROC_FS to fix a build break. Signed-off-by : Sachin Sant --- arch/s390/kernel/irq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index e7c5bfb7c755..026a37a94fc9 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -95,6 +95,7 @@ asmlinkage void do_softirq(void) local_irq_restore(flags); } +#ifdef CONFIG_PROC_FS void init_irq_proc(void) { struct proc_dir_entry *root_irq_dir; @@ -102,3 +103,4 @@ void init_irq_proc(void) root_irq_dir = proc_mkdir("irq", NULL); create_prof_cpu_mask(root_irq_dir); } +#endif From ca0b4b7d2cb57a2e24d7e48ce9b411b9baa3bf63 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Wed, 11 Feb 2009 10:37:30 +0100 Subject: [PATCH 508/566] [S390] dasd: bus_id -> dev_name() conversion. bus_id usage crept in again; fix it. Signed-off-by: Cornelia Huck Signed-off-by: Heiko Carstens --- drivers/s390/block/dasd_devmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 300e28a531f8..34339902efb9 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -677,7 +677,7 @@ static ssize_t dasd_ff_show(struct device *dev, struct device_attribute *attr, struct dasd_devmap *devmap; int ff_flag; - devmap = dasd_find_busid(dev->bus_id); + devmap = dasd_find_busid(dev_name(dev)); if (!IS_ERR(devmap)) ff_flag = (devmap->features & DASD_FEATURE_FAILFAST) != 0; else From 48cae885d5a896030588978f503c73c5ed5e62b1 Mon Sep 17 00:00:00 2001 From: Stefan Weinhuber Date: Wed, 11 Feb 2009 10:37:31 +0100 Subject: [PATCH 509/566] [S390] dasd: fix race in dasd timer handling In dasd_device_set_timer and dasd_block_set_timer we interpret the return value of mod_timer in a wrong way. If the timer expires in the small window between our check of timer_pending and the call to mod_timer, then the timer will be set, mod_timer returns zero and we will call add_timer for a timer that is already pending. As del_timer and mod_timer do all the necessary checking themselves, we can simplify our code and remove the race a the same time. Signed-off-by: Stefan Weinhuber Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 46 ++++++++++++++------------------------- 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index bd5914994142..08c23a921012 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -57,6 +57,8 @@ static void dasd_device_tasklet(struct dasd_device *); static void dasd_block_tasklet(struct dasd_block *); static void do_kick_device(struct work_struct *); static void dasd_return_cqr_cb(struct dasd_ccw_req *, void *); +static void dasd_device_timeout(unsigned long); +static void dasd_block_timeout(unsigned long); /* * SECTION: Operations on the device structure. @@ -99,6 +101,8 @@ struct dasd_device *dasd_alloc_device(void) (unsigned long) device); INIT_LIST_HEAD(&device->ccw_queue); init_timer(&device->timer); + device->timer.function = dasd_device_timeout; + device->timer.data = (unsigned long) device; INIT_WORK(&device->kick_work, do_kick_device); device->state = DASD_STATE_NEW; device->target = DASD_STATE_NEW; @@ -138,6 +142,8 @@ struct dasd_block *dasd_alloc_block(void) INIT_LIST_HEAD(&block->ccw_queue); spin_lock_init(&block->queue_lock); init_timer(&block->timer); + block->timer.function = dasd_block_timeout; + block->timer.data = (unsigned long) block; return block; } @@ -915,19 +921,10 @@ static void dasd_device_timeout(unsigned long ptr) */ void dasd_device_set_timer(struct dasd_device *device, int expires) { - if (expires == 0) { - if (timer_pending(&device->timer)) - del_timer(&device->timer); - return; - } - if (timer_pending(&device->timer)) { - if (mod_timer(&device->timer, jiffies + expires)) - return; - } - device->timer.function = dasd_device_timeout; - device->timer.data = (unsigned long) device; - device->timer.expires = jiffies + expires; - add_timer(&device->timer); + if (expires == 0) + del_timer(&device->timer); + else + mod_timer(&device->timer, jiffies + expires); } /* @@ -935,8 +932,7 @@ void dasd_device_set_timer(struct dasd_device *device, int expires) */ void dasd_device_clear_timer(struct dasd_device *device) { - if (timer_pending(&device->timer)) - del_timer(&device->timer); + del_timer(&device->timer); } static void dasd_handle_killed_request(struct ccw_device *cdev, @@ -1586,19 +1582,10 @@ static void dasd_block_timeout(unsigned long ptr) */ void dasd_block_set_timer(struct dasd_block *block, int expires) { - if (expires == 0) { - if (timer_pending(&block->timer)) - del_timer(&block->timer); - return; - } - if (timer_pending(&block->timer)) { - if (mod_timer(&block->timer, jiffies + expires)) - return; - } - block->timer.function = dasd_block_timeout; - block->timer.data = (unsigned long) block; - block->timer.expires = jiffies + expires; - add_timer(&block->timer); + if (expires == 0) + del_timer(&block->timer); + else + mod_timer(&block->timer, jiffies + expires); } /* @@ -1606,8 +1593,7 @@ void dasd_block_set_timer(struct dasd_block *block, int expires) */ void dasd_block_clear_timer(struct dasd_block *block) { - if (timer_pending(&block->timer)) - del_timer(&block->timer); + del_timer(&block->timer); } /* From 95ec807e0a42188ec1ce29cf939816ad1e22f2d3 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 11 Feb 2009 10:37:32 +0100 Subject: [PATCH 510/566] [S390] Update default configuration. Signed-off-by: Martin Schwidefsky --- arch/s390/defconfig | 87 +++++++++++++++++++++++++++++++-------------- 1 file changed, 60 insertions(+), 27 deletions(-) diff --git a/arch/s390/defconfig b/arch/s390/defconfig index a0e748da9909..31e809c77790 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.28-rc6 -# Thu Nov 27 11:00:49 2008 +# Linux kernel version: 2.6.29-rc4 +# Wed Feb 11 10:07:16 2009 # CONFIG_SCHED_MC=y CONFIG_MMU=y @@ -14,12 +14,14 @@ CONFIG_RWSEM_XCHGADD_ALGORITHM=y # CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_BUG=y CONFIG_NO_IOMEM=y CONFIG_NO_DMA=y CONFIG_GENERIC_LOCKBREAK=y CONFIG_PGSTE=y +CONFIG_VIRT_CPU_ACCOUNTING=y CONFIG_S390=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" @@ -39,20 +41,29 @@ CONFIG_POSIX_MQUEUE=y # CONFIG_TASKSTATS is not set CONFIG_AUDIT=y # CONFIG_AUDITSYSCALL is not set + +# +# RCU Subsystem +# +CONFIG_CLASSIC_RCU=y +# CONFIG_TREE_RCU is not set +# CONFIG_PREEMPT_RCU is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=17 +CONFIG_GROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +# CONFIG_RT_GROUP_SCHED is not set +CONFIG_USER_SCHED=y +# CONFIG_CGROUP_SCHED is not set CONFIG_CGROUPS=y # CONFIG_CGROUP_DEBUG is not set CONFIG_CGROUP_NS=y # CONFIG_CGROUP_FREEZER is not set # CONFIG_CGROUP_DEVICE is not set # CONFIG_CPUSETS is not set -CONFIG_GROUP_SCHED=y -CONFIG_FAIR_GROUP_SCHED=y -# CONFIG_RT_GROUP_SCHED is not set -CONFIG_USER_SCHED=y -# CONFIG_CGROUP_SCHED is not set # CONFIG_CGROUP_CPUACCT is not set # CONFIG_RESOURCE_COUNTERS is not set CONFIG_SYSFS_DEPRECATED=y @@ -63,6 +74,7 @@ CONFIG_UTS_NS=y CONFIG_IPC_NS=y # CONFIG_USER_NS is not set # CONFIG_PID_NS is not set +# CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set @@ -91,17 +103,17 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set -# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y CONFIG_KPROBES=y +CONFIG_HAVE_SYSCALL_WRAPPERS=y CONFIG_KRETPROBES=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y +CONFIG_USE_GENERIC_SMP_HELPERS=y # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y -# CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 CONFIG_MODULES=y # CONFIG_MODULE_FORCE_LOAD is not set @@ -109,7 +121,7 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_FORCE_UNLOAD is not set CONFIG_MODVERSIONS=y # CONFIG_MODULE_SRCVERSION_ALL is not set -CONFIG_KMOD=y +CONFIG_INIT_ALL_POSSIBLE=y CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y # CONFIG_BLK_DEV_IO_TRACE is not set @@ -130,7 +142,6 @@ CONFIG_DEFAULT_DEADLINE=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="deadline" CONFIG_PREEMPT_NOTIFIERS=y -CONFIG_CLASSIC_RCU=y # CONFIG_FREEZER is not set # @@ -161,6 +172,7 @@ CONFIG_S390_EXEC_PROTECT=y CONFIG_MARCH_Z900=y # CONFIG_MARCH_Z990 is not set # CONFIG_MARCH_Z9_109 is not set +# CONFIG_MARCH_Z10 is not set CONFIG_PACK_STACK=y # CONFIG_SMALL_STACK is not set CONFIG_CHECK_STACK=y @@ -174,7 +186,6 @@ CONFIG_ARCH_POPULATES_NODE_MAP=y # CONFIG_PREEMPT_NONE is not set # CONFIG_PREEMPT_VOLUNTARY is not set CONFIG_PREEMPT=y -# CONFIG_PREEMPT_RCU is not set CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_DEFAULT=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y @@ -195,7 +206,6 @@ CONFIG_MEMORY_HOTREMOVE=y CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MIGRATION=y -CONFIG_RESOURCES_64BIT=y CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y @@ -207,7 +217,6 @@ CONFIG_UNEVICTABLE_LRU=y # CONFIG_MACHCHK_WARNING=y CONFIG_QDIO=y -# CONFIG_QDIO_DEBUG is not set CONFIG_CHSC_SCH=m # @@ -227,15 +236,13 @@ CONFIG_PFAULT=y # CONFIG_SHARED_KERNEL is not set # CONFIG_CMM is not set # CONFIG_PAGE_STATES is not set -CONFIG_VIRT_TIMER=y -CONFIG_VIRT_CPU_ACCOUNTING=y # CONFIG_APPLDATA_BASE is not set CONFIG_HZ_100=y # CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=100 -# CONFIG_SCHED_HRTICK is not set +CONFIG_SCHED_HRTICK=y CONFIG_S390_HYPFS_FS=y CONFIG_KEXEC=y # CONFIG_ZFCPDUMP is not set @@ -245,6 +252,7 @@ CONFIG_NET=y # # Networking options # +CONFIG_COMPAT_NET_DEV_OPS=y CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set CONFIG_UNIX=y @@ -383,6 +391,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SCH_GRED=m CONFIG_NET_SCH_DSMARK=m # CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_DRR is not set # CONFIG_NET_SCH_INGRESS is not set # @@ -400,6 +409,7 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_FLOW=m +# CONFIG_NET_CLS_CGROUP is not set # CONFIG_NET_EMATCH is not set CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y @@ -411,6 +421,7 @@ CONFIG_NET_ACT_NAT=m # CONFIG_NET_ACT_SKBEDIT is not set # CONFIG_NET_CLS_IND is not set CONFIG_NET_SCH_FIFO=y +# CONFIG_DCB is not set # # Network testing @@ -428,6 +439,7 @@ CONFIG_CAN_VCAN=m # CONFIG_CAN_DEBUG_DEVICES is not set # CONFIG_AF_RXRPC is not set # CONFIG_PHONET is not set +# CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set # CONFIG_PCMCIA is not set @@ -475,10 +487,14 @@ CONFIG_DASD_DIAG=y CONFIG_DASD_EER=y CONFIG_VIRTIO_BLK=m CONFIG_MISC_DEVICES=y -# CONFIG_EEPROM_93CX6 is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_C2PORT is not set +# +# EEPROM support +# +# CONFIG_EEPROM_93CX6 is not set + # # SCSI device support # @@ -520,6 +536,7 @@ CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_SRP_ATTRS is not set CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set +# CONFIG_LIBFC is not set # CONFIG_SCSI_DEBUG is not set CONFIG_ZFCP=y CONFIG_SCSI_DH=m @@ -566,6 +583,10 @@ CONFIG_NET_ETHERNET=y CONFIG_NETDEV_1000=y CONFIG_NETDEV_10000=y # CONFIG_TR is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# # CONFIG_WAN is not set # @@ -593,9 +614,11 @@ CONFIG_VIRTIO_NET=m # CONFIG_DEVKMEM=y CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 CONFIG_HVC_DRIVER=y +CONFIG_HVC_IUCV=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=m CONFIG_HW_RANDOM_VIRTIO=m @@ -645,7 +668,6 @@ CONFIG_S390_VMUR=m # CONFIG_NEW_LEDS is not set CONFIG_ACCESSIBILITY=y # CONFIG_STAGING is not set -CONFIG_STAGING_EXCLUDE_BUILD=y # # File systems @@ -668,6 +690,7 @@ CONFIG_FILE_LOCKING=y # CONFIG_XFS_FS is not set # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y @@ -703,10 +726,7 @@ CONFIG_TMPFS_POSIX_ACL=y # CONFIG_HUGETLBFS is not set # CONFIG_HUGETLB_PAGE is not set CONFIG_CONFIGFS_FS=m - -# -# Miscellaneous filesystems -# +CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_HFS_FS is not set @@ -715,6 +735,7 @@ CONFIG_CONFIGFS_FS=m # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set @@ -808,6 +829,7 @@ CONFIG_DEBUG_BUGVERBOSE=y CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set # CONFIG_FRAME_POINTER is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set @@ -818,15 +840,19 @@ CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_FAULT_INJECTION is not set # CONFIG_LATENCYTOP is not set CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_HAVE_FUNCTION_TRACER=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set # CONFIG_IRQSOFF_TRACER is not set # CONFIG_PREEMPT_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set # CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set # CONFIG_DYNAMIC_PRINTK_DEBUG is not set CONFIG_SAMPLES=y # CONFIG_SAMPLE_KOBJECT is not set @@ -847,11 +873,17 @@ CONFIG_CRYPTO=y # CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_ALGAPI2=y +CONFIG_CRYPTO_AEAD=m +CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_BLKCIPHER=y -CONFIG_CRYPTO_HASH=y -CONFIG_CRYPTO_RNG=y +CONFIG_CRYPTO_BLKCIPHER2=y +CONFIG_CRYPTO_HASH=m +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG=m +CONFIG_CRYPTO_RNG2=y CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y CONFIG_CRYPTO_GF128MUL=m # CONFIG_CRYPTO_NULL is not set # CONFIG_CRYPTO_CRYPTD is not set @@ -885,7 +917,7 @@ CONFIG_CRYPTO_HMAC=m # # Digest # -# CONFIG_CRYPTO_CRC32C is not set +CONFIG_CRYPTO_CRC32C=m # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=m # CONFIG_CRYPTO_MICHAEL_MIC is not set @@ -942,6 +974,7 @@ CONFIG_S390_PRNG=m # Library routines # CONFIG_BITREVERSE=m +CONFIG_GENERIC_FIND_LAST_BIT=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set CONFIG_CRC_T10DIF=y From 0e81cb59c7120191c0243c686b591797bc79e5c6 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 10 Feb 2009 16:45:37 -0800 Subject: [PATCH 511/566] x86, apic: fix initialization of wakeup_cpu With refactoring of wake_cpu macros the 32bit code in tip doesn't execute generic_apic_probe if CONFIG_X86_32_NON_STANDARD is not set. Even on a x86 STANDARD cpu we need to execute the generic_apic_probe function, as we rely on this function to execute the update_genapic quirk which initilizes apic->wakeup_cpu. Failing to do so results in we making a call to a null function in do_boot_cpu. The stack trace without the patch goes like this. Booting processor 1 APIC 0x1 ip 0x6000 BUG: unable to handle kernel NULL pointer dereference at (null) IP: [<(null)>] (null) *pdpt = 0000000000839001 *pde = 0000000000c97067 *pte = 0000000000000163 Oops: 0000 [#1] SMP last sysfs file: Modules linked in: Pid: 1, comm: swapper Not tainted (2.6.29-rc4-tip #18) VMware Virtual Platform EIP: 0062:[<00000000>] EFLAGS: 00010293 CPU: 0 EIP is at 0x0 EAX: 00000001 EBX: 00006000 ECX: c077ed00 EDX: 00006000 ESI: 00000001 EDI: 00000001 EBP: ef04cf40 ESP: ef04cf1c DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 006a Process swapper (pid: 1, ti=ef04c000 task=ef050000 task.ti=ef04c000) Stack: c0644e52 00000000 ef04cf24 ef04cf24 c064468d c0886dc0 00000000 c0702aea ef055480 00000001 00000101 dead4ead ffffffff ffffffff c08af530 00000000 c0709715 ef04cf60 ef04cf60 00000001 00000000 00000000 dead4ead ffffffff Call Trace: [] ? native_cpu_up+0x2de/0x45b [] ? do_fork_idle+0x0/0x19 [] ? _cpu_up+0x88/0xe8 [] ? cpu_up+0x42/0x4e [] ? kernel_init+0x99/0x14b [] ? kernel_init+0x0/0x14b [] ? kernel_thread_helper+0x7/0x10 Code: Bad EIP value. EIP: [<00000000>] 0x0 SS:ESP 006a:ef04cf1c I think we should call generic_apic_probe unconditionally for 32 bit now. Signed-off-by: Alok N Kataria Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8d8fa992c9a0..150e6d0a3b47 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -936,7 +936,7 @@ void __init setup_arch(char **cmdline_p) map_vsyscall(); #endif -#if defined(CONFIG_X86_32_NON_STANDARD) || defined(CONFIG_X86_BIGSMP) +#ifdef CONFIG_X86_32 generic_apic_probe(); #endif From 160d8dac12932ad6eb4a359b66521e2e3282ea7d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 11 Feb 2009 11:27:39 +0100 Subject: [PATCH 512/566] x86, apic: make generic_apic_probe() generally available Impact: build fix Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 6 ++++++ arch/x86/kernel/setup.c | 2 -- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index f4835a1be360..fba49f66228f 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -33,7 +33,13 @@ } while (0) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) extern void generic_apic_probe(void); +#else +static inline void generic_apic_probe(void) +{ +} +#endif #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 150e6d0a3b47..8fce6c714514 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -936,9 +936,7 @@ void __init setup_arch(char **cmdline_p) map_vsyscall(); #endif -#ifdef CONFIG_X86_32 generic_apic_probe(); -#endif early_quirks(); From 5c79d2a517a9905599d192db8ce77ab5f1a2faca Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 11 Feb 2009 16:31:00 +0900 Subject: [PATCH 513/566] x86: fix x86_32 stack protector bugs Impact: fix x86_32 stack protector Brian Gerst found out that %gs was being initialized to stack_canary instead of stack_canary - 20, which basically gave the same canary value for all threads. Fixing this also exposed the following bugs. * cpu_idle() didn't call boot_init_stack_canary() * stack canary switching in switch_to() was being done too late making the initial run of a new thread use the old stack canary value. Fix all of them and while at it update comment in cpu_idle() about calling boot_init_stack_canary(). Reported-by: Brian Gerst Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stackprotector.h | 2 +- arch/x86/include/asm/system.h | 8 +++----- arch/x86/kernel/head_32.S | 1 + arch/x86/kernel/process_32.c | 10 ++++++++++ arch/x86/kernel/process_64.c | 11 +++++------ 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index fa7e5bd6fbe8..c2d742c6e15f 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -85,7 +85,7 @@ static __always_inline void boot_init_stack_canary(void) static inline void setup_stack_canary_segment(int cpu) { #ifdef CONFIG_X86_32 - unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu); + unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20; struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); struct desc_struct desc; diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 2692ee8ef031..7a80f72bec47 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -25,13 +25,11 @@ struct task_struct *__switch_to(struct task_struct *prev, #ifdef CONFIG_CC_STACKPROTECTOR #define __switch_canary \ - "movl "__percpu_arg([current_task])",%%ebx\n\t" \ - "movl %P[task_canary](%%ebx),%%ebx\n\t" \ - "movl %%ebx,"__percpu_arg([stack_canary])"\n\t" + "movl %P[task_canary](%[next]), %%ebx\n\t" \ + "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" #define __switch_canary_oparam \ , [stack_canary] "=m" (per_cpu_var(stack_canary)) #define __switch_canary_iparam \ - , [current_task] "m" (per_cpu_var(current_task)) \ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) #else /* CC_STACKPROTECTOR */ #define __switch_canary @@ -60,9 +58,9 @@ do { \ "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ "pushl %[next_ip]\n\t" /* restore EIP */ \ + __switch_canary \ "jmp __switch_to\n" /* regparm call */ \ "1:\t" \ - __switch_canary \ "popl %%ebp\n\t" /* restore EBP */ \ "popfl\n" /* restore flags */ \ \ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 924e31615fb6..cf21fd0cf6ac 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -447,6 +447,7 @@ is386: movl $2,%ecx # set MP jne 1f movl $per_cpu__gdt_page,%eax movl $per_cpu__stack_canary,%ecx + subl $20, %ecx movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) shrl $16, %ecx movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 9a62383e7c3c..b50604bb1e41 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -11,6 +11,7 @@ #include +#include #include #include #include @@ -91,6 +92,15 @@ void cpu_idle(void) { int cpu = smp_processor_id(); + /* + * If we're the non-boot CPU, nothing set the stack canary up + * for us. CPU0 already has it initialized but no harm in + * doing it again. This is a good place for updating it, as + * we wont ever return from this function (so the invalid + * canaries already on the stack wont ever trigger). + */ + boot_init_stack_canary(); + current_thread_info()->status |= TS_POLLING; /* endless idle loop with no priority at all */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8eb169e45584..836ef6575f01 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -120,12 +120,11 @@ void cpu_idle(void) current_thread_info()->status |= TS_POLLING; /* - * If we're the non-boot CPU, nothing set the PDA stack - * canary up for us - and if we are the boot CPU we have - * a 0 stack canary. This is a good place for updating - * it, as we wont ever return from this function (so the - * invalid canaries already on the stack wont ever - * trigger): + * If we're the non-boot CPU, nothing set the stack canary up + * for us. CPU0 already has it initialized but no harm in + * doing it again. This is a good place for updating it, as + * we wont ever return from this function (so the invalid + * canaries already on the stack wont ever trigger). */ boot_init_stack_canary(); From ebd9026d9f8499abc60d82d949bd37f88fe34a41 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 11 Feb 2009 12:17:29 +0100 Subject: [PATCH 514/566] stackprotector: fix multi-word cross-builds Stackprotector builds were failing if CROSS_COMPILER was more than a single world (such as when distcc was used) - because the check scripts used $1 instead of $*. Signed-off-by: Ingo Molnar --- scripts/gcc-x86_32-has-stack-protector.sh | 2 +- scripts/gcc-x86_64-has-stack-protector.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh index 4fdf6ce1b062..29493dc4528d 100644 --- a/scripts/gcc-x86_32-has-stack-protector.sh +++ b/scripts/gcc-x86_32-has-stack-protector.sh @@ -1,6 +1,6 @@ #!/bin/sh -echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs" +echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs" if [ "$?" -eq "0" ] ; then echo y else diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh index 2d69fcdc5609..afaec618b395 100644 --- a/scripts/gcc-x86_64-has-stack-protector.sh +++ b/scripts/gcc-x86_64-has-stack-protector.sh @@ -1,6 +1,6 @@ #!/bin/sh -echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs" +echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs" if [ "$?" -eq "0" ] ; then echo y else From aa78bcfa01dec3cdbde3cda098ce32abbd9c3bf6 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 10 Feb 2009 09:51:45 -0500 Subject: [PATCH 515/566] x86: use pt_regs pointer in do_device_not_available() The generic exception handler (error_code) passes in the pt_regs pointer and the error code (unused in this case). The commit "x86: fix math_emu register frame access" changed this to pass by value, which doesn't work correctly with stack protector enabled. Change it back to use the pt_regs pointer. Signed-off-by: Brian Gerst Acked-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/traps.h | 2 +- arch/x86/kernel/traps.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index cf3bb053da0b..0d5342515b86 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -41,7 +41,7 @@ dotraplinkage void do_int3(struct pt_regs *, long); dotraplinkage void do_overflow(struct pt_regs *, long); dotraplinkage void do_bounds(struct pt_regs *, long); dotraplinkage void do_invalid_op(struct pt_regs *, long); -dotraplinkage void do_device_not_available(struct pt_regs); +dotraplinkage void do_device_not_available(struct pt_regs *, long); dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); dotraplinkage void do_invalid_TSS(struct pt_regs *, long); dotraplinkage void do_segment_not_present(struct pt_regs *, long); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3b7b2e190201..71a8f871331e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -905,19 +905,20 @@ void math_emulate(struct math_emu_info *info) } #endif /* CONFIG_MATH_EMULATION */ -dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) +dotraplinkage void __kprobes +do_device_not_available(struct pt_regs *regs, long error_code) { #ifdef CONFIG_X86_32 if (read_cr0() & X86_CR0_EM) { struct math_emu_info info = { }; - conditional_sti(®s); + conditional_sti(regs); - info.regs = ®s; + info.regs = regs; math_emulate(&info); } else { math_state_restore(); /* interrupts still off */ - conditional_sti(®s); + conditional_sti(regs); } #else math_state_restore(); From 253f29a4ae9cc6cdc7b94f96517f27a93885a6ce Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 10 Feb 2009 09:51:46 -0500 Subject: [PATCH 516/566] x86: pass in pt_regs pointer for syscalls that need it Some syscalls need to access the pt_regs structure, either to copy user register state or to modifiy it. This patch adds stubs to load the address of the pt_regs struct into the %eax register, and changes the syscalls to regparm(1) to receive the pt_regs pointer as the first argument. Signed-off-by: Brian Gerst Acked-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/linkage.h | 7 ++++++ arch/x86/include/asm/syscalls.h | 25 ++++++++++++--------- arch/x86/kernel/entry_32.S | 20 +++++++++++++++++ arch/x86/kernel/ioport.c | 4 +--- arch/x86/kernel/process_32.c | 35 ++++++++++++------------------ arch/x86/kernel/signal.c | 35 ++++++------------------------ arch/x86/kernel/syscall_table_32.S | 20 ++++++++--------- arch/x86/kernel/vm86_32.c | 15 ++++++------- 8 files changed, 81 insertions(+), 80 deletions(-) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 5d98d0b68ffc..2fd5926fb97d 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -17,6 +17,13 @@ */ #define asmregparm __attribute__((regparm(3))) +/* + * For syscalls that need a pointer to the pt_regs struct (ie. fork). + * The regs pointer is passed in %eax as the first argument. The + * remaining function arguments remain on the stack. + */ +#define ptregscall __attribute__((regparm(1))) + /* * Make sure the compiler doesn't do anything stupid with the * arguments on the stack - they are owned by the *caller*, not diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index c0b0bda754ee..617295255a17 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -29,21 +29,26 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ #ifdef CONFIG_X86_32 /* kernel/process_32.c */ -asmlinkage int sys_fork(struct pt_regs); -asmlinkage int sys_clone(struct pt_regs); -asmlinkage int sys_vfork(struct pt_regs); -asmlinkage int sys_execve(struct pt_regs); +ptregscall int sys_fork(struct pt_regs *); +ptregscall int sys_clone(struct pt_regs *, unsigned long, + unsigned long, int __user *, + unsigned long, int __user *); +ptregscall int sys_vfork(struct pt_regs *); +ptregscall int sys_execve(struct pt_regs *, char __user *, + char __user * __user *, + char __user * __user *); /* kernel/signal_32.c */ asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); -asmlinkage int sys_sigaltstack(unsigned long); -asmlinkage unsigned long sys_sigreturn(unsigned long); -asmlinkage int sys_rt_sigreturn(unsigned long); +ptregscall int sys_sigaltstack(struct pt_regs *, const stack_t __user *, + stack_t __user *); +ptregscall unsigned long sys_sigreturn(struct pt_regs *); +ptregscall int sys_rt_sigreturn(struct pt_regs *); /* kernel/ioport.c */ -asmlinkage long sys_iopl(unsigned long); +ptregscall long sys_iopl(struct pt_regs *, unsigned int); /* kernel/sys_i386_32.c */ asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, @@ -59,8 +64,8 @@ struct oldold_utsname; asmlinkage int sys_olduname(struct oldold_utsname __user *); /* kernel/vm86_32.c */ -asmlinkage int sys_vm86old(struct pt_regs); -asmlinkage int sys_vm86(struct pt_regs); +ptregscall int sys_vm86old(struct pt_regs *, struct vm86_struct __user *); +ptregscall int sys_vm86(struct pt_regs *, unsigned long, unsigned long); #else /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 5f5bd22adcd4..3de7b5710dc8 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -697,6 +697,26 @@ syscall_badsys: END(syscall_badsys) CFI_ENDPROC +/* + * System calls that need a pt_regs pointer. + */ +#define PTREGSCALL(name) \ + ALIGN; \ +ptregs_##name: \ + leal 4(%esp),%eax; \ + jmp sys_##name; + +PTREGSCALL(iopl) +PTREGSCALL(fork) +PTREGSCALL(clone) +PTREGSCALL(vfork) +PTREGSCALL(execve) +PTREGSCALL(sigaltstack) +PTREGSCALL(sigreturn) +PTREGSCALL(rt_sigreturn) +PTREGSCALL(vm86) +PTREGSCALL(vm86old) + .macro FIXUP_ESPFIX_STACK /* since we are on a wrong stack, we cant make it a C code :( */ PER_CPU(gdt_page, %ebx) diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index b12208f4dfee..7ec148646312 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -131,10 +131,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) } #ifdef CONFIG_X86_32 -asmlinkage long sys_iopl(unsigned long regsp) +ptregscall long sys_iopl(struct pt_regs *regs, unsigned int level) { - struct pt_regs *regs = (struct pt_regs *)®sp; - unsigned int level = regs->bx; struct thread_struct *t = ¤t->thread; int rc; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b50604bb1e41..5a9dcfb01f71 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -603,24 +603,18 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } -asmlinkage int sys_fork(struct pt_regs regs) +ptregscall int sys_fork(struct pt_regs *regs) { - return do_fork(SIGCHLD, regs.sp, ®s, 0, NULL, NULL); + return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); } -asmlinkage int sys_clone(struct pt_regs regs) +ptregscall int sys_clone(struct pt_regs *regs, unsigned long clone_flags, + unsigned long newsp, int __user *parent_tidptr, + unsigned long unused, int __user *child_tidptr) { - unsigned long clone_flags; - unsigned long newsp; - int __user *parent_tidptr, *child_tidptr; - - clone_flags = regs.bx; - newsp = regs.cx; - parent_tidptr = (int __user *)regs.dx; - child_tidptr = (int __user *)regs.di; if (!newsp) - newsp = regs.sp; - return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); + newsp = regs->sp; + return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); } /* @@ -633,27 +627,26 @@ asmlinkage int sys_clone(struct pt_regs regs) * do not have enough call-clobbered registers to hold all * the information you need. */ -asmlinkage int sys_vfork(struct pt_regs regs) +ptregscall int sys_vfork(struct pt_regs *regs) { - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, ®s, 0, NULL, NULL); + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL); } /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(struct pt_regs regs) +ptregscall int sys_execve(struct pt_regs *regs, char __user *u_filename, + char __user * __user *argv, + char __user * __user *envp) { int error; char *filename; - filename = getname((char __user *) regs.bx); + filename = getname(u_filename); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, - (char __user * __user *) regs.cx, - (char __user * __user *) regs.dx, - ®s); + error = do_execve(filename, argv, envp, regs); if (error == 0) { /* Make sure we don't return using sysenter.. */ set_thread_flag(TIF_IRET); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 8562387c75a7..d7a158367e38 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -549,39 +549,28 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, #endif /* CONFIG_X86_32 */ #ifdef CONFIG_X86_32 -asmlinkage int sys_sigaltstack(unsigned long bx) -{ - /* - * This is needed to make gcc realize it doesn't own the - * "struct pt_regs" - */ - struct pt_regs *regs = (struct pt_regs *)&bx; - const stack_t __user *uss = (const stack_t __user *)bx; - stack_t __user *uoss = (stack_t __user *)regs->cx; - - return do_sigaltstack(uss, uoss, regs->sp); -} +ptregscall int +sys_sigaltstack(struct pt_regs *regs, const stack_t __user *uss, + stack_t __user *uoss) #else /* !CONFIG_X86_32 */ asmlinkage long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct pt_regs *regs) +#endif /* CONFIG_X86_32 */ { return do_sigaltstack(uss, uoss, regs->sp); } -#endif /* CONFIG_X86_32 */ /* * Do a signal return; undo the signal stack. */ #ifdef CONFIG_X86_32 -asmlinkage unsigned long sys_sigreturn(unsigned long __unused) +ptregscall unsigned long sys_sigreturn(struct pt_regs *regs) { struct sigframe __user *frame; - struct pt_regs *regs; unsigned long ax; sigset_t set; - regs = (struct pt_regs *) &__unused; frame = (struct sigframe __user *)(regs->sp - 8); if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) @@ -640,23 +629,13 @@ badframe: } #ifdef CONFIG_X86_32 -/* - * Note: do not pass in pt_regs directly as with tail-call optimization - * GCC will incorrectly stomp on the caller's frame and corrupt user-space - * register state: - */ -asmlinkage int sys_rt_sigreturn(unsigned long __unused) -{ - struct pt_regs *regs = (struct pt_regs *)&__unused; - - return do_rt_sigreturn(regs); -} +ptregscall int sys_rt_sigreturn(struct pt_regs *regs) #else /* !CONFIG_X86_32 */ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +#endif /* CONFIG_X86_32 */ { return do_rt_sigreturn(regs); } -#endif /* CONFIG_X86_32 */ /* * OK, we're invoking a handler: diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index e2e86a08f31d..3bdb64829b82 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -1,7 +1,7 @@ ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_exit - .long sys_fork + .long ptregs_fork .long sys_read .long sys_write .long sys_open /* 5 */ @@ -10,7 +10,7 @@ ENTRY(sys_call_table) .long sys_creat .long sys_link .long sys_unlink /* 10 */ - .long sys_execve + .long ptregs_execve .long sys_chdir .long sys_time .long sys_mknod @@ -109,17 +109,17 @@ ENTRY(sys_call_table) .long sys_newlstat .long sys_newfstat .long sys_uname - .long sys_iopl /* 110 */ + .long ptregs_iopl /* 110 */ .long sys_vhangup .long sys_ni_syscall /* old "idle" system call */ - .long sys_vm86old + .long ptregs_vm86old .long sys_wait4 .long sys_swapoff /* 115 */ .long sys_sysinfo .long sys_ipc .long sys_fsync - .long sys_sigreturn - .long sys_clone /* 120 */ + .long ptregs_sigreturn + .long ptregs_clone /* 120 */ .long sys_setdomainname .long sys_newuname .long sys_modify_ldt @@ -165,14 +165,14 @@ ENTRY(sys_call_table) .long sys_mremap .long sys_setresuid16 .long sys_getresuid16 /* 165 */ - .long sys_vm86 + .long ptregs_vm86 .long sys_ni_syscall /* Old sys_query_module */ .long sys_poll .long sys_nfsservctl .long sys_setresgid16 /* 170 */ .long sys_getresgid16 .long sys_prctl - .long sys_rt_sigreturn + .long ptregs_rt_sigreturn .long sys_rt_sigaction .long sys_rt_sigprocmask /* 175 */ .long sys_rt_sigpending @@ -185,11 +185,11 @@ ENTRY(sys_call_table) .long sys_getcwd .long sys_capget .long sys_capset /* 185 */ - .long sys_sigaltstack + .long ptregs_sigaltstack .long sys_sendfile .long sys_ni_syscall /* reserved for streams1 */ .long sys_ni_syscall /* reserved for streams2 */ - .long sys_vfork /* 190 */ + .long ptregs_vfork /* 190 */ .long sys_getrlimit .long sys_mmap2 .long sys_truncate64 diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 55ea30d2a3d6..8fa6ba7c9233 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -197,9 +197,8 @@ out: static int do_vm86_irq_handling(int subfunction, int irqnumber); static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); -asmlinkage int sys_vm86old(struct pt_regs regs) +ptregscall int sys_vm86old(struct pt_regs *regs, struct vm86_struct __user *v86) { - struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx; struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. * This remains on the stack until we @@ -218,7 +217,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs) if (tmp) goto out; memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); - info.regs32 = ®s; + info.regs32 = regs; tsk->thread.vm86_info = v86; do_sys_vm86(&info, tsk); ret = 0; /* we never return here */ @@ -227,7 +226,7 @@ out: } -asmlinkage int sys_vm86(struct pt_regs regs) +ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long arg) { struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. @@ -239,12 +238,12 @@ asmlinkage int sys_vm86(struct pt_regs regs) struct vm86plus_struct __user *v86; tsk = current; - switch (regs.bx) { + switch (cmd) { case VM86_REQUEST_IRQ: case VM86_FREE_IRQ: case VM86_GET_IRQ_BITS: case VM86_GET_AND_RESET_IRQ: - ret = do_vm86_irq_handling(regs.bx, (int)regs.cx); + ret = do_vm86_irq_handling(cmd, (int)arg); goto out; case VM86_PLUS_INSTALL_CHECK: /* @@ -261,14 +260,14 @@ asmlinkage int sys_vm86(struct pt_regs regs) ret = -EPERM; if (tsk->thread.saved_sp0) goto out; - v86 = (struct vm86plus_struct __user *)regs.cx; + v86 = (struct vm86plus_struct __user *)arg; tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, offsetof(struct kernel_vm86_struct, regs32) - sizeof(info.regs)); ret = -EFAULT; if (tmp) goto out; - info.regs32 = ®s; + info.regs32 = regs; info.vm86plus.is_vm86pus = 1; tsk->thread.vm86_info = (struct vm86_struct __user *)v86; do_sys_vm86(&info, tsk); From 9c8bb6b534d1c89a20bf9bb45e1471cf8f4747c0 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 10 Feb 2009 09:51:47 -0500 Subject: [PATCH 517/566] x86: drop -fno-stack-protector annotations after pt_regs fixes Now that no functions rely on struct pt_regs being passed by value, various "no stack protector" annotations can be dropped. Signed-off-by: Brian Gerst Acked-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b1f8be33300d..37fa30bada17 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -24,24 +24,6 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) CFLAGS_paravirt.o := $(nostackp) -# -# On x86_32, register frame is passed verbatim on stack as struct -# pt_regs. gcc considers the parameter to belong to the callee and -# with -fstack-protector it copies pt_regs to the callee's stack frame -# to put the structure after the stack canary causing changes made by -# the exception handlers to be lost. Turn off stack protector for all -# files containing functions which take struct pt_regs from register -# frame. -# -# The proper way to fix this is to teach gcc that the argument belongs -# to the caller for these functions, oh well... -# -ifdef CONFIG_X86_32 -CFLAGS_process_32.o := $(nostackp) -CFLAGS_vm86_32.o := $(nostackp) -CFLAGS_signal.o := $(nostackp) -CFLAGS_traps.o := $(nostackp) -endif obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o From 1c0040047d5499599cc231ca3f105be3ceff8562 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Mon, 9 Feb 2009 10:25:20 -0600 Subject: [PATCH 518/566] SGI IA64 UV: fix ia64 build error in the linux-next tree Fix the ia64 build error that occurs in the linux-next tree by introducing an ia64 version of uv.h. Additionally, clean up the usage of is_uv_system(). Signed-off-by: Dean Nelson Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/uv/uv.h | 13 +++++++++++++ drivers/misc/sgi-gru/gru.h | 2 -- drivers/misc/sgi-gru/grufile.c | 18 +++--------------- drivers/misc/sgi-xp/xp.h | 22 ++++++++-------------- 4 files changed, 24 insertions(+), 31 deletions(-) create mode 100644 arch/ia64/include/asm/uv/uv.h diff --git a/arch/ia64/include/asm/uv/uv.h b/arch/ia64/include/asm/uv/uv.h new file mode 100644 index 000000000000..61b5bdfd980e --- /dev/null +++ b/arch/ia64/include/asm/uv/uv.h @@ -0,0 +1,13 @@ +#ifndef _ASM_IA64_UV_UV_H +#define _ASM_IA64_UV_UV_H + +#include +#include + +static inline int is_uv_system(void) +{ + /* temporary support for running on hardware simulator */ + return IS_MEDUSA() || ia64_platform_is("uv"); +} + +#endif /* _ASM_IA64_UV_UV_H */ diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h index 1b5f579df15f..f93f03a9e6e9 100644 --- a/drivers/misc/sgi-gru/gru.h +++ b/drivers/misc/sgi-gru/gru.h @@ -19,8 +19,6 @@ #ifndef __GRU_H__ #define __GRU_H__ -#include - /* * GRU architectural definitions */ diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index 650983806392..c67e4e8bd62c 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -36,23 +36,11 @@ #include #include #include +#include #include "gru.h" #include "grulib.h" #include "grutables.h" -#if defined CONFIG_X86_64 -#include -#include -#define IS_UV() is_uv_system() -#elif defined CONFIG_IA64 -#include -#include -/* temp support for running on hardware simulator */ -#define IS_UV() IS_MEDUSA() || ia64_platform_is("uv") -#else -#define IS_UV() 0 -#endif - #include #include @@ -381,7 +369,7 @@ static int __init gru_init(void) char id[10]; void *gru_start_vaddr; - if (!IS_UV()) + if (!is_uv_system()) return 0; #if defined CONFIG_IA64 @@ -451,7 +439,7 @@ static void __exit gru_exit(void) int order = get_order(sizeof(struct gru_state) * GRU_CHIPLETS_PER_BLADE); - if (!IS_UV()) + if (!is_uv_system()) return; for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++) diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h index 069ad3a1c2ac..2275126cb334 100644 --- a/drivers/misc/sgi-xp/xp.h +++ b/drivers/misc/sgi-xp/xp.h @@ -15,21 +15,19 @@ #include +#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV #include +#define is_uv() is_uv_system() +#endif -#ifdef CONFIG_IA64 +#ifndef is_uv +#define is_uv() 0 +#endif + +#if defined CONFIG_IA64 #include #include /* defines is_shub1() and is_shub2() */ #define is_shub() ia64_platform_is("sn2") -#ifdef CONFIG_IA64_SGI_UV -#define is_uv() ia64_platform_is("uv") -#else -#define is_uv() 0 -#endif -#endif -#ifdef CONFIG_X86_64 -#include -#define is_uv() is_uv_system() #endif #ifndef is_shub1 @@ -44,10 +42,6 @@ #define is_shub() 0 #endif -#ifndef is_uv -#define is_uv() 0 -#endif - #ifdef USE_DBUG_ON #define DBUG_ON(condition) BUG_ON(condition) #else From 3fccfd67df79c6351a156eb25a7a514e5f39c4d9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Feb 2009 16:37:31 +0100 Subject: [PATCH 519/566] timers: split process wide cpu clocks/timers, fix To decrease the chance of a missed enable, always enable the timer when we sample it, we'll always disable it when we find that there are no active timers in the jiffy tick. This fixes a flood of warnings reported by Mike Galbraith. Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + kernel/posix-cpu-timers.c | 42 +++++++++++++-------------------------- 2 files changed, 15 insertions(+), 28 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 79392916d6c9..5d10fa0b6002 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2209,6 +2209,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) unsigned long flags; spin_lock_irqsave(&cputimer->lock, flags); + cputimer->running = 1; *times = cputimer->cputime; spin_unlock_irqrestore(&cputimer->lock, flags); } diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index db107c9bbc05..e5d7bfdfa7d4 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -488,7 +488,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk) { struct task_cputime cputime; - thread_group_cputime(tsk, &cputime); + thread_group_cputimer(tsk, &cputime); cleanup_timers(tsk->signal->cpu_timers, cputime.utime, cputime.stime, cputime.sum_exec_runtime); } @@ -506,29 +506,6 @@ static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) now); } -/* - * Enable the process wide cpu timer accounting. - * - * serialized using ->sighand->siglock - */ -static void start_process_timers(struct task_struct *tsk) -{ - tsk->signal->cputimer.running = 1; - barrier(); -} - -/* - * Release the process wide timer accounting -- timer stops ticking when - * nobody cares about it. - * - * serialized using ->sighand->siglock - */ -static void stop_process_timers(struct task_struct *tsk) -{ - tsk->signal->cputimer.running = 0; - barrier(); -} - /* * Insert the timer on the appropriate list before any timers that * expire later. This must be called with the tasklist_lock held @@ -549,9 +526,6 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) BUG_ON(!irqs_disabled()); spin_lock(&p->sighand->siglock); - if (!CPUCLOCK_PERTHREAD(timer->it_clock)) - start_process_timers(p); - listpos = head; if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { list_for_each_entry(next, head, entry) { @@ -1021,6 +995,19 @@ static void check_thread_timers(struct task_struct *tsk, } } +static void stop_process_timers(struct task_struct *tsk) +{ + struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + unsigned long flags; + + if (!cputimer->running) + return; + + spin_lock_irqsave(&cputimer->lock, flags); + cputimer->running = 0; + spin_unlock_irqrestore(&cputimer->lock, flags); +} + /* * Check for any per-thread CPU timers that have fired and move them * off the tsk->*_timers list onto the firing list. Per-thread timers @@ -1427,7 +1414,6 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, struct list_head *head; BUG_ON(clock_idx == CPUCLOCK_SCHED); - start_process_timers(tsk); cpu_timer_sample_group(clock_idx, tsk, &now); if (oldval) { From 4da94d49b2ecb0a26e716a8811c3ecc542c2a65d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Feb 2009 11:30:27 +0100 Subject: [PATCH 520/566] timers: fix TIMER_ABSTIME for process wide cpu timers The POSIX timer interface allows for absolute time expiry values through the TIMER_ABSTIME flag, therefore we have to synchronize the timer to the clock every time we start it. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 13 +------------ kernel/posix-cpu-timers.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 5d10fa0b6002..8981e52c714f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2201,18 +2201,7 @@ static inline int spin_needbreak(spinlock_t *lock) * Thread group CPU time accounting. */ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); - -static inline -void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) -{ - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; - unsigned long flags; - - spin_lock_irqsave(&cputimer->lock, flags); - cputimer->running = 1; - *times = cputimer->cputime; - spin_unlock_irqrestore(&cputimer->lock, flags); -} +void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); static inline void thread_group_cputime_init(struct signal_struct *sig) { diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index e5d7bfdfa7d4..2313a4cc14ea 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -261,6 +261,40 @@ out: rcu_read_unlock(); } +static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b) +{ + if (cputime_gt(b->utime, a->utime)) + a->utime = b->utime; + + if (cputime_gt(b->stime, a->stime)) + a->stime = b->stime; + + if (b->sum_exec_runtime > a->sum_exec_runtime) + a->sum_exec_runtime = b->sum_exec_runtime; +} + +void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) +{ + struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + struct task_cputime sum; + unsigned long flags; + + spin_lock_irqsave(&cputimer->lock, flags); + if (!cputimer->running) { + cputimer->running = 1; + /* + * The POSIX timer interface allows for absolute time expiry + * values through the TIMER_ABSTIME flag, therefore we have + * to synchronize the timer to the clock every time we start + * it. + */ + thread_group_cputime(tsk, &sum); + update_gt_cputime(&cputimer->cputime, &sum); + } + *times = cputimer->cputime; + spin_unlock_irqrestore(&cputimer->lock, flags); +} + /* * Sample a process (thread group) clock for the given group_leader task. * Must be called with tasklist_lock held for reading. From c5c606d9dce6e0852a401c57a8b0542acdbb6796 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Mon, 9 Feb 2009 18:18:14 -0800 Subject: [PATCH 521/566] x86: cleanup, rename CONFIG_X86_NON_STANDARD to CONFIG_X86_EXTENDED_PLATFORM Patch to rename the CONFIG_X86_NON_STANDARD to CONFIG_X86_EXTENDED_PLATFORM. The new name represents the subarches better. Also, default this to 'y' so that many of the sub architectures that were not easily visible now become visible. Also re-organize the extended architecture platform and non standard platform list alphabetically as suggested by Ingo. Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 87 ++++++++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 39 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 270ecf90bdb4..4a27aa41f6df 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -268,65 +268,48 @@ config X86_BIGSMP ---help--- This option is needed for the systems that have more than 8 CPUs -config X86_NON_STANDARD - bool "Support for non-standard x86 platforms" +config X86_EXTENDED_PLATFORM + bool "Support for extended (non-PC) x86 platforms" + default y ---help--- If you disable this option then the kernel will only support standard PC platforms. (which covers the vast majority of systems out there.) If you enable this option then you'll be able to select a number - of less common non-PC x86 platforms: VisWS, RDC321, SGI/UV. + of non-PC x86 platforms. If you have one of these systems, or if you want to build a generic distribution kernel, say Y here - otherwise say N. -config X86_VISWS - bool "SGI 320/540 (Visual Workstation)" - depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT - depends on X86_NON_STANDARD - ---help--- - The SGI Visual Workstation series is an IA32-based workstation - based on SGI systems chips with some legacy PC hardware attached. - - Say Y here to create a kernel to run on the SGI 320 or 540. - - A kernel compiled for the Visual Workstation will run on general - PCs as well. See for details. - -config X86_RDC321X - bool "RDC R-321x SoC" - depends on X86_32 - depends on X86_NON_STANDARD - select M486 - select X86_REBOOTFIXUPS - ---help--- - This option is needed for RDC R-321x system-on-chip, also known - as R-8610-(G). - If you don't have one of these chips, you should say N here. - -config X86_UV - bool "SGI Ultraviolet" - depends on X86_64 - depends on X86_NON_STANDARD - ---help--- - This option is needed in order to support SGI Ultraviolet systems. - If you don't have one of these, you should say N here. +# This is an alphabetically sorted list of 64 bit extended platforms +# Please maintain the alphabetic order if and when there are additions config X86_VSMP - bool "Support for ScaleMP vSMP" + bool "ScaleMP vSMP" select PARAVIRT depends on X86_64 && PCI - depends on X86_NON_STANDARD + depends on X86_EXTENDED_PLATFORM ---help--- Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is supposed to run on these EM64T-based machines. Only choose this option if you have one of these machines. +config X86_UV + bool "SGI Ultraviolet" + depends on X86_64 + depends on X86_EXTENDED_PLATFORM + ---help--- + This option is needed in order to support SGI Ultraviolet systems. + If you don't have one of these, you should say N here. + +# Following is an alphabetically sorted list of 32 bit extended platforms +# Please maintain the alphabetic order if and when there are additions + config X86_ELAN bool "AMD Elan" depends on X86_32 - depends on X86_NON_STANDARD + depends on X86_EXTENDED_PLATFORM ---help--- Select this for an AMD Elan processor. @@ -334,16 +317,29 @@ config X86_ELAN If unsure, choose "PC-compatible" instead. +config X86_RDC321X + bool "RDC R-321x SoC" + depends on X86_32 + depends on X86_EXTENDED_PLATFORM + select M486 + select X86_REBOOTFIXUPS + ---help--- + This option is needed for RDC R-321x system-on-chip, also known + as R-8610-(G). + If you don't have one of these chips, you should say N here. + config X86_32_NON_STANDARD bool "Support non-standard 32-bit SMP architectures" depends on X86_32 && SMP - depends on X86_NON_STANDARD + depends on X86_EXTENDED_PLATFORM ---help--- This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default subarchitectures. It is intended for a generic binary kernel. if you select them all, kernel will probe it one by one. and will fallback to default. +# Alphabetically sorted list of Non standard 32 bit platforms + config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" depends on X86_32_NON_STANDARD @@ -356,6 +352,19 @@ config X86_NUMAQ of Flat Logical. You will need a new lynxer.elf file to flash your firmware with - send email to . +config X86_VISWS + bool "SGI 320/540 (Visual Workstation)" + depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT + depends on X86_32_NON_STANDARD + ---help--- + The SGI Visual Workstation series is an IA32-based workstation + based on SGI systems chips with some legacy PC hardware attached. + + Say Y here to create a kernel to run on the SGI 320 or 540. + + A kernel compiled for the Visual Workstation will run on general + PCs as well. See for details. + config X86_SUMMIT bool "Summit/EXA (IBM x440)" depends on X86_32_NON_STANDARD @@ -364,7 +373,7 @@ config X86_SUMMIT In particular, it is needed for the x440. config X86_ES7000 - bool "Support for Unisys ES7000 IA32 series" + bool "Unisys ES7000 IA32 series" depends on X86_32_NON_STANDARD && X86_BIGSMP ---help--- Support for Unisys ES7000 systems. Say 'Y' here if this kernel is From 8e1568f3500287d0b36c9776132cb53a42d5651d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 Feb 2009 01:06:59 -0800 Subject: [PATCH 522/566] pci, x86, acpi: fix early_ioremap() leak Pawel reported: ------------[ cut here ]------------ WARNING: at arch/x86/mm/ioremap.c:616 check_early_ioremap_leak+0x52/0x67() Hardware name: Debug warning: early ioremap leak of 1 areas detected. Modules linked in: Pid: 1, comm: swapper Not tainted 2.6.29-rc4-tip #2 ... Reported-by: Pawel Dziekonski Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index f5a662a50acb..519f5f91e765 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -42,6 +42,7 @@ LIST_HEAD(dmar_drhd_units); static struct acpi_table_header * __initdata dmar_tbl; +static acpi_size dmar_tbl_size; static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) { @@ -288,8 +289,9 @@ static int __init dmar_table_detect(void) acpi_status status = AE_OK; /* if we could find DMAR table, then there are DMAR devices */ - status = acpi_get_table(ACPI_SIG_DMAR, 0, - (struct acpi_table_header **)&dmar_tbl); + status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0, + (struct acpi_table_header **)&dmar_tbl, + &dmar_tbl_size); if (ACPI_SUCCESS(status) && !dmar_tbl) { printk (KERN_WARNING PREFIX "Unable to map DMAR\n"); @@ -481,6 +483,7 @@ void __init detect_intel_iommu(void) iommu_detected = 1; #endif } + early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); dmar_tbl = NULL; } From fc631c82e1734d718ff0832558f64c8f5d185f26 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Feb 2009 14:27:17 +0100 Subject: [PATCH 523/566] sched: revert recent sync wakeup changes Intel reported a 10% regression (mysql+sysbench) on a 16-way machine with these patches: 1596e29: sched: symmetric sync vs avg_overlap d942fb6: sched: fix sync wakeups Revert them. Reported-by: "Zhang, Yanmin" Bisected-by: Lin Ming Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 10 ---------- kernel/sched_fair.c | 11 +++++++++-- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index e1fc67d0674c..f11c02b86c73 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2266,16 +2266,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) if (!sched_feat(SYNC_WAKEUPS)) sync = 0; - if (!sync) { - if (current->se.avg_overlap < sysctl_sched_migration_cost && - p->se.avg_overlap < sysctl_sched_migration_cost) - sync = 1; - } else { - if (current->se.avg_overlap >= sysctl_sched_migration_cost || - p->se.avg_overlap >= sysctl_sched_migration_cost) - sync = 0; - } - #ifdef CONFIG_SMP if (sched_feat(LB_WAKEUP_UPDATE)) { struct sched_domain *sd; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a7e50ba185ac..0566f2a03c42 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1191,15 +1191,20 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, int idx, unsigned long load, unsigned long this_load, unsigned int imbalance) { + struct task_struct *curr = this_rq->curr; + struct task_group *tg; unsigned long tl = this_load; unsigned long tl_per_task; - struct task_group *tg; unsigned long weight; int balanced; if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) return 0; + if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || + p->se.avg_overlap > sysctl_sched_migration_cost)) + sync = 0; + /* * If sync wakeup then subtract the (maximum possible) * effect of the currently running task from the load @@ -1426,7 +1431,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) if (!sched_feat(WAKEUP_PREEMPT)) return; - if (sched_feat(WAKEUP_OVERLAP) && sync) { + if (sched_feat(WAKEUP_OVERLAP) && (sync || + (se->avg_overlap < sysctl_sched_migration_cost && + pse->avg_overlap < sysctl_sched_migration_cost))) { resched_task(curr); return; } From 9f339e7028e2855717af3193c938f9960ad13b38 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Wed, 11 Feb 2009 15:10:27 +0100 Subject: [PATCH 524/566] x86, ptrace, mm: fix double-free on race Ptrace_detach() races with __ptrace_unlink() if the traced task is reaped while detaching. This might cause a double-free of the BTS buffer. Change the ptrace_detach() path to only do the memory accounting in ptrace_bts_detach() and leave the buffer free to ptrace_bts_untrace() which will be called from __ptrace_unlink(). The fix follows a proposal from Oleg Nesterov. Reported-by: Oleg Nesterov Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 16 ++++++++++------ include/linux/mm.h | 1 + mm/mlock.c | 7 ++++++- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a5df5f82fb9..5a4c23d89892 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child) static void ptrace_bts_detach(struct task_struct *child) { - if (unlikely(child->bts)) { - ds_release_bts(child->bts); - child->bts = NULL; - - ptrace_bts_free_buffer(child); - } + /* + * Ptrace_detach() races with ptrace_untrace() in case + * the child dies and is reaped by another thread. + * + * We only do the memory accounting at this point and + * leave the buffer deallocation and the bts tracer + * release to ptrace_bts_untrace() which will be called + * later on with tasklist_lock held. + */ + release_locked_buffer(child->bts_buffer, child->bts_size); } #else static inline void ptrace_bts_fork(struct task_struct *tsk) {} diff --git a/include/linux/mm.h b/include/linux/mm.h index e8ddc98b8405..3d7fb44d7d7e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1305,5 +1305,6 @@ void vmemmap_populate_print_last(void); extern void *alloc_locked_buffer(size_t size); extern void free_locked_buffer(void *buffer, size_t size); +extern void release_locked_buffer(void *buffer, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/mlock.c b/mm/mlock.c index 028ec482fdd4..2b57f7e60390 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -657,7 +657,7 @@ void *alloc_locked_buffer(size_t size) return buffer; } -void free_locked_buffer(void *buffer, size_t size) +void release_locked_buffer(void *buffer, size_t size) { unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; @@ -667,6 +667,11 @@ void free_locked_buffer(void *buffer, size_t size) current->mm->locked_vm -= pgsz; up_write(¤t->mm->mmap_sem); +} + +void free_locked_buffer(void *buffer, size_t size) +{ + release_locked_buffer(buffer, size); kfree(buffer); } From 17993b49b1f540aace8e9b4242530d0b3376eb2a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 11 Feb 2009 17:20:51 +0100 Subject: [PATCH 525/566] x86: make hibernation always-possible This commit: aced3ce: x86/Voyager: remove HIBERNATION Kconfig quirk Made hibernation only available on UP - instead of making it available on all of x86. Fix it. Reported-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4a27aa41f6df..148c112c9ca4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -140,7 +140,6 @@ config HAVE_CPUMASK_OF_CPU_MAP config ARCH_HIBERNATION_POSSIBLE def_bool y - depends on !SMP config ARCH_SUSPEND_POSSIBLE def_bool y From ba1511bf7fbda452138e4096bf10d5a382710f4f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 11 Feb 2009 23:38:25 +0530 Subject: [PATCH 526/566] x86: kernel/mpparse.c fix compilation warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/x86/kernel/mpparse.c: In function ‘smp_scan_config’: arch/x86/kernel/mpparse.c:696: warning: format ‘%08lx’ expects type ‘long unsigned int’, but argument 3 has type ‘phys_addr_t’ arch/x86/kernel/mpparse.c: In function ‘update_mp_table’: arch/x86/kernel/mpparse.c:1014: warning: format ‘%lx’ expects type ‘long unsigned int’, but argument 2 has type ‘phys_addr_t’ Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 66ebb823f390..200764453195 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -692,8 +692,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, #endif mpf_found = mpf; - printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", - mpf, virt_to_phys(mpf)); + printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", + mpf, (u64)virt_to_phys(mpf)); if (!reserve) return 1; @@ -1011,7 +1011,7 @@ static int __init update_mp_table(void) if (!smp_check_mpc(mpc, oem, str)) return 0; - printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); + printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf)); printk(KERN_INFO "physptr: %x\n", mpf->physptr); if (mpc_new_phys && mpc->length > mpc_new_length) { From 17c9d12e126cb0de8d535dc1908c4819d712bc68 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 11 Feb 2009 16:34:16 +0000 Subject: [PATCH 527/566] Do not account for hugetlbfs quota at mmap() time if mapping [SHM|MAP]_NORESERVE Commit 5a6fe125950676015f5108fb71b2a67441755003 brought hugetlbfs more in line with the core VM by obeying VM_NORESERVE and not reserving hugepages for both shared and private mappings when [SHM|MAP]_NORESERVE are specified. However, it is still taking filesystem quota unconditionally. At fault time, if there are no reserves and attempt is made to allocate the page and account for filesystem quota. If either fail, the fault fails. The impact is that quota is getting accounted for twice. This patch partially reverts 5a6fe125950676015f5108fb71b2a67441755003. To help prevent this mistake happening again, it improves the documentation of hugetlb_reserve_pages() Reported-by: Andy Whitcroft Signed-off-by: Mel Gorman Acked-by: Andy Whitcroft Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 69 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 207464209546..107da3d809a8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2272,9 +2272,17 @@ int hugetlb_reserve_pages(struct inode *inode, struct vm_area_struct *vma, int acctflag) { - long ret = 0, chg; + long ret, chg; struct hstate *h = hstate_inode(inode); + /* + * Only apply hugepage reservation if asked. At fault time, an + * attempt will be made for VM_NORESERVE to allocate a page + * and filesystem quota without using reserves + */ + if (acctflag & VM_NORESERVE) + return 0; + /* * Shared mappings base their reservation on the number of pages that * are already allocated on behalf of the file. Private mappings need @@ -2283,42 +2291,47 @@ int hugetlb_reserve_pages(struct inode *inode, */ if (!vma || vma->vm_flags & VM_SHARED) chg = region_chg(&inode->i_mapping->private_list, from, to); - else - chg = to - from; - - if (chg < 0) - return chg; - - if (hugetlb_get_quota(inode->i_mapping, chg)) - return -ENOSPC; - - /* - * Only apply hugepage reservation if asked. We still have to - * take the filesystem quota because it is an upper limit - * defined for the mount and not necessarily memory as a whole - */ - if (acctflag & VM_NORESERVE) { - reset_vma_resv_huge_pages(vma); - return 0; - } - - ret = hugetlb_acct_memory(h, chg); - if (ret < 0) { - hugetlb_put_quota(inode->i_mapping, chg); - return ret; - } - if (!vma || vma->vm_flags & VM_SHARED) - region_add(&inode->i_mapping->private_list, from, to); else { struct resv_map *resv_map = resv_map_alloc(); - if (!resv_map) return -ENOMEM; + chg = to - from; + set_vma_resv_map(vma, resv_map); set_vma_resv_flags(vma, HPAGE_RESV_OWNER); } + if (chg < 0) + return chg; + + /* There must be enough filesystem quota for the mapping */ + if (hugetlb_get_quota(inode->i_mapping, chg)) + return -ENOSPC; + + /* + * Check enough hugepages are available for the reservation. + * Hand back the quota if there are not + */ + ret = hugetlb_acct_memory(h, chg); + if (ret < 0) { + hugetlb_put_quota(inode->i_mapping, chg); + return ret; + } + + /* + * Account for the reservations made. Shared mappings record regions + * that have reservations as they are shared by multiple VMAs. + * When the last VMA disappears, the region map says how much + * the reservation was and the page cache tells how much of + * the reservation was consumed. Private mappings are per-VMA and + * only the consumed reservations are tracked. When the VMA + * disappears, the original reservation is the VMA size and the + * consumed reservations are stored in the map. Hence, nothing + * else has to be done for private mappings here + */ + if (!vma || vma->vm_flags & VM_SHARED) + region_add(&inode->i_mapping->private_list, from, to); return 0; } From b12bdaf11f935d7be030207e3c77faeaeab8ded3 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 11 Feb 2009 16:43:58 -0500 Subject: [PATCH 528/566] x86: use regparm(3) for passed-in pt_regs pointer Some syscalls need to access the pt_regs structure, either to copy user register state or to modifiy it. This patch adds stubs to load the address of the pt_regs struct into the %eax register, and changes the syscalls to take the pointer as an argument instead of relying on the assumption that the pt_regs structure overlaps the function arguments. Drop the use of regparm(1) due to concern about gcc bugs, and to move in the direction of the eventual removal of regparm(0) for asmlinkage. Signed-off-by: Brian Gerst Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/linkage.h | 7 ------- arch/x86/include/asm/syscalls.h | 25 ++++++++++--------------- arch/x86/kernel/ioport.c | 3 ++- arch/x86/kernel/process_32.c | 27 +++++++++++++++++---------- arch/x86/kernel/signal.c | 25 ++++++++++++++++--------- arch/x86/kernel/vm86_32.c | 11 ++++++----- 6 files changed, 51 insertions(+), 47 deletions(-) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 2fd5926fb97d..5d98d0b68ffc 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -17,13 +17,6 @@ */ #define asmregparm __attribute__((regparm(3))) -/* - * For syscalls that need a pointer to the pt_regs struct (ie. fork). - * The regs pointer is passed in %eax as the first argument. The - * remaining function arguments remain on the stack. - */ -#define ptregscall __attribute__((regparm(1))) - /* * Make sure the compiler doesn't do anything stupid with the * arguments on the stack - they are owned by the *caller*, not diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 617295255a17..77bb31a88ba8 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -29,26 +29,21 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ #ifdef CONFIG_X86_32 /* kernel/process_32.c */ -ptregscall int sys_fork(struct pt_regs *); -ptregscall int sys_clone(struct pt_regs *, unsigned long, - unsigned long, int __user *, - unsigned long, int __user *); -ptregscall int sys_vfork(struct pt_regs *); -ptregscall int sys_execve(struct pt_regs *, char __user *, - char __user * __user *, - char __user * __user *); +int sys_fork(struct pt_regs *); +int sys_clone(struct pt_regs *); +int sys_vfork(struct pt_regs *); +int sys_execve(struct pt_regs *); /* kernel/signal_32.c */ asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); -ptregscall int sys_sigaltstack(struct pt_regs *, const stack_t __user *, - stack_t __user *); -ptregscall unsigned long sys_sigreturn(struct pt_regs *); -ptregscall int sys_rt_sigreturn(struct pt_regs *); +int sys_sigaltstack(struct pt_regs *); +unsigned long sys_sigreturn(struct pt_regs *); +int sys_rt_sigreturn(struct pt_regs *); /* kernel/ioport.c */ -ptregscall long sys_iopl(struct pt_regs *, unsigned int); +long sys_iopl(struct pt_regs *); /* kernel/sys_i386_32.c */ asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, @@ -64,8 +59,8 @@ struct oldold_utsname; asmlinkage int sys_olduname(struct oldold_utsname __user *); /* kernel/vm86_32.c */ -ptregscall int sys_vm86old(struct pt_regs *, struct vm86_struct __user *); -ptregscall int sys_vm86(struct pt_regs *, unsigned long, unsigned long); +int sys_vm86old(struct pt_regs *); +int sys_vm86(struct pt_regs *); #else /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 7ec148646312..e41980a373ab 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -131,8 +131,9 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) } #ifdef CONFIG_X86_32 -ptregscall long sys_iopl(struct pt_regs *regs, unsigned int level) +long sys_iopl(struct pt_regs *regs) { + unsigned int level = regs->bx; struct thread_struct *t = ¤t->thread; int rc; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5a9dcfb01f71..fec79ad85dc6 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -603,15 +603,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } -ptregscall int sys_fork(struct pt_regs *regs) +int sys_fork(struct pt_regs *regs) { return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); } -ptregscall int sys_clone(struct pt_regs *regs, unsigned long clone_flags, - unsigned long newsp, int __user *parent_tidptr, - unsigned long unused, int __user *child_tidptr) +int sys_clone(struct pt_regs *regs) { + unsigned long clone_flags; + unsigned long newsp; + int __user *parent_tidptr, *child_tidptr; + + clone_flags = regs->bx; + newsp = regs->cx; + parent_tidptr = (int __user *)regs->dx; + child_tidptr = (int __user *)regs->di; if (!newsp) newsp = regs->sp; return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); @@ -627,7 +633,7 @@ ptregscall int sys_clone(struct pt_regs *regs, unsigned long clone_flags, * do not have enough call-clobbered registers to hold all * the information you need. */ -ptregscall int sys_vfork(struct pt_regs *regs) +int sys_vfork(struct pt_regs *regs) { return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL); } @@ -635,18 +641,19 @@ ptregscall int sys_vfork(struct pt_regs *regs) /* * sys_execve() executes a new program. */ -ptregscall int sys_execve(struct pt_regs *regs, char __user *u_filename, - char __user * __user *argv, - char __user * __user *envp) +int sys_execve(struct pt_regs *regs) { int error; char *filename; - filename = getname(u_filename); + filename = getname((char __user *) regs->bx); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, argv, envp, regs); + error = do_execve(filename, + (char __user * __user *) regs->cx, + (char __user * __user *) regs->dx, + regs); if (error == 0) { /* Make sure we don't return using sysenter.. */ set_thread_flag(TIF_IRET); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index d7a158367e38..ccfb27412f0f 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -549,23 +549,27 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, #endif /* CONFIG_X86_32 */ #ifdef CONFIG_X86_32 -ptregscall int -sys_sigaltstack(struct pt_regs *regs, const stack_t __user *uss, - stack_t __user *uoss) +int sys_sigaltstack(struct pt_regs *regs) +{ + const stack_t __user *uss = (const stack_t __user *)regs->bx; + stack_t __user *uoss = (stack_t __user *)regs->cx; + + return do_sigaltstack(uss, uoss, regs->sp); +} #else /* !CONFIG_X86_32 */ asmlinkage long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct pt_regs *regs) -#endif /* CONFIG_X86_32 */ { return do_sigaltstack(uss, uoss, regs->sp); } +#endif /* CONFIG_X86_32 */ /* * Do a signal return; undo the signal stack. */ #ifdef CONFIG_X86_32 -ptregscall unsigned long sys_sigreturn(struct pt_regs *regs) +unsigned long sys_sigreturn(struct pt_regs *regs) { struct sigframe __user *frame; unsigned long ax; @@ -629,13 +633,16 @@ badframe: } #ifdef CONFIG_X86_32 -ptregscall int sys_rt_sigreturn(struct pt_regs *regs) -#else /* !CONFIG_X86_32 */ -asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) -#endif /* CONFIG_X86_32 */ +int sys_rt_sigreturn(struct pt_regs *regs) { return do_rt_sigreturn(regs); } +#else /* !CONFIG_X86_32 */ +asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +{ + return do_rt_sigreturn(regs); +} +#endif /* CONFIG_X86_32 */ /* * OK, we're invoking a handler: diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 8fa6ba7c9233..d7ac84e7fc1c 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -197,8 +197,9 @@ out: static int do_vm86_irq_handling(int subfunction, int irqnumber); static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); -ptregscall int sys_vm86old(struct pt_regs *regs, struct vm86_struct __user *v86) +int sys_vm86old(struct pt_regs *regs) { + struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx; struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. * This remains on the stack until we @@ -226,7 +227,7 @@ out: } -ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long arg) +int sys_vm86(struct pt_regs *regs) { struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. @@ -238,12 +239,12 @@ ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long a struct vm86plus_struct __user *v86; tsk = current; - switch (cmd) { + switch (regs->bx) { case VM86_REQUEST_IRQ: case VM86_FREE_IRQ: case VM86_GET_IRQ_BITS: case VM86_GET_AND_RESET_IRQ: - ret = do_vm86_irq_handling(cmd, (int)arg); + ret = do_vm86_irq_handling(regs->bx, (int)regs->cx); goto out; case VM86_PLUS_INSTALL_CHECK: /* @@ -260,7 +261,7 @@ ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long a ret = -EPERM; if (tsk->thread.saved_sp0) goto out; - v86 = (struct vm86plus_struct __user *)arg; + v86 = (struct vm86plus_struct __user *)regs->cx; tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, offsetof(struct kernel_vm86_struct, regs32) - sizeof(info.regs)); From 1001c9fb8721ab395e21f571ed2aaa523cdd1e29 Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Wed, 11 Feb 2009 13:04:18 -0800 Subject: [PATCH 529/566] migration: migrate_vmas should check "vma" migrate_vmas() should check "vma" not "vma->vm_next" for for-loop condition. Signed-off-by: Daisuke Nishimura Cc: Christoph Lameter Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 2bb4e1d63520..a9eff3f092f6 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1129,7 +1129,7 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to, struct vm_area_struct *vma; int err = 0; - for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) { + for (vma = mm->mmap; vma && !err; vma = vma->vm_next) { if (vma->vm_ops && vma->vm_ops->migrate) { err = vma->vm_ops->migrate(vma, to, from, flags); if (err) From 57f63bc8fe79e6598e7253f10f53f58c9fdc57be Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Wed, 11 Feb 2009 13:04:19 -0800 Subject: [PATCH 530/566] rtc: update maintainership of pxa rtc driver Signed-off-by: Robert Jarzmik Signed-off-by: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 6 ++++++ drivers/rtc/rtc-pxa.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0ea3a6d98714..6d5f728f989a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3537,6 +3537,12 @@ S: Maintained PXA MMCI DRIVER S: Orphan +PXA RTC DRIVER +P: Robert Jarzmik +M: robert.jarzmik@free.fr +L: rtc-linux@googlegroups.com +S: Maintained + QLOGIC QLA2XXX FC-SCSI DRIVER P: Andrew Vasquez M: linux-driver@qlogic.com diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c index bd56a033bfd0..bb8cc05605ac 100644 --- a/drivers/rtc/rtc-pxa.c +++ b/drivers/rtc/rtc-pxa.c @@ -485,7 +485,7 @@ static void __exit pxa_rtc_exit(void) module_init(pxa_rtc_init); module_exit(pxa_rtc_exit); -MODULE_AUTHOR("Robert Jarzmik"); +MODULE_AUTHOR("Robert Jarzmik "); MODULE_DESCRIPTION("PXA27x/PXA3xx Realtime Clock Driver (RTC)"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:pxa-rtc"); From 067f1293cc5916f8d88b602beeb8787d58515608 Mon Sep 17 00:00:00 2001 From: Marco La Porta Date: Wed, 11 Feb 2009 13:04:20 -0800 Subject: [PATCH 531/566] lxfb: properly alloc cmap in all cases and don't leak the memory We weren't properly allocating the cmap for depths greater than 8bpp, which caused pain for things like DirectFB. Also, we never freed the cmap memory upon module unload.. [dilinger@debian.org: dropped unnecessary code and clean up patch] [dilinger@debian.org: add error checking and handling] Signed-off-by: Andres Salomon Cc: Jordan Crouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/geode/lxfb_core.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/video/geode/lxfb_core.c b/drivers/video/geode/lxfb_core.c index b965ecdbc604..889cbe39e580 100644 --- a/drivers/video/geode/lxfb_core.c +++ b/drivers/video/geode/lxfb_core.c @@ -278,13 +278,10 @@ static int lxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) static int lxfb_set_par(struct fb_info *info) { - if (info->var.bits_per_pixel > 8) { + if (info->var.bits_per_pixel > 8) info->fix.visual = FB_VISUAL_TRUECOLOR; - fb_dealloc_cmap(&info->cmap); - } else { + else info->fix.visual = FB_VISUAL_PSEUDOCOLOR; - fb_alloc_cmap(&info->cmap, 1<var.bits_per_pixel, 0); - } info->fix.line_length = lx_get_pitch(info->var.xres, info->var.bits_per_pixel); @@ -451,6 +448,11 @@ static struct fb_info * __init lxfb_init_fbinfo(struct device *dev) info->pseudo_palette = (void *)par + sizeof(struct lxfb_par); + if (fb_alloc_cmap(&info->cmap, 256, 0) < 0) { + framebuffer_release(info); + return NULL; + } + info->var.grayscale = 0; return info; @@ -579,8 +581,10 @@ err: pci_release_region(pdev, 3); } - if (info) + if (info) { + fb_dealloc_cmap(&info->cmap); framebuffer_release(info); + } return ret; } @@ -604,6 +608,7 @@ static void lxfb_remove(struct pci_dev *pdev) iounmap(par->vp_regs); pci_release_region(pdev, 3); + fb_dealloc_cmap(&info->cmap); pci_set_drvdata(pdev, NULL); framebuffer_release(info); } From b14caecdbe7730bf82c8510f1ba52e00273e15c4 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Wed, 11 Feb 2009 13:04:22 -0800 Subject: [PATCH 532/566] gxfb: properly alloc cmap and plug cmap leak We weren't properly allocating the cmap for depths greater than 8bpp, which caused pain for things like DirectFB. Also, we never freed the cmap memory upon module unload.. Signed-off-by: Andres Salomon Cc: Marco La Porta Cc: Jordan Crouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/geode/gxfb_core.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/video/geode/gxfb_core.c b/drivers/video/geode/gxfb_core.c index 484118926318..2552cac39e1c 100644 --- a/drivers/video/geode/gxfb_core.c +++ b/drivers/video/geode/gxfb_core.c @@ -171,13 +171,10 @@ static int gxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) static int gxfb_set_par(struct fb_info *info) { - if (info->var.bits_per_pixel > 8) { + if (info->var.bits_per_pixel > 8) info->fix.visual = FB_VISUAL_TRUECOLOR; - fb_dealloc_cmap(&info->cmap); - } else { + else info->fix.visual = FB_VISUAL_PSEUDOCOLOR; - fb_alloc_cmap(&info->cmap, 1<var.bits_per_pixel, 0); - } info->fix.line_length = gx_line_delta(info->var.xres, info->var.bits_per_pixel); @@ -331,6 +328,11 @@ static struct fb_info * __init gxfb_init_fbinfo(struct device *dev) info->var.grayscale = 0; + if (fb_alloc_cmap(&info->cmap, 256, 0) < 0) { + framebuffer_release(info); + return NULL; + } + return info; } @@ -443,8 +445,10 @@ static int __init gxfb_probe(struct pci_dev *pdev, const struct pci_device_id *i pci_release_region(pdev, 1); } - if (info) + if (info) { + fb_dealloc_cmap(&info->cmap); framebuffer_release(info); + } return ret; } @@ -467,6 +471,7 @@ static void gxfb_remove(struct pci_dev *pdev) iounmap(par->gp_regs); pci_release_region(pdev, 1); + fb_dealloc_cmap(&info->cmap); pci_set_drvdata(pdev, NULL); framebuffer_release(info); From 35887b1cf74dc751dd0574b26515142d3cea9376 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Wed, 11 Feb 2009 13:04:23 -0800 Subject: [PATCH 533/566] gx1fb: properly alloc cmap and plug cmap leak We weren't properly allocating the cmap for depths greater than 8bpp, which caused pain for things like DirectFB. Also, we never freed the cmap memory upon module unload.. Signed-off-by: Andres Salomon Cc: Marco La Porta Cc: Jordan Crouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/geode/gx1fb_core.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/video/geode/gx1fb_core.c b/drivers/video/geode/gx1fb_core.c index 751e491ca8c8..f20eff8c4a81 100644 --- a/drivers/video/geode/gx1fb_core.c +++ b/drivers/video/geode/gx1fb_core.c @@ -136,13 +136,10 @@ static int gx1fb_set_par(struct fb_info *info) { struct geodefb_par *par = info->par; - if (info->var.bits_per_pixel == 16) { + if (info->var.bits_per_pixel == 16) info->fix.visual = FB_VISUAL_TRUECOLOR; - fb_dealloc_cmap(&info->cmap); - } else { + else info->fix.visual = FB_VISUAL_PSEUDOCOLOR; - fb_alloc_cmap(&info->cmap, 1<var.bits_per_pixel, 0); - } info->fix.line_length = gx1_line_delta(info->var.xres, info->var.bits_per_pixel); @@ -315,6 +312,10 @@ static struct fb_info * __init gx1fb_init_fbinfo(struct device *dev) if (!par->panel_x) par->enable_crt = 1; /* fall back to CRT if no panel is specified */ + if (fb_alloc_cmap(&info->cmap, 256, 0) < 0) { + framebuffer_release(info); + return NULL; + } return info; } @@ -374,8 +375,11 @@ static int __init gx1fb_probe(struct pci_dev *pdev, const struct pci_device_id * release_mem_region(gx1_gx_base() + 0x8300, 0x100); } - if (info) + if (info) { + fb_dealloc_cmap(&info->cmap); framebuffer_release(info); + } + return ret; } @@ -395,6 +399,7 @@ static void gx1fb_remove(struct pci_dev *pdev) iounmap(par->dc_regs); release_mem_region(gx1_gx_base() + 0x8300, 0x100); + fb_dealloc_cmap(&info->cmap); pci_set_drvdata(pdev, NULL); framebuffer_release(info); From fc3501d411d34823fb9be248a95a0c44f945866f Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Wed, 11 Feb 2009 13:04:23 -0800 Subject: [PATCH 534/566] mm: fix dirty_bytes/dirty_background_bytes sysctls on 64bit arches We need to pass an unsigned long as the minimum, because it gets casted to an unsigned long in the sysctl handler. If we pass an int, we'll access four more bytes on 64bit arches, resulting in a random minimum value. [rientjes@google.com: fix type of `old_bytes'] Signed-off-by: Sven Wegener Cc: Peter Zijlstra Cc: Dave Chinner Cc: Christoph Lameter Cc: David Rientjes Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 5 +++-- mm/page-writeback.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 790f9d785663..c5ef44ff850f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -101,6 +101,7 @@ static int two = 2; static int zero; static int one = 1; +static unsigned long one_ul = 1; static int one_hundred = 100; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ @@ -974,7 +975,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &dirty_background_bytes_handler, .strategy = &sysctl_intvec, - .extra1 = &one, + .extra1 = &one_ul, }, { .ctl_name = VM_DIRTY_RATIO, @@ -995,7 +996,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &dirty_bytes_handler, .strategy = &sysctl_intvec, - .extra1 = &one, + .extra1 = &one_ul, }, { .procname = "dirty_writeback_centisecs", diff --git a/mm/page-writeback.c b/mm/page-writeback.c index dc32dae01e5f..c17005e73974 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -209,7 +209,7 @@ int dirty_bytes_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - int old_bytes = vm_dirty_bytes; + unsigned long old_bytes = vm_dirty_bytes; int ret; ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); From 8fe4cd0dc5ea43760c59eb256404188272cc95dd Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 11 Feb 2009 13:04:25 -0800 Subject: [PATCH 535/566] jbd: fix return value of journal_start_commit() journal_start_commit() returns 1 if either a transaction is committing or the function has queued a transaction commit. But it returns 0 if we raced with somebody queueing the transaction commit as well. This resulted in ext3_sync_fs() not functioning correctly (description from Arthur Jones): In the case of a data=ordered umount with pending long symlinks which are delayed due to a long list of other I/O on the backing block device, this causes the buffer associated with the long symlinks to not be moved to the inode dirty list in the second phase of fsync_super. Then, before they can be dirtied again, kjournald exits, seeing the UMOUNT flag and the dirty pages are never written to the backing block device, causing long symlink corruption and exposing new or previously freed block data to userspace. This can be reproduced with a script created by Eric Sandeen : #!/bin/bash umount /mnt/test2 mount /dev/sdb4 /mnt/test2 rm -f /mnt/test2/* dd if=/dev/zero of=/mnt/test2/bigfile bs=1M count=512 touch /mnt/test2/thisisveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongfilename ln -s /mnt/test2/thisisveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongfilename /mnt/test2/link umount /mnt/test2 mount /dev/sdb4 /mnt/test2 ls /mnt/test2/ This patch fixes journal_start_commit() to always return 1 when there's a transaction committing or queued for commit. Cc: Eric Sandeen Cc: Mike Snitzer Cc: Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/journal.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 9e4fa52d7dc8..e79c07812afa 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -427,7 +427,7 @@ int __log_space_left(journal_t *journal) } /* - * Called under j_state_lock. Returns true if a transaction was started. + * Called under j_state_lock. Returns true if a transaction commit was started. */ int __log_start_commit(journal_t *journal, tid_t target) { @@ -495,7 +495,8 @@ int journal_force_commit_nested(journal_t *journal) /* * Start a commit of the current running transaction (if any). Returns true - * if a transaction was started, and fills its tid in at *ptid + * if a transaction is going to be committed (or is currently already + * committing), and fills its tid in at *ptid */ int journal_start_commit(journal_t *journal, tid_t *ptid) { @@ -505,15 +506,19 @@ int journal_start_commit(journal_t *journal, tid_t *ptid) if (journal->j_running_transaction) { tid_t tid = journal->j_running_transaction->t_tid; - ret = __log_start_commit(journal, tid); - if (ret && ptid) + __log_start_commit(journal, tid); + /* There's a running transaction and we've just made sure + * it's commit has been scheduled. */ + if (ptid) *ptid = tid; - } else if (journal->j_committing_transaction && ptid) { + ret = 1; + } else if (journal->j_committing_transaction) { /* * If ext3_write_super() recently started a commit, then we * have to wait for completion of that transaction */ - *ptid = journal->j_committing_transaction->t_tid; + if (ptid) + *ptid = journal->j_committing_transaction->t_tid; ret = 1; } spin_unlock(&journal->j_state_lock); From 02ac597c9b86af49b2016aa98aee20ab59dbf0d2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 11 Feb 2009 13:04:26 -0800 Subject: [PATCH 536/566] ext3: revert "ext3: wait on all pending commits in ext3_sync_fs" This reverts commit c87591b719737b4e91eb1a9fa8fd55a4ff1886d6. Since journal_start_commit() is now fixed to return 1 when we started a transaction commit, there's some transaction waiting to be committed or there's a transaction already committing, we don't need to call ext3_force_commit() in ext3_sync_fs(). Furthermore ext3_force_commit() can unnecessarily create sync transaction which is expensive so it's worthwhile to remove it when we can. Cc: Eric Sandeen Cc: Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/super.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index b70d90e08a3c..4a970411a458 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2428,12 +2428,13 @@ static void ext3_write_super (struct super_block * sb) static int ext3_sync_fs(struct super_block *sb, int wait) { - sb->s_dirt = 0; - if (wait) - ext3_force_commit(sb); - else - journal_start_commit(EXT3_SB(sb)->s_journal, NULL); + tid_t target; + sb->s_dirt = 0; + if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { + if (wait) + log_wait_commit(EXT3_SB(sb)->s_journal, target); + } return 0; } From 508b9f8efdad123b202b228f71f59feba51e4fb5 Mon Sep 17 00:00:00 2001 From: MinChan Kim Date: Wed, 11 Feb 2009 13:04:27 -0800 Subject: [PATCH 537/566] mm: fix mlocked page counter mismatch When I tested following program, I found that the mlocked counter is strange. It cannot free some mlocked pages. It is because try_to_unmap_file() doesn't check real page mappings in vmas. That is because the goal of an address_space for a file is to find all processes into which the file's specific interval is mapped. It is related to the file's interval, not to pages. Even if the page isn't really mapped by the vma, it returns SWAP_MLOCK since the vma has VM_LOCKED, then calls try_to_mlock_page. After this the mlocked counter is increased again. COWed anon page in a file-backed vma could be a such case. This patch resolves it. -- my test program -- int main() { mlockall(MCL_CURRENT); return 0; } -- before -- root@barrios-target-linux:~# cat /proc/meminfo | egrep 'Mlo|Unev' Unevictable: 0 kB Mlocked: 0 kB -- after -- root@barrios-target-linux:~# cat /proc/meminfo | egrep 'Mlo|Unev' Unevictable: 8 kB Mlocked: 8 kB Signed-off-by: MinChan Kim Acked-by: Lee Schermerhorn Acked-by: KOSAKI Motohiro Tested-by: Lee Schermerhorn Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/rmap.c b/mm/rmap.c index ac4af8cffbf9..16521664010d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1072,7 +1072,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) spin_lock(&mapping->i_mmap_lock); vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { if (MLOCK_PAGES && unlikely(unlock)) { - if (!(vma->vm_flags & VM_LOCKED)) + if (!((vma->vm_flags & VM_LOCKED) && + page_mapped_in_vma(page, vma))) continue; /* must visit all vmas */ ret = SWAP_MLOCK; } else { From 7dcce1334fa5879dc12bee001962e8f74bce60f1 Mon Sep 17 00:00:00 2001 From: Marcel Selhorst Date: Wed, 11 Feb 2009 13:04:27 -0800 Subject: [PATCH 538/566] tpm: correct email address for tpm_infineon-driver Update my email address. Signed-off-by: Marcel Selhorst Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- drivers/char/tpm/tpm_infineon.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6d5f728f989a..8ee281518c77 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4291,8 +4291,8 @@ P: Rajiv Andrade M: srajiv@linux.vnet.ibm.com W: http://tpmdd.sourceforge.net P: Marcel Selhorst -M: tpm@selhorst.net -W: http://www.prosec.rub.de/tpm/ +M: m.selhorst@sirrix.com +W: http://www.sirrix.com L: tpmdd-devel@lists.sourceforge.net (moderated for non-subscribers) S: Maintained diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c index 726ee8a0277f..ecba4942fc8e 100644 --- a/drivers/char/tpm/tpm_infineon.c +++ b/drivers/char/tpm/tpm_infineon.c @@ -4,7 +4,7 @@ * SLD 9630 TT 1.1 and SLB 9635 TT 1.2 Trusted Platform Module * Specifications at www.trustedcomputinggroup.org * - * Copyright (C) 2005, Marcel Selhorst + * Copyright (C) 2005, Marcel Selhorst * Sirrix AG - security technologies, http://www.sirrix.com and * Applied Data Security Group, Ruhr-University Bochum, Germany * Project-Homepage: http://www.prosec.rub.de/tpm @@ -636,7 +636,7 @@ static void __exit cleanup_inf(void) module_init(init_inf); module_exit(cleanup_inf); -MODULE_AUTHOR("Marcel Selhorst "); +MODULE_AUTHOR("Marcel Selhorst "); MODULE_DESCRIPTION("Driver for Infineon TPM SLD 9630 TT 1.1 / SLB 9635 TT 1.2"); MODULE_VERSION("1.9"); MODULE_LICENSE("GPL"); From d4097456cd1d9285e876fc5d08a789462804cc28 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-Koenig Date: Wed, 11 Feb 2009 13:04:28 -0800 Subject: [PATCH 539/566] video/framebuffer: move the probe func into .devinit.text in Blackfin LCD driver Signed-off-by: Uwe Kleine-Koenig Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/bfin-t350mcqb-fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/bfin-t350mcqb-fb.c b/drivers/video/bfin-t350mcqb-fb.c index 2a423d3a2a8e..90cfddabf1f7 100644 --- a/drivers/video/bfin-t350mcqb-fb.c +++ b/drivers/video/bfin-t350mcqb-fb.c @@ -447,7 +447,7 @@ static irqreturn_t bfin_t350mcqb_irq_error(int irq, void *dev_id) return IRQ_HANDLED; } -static int __init bfin_t350mcqb_probe(struct platform_device *pdev) +static int __devinit bfin_t350mcqb_probe(struct platform_device *pdev) { struct bfin_t350mcqbfb_info *info; struct fb_info *fbinfo; From 2e9c23724328ae4e56c42a35a717a956d7d3001d Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 11 Feb 2009 13:04:29 -0800 Subject: [PATCH 540/566] memcg: use __GFP_NOWARN in page cgroup allocation page_cgroup's page allocation at init/memory hotplug uses kmalloc() and vmalloc(). If kmalloc() failes, vmalloc() is used. This is because vmalloc() is very limited resource on 32bit systems. We want to use kmalloc() first. But in this kind of call, __GFP_NOWARN should be specified. Reported-by: Heiko Carstens Signed-off-by: KAMEZAWA Hiroyuki Acked-by: Balbir Singh Acked-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_cgroup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 7006a11350c8..ceecfbb143fa 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -114,7 +114,8 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn) nid = page_to_nid(pfn_to_page(pfn)); table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; if (slab_is_available()) { - base = kmalloc_node(table_size, GFP_KERNEL, nid); + base = kmalloc_node(table_size, + GFP_KERNEL | __GFP_NOWARN, nid); if (!base) base = vmalloc_node(table_size, nid); } else { From f40b45a2e45b0f02aeedfcfbb28d8e2d4b8b86b1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 11 Feb 2009 13:04:31 -0800 Subject: [PATCH 541/566] kernel-doc: preferred ending marker and examples Fix kernel-doc-nano-HOWTO.txt to use */ as the ending marker in kernel-doc examples and state that */ is the preferred ending marker. Signed-off-by: Randy Dunlap Reported-by: Robert Love Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-doc-nano-HOWTO.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt index d73fbd2b2b45..026ec7d57384 100644 --- a/Documentation/kernel-doc-nano-HOWTO.txt +++ b/Documentation/kernel-doc-nano-HOWTO.txt @@ -43,7 +43,8 @@ Only comments so marked will be considered by the kernel-doc scripts, and any comment so marked must be in kernel-doc format. Do not use "/**" to be begin a comment block unless the comment block contains kernel-doc formatted comments. The closing comment marker for -kernel-doc comments can be either "*/" or "**/". +kernel-doc comments can be either "*/" or "**/", but "*/" is +preferred in the Linux kernel tree. Kernel-doc comments should be placed just before the function or data structure being described. @@ -63,7 +64,7 @@ Example kernel-doc function comment: * comment lines. * * The longer description can have multiple paragraphs. - **/ + */ The first line, with the short description, must be on a single line. @@ -85,7 +86,7 @@ Example kernel-doc data structure comment. * perhaps with more lines and words. * * Longer description of this structure. - **/ + */ The kernel-doc function comments describe each parameter to the function, in order, with the @name lines. From b4870bc5ee8c7a37541a3eb1208b5c76c13a078a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 11 Feb 2009 13:04:33 -0800 Subject: [PATCH 542/566] kernel-doc: fix syscall wrapper processing Fix kernel-doc processing of SYSCALL wrappers. The SYSCALL wrapper patches played havoc with kernel-doc for syscalls. Syscalls that were scanned for DocBook processing reported warnings like this one, for sys_tgkill: Warning(kernel/signal.c:2285): No description found for parameter 'tgkill' Warning(kernel/signal.c:2285): No description found for parameter 'pid_t' Warning(kernel/signal.c:2285): No description found for parameter 'int' because the macro parameters all "look like" function parameters, although they are not: /** * sys_tgkill - send signal to one specific thread * @tgid: the thread group ID of the thread * @pid: the PID of the thread * @sig: signal to be sent * * This syscall also checks the @tgid and returns -ESRCH even if the PID * exists but it's not belonging to the target process anymore. This * method solves the problem of threads exiting and PIDs getting reused. */ SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig) { ... This patch special-cases the handling SYSCALL_DEFINE* function prototypes by expanding them to long sys_foobar(type1 arg1, type1 arg2, ...) Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/kernel-doc | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 8bb83a100edb..0f11870116dc 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1827,6 +1827,40 @@ sub reset_state { $state = 0; } +sub syscall_munge() { + my $void = 0; + + $prototype =~ s@[\r\n\t]+@ @gos; # strip newlines/CR's/tabs +## if ($prototype =~ m/SYSCALL_DEFINE0\s*\(\s*(a-zA-Z0-9_)*\s*\)/) { + if ($prototype =~ m/SYSCALL_DEFINE0/) { + $void = 1; +## $prototype = "long sys_$1(void)"; + } + + $prototype =~ s/SYSCALL_DEFINE.*\(/long sys_/; # fix return type & func name + if ($prototype =~ m/long (sys_.*?),/) { + $prototype =~ s/,/\(/; + } elsif ($void) { + $prototype =~ s/\)/\(void\)/; + } + + # now delete all of the odd-number commas in $prototype + # so that arg types & arg names don't have a comma between them + my $count = 0; + my $len = length($prototype); + if ($void) { + $len = 0; # skip the for-loop + } + for (my $ix = 0; $ix < $len; $ix++) { + if (substr($prototype, $ix, 1) eq ',') { + $count++; + if ($count % 2 == 1) { + substr($prototype, $ix, 1) = ' '; + } + } + } +} + sub process_state3_function($$) { my $x = shift; my $file = shift; @@ -1839,11 +1873,15 @@ sub process_state3_function($$) { elsif ($x =~ /([^\{]*)/) { $prototype .= $1; } + if (($x =~ /\{/) || ($x =~ /\#\s*define/) || ($x =~ /;/)) { $prototype =~ s@/\*.*?\*/@@gos; # strip comments. $prototype =~ s@[\r\n]+@ @gos; # strip newlines/cr's. $prototype =~ s@^\s+@@gos; # strip leading spaces - dump_function($prototype,$file); + if ($prototype =~ /SYSCALL_DEFINE/) { + syscall_munge(); + } + dump_function($prototype, $file); reset_state(); } } From c318c7ac49f9139f55da619bbace6137e1509390 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Wed, 11 Feb 2009 13:04:34 -0800 Subject: [PATCH 543/566] rtc: t reaches -1, tested 0 With a postfix decrement t will reach -1 rather than 0, so neither the warning nor the `goto error_out' will occur. Signed-off-by: Roel Kluin Acked-by: Manuel Lauss Acked-by: Alessandro Zummo Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-au1xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-au1xxx.c b/drivers/rtc/rtc-au1xxx.c index 8906a688e6a6..979ed0406ce9 100644 --- a/drivers/rtc/rtc-au1xxx.c +++ b/drivers/rtc/rtc-au1xxx.c @@ -81,7 +81,7 @@ static int __devinit au1xtoy_rtc_probe(struct platform_device *pdev) if (au_readl(SYS_TOYTRIM) != 32767) { /* wait until hardware gives access to TRIM register */ t = 0x00100000; - while ((au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_T0S) && t--) + while ((au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_T0S) && --t) msleep(1); if (!t) { From 01c4a4283137d24c9cc3785f1f312e895a18f273 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 11 Feb 2009 13:04:35 -0800 Subject: [PATCH 544/566] cgroups: add Li Zefan as a maintainer Add Li Zefan as co-maintainer. Acked-by: Paul Menage Acked-by: Li Zefan Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8ee281518c77..db65b4e6d132 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1202,6 +1202,8 @@ S: Supported CONTROL GROUPS (CGROUPS) P: Paul Menage M: menage@google.com +P: Li Zefan +M: lizf@cn.fujitsu.com L: containers@lists.linux-foundation.org S: Maintained From cfebe563bd0a3ff97e1bc167123120d59c7a84db Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 11 Feb 2009 13:04:36 -0800 Subject: [PATCH 545/566] cgroups: fix lockdep subclasses overflow I enabled all cgroup subsystems when compiling kernel, and then: # mount -t cgroup -o net_cls xxx /mnt # mkdir /mnt/0 This showed up immediately: BUG: MAX_LOCKDEP_SUBCLASSES too low! turning off the locking correctness validator. It's caused by the cgroup hierarchy lock: for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; if (ss->root == root) mutex_lock_nested(&ss->hierarchy_mutex, i); } Now we have 9 cgroup subsystems, and the above 'i' for net_cls is 8, but MAX_LOCKDEP_SUBCLASSES is 8. This patch uses different lockdep keys for different subsystems. Signed-off-by: Li Zefan Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 1 + kernel/cgroup.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e4e8e117d27d..499900d0cee7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -378,6 +378,7 @@ struct cgroup_subsys { * - initiating hotplug events */ struct mutex hierarchy_mutex; + struct lock_class_key subsys_key; /* * Link to parent, and list entry in parent's children. diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5a54ff42874e..e14db9c089b9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2351,7 +2351,7 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root) for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; if (ss->root == root) - mutex_lock_nested(&ss->hierarchy_mutex, i); + mutex_lock(&ss->hierarchy_mutex); } } @@ -2637,6 +2637,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) BUG_ON(!list_empty(&init_task.tasks)); mutex_init(&ss->hierarchy_mutex); + lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key); ss->active = 1; } From 0e4a9b59282914fe057ab17027f55123964bc2e2 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 11 Feb 2009 13:04:37 -0800 Subject: [PATCH 546/566] ext2/xip: refuse to change xip flag during remount with busy inodes For a reason that I was unable to understand in three months of debugging, mount ext2 -o remount stopped working properly when remounting from regular operation to xip, or the other way around. According to a git bisect search, the problem was introduced with the VM_MIXEDMAP/PTE_SPECIAL rework in the vm: commit 70688e4dd1647f0ceb502bbd5964fa344c5eb411 Author: Nick Piggin Date: Mon Apr 28 02:13:02 2008 -0700 xip: support non-struct page backed memory In the failing scenario, the filesystem is mounted read only via root= kernel parameter on s390x. During remount (in rc.sysinit), the inodes of the bash binary and its libraries are busy and cannot be invalidated (the bash which is running rc.sysinit resides on subject filesystem). Afterwards, another bash process (running ifup-eth) recurses into a subshell, runs dup_mm (via fork). Some of the mappings in this bash process were created from inodes that could not be invalidated during remount. Both parent and child process crash some time later due to inconsistencies in their address spaces. The issue seems to be timing sensitive, various attempts to recreate it have failed. This patch refuses to change the xip flag during remount in case some inodes cannot be invalidated. This patch keeps users from running into that issue. [akpm@linux-foundation.org: cleanup] Signed-off-by: Carsten Otte Cc: Nick Piggin Cc: Jared Hulbert Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext2/super.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index da8bdeaa2e6d..7c6e3606f0ec 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1185,9 +1185,12 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) es = sbi->s_es; if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != (old_mount_opt & EXT2_MOUNT_XIP)) && - invalidate_inodes(sb)) - ext2_warning(sb, __func__, "busy inodes while remounting "\ - "xip remain in cache (no functional problem)"); + invalidate_inodes(sb)) { + ext2_warning(sb, __func__, "refusing change of xip flag " + "with busy inodes while remounting"); + sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; + sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; + } if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; if (*flags & MS_RDONLY) { From 6c5979631b4b03c9288776562c18036765e398c1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 11 Feb 2009 13:04:38 -0800 Subject: [PATCH 547/566] syscall define: fix uml compile bug With the new system call defines we get this on uml: arch/um/sys-i386/built-in.o: In function `sys_call_table': (.rodata+0x308): undefined reference to `sys_sigprocmask' Reason for this is that uml passes the preprocessor option -Dsigprocmask=kernel_sigprocmask to gcc when compiling the kernel. This causes SYSCALL_DEFINE3(sigprocmask, ...) to be expanded to SYSCALL_DEFINEx(3, kernel_sigprocmask, ...) and finally to a system call named sys_kernel_sigprocmask. However sys_sigprocmask is missing because of this. To avoid macro expansion for the system call name just concatenate the name at first define instead of carrying it through severel levels. This was pointed out by Al Viro. Signed-off-by: Heiko Carstens Cc: Geert Uytterhoeven Cc: Al Viro Reviewed-by: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0eda02ff2414..f9f900cfd066 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -95,13 +95,13 @@ struct old_linux_dirent; #define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) -#define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) -#define SYSCALL_DEFINE1(...) SYSCALL_DEFINEx(1, __VA_ARGS__) -#define SYSCALL_DEFINE2(...) SYSCALL_DEFINEx(2, __VA_ARGS__) -#define SYSCALL_DEFINE3(...) SYSCALL_DEFINEx(3, __VA_ARGS__) -#define SYSCALL_DEFINE4(...) SYSCALL_DEFINEx(4, __VA_ARGS__) -#define SYSCALL_DEFINE5(...) SYSCALL_DEFINEx(5, __VA_ARGS__) -#define SYSCALL_DEFINE6(...) SYSCALL_DEFINEx(6, __VA_ARGS__) +#define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) +#define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) +#define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__) +#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__) +#define SYSCALL_DEFINE4(name, ...) SYSCALL_DEFINEx(4, _##name, __VA_ARGS__) +#define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__) +#define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) #ifdef CONFIG_PPC64 #define SYSCALL_ALIAS(alias, name) \ @@ -121,21 +121,21 @@ struct old_linux_dirent; #define SYSCALL_DEFINE(name) static inline long SYSC_##name #define SYSCALL_DEFINEx(x, name, ...) \ - asmlinkage long sys_##name(__SC_DECL##x(__VA_ARGS__)); \ - static inline long SYSC_##name(__SC_DECL##x(__VA_ARGS__)); \ - asmlinkage long SyS_##name(__SC_LONG##x(__VA_ARGS__)) \ + asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)); \ + static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__)); \ + asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__)) \ { \ __SC_TEST##x(__VA_ARGS__); \ - return (long) SYSC_##name(__SC_CAST##x(__VA_ARGS__)); \ + return (long) SYSC##name(__SC_CAST##x(__VA_ARGS__)); \ } \ - SYSCALL_ALIAS(sys_##name, SyS_##name); \ - static inline long SYSC_##name(__SC_DECL##x(__VA_ARGS__)) + SYSCALL_ALIAS(sys##name, SyS##name); \ + static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__)) #else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ #define SYSCALL_DEFINE(name) asmlinkage long sys_##name #define SYSCALL_DEFINEx(x, name, ...) \ - asmlinkage long sys_##name(__SC_DECL##x(__VA_ARGS__)) + asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)) #endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ From 89e1219004b3657cc014521663eeef0744f1c99d Mon Sep 17 00:00:00 2001 From: Federico Cuello Date: Wed, 11 Feb 2009 13:04:39 -0800 Subject: [PATCH 548/566] writeback: fix break condition Commit dcf6a79dda5cc2a2bec183e50d829030c0972aaa ("write-back: fix nr_to_write counter") fixed nr_to_write counter, but didn't set the break condition properly. If nr_to_write == 0 after being decremented it will loop one more time before setting done = 1 and breaking the loop. [akpm@linux-foundation.org: coding-style fixes] Cc: Artem Bityutskiy Acked-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page-writeback.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c17005e73974..6106a5c7ed44 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1051,20 +1051,23 @@ continue_unlock: } } - if (nr_to_write > 0) + if (nr_to_write > 0) { nr_to_write--; - else if (wbc->sync_mode == WB_SYNC_NONE) { - /* - * We stop writing back only if we are not - * doing integrity sync. In case of integrity - * sync we have to keep going because someone - * may be concurrently dirtying pages, and we - * might have synced a lot of newly appeared - * dirty pages, but have not synced all of the - * old dirty pages. - */ - done = 1; - break; + if (nr_to_write == 0 && + wbc->sync_mode == WB_SYNC_NONE) { + /* + * We stop writing back only if we are + * not doing integrity sync. In case of + * integrity sync we have to keep going + * because someone may be concurrently + * dirtying pages, and we might have + * synced a lot of newly appeared dirty + * pages, but have not synced all of the + * old dirty pages. + */ + done = 1; + break; + } } if (wbc->nonblocking && bdi_write_congested(bdi)) { From 3abdbf90a3ffb006108c831c56b092e35483b6ec Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 11 Feb 2009 13:04:40 -0800 Subject: [PATCH 549/566] parport: parport_serial, don't bind netmos ibm 0299 Since netmos 9835 with subids 0x1014(IBM):0x0299 is now bound with serial/8250_pci, because it has no parallel ports and subdevice id isn't in the expected form, return -ENODEV from probe function. This is performed in netmos preinit_hook. Signed-off-by: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/parport/parport_serial.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/parport/parport_serial.c b/drivers/parport/parport_serial.c index 101ed49a2d15..032db815b0f9 100644 --- a/drivers/parport/parport_serial.c +++ b/drivers/parport/parport_serial.c @@ -64,6 +64,11 @@ struct parport_pc_pci { static int __devinit netmos_parallel_init(struct pci_dev *dev, struct parport_pc_pci *card, int autoirq, int autodma) { + /* the rule described below doesn't hold for this device */ + if (dev->device == PCI_DEVICE_ID_NETMOS_9835 && + dev->subsystem_vendor == PCI_VENDOR_ID_IBM && + dev->subsystem_device == 0x0299) + return -ENODEV; /* * Netmos uses the subdevice ID to indicate the number of parallel * and serial ports. The form is 0x00PS, where

is the number of From 9480c53e9b2aa13a06283ffb96bb8f1873ac4e9a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 11 Feb 2009 13:04:41 -0800 Subject: [PATCH 550/566] mm: rearrange exit_mmap() to unlock before arch_exit_mmap Christophe Saout reported [in precursor to: http://marc.info/?l=linux-kernel&m=123209902707347&w=4]: > Note that I also some a different issue with CONFIG_UNEVICTABLE_LRU. > Seems like Xen tears down current->mm early on process termination, so > that __get_user_pages in exit_mmap causes nasty messages when the > process had any mlocked pages. (in fact, it somehow manages to get into > the swapping code and produces a null pointer dereference trying to get > a swap token) Jeremy explained: Yes. In the normal case under Xen, an in-use pagetable is "pinned", meaning that it is RO to the kernel, and all updates must go via hypercall (or writes are trapped and emulated, which is much the same thing). An unpinned pagetable is not currently in use by any process, and can be directly accessed as normal RW pages. As an optimisation at process exit time, we unpin the pagetable as early as possible (switching the process to init_mm), so that all the normal pagetable teardown can happen with direct memory accesses. This happens in exit_mmap() -> arch_exit_mmap(). The munlocking happens a few lines below. The obvious thing to do would be to move arch_exit_mmap() to below the munlock code, but I think we'd want to call it even if mm->mmap is NULL, just to be on the safe side. Thus, this patch: exit_mmap() needs to unlock any locked vmas before calling arch_exit_mmap, as the latter may switch the current mm to init_mm, which would cause the former to fail. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Lee Schermerhorn Cc: Christophe Saout Cc: Keir Fraser Cc: Christophe Saout Cc: Alex Williamson Cc: [2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index eb1270bebe67..00ced3ee49a8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2084,12 +2084,8 @@ void exit_mmap(struct mm_struct *mm) unsigned long end; /* mm's last user has gone, and its about to be pulled down */ - arch_exit_mmap(mm); mmu_notifier_release(mm); - if (!mm->mmap) /* Can happen if dup_mmap() received an OOM */ - return; - if (mm->locked_vm) { vma = mm->mmap; while (vma) { @@ -2098,7 +2094,13 @@ void exit_mmap(struct mm_struct *mm) vma = vma->vm_next; } } + + arch_exit_mmap(mm); + vma = mm->mmap; + if (!vma) /* Can happen if dup_mmap() received an OOM */ + return; + lru_add_drain(); flush_cache_mm(mm); tlb = tlb_gather_mmu(mm, 1); From 4d48a542b42747c36a5937447d9c3de7c897ea50 Mon Sep 17 00:00:00 2001 From: Paul Clements Date: Wed, 11 Feb 2009 13:04:45 -0800 Subject: [PATCH 551/566] nbd: fix I/O hang on disconnected nbds Fix a problem that causes I/O to a disconnected (or partially initialized) nbd device to hang indefinitely. To reproduce: # ioctl NBD_SET_SIZE_BLOCKS /dev/nbd23 514048 # dd if=/dev/nbd23 of=/dev/null bs=4096 count=1 ...hangs... This can also occur when an nbd device loses its nbd-client/server connection. Although we clear the queue of any outstanding I/Os after the client/server connection fails, any additional I/Os that get queued later will hang. This bug may also be the problem reported in this bug report: http://bugzilla.kernel.org/show_bug.cgi?id=12277 Testing would need to be performed to determine if the two issues are the same. This problem was introduced by the new request handling thread code ("NBD: allow nbd to be used locally", 3/2008), which entered into mainline around 2.6.25. The fix, which is fairly simple, is to restore the check for lo->sock being NULL in do_nbd_request. This causes I/O to an uninitialized nbd to immediately fail with an I/O error, as it did prior to the introduction of this bug. Signed-off-by: Paul Clements Reported-by: Jon Nelson Acked-by: Pavel Machek Cc: [2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/nbd.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 34f80fa6fed1..8299e2d3b611 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -549,6 +549,15 @@ static void do_nbd_request(struct request_queue * q) BUG_ON(lo->magic != LO_MAGIC); + if (unlikely(!lo->sock)) { + printk(KERN_ERR "%s: Attempted send on closed socket\n", + lo->disk->disk_name); + req->errors++; + nbd_end_request(req); + spin_lock_irq(q->queue_lock); + continue; + } + spin_lock_irq(&lo->queue_lock); list_add_tail(&req->queuelist, &lo->waiting_queue); spin_unlock_irq(&lo->queue_lock); From 507e2fbaaacb6f164b4125b87c5002f95143174b Mon Sep 17 00:00:00 2001 From: Ian Dall Date: Wed, 11 Feb 2009 13:04:46 -0800 Subject: [PATCH 552/566] w1: w1 temp calculation overflow fix Addresses http://bugzilla.kernel.org/show_bug.cgi?id=12646 When the temperature exceeds 32767 milli-degrees the temperature overflows to -32768 millidegrees. These are bothe well within the -55 - +125 degree range for the sensor. Fix overflow in left-shift of a u8. Signed-off-by: Ian Dall Signed-off-by: Evgeniy Polyakov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/w1/slaves/w1_therm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c index 2c8dff9f77da..1ed3d554e372 100644 --- a/drivers/w1/slaves/w1_therm.c +++ b/drivers/w1/slaves/w1_therm.c @@ -115,7 +115,7 @@ static struct w1_therm_family_converter w1_therm_families[] = { static inline int w1_DS18B20_convert_temp(u8 rom[9]) { - s16 t = (rom[1] << 8) | rom[0]; + int t = ((s16)rom[1] << 8) | rom[0]; t = t*1000/16; return t; } From 744525092727827a9cf0044074db3e22dcf354fd Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 11 Feb 2009 16:31:40 -0800 Subject: [PATCH 553/566] x86: merge sys_rt_sigreturn between 32 and 64 bits Impact: cleanup With the recent changes in the 32-bit code to make system calls which use struct pt_regs take a pointer, sys_rt_sigreturn() have become identical between 32 and 64 bits, and both are empty wrappers around do_rt_sigreturn(). Remove both wrappers and rename both to sys_rt_sigreturn(). Cc: Brian Gerst Cc: Tejun Heo Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/syscalls.h | 2 +- arch/x86/kernel/signal.c | 14 +------------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 77bb31a88ba8..68b1be10cfad 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -40,7 +40,7 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); int sys_sigaltstack(struct pt_regs *); unsigned long sys_sigreturn(struct pt_regs *); -int sys_rt_sigreturn(struct pt_regs *); +long sys_rt_sigreturn(struct pt_regs *); /* kernel/ioport.c */ long sys_iopl(struct pt_regs *); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ccfb27412f0f..7cdcd16885ed 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -601,7 +601,7 @@ badframe: } #endif /* CONFIG_X86_32 */ -static long do_rt_sigreturn(struct pt_regs *regs) +long sys_rt_sigreturn(struct pt_regs *regs) { struct rt_sigframe __user *frame; unsigned long ax; @@ -632,18 +632,6 @@ badframe: return 0; } -#ifdef CONFIG_X86_32 -int sys_rt_sigreturn(struct pt_regs *regs) -{ - return do_rt_sigreturn(regs); -} -#else /* !CONFIG_X86_32 */ -asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) -{ - return do_rt_sigreturn(regs); -} -#endif /* CONFIG_X86_32 */ - /* * OK, we're invoking a handler: */ From 58105ef1857112a186696c9b8957020090226a28 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 31 Jan 2009 12:32:26 -0800 Subject: [PATCH 554/566] x86: UV: fix header struct usage Impact: Fixes warning Fix uv.h struct usage: arch/x86/include/asm/uv/uv.h:16: warning: 'struct mm_struct' declared inside parameter list arch/x86/include/asm/uv/uv.h:16: warning: its scope is only this definition or declaration, which is probably not what you want Signed-off-by: Randy Dunlap Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uv/uv.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 8ac1d7e312f3..8242bf965812 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -3,6 +3,9 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; +struct cpumask; +struct mm_struct; + #ifdef CONFIG_X86_UV extern enum uv_system_type get_uv_system_type(void); From 4f06b0436b2ddbd3b67b10e77098a6862787b3eb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 11 Feb 2009 09:32:19 -0800 Subject: [PATCH 555/566] x86/cpa: make sure cpa is safe to call in lazy mmu mode Impact: fix race leading to crash under KVM and Xen The CPA code may be called while we're in lazy mmu update mode - for example, when using DEBUG_PAGE_ALLOC and doing a slab allocation in an interrupt handler which interrupted a lazy mmu update. In this case, the in-memory pagetable state may be out of date due to pending queued updates. We need to flush any pending updates before inspecting the page table. Similarly, we must explicitly flush any modifications CPA may have made (which comes down to flushing queued operations when flushing the TLB). Signed-off-by: Jeremy Fitzhardinge Acked-by: Marcelo Tosatti Cc: Stable Kernel Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 84ba74820ad6..f664bc1c4093 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -576,6 +576,13 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) else address = *cpa->vaddr; + /* + * If we're called with lazy mmu updates enabled, the + * in-memory pte state may be stale. Flush pending updates to + * bring them up to date. + */ + arch_flush_lazy_mmu_mode(); + repeat: kpte = lookup_address(address, &level); if (!kpte) @@ -854,6 +861,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, } else cpa_flush_all(cache); + /* + * If we've been called with lazy mmu updates enabled, then + * make sure that everything gets flushed out before we + * return. + */ + arch_flush_lazy_mmu_mode(); + out: return ret; } From be03d9e8022030c16abf534e33e185bfc3d40eef Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 11 Feb 2009 11:20:23 -0800 Subject: [PATCH 556/566] x86, pat: fix warn_on_once() while mapping 0-1MB range with /dev/mem Jeff Mahoney reported: > With Suse's hwinfo tool, on -tip: > WARNING: at arch/x86/mm/pat.c:637 reserve_pfn_range+0x5b/0x26d() reserve_pfn_range() is not tracking the memory range below 1MB as non-RAM and as such is inconsistent with similar checks in reserve_memtype() and free_memtype() Rename the pagerange_is_ram() to pat_pagerange_is_ram() and add the "track legacy 1MB region as non RAM" condition. And also, fix reserve_pfn_range() to return -EINVAL, when the pfn range is RAM. This is to be consistent with this API design. Reported-and-tested-by: Jeff Mahoney Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page.h | 1 - arch/x86/mm/ioremap.c | 19 --------- arch/x86/mm/pat.c | 83 ++++++++++++++++++++----------------- 3 files changed, 45 insertions(+), 58 deletions(-) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index e9873a2e8695..776579119a00 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -57,7 +57,6 @@ typedef struct { pgdval_t pgd; } pgd_t; typedef struct { pgprotval_t pgprot; } pgprot_t; extern int page_is_ram(unsigned long pagenr); -extern int pagerange_is_ram(unsigned long start, unsigned long end); extern int devmem_is_allowed(unsigned long pagenr); extern void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index af750ab973b6..f45d5e29a72e 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr) return 0; } -int pagerange_is_ram(unsigned long start, unsigned long end) -{ - int ram_page = 0, not_rampage = 0; - unsigned long page_nr; - - for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); - ++page_nr) { - if (page_is_ram(page_nr)) - ram_page = 1; - else - not_rampage = 1; - - if (ram_page == not_rampage) - return -1; - } - - return ram_page; -} - /* * Fix up the linear direct mapping of the kernel to avoid cache attribute * conflicts. diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 7b61036427df..aebbf67a79d0 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -211,6 +211,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) static struct memtype *cached_entry; static u64 cached_start; +static int pat_pagerange_is_ram(unsigned long start, unsigned long end) +{ + int ram_page = 0, not_rampage = 0; + unsigned long page_nr; + + for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); + ++page_nr) { + /* + * For legacy reasons, physical address range in the legacy ISA + * region is tracked as non-RAM. This will allow users of + * /dev/mem to map portions of legacy ISA region, even when + * some of those portions are listed(or not even listed) with + * different e820 types(RAM/reserved/..) + */ + if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) && + page_is_ram(page_nr)) + ram_page = 1; + else + not_rampage = 1; + + if (ram_page == not_rampage) + return -1; + } + + return ram_page; +} + /* * For RAM pages, mark the pages as non WB memory type using * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or @@ -336,20 +363,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (new_type) *new_type = actual_type; - /* - * For legacy reasons, some parts of the physical address range in the - * legacy 1MB region is treated as non-RAM (even when listed as RAM in - * the e820 tables). So we will track the memory attributes of this - * legacy 1MB region using the linear memtype_list always. - */ - if (end >= ISA_END_ADDRESS) { - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return reserve_ram_pages_type(start, end, req_type, - new_type); - else if (is_range_ram < 0) - return -EINVAL; - } + is_range_ram = pat_pagerange_is_ram(start, end); + if (is_range_ram == 1) + return reserve_ram_pages_type(start, end, req_type, + new_type); + else if (is_range_ram < 0) + return -EINVAL; new = kmalloc(sizeof(struct memtype), GFP_KERNEL); if (!new) @@ -446,19 +465,11 @@ int free_memtype(u64 start, u64 end) if (is_ISA_range(start, end - 1)) return 0; - /* - * For legacy reasons, some parts of the physical address range in the - * legacy 1MB region is treated as non-RAM (even when listed as RAM in - * the e820 tables). So we will track the memory attributes of this - * legacy 1MB region using the linear memtype_list always. - */ - if (end >= ISA_END_ADDRESS) { - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return free_ram_pages_type(start, end); - else if (is_range_ram < 0) - return -EINVAL; - } + is_range_ram = pat_pagerange_is_ram(start, end); + if (is_range_ram == 1) + return free_ram_pages_type(start, end); + else if (is_range_ram < 0) + return -EINVAL; spin_lock(&memtype_lock); list_for_each_entry(entry, &memtype_list, nd) { @@ -626,17 +637,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, unsigned long flags; unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); - is_ram = pagerange_is_ram(paddr, paddr + size); + is_ram = pat_pagerange_is_ram(paddr, paddr + size); - if (is_ram != 0) { - /* - * For mapping RAM pages, drivers need to call - * set_memory_[uc|wc|wb] directly, for reserve and free, before - * setting up the PTE. - */ - WARN_ON_ONCE(1); - return 0; - } + /* + * reserve_pfn_range() doesn't support RAM pages. + */ + if (is_ram != 0) + return -EINVAL; ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); if (ret) @@ -693,7 +700,7 @@ static void free_pfn_range(u64 paddr, unsigned long size) { int is_ram; - is_ram = pagerange_is_ram(paddr, paddr + size); + is_ram = pat_pagerange_is_ram(paddr, paddr + size); if (is_ram == 0) free_memtype(paddr, paddr + size); } From bc8bd002b8371ece81c9adbdce49d7e68f0a0cbc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Feb 2009 12:43:52 +0100 Subject: [PATCH 557/566] x86, defconfig: update the 32-bit defconfig Signed-off-by: Ingo Molnar --- arch/x86/configs/i386_defconfig | 406 +++++++++++++++++++++++++------- 1 file changed, 324 insertions(+), 82 deletions(-) diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index edba00d98ac3..c3186ccc9369 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,14 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.27-rc5 -# Wed Sep 3 17:23:09 2008 +# Linux kernel version: 2.6.29-rc4 +# Thu Feb 12 12:42:50 2009 # # CONFIG_64BIT is not set CONFIG_X86_32=y # CONFIG_X86_64 is not set CONFIG_X86=y CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig" -# CONFIG_GENERIC_LOCKBREAK is not set CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_CLOCKSOURCE_WATCHDOG=y @@ -24,16 +23,14 @@ CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y CONFIG_GENERIC_BUG=y CONFIG_GENERIC_HWEIGHT=y -# CONFIG_GENERIC_GPIO is not set CONFIG_ARCH_MAY_HAVE_PC_FDC=y # CONFIG_RWSEM_GENERIC_SPINLOCK is not set CONFIG_RWSEM_XCHGADD_ALGORITHM=y -# CONFIG_ARCH_HAS_ILOG2_U32 is not set -# CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y CONFIG_GENERIC_CALIBRATE_DELAY=y # CONFIG_GENERIC_TIME_VSYSCALL is not set CONFIG_ARCH_HAS_CPU_RELAX=y +CONFIG_ARCH_HAS_DEFAULT_IDLE=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y # CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set @@ -42,12 +39,12 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y # CONFIG_ZONE_DMA32 is not set CONFIG_ARCH_POPULATES_NODE_MAP=y # CONFIG_AUDIT_ARCH is not set -CONFIG_ARCH_SUPPORTS_AOUT=y CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_PENDING_IRQ=y CONFIG_X86_SMP=y +CONFIG_USE_GENERIC_SMP_HELPERS=y CONFIG_X86_32_SMP=y CONFIG_X86_HT=y CONFIG_X86_BIOS_REBOOT=y @@ -76,30 +73,44 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_AUDIT_TREE=y + +# +# RCU Subsystem +# +# CONFIG_CLASSIC_RCU is not set +CONFIG_TREE_RCU=y +# CONFIG_PREEMPT_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set # CONFIG_IKCONFIG is not set CONFIG_LOG_BUF_SHIFT=18 -CONFIG_CGROUPS=y -# CONFIG_CGROUP_DEBUG is not set -CONFIG_CGROUP_NS=y -# CONFIG_CGROUP_DEVICE is not set -CONFIG_CPUSETS=y CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y CONFIG_GROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y # CONFIG_RT_GROUP_SCHED is not set # CONFIG_USER_SCHED is not set CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_CGROUP_NS=y +CONFIG_CGROUP_FREEZER=y +# CONFIG_CGROUP_DEVICE is not set +CONFIG_CPUSETS=y +CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y # CONFIG_CGROUP_MEM_RES_CTLR is not set # CONFIG_SYSFS_DEPRECATED_V2 is not set -CONFIG_PROC_PID_CPUSET=y CONFIG_RELAY=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_IPC_NS=y CONFIG_USER_NS=y CONFIG_PID_NS=y +CONFIG_NET_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -124,12 +135,15 @@ CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y +CONFIG_AIO=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_PCI_QUIRKS=y CONFIG_SLUB_DEBUG=y # CONFIG_SLAB is not set CONFIG_SLUB=y # CONFIG_SLOB is not set CONFIG_PROFILING=y +CONFIG_TRACEPOINTS=y CONFIG_MARKERS=y # CONFIG_OPROFILE is not set CONFIG_HAVE_OPROFILE=y @@ -139,15 +153,10 @@ CONFIG_KRETPROBES=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y -# CONFIG_HAVE_ARCH_TRACEHOOK is not set -# CONFIG_HAVE_DMA_ATTRS is not set -CONFIG_USE_GENERIC_SMP_HELPERS=y -# CONFIG_HAVE_CLK is not set -CONFIG_PROC_PAGE_MONITOR=y +CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y -# CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 CONFIG_MODULES=y # CONFIG_MODULE_FORCE_LOAD is not set @@ -155,12 +164,10 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set -CONFIG_KMOD=y CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y # CONFIG_LBD is not set CONFIG_BLK_DEV_IO_TRACE=y -# CONFIG_LSF is not set CONFIG_BLK_DEV_BSG=y # CONFIG_BLK_DEV_INTEGRITY is not set @@ -176,7 +183,7 @@ CONFIG_IOSCHED_CFQ=y CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="cfq" -CONFIG_CLASSIC_RCU=y +CONFIG_FREEZER=y # # Processor type and features @@ -186,6 +193,7 @@ CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y +CONFIG_SPARSE_IRQ=y CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y CONFIG_X86_PC=y @@ -194,7 +202,7 @@ CONFIG_X86_PC=y # CONFIG_X86_GENERICARCH is not set # CONFIG_X86_VSMP is not set # CONFIG_X86_RDC321X is not set -CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_SCHED_OMIT_FRAME_POINTER=y # CONFIG_PARAVIRT_GUEST is not set # CONFIG_MEMTEST is not set # CONFIG_M386 is not set @@ -238,10 +246,19 @@ CONFIG_X86_TSC=y CONFIG_X86_CMOV=y CONFIG_X86_MINIMUM_CPU_FAMILY=4 CONFIG_X86_DEBUGCTLMSR=y +CONFIG_CPU_SUP_INTEL=y +CONFIG_CPU_SUP_CYRIX_32=y +CONFIG_CPU_SUP_AMD=y +CONFIG_CPU_SUP_CENTAUR_32=y +CONFIG_CPU_SUP_TRANSMETA_32=y +CONFIG_CPU_SUP_UMC_32=y +CONFIG_X86_DS=y +CONFIG_X86_PTRACE_BTS=y CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y CONFIG_DMI=y # CONFIG_IOMMU_HELPER is not set +# CONFIG_IOMMU_API is not set CONFIG_NR_CPUS=64 CONFIG_SCHED_SMT=y CONFIG_SCHED_MC=y @@ -250,12 +267,15 @@ CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y +CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y # CONFIG_X86_MCE is not set CONFIG_VM86=y # CONFIG_TOSHIBA is not set # CONFIG_I8K is not set CONFIG_X86_REBOOTFIXUPS=y CONFIG_MICROCODE=y +CONFIG_MICROCODE_INTEL=y +CONFIG_MICROCODE_AMD=y CONFIG_MICROCODE_OLD_INTERFACE=y CONFIG_X86_MSR=y CONFIG_X86_CPUID=y @@ -264,6 +284,7 @@ CONFIG_HIGHMEM4G=y # CONFIG_HIGHMEM64G is not set CONFIG_PAGE_OFFSET=0xC0000000 CONFIG_HIGHMEM=y +# CONFIG_ARCH_PHYS_ADDR_T_64BIT is not set CONFIG_ARCH_FLATMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y @@ -274,14 +295,17 @@ CONFIG_FLATMEM_MANUAL=y CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_SPARSEMEM_STATIC=y -# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 -CONFIG_RESOURCES_64BIT=y +# CONFIG_PHYS_ADDR_T_64BIT is not set CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y +CONFIG_UNEVICTABLE_LRU=y CONFIG_HIGHPTE=y +CONFIG_X86_CHECK_BIOS_CORRUPTION=y +CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y +CONFIG_X86_RESERVE_LOW_64K=y # CONFIG_MATH_EMULATION is not set CONFIG_MTRR=y # CONFIG_MTRR_SANITIZER is not set @@ -302,10 +326,11 @@ CONFIG_PHYSICAL_START=0x1000000 CONFIG_PHYSICAL_ALIGN=0x200000 CONFIG_HOTPLUG_CPU=y # CONFIG_COMPAT_VDSO is not set +# CONFIG_CMDLINE_BOOL is not set CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # -# Power management options +# Power management and ACPI options # CONFIG_PM=y CONFIG_PM_DEBUG=y @@ -331,19 +356,13 @@ CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y CONFIG_ACPI_FAN=y CONFIG_ACPI_DOCK=y -# CONFIG_ACPI_BAY is not set CONFIG_ACPI_PROCESSOR=y CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_THERMAL=y -# CONFIG_ACPI_WMI is not set -# CONFIG_ACPI_ASUS is not set -# CONFIG_ACPI_TOSHIBA is not set # CONFIG_ACPI_CUSTOM_DSDT is not set CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set -CONFIG_ACPI_EC=y # CONFIG_ACPI_PCI_SLOT is not set -CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y CONFIG_ACPI_CONTAINER=y @@ -388,7 +407,6 @@ CONFIG_X86_ACPI_CPUFREQ=y # # shared options # -# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set # CONFIG_X86_SPEEDSTEP_LIB is not set CONFIG_CPU_IDLE=y CONFIG_CPU_IDLE_GOV_LADDER=y @@ -415,6 +433,7 @@ CONFIG_ARCH_SUPPORTS_MSI=y CONFIG_PCI_MSI=y # CONFIG_PCI_LEGACY is not set # CONFIG_PCI_DEBUG is not set +# CONFIG_PCI_STUB is not set CONFIG_HT_IRQ=y CONFIG_ISA_DMA_API=y # CONFIG_ISA is not set @@ -452,13 +471,17 @@ CONFIG_HOTPLUG_PCI=y # Executable file formats / Emulations # CONFIG_BINFMT_ELF=y +CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y +CONFIG_HAVE_AOUT=y # CONFIG_BINFMT_AOUT is not set CONFIG_BINFMT_MISC=y +CONFIG_HAVE_ATOMIC_IOMAP=y CONFIG_NET=y # # Networking options # +CONFIG_COMPAT_NET_DEV_OPS=y CONFIG_PACKET=y CONFIG_PACKET_MMAP=y CONFIG_UNIX=y @@ -519,7 +542,6 @@ CONFIG_DEFAULT_CUBIC=y # CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_TCP_MD5SIG=y -# CONFIG_IP_VS is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set @@ -557,19 +579,21 @@ CONFIG_NF_CONNTRACK_IRC=y CONFIG_NF_CONNTRACK_SIP=y CONFIG_NF_CT_NETLINK=y CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y CONFIG_NETFILTER_XT_TARGET_MARK=y CONFIG_NETFILTER_XT_TARGET_NFLOG=y CONFIG_NETFILTER_XT_TARGET_SECMARK=y -CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y CONFIG_NETFILTER_XT_TARGET_TCPMSS=y CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y CONFIG_NETFILTER_XT_MATCH_MARK=y CONFIG_NETFILTER_XT_MATCH_POLICY=y CONFIG_NETFILTER_XT_MATCH_STATE=y +# CONFIG_IP_VS is not set # # IP: Netfilter Configuration # +CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_CONNTRACK_IPV4=y CONFIG_NF_CONNTRACK_PROC_COMPAT=y CONFIG_IP_NF_IPTABLES=y @@ -595,8 +619,8 @@ CONFIG_IP_NF_MANGLE=y CONFIG_NF_CONNTRACK_IPV6=y CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_MATCH_IPV6HEADER=y -CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_MANGLE=y # CONFIG_IP_DCCP is not set @@ -604,6 +628,7 @@ CONFIG_IP6_NF_MANGLE=y # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set # CONFIG_DECNET is not set CONFIG_LLC=y @@ -623,6 +648,7 @@ CONFIG_NET_SCHED=y # CONFIG_NET_SCH_HTB is not set # CONFIG_NET_SCH_HFSC is not set # CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_MULTIQ is not set # CONFIG_NET_SCH_RED is not set # CONFIG_NET_SCH_SFQ is not set # CONFIG_NET_SCH_TEQL is not set @@ -630,6 +656,7 @@ CONFIG_NET_SCHED=y # CONFIG_NET_SCH_GRED is not set # CONFIG_NET_SCH_DSMARK is not set # CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_DRR is not set # CONFIG_NET_SCH_INGRESS is not set # @@ -644,6 +671,7 @@ CONFIG_NET_CLS=y # CONFIG_NET_CLS_RSVP is not set # CONFIG_NET_CLS_RSVP6 is not set # CONFIG_NET_CLS_FLOW is not set +# CONFIG_NET_CLS_CGROUP is not set CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_STACK=32 # CONFIG_NET_EMATCH_CMP is not set @@ -659,7 +687,9 @@ CONFIG_NET_CLS_ACT=y # CONFIG_NET_ACT_NAT is not set # CONFIG_NET_ACT_PEDIT is not set # CONFIG_NET_ACT_SIMP is not set +# CONFIG_NET_ACT_SKBEDIT is not set CONFIG_NET_SCH_FIFO=y +# CONFIG_DCB is not set # # Network testing @@ -676,29 +706,33 @@ CONFIG_HAMRADIO=y # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set +# CONFIG_PHONET is not set CONFIG_FIB_RULES=y - -# -# Wireless -# +CONFIG_WIRELESS=y CONFIG_CFG80211=y +# CONFIG_CFG80211_REG_DEBUG is not set CONFIG_NL80211=y +CONFIG_WIRELESS_OLD_REGULATORY=y CONFIG_WIRELESS_EXT=y CONFIG_WIRELESS_EXT_SYSFS=y +# CONFIG_LIB80211 is not set CONFIG_MAC80211=y # # Rate control algorithm selection # -CONFIG_MAC80211_RC_PID=y -CONFIG_MAC80211_RC_DEFAULT_PID=y -CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_MINSTREL=y +# CONFIG_MAC80211_RC_DEFAULT_PID is not set +CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y +CONFIG_MAC80211_RC_DEFAULT="minstrel" # CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_DEBUGFS is not set # CONFIG_MAC80211_DEBUG_MENU is not set -# CONFIG_IEEE80211 is not set -# CONFIG_RFKILL is not set +# CONFIG_WIMAX is not set +CONFIG_RFKILL=y +# CONFIG_RFKILL_INPUT is not set +CONFIG_RFKILL_LEDS=y # CONFIG_NET_9P is not set # @@ -722,7 +756,7 @@ CONFIG_PROC_EVENTS=y # CONFIG_MTD is not set # CONFIG_PARPORT is not set CONFIG_PNP=y -# CONFIG_PNP_DEBUG is not set +CONFIG_PNP_DEBUG_MESSAGES=y # # Protocols @@ -750,20 +784,19 @@ CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_MISC_DEVICES=y # CONFIG_IBM_ASM is not set # CONFIG_PHANTOM is not set -# CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set -# CONFIG_ACER_WMI is not set -# CONFIG_ASUS_LAPTOP is not set -# CONFIG_FUJITSU_LAPTOP is not set -# CONFIG_TC1100_WMI is not set -# CONFIG_MSI_LAPTOP is not set -# CONFIG_COMPAL_LAPTOP is not set -# CONFIG_SONY_LAPTOP is not set -# CONFIG_THINKPAD_ACPI is not set -# CONFIG_INTEL_MENLOW is not set +# CONFIG_ICS932S401 is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_HP_ILO is not set +# CONFIG_C2PORT is not set + +# +# EEPROM support +# +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set +# CONFIG_EEPROM_93CX6 is not set CONFIG_HAVE_IDE=y # CONFIG_IDE is not set @@ -875,6 +908,7 @@ CONFIG_PATA_OLDPIIX=y CONFIG_PATA_SCH=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y +CONFIG_MD_AUTODETECT=y # CONFIG_MD_LINEAR is not set # CONFIG_MD_RAID0 is not set # CONFIG_MD_RAID1 is not set @@ -930,6 +964,9 @@ CONFIG_PHYLIB=y # CONFIG_BROADCOM_PHY is not set # CONFIG_ICPLUS_PHY is not set # CONFIG_REALTEK_PHY is not set +# CONFIG_NATIONAL_PHY is not set +# CONFIG_STE10XP is not set +# CONFIG_LSI_ET1011C_PHY is not set # CONFIG_FIXED_PHY is not set # CONFIG_MDIO_BITBANG is not set CONFIG_NET_ETHERNET=y @@ -953,6 +990,9 @@ CONFIG_NET_TULIP=y # CONFIG_IBM_NEW_EMAC_RGMII is not set # CONFIG_IBM_NEW_EMAC_TAH is not set # CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set +# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set +# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set # CONFIG_AMD8111_ETH is not set @@ -960,7 +1000,6 @@ CONFIG_NET_PCI=y # CONFIG_B44 is not set CONFIG_FORCEDETH=y # CONFIG_FORCEDETH_NAPI is not set -# CONFIG_EEPRO100 is not set CONFIG_E100=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set @@ -974,15 +1013,16 @@ CONFIG_8139TOO=y # CONFIG_R6040 is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set +# CONFIG_SMSC9420 is not set # CONFIG_SUNDANCE is not set # CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set # CONFIG_SC92031 is not set +# CONFIG_ATL2 is not set CONFIG_NETDEV_1000=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set CONFIG_E1000=y -# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set CONFIG_E1000E=y # CONFIG_IP1000 is not set # CONFIG_IGB is not set @@ -1000,18 +1040,23 @@ CONFIG_BNX2=y # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set # CONFIG_ATL1E is not set +# CONFIG_JME is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set +CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_CHELSIO_T3 is not set +# CONFIG_ENIC is not set # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set +# CONFIG_MLX4_EN is not set # CONFIG_MLX4_CORE is not set # CONFIG_TEHUTI is not set # CONFIG_BNX2X is not set +# CONFIG_QLGE is not set # CONFIG_SFC is not set CONFIG_TR=y # CONFIG_IBMOL is not set @@ -1025,9 +1070,8 @@ CONFIG_TR=y # CONFIG_WLAN_PRE80211 is not set CONFIG_WLAN_80211=y # CONFIG_PCMCIA_RAYCS is not set -# CONFIG_IPW2100 is not set -# CONFIG_IPW2200 is not set # CONFIG_LIBERTAS is not set +# CONFIG_LIBERTAS_THINFIRM is not set # CONFIG_AIRO is not set # CONFIG_HERMES is not set # CONFIG_ATMEL is not set @@ -1044,6 +1088,8 @@ CONFIG_WLAN_80211=y CONFIG_ATH5K=y # CONFIG_ATH5K_DEBUG is not set # CONFIG_ATH9K is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set # CONFIG_IWLCORE is not set # CONFIG_IWLWIFI_LEDS is not set # CONFIG_IWLAGN is not set @@ -1054,6 +1100,10 @@ CONFIG_ATH5K=y # CONFIG_ZD1211RW is not set # CONFIG_RT2X00 is not set +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# + # # USB Network Adapters # @@ -1062,6 +1112,7 @@ CONFIG_ATH5K=y # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set # CONFIG_USB_USBNET is not set +# CONFIG_USB_HSO is not set CONFIG_NET_PCMCIA=y # CONFIG_PCMCIA_3C589 is not set # CONFIG_PCMCIA_3C574 is not set @@ -1123,6 +1174,7 @@ CONFIG_MOUSE_PS2_LOGIPS2PP=y CONFIG_MOUSE_PS2_SYNAPTICS=y CONFIG_MOUSE_PS2_LIFEBOOK=y CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_ELANTECH is not set # CONFIG_MOUSE_PS2_TOUCHKIT is not set # CONFIG_MOUSE_SERIAL is not set # CONFIG_MOUSE_APPLETOUCH is not set @@ -1160,15 +1212,16 @@ CONFIG_INPUT_TOUCHSCREEN=y # CONFIG_TOUCHSCREEN_FUJITSU is not set # CONFIG_TOUCHSCREEN_GUNZE is not set # CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set # CONFIG_TOUCHSCREEN_MTOUCH is not set # CONFIG_TOUCHSCREEN_INEXIO is not set # CONFIG_TOUCHSCREEN_MK712 is not set # CONFIG_TOUCHSCREEN_PENMOUNT is not set # CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set # CONFIG_TOUCHSCREEN_TOUCHWIN is not set -# CONFIG_TOUCHSCREEN_UCB1400 is not set # CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set # CONFIG_TOUCHSCREEN_TOUCHIT213 is not set +# CONFIG_TOUCHSCREEN_TSC2007 is not set CONFIG_INPUT_MISC=y # CONFIG_INPUT_PCSPKR is not set # CONFIG_INPUT_APANEL is not set @@ -1179,6 +1232,7 @@ CONFIG_INPUT_MISC=y # CONFIG_INPUT_KEYSPAN_REMOTE is not set # CONFIG_INPUT_POWERMATE is not set # CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_CM109 is not set # CONFIG_INPUT_UINPUT is not set # @@ -1245,6 +1299,7 @@ CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set # CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=y @@ -1279,6 +1334,7 @@ CONFIG_I2C=y CONFIG_I2C_BOARDINFO=y # CONFIG_I2C_CHARDEV is not set CONFIG_I2C_HELPER_AUTO=y +CONFIG_I2C_ALGOBIT=y # # I2C Hardware Bus support @@ -1331,8 +1387,6 @@ CONFIG_I2C_I801=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_EEPROM_AT24 is not set -# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -1351,8 +1405,78 @@ CONFIG_POWER_SUPPLY=y # CONFIG_POWER_SUPPLY_DEBUG is not set # CONFIG_PDA_POWER is not set # CONFIG_BATTERY_DS2760 is not set -# CONFIG_HWMON is not set +# CONFIG_BATTERY_BQ27x00 is not set +CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_ABITUGURU is not set +# CONFIG_SENSORS_ABITUGURU3 is not set +# CONFIG_SENSORS_AD7414 is not set +# CONFIG_SENSORS_AD7418 is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1029 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ADT7462 is not set +# CONFIG_SENSORS_ADT7470 is not set +# CONFIG_SENSORS_ADT7473 is not set +# CONFIG_SENSORS_ADT7475 is not set +# CONFIG_SENSORS_K8TEMP is not set +# CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_I5K_AMB is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_F71882FG is not set +# CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_FSCHER is not set +# CONFIG_SENSORS_FSCPOS is not set +# CONFIG_SENSORS_FSCHMD is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_CORETEMP is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +# CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_LTC4245 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_MAX6650 is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_DME1737 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47M192 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_ADS7828 is not set +# CONFIG_SENSORS_THMC50 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_VT8231 is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83791D is not set +# CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83L786NG is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +# CONFIG_SENSORS_HDAPS is not set +# CONFIG_SENSORS_LIS3LV02D is not set +# CONFIG_SENSORS_APPLESMC is not set +# CONFIG_HWMON_DEBUG_CHIP is not set CONFIG_THERMAL=y +# CONFIG_THERMAL_HWMON is not set CONFIG_WATCHDOG=y # CONFIG_WATCHDOG_NOWAYOUT is not set @@ -1372,6 +1496,7 @@ CONFIG_WATCHDOG=y # CONFIG_I6300ESB_WDT is not set # CONFIG_ITCO_WDT is not set # CONFIG_IT8712F_WDT is not set +# CONFIG_IT87_WDT is not set # CONFIG_HP_WATCHDOG is not set # CONFIG_SC1200_WDT is not set # CONFIG_PC87413_WDT is not set @@ -1379,9 +1504,11 @@ CONFIG_WATCHDOG=y # CONFIG_SBC8360_WDT is not set # CONFIG_SBC7240_WDT is not set # CONFIG_CPU5_WDT is not set +# CONFIG_SMSC_SCH311X_WDT is not set # CONFIG_SMSC37B787_WDT is not set # CONFIG_W83627HF_WDT is not set # CONFIG_W83697HF_WDT is not set +# CONFIG_W83697UG_WDT is not set # CONFIG_W83877F_WDT is not set # CONFIG_W83977F_WDT is not set # CONFIG_MACHZ_WDT is not set @@ -1397,11 +1524,11 @@ CONFIG_WATCHDOG=y # USB-based Watchdog Cards # # CONFIG_USBPCWATCHDOG is not set +CONFIG_SSB_POSSIBLE=y # # Sonics Silicon Backplane # -CONFIG_SSB_POSSIBLE=y # CONFIG_SSB is not set # @@ -1410,7 +1537,13 @@ CONFIG_SSB_POSSIBLE=y # CONFIG_MFD_CORE is not set # CONFIG_MFD_SM501 is not set # CONFIG_HTC_PASIC3 is not set +# CONFIG_TWL4030_CORE is not set # CONFIG_MFD_TMIO is not set +# CONFIG_PMIC_DA903X is not set +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_REGULATOR is not set # # Multimedia devices @@ -1450,6 +1583,7 @@ CONFIG_DRM=y # CONFIG_DRM_I810 is not set # CONFIG_DRM_I830 is not set CONFIG_DRM_I915=y +# CONFIG_DRM_I915_KMS is not set # CONFIG_DRM_MGA is not set # CONFIG_DRM_SIS is not set # CONFIG_DRM_VIA is not set @@ -1459,6 +1593,7 @@ CONFIG_DRM_I915=y CONFIG_FB=y # CONFIG_FIRMWARE_EDID is not set # CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y @@ -1487,7 +1622,6 @@ CONFIG_FB_TILEBLITTING=y # CONFIG_FB_UVESA is not set # CONFIG_FB_VESA is not set CONFIG_FB_EFI=y -# CONFIG_FB_IMAC is not set # CONFIG_FB_N411 is not set # CONFIG_FB_HGA is not set # CONFIG_FB_S1D13XXX is not set @@ -1503,6 +1637,7 @@ CONFIG_FB_EFI=y # CONFIG_FB_S3 is not set # CONFIG_FB_SAVAGE is not set # CONFIG_FB_SIS is not set +# CONFIG_FB_VIA is not set # CONFIG_FB_NEOMAGIC is not set # CONFIG_FB_KYRO is not set # CONFIG_FB_3DFX is not set @@ -1515,12 +1650,15 @@ CONFIG_FB_EFI=y # CONFIG_FB_CARMINE is not set # CONFIG_FB_GEODE is not set # CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y -# CONFIG_BACKLIGHT_CORGI is not set +CONFIG_BACKLIGHT_GENERIC=y # CONFIG_BACKLIGHT_PROGEAR is not set # CONFIG_BACKLIGHT_MBP_NVIDIA is not set +# CONFIG_BACKLIGHT_SAHARA is not set # # Display device support @@ -1540,10 +1678,12 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_VGA16 is not set CONFIG_LOGO_LINUX_CLUT224=y CONFIG_SOUND=y +CONFIG_SOUND_OSS_CORE=y CONFIG_SND=y CONFIG_SND_TIMER=y CONFIG_SND_PCM=y CONFIG_SND_HWDEP=y +CONFIG_SND_JACK=y CONFIG_SND_SEQUENCER=y CONFIG_SND_SEQ_DUMMY=y CONFIG_SND_OSSEMUL=y @@ -1551,6 +1691,8 @@ CONFIG_SND_MIXER_OSS=y CONFIG_SND_PCM_OSS=y CONFIG_SND_PCM_OSS_PLUGINS=y CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_HRTIMER=y +CONFIG_SND_SEQ_HRTIMER_DEFAULT=y CONFIG_SND_DYNAMIC_MINORS=y CONFIG_SND_SUPPORT_OLD_API=y CONFIG_SND_VERBOSE_PROCFS=y @@ -1605,11 +1747,16 @@ CONFIG_SND_PCI=y # CONFIG_SND_FM801 is not set CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y +# CONFIG_SND_HDA_RECONFIG is not set +# CONFIG_SND_HDA_INPUT_BEEP is not set CONFIG_SND_HDA_CODEC_REALTEK=y CONFIG_SND_HDA_CODEC_ANALOG=y CONFIG_SND_HDA_CODEC_SIGMATEL=y CONFIG_SND_HDA_CODEC_VIA=y CONFIG_SND_HDA_CODEC_ATIHDMI=y +CONFIG_SND_HDA_CODEC_NVHDMI=y +CONFIG_SND_HDA_CODEC_INTELHDMI=y +CONFIG_SND_HDA_ELD=y CONFIG_SND_HDA_CODEC_CONEXANT=y CONFIG_SND_HDA_CODEC_CMEDIA=y CONFIG_SND_HDA_CODEC_SI3054=y @@ -1643,6 +1790,7 @@ CONFIG_SND_USB=y # CONFIG_SND_USB_AUDIO is not set # CONFIG_SND_USB_USX2Y is not set # CONFIG_SND_USB_CAIAQ is not set +# CONFIG_SND_USB_US122L is not set CONFIG_SND_PCMCIA=y # CONFIG_SND_VXPOCKET is not set # CONFIG_SND_PDAUDIOCF is not set @@ -1657,15 +1805,37 @@ CONFIG_HIDRAW=y # USB Input Devices # CONFIG_USB_HID=y -CONFIG_USB_HIDINPUT_POWERBOOK=y -CONFIG_HID_FF=y CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y + +# +# Special HID drivers +# +CONFIG_HID_COMPAT=y +CONFIG_HID_A4TECH=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GYRATION=y +CONFIG_HID_LOGITECH=y CONFIG_LOGITECH_FF=y # CONFIG_LOGIRUMBLEPAD2_FF is not set +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_NTRIG=y +CONFIG_HID_PANTHERLORD=y CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SUNPLUS=y +# CONFIG_GREENASIA_FF is not set +CONFIG_HID_TOPSEED=y CONFIG_THRUSTMASTER_FF=y CONFIG_ZEROPLUS_FF=y -CONFIG_USB_HIDDEV=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y @@ -1683,6 +1853,8 @@ CONFIG_USB_DEVICEFS=y CONFIG_USB_SUSPEND=y # CONFIG_USB_OTG is not set CONFIG_USB_MON=y +# CONFIG_USB_WUSB is not set +# CONFIG_USB_WUSB_CBAF is not set # # USB Host Controller Drivers @@ -1691,6 +1863,7 @@ CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_ROOT_HUB_TT is not set # CONFIG_USB_EHCI_TT_NEWSCHED is not set +# CONFIG_USB_OXU210HP_HCD is not set # CONFIG_USB_ISP116X_HCD is not set # CONFIG_USB_ISP1760_HCD is not set CONFIG_USB_OHCI_HCD=y @@ -1700,6 +1873,8 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_UHCI_HCD=y # CONFIG_USB_SL811_HCD is not set # CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_WHCI_HCD is not set +# CONFIG_USB_HWA_HCD is not set # # USB Device Class drivers @@ -1707,20 +1882,20 @@ CONFIG_USB_UHCI_HCD=y # CONFIG_USB_ACM is not set CONFIG_USB_PRINTER=y # CONFIG_USB_WDM is not set +# CONFIG_USB_TMC is not set # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; # # -# may also be needed; see USB_STORAGE Help for more information +# see USB_STORAGE Help for more information # CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_DEBUG is not set # CONFIG_USB_STORAGE_DATAFAB is not set # CONFIG_USB_STORAGE_FREECOM is not set # CONFIG_USB_STORAGE_ISD200 is not set -# CONFIG_USB_STORAGE_DPCM is not set # CONFIG_USB_STORAGE_USBAT is not set # CONFIG_USB_STORAGE_SDDR09 is not set # CONFIG_USB_STORAGE_SDDR55 is not set @@ -1728,7 +1903,6 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_ALAUDA is not set # CONFIG_USB_STORAGE_ONETOUCH is not set # CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_STORAGE_SIERRA is not set # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set CONFIG_USB_LIBUSUAL=y @@ -1749,6 +1923,7 @@ CONFIG_USB_LIBUSUAL=y # CONFIG_USB_EMI62 is not set # CONFIG_USB_EMI26 is not set # CONFIG_USB_ADUTUX is not set +# CONFIG_USB_SEVSEG is not set # CONFIG_USB_RIO500 is not set # CONFIG_USB_LEGOTOWER is not set # CONFIG_USB_LCD is not set @@ -1766,7 +1941,13 @@ CONFIG_USB_LIBUSUAL=y # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set # CONFIG_USB_ISIGHTFW is not set +# CONFIG_USB_VST is not set # CONFIG_USB_GADGET is not set + +# +# OTG and related infrastructure +# +# CONFIG_UWB is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set CONFIG_NEW_LEDS=y @@ -1775,6 +1956,7 @@ CONFIG_LEDS_CLASS=y # # LED drivers # +# CONFIG_LEDS_ALIX2 is not set # CONFIG_LEDS_PCA9532 is not set # CONFIG_LEDS_CLEVO_MAIL is not set # CONFIG_LEDS_PCA955X is not set @@ -1785,6 +1967,7 @@ CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y # CONFIG_LEDS_TRIGGER_TIMER is not set # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set +# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set @@ -1824,6 +2007,7 @@ CONFIG_RTC_INTF_DEV=y # CONFIG_RTC_DRV_M41T80 is not set # CONFIG_RTC_DRV_S35390A is not set # CONFIG_RTC_DRV_FM3130 is not set +# CONFIG_RTC_DRV_RX8581 is not set # # SPI RTC drivers @@ -1833,12 +2017,15 @@ CONFIG_RTC_INTF_DEV=y # Platform RTC drivers # CONFIG_RTC_DRV_CMOS=y +# CONFIG_RTC_DRV_DS1286 is not set # CONFIG_RTC_DRV_DS1511 is not set # CONFIG_RTC_DRV_DS1553 is not set # CONFIG_RTC_DRV_DS1742 is not set # CONFIG_RTC_DRV_STK17TA8 is not set # CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T35 is not set # CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_BQ4802 is not set # CONFIG_RTC_DRV_V3020 is not set # @@ -1851,6 +2038,22 @@ CONFIG_DMADEVICES=y # # CONFIG_INTEL_IOATDMA is not set # CONFIG_UIO is not set +# CONFIG_STAGING is not set +CONFIG_X86_PLATFORM_DEVICES=y +# CONFIG_ACER_WMI is not set +# CONFIG_ASUS_LAPTOP is not set +# CONFIG_FUJITSU_LAPTOP is not set +# CONFIG_TC1100_WMI is not set +# CONFIG_MSI_LAPTOP is not set +# CONFIG_PANASONIC_LAPTOP is not set +# CONFIG_COMPAL_LAPTOP is not set +# CONFIG_SONY_LAPTOP is not set +# CONFIG_THINKPAD_ACPI is not set +# CONFIG_INTEL_MENLOW is not set +CONFIG_EEEPC_LAPTOP=y +# CONFIG_ACPI_WMI is not set +# CONFIG_ACPI_ASUS is not set +# CONFIG_ACPI_TOSHIBA is not set # # Firmware Drivers @@ -1872,21 +2075,24 @@ CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y -# CONFIG_EXT4DEV_FS is not set +# CONFIG_EXT4_FS is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y +CONFIG_FILE_LOCKING=y # CONFIG_XFS_FS is not set # CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QUOTA_TREE=y # CONFIG_QFMT_V1 is not set CONFIG_QFMT_V2=y CONFIG_QUOTACTL=y @@ -1920,16 +2126,14 @@ CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_PROC_VMCORE=y CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y # CONFIG_CONFIGFS_FS is not set - -# -# Miscellaneous filesystems -# +CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_ECRYPT_FS is not set @@ -1939,6 +2143,7 @@ CONFIG_HUGETLB_PAGE=y # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set @@ -1960,6 +2165,7 @@ CONFIG_NFS_ACL_SUPPORT=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y +# CONFIG_SUNRPC_REGISTER_V4 is not set CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set @@ -2066,33 +2272,54 @@ CONFIG_TIMER_STATS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_VIRTUAL is not set # CONFIG_DEBUG_WRITECOUNT is not set CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +CONFIG_ARCH_WANT_FRAME_POINTERS=y CONFIG_FRAME_POINTER=y # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_KPROBES_SANITY_TEST is not set # CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set # CONFIG_LATENCYTOP is not set CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_HAVE_FTRACE=y +CONFIG_USER_STACKTRACE_SUPPORT=y +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y CONFIG_HAVE_DYNAMIC_FTRACE=y -# CONFIG_FTRACE is not set +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_HAVE_HW_BRANCH_TRACER=y + +# +# Tracers +# +# CONFIG_FUNCTION_TRACER is not set # CONFIG_IRQSOFF_TRACER is not set # CONFIG_SYSPROF_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_POWER_TRACER is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_HW_BRANCH_TRACER is not set CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +# CONFIG_DYNAMIC_PRINTK_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set # CONFIG_STRICT_DEVMEM is not set CONFIG_X86_VERBOSE_BOOTUP=y CONFIG_EARLY_PRINTK=y +CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y # CONFIG_DEBUG_PAGEALLOC is not set @@ -2123,8 +2350,10 @@ CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS=y CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y +# CONFIG_SECURITYFS is not set CONFIG_SECURITY_NETWORK=y # CONFIG_SECURITY_NETWORK_XFRM is not set +# CONFIG_SECURITY_PATH is not set CONFIG_SECURITY_FILE_CAPABILITIES=y # CONFIG_SECURITY_ROOTPLUG is not set CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 @@ -2135,7 +2364,6 @@ CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_SELINUX_DEVELOP=y CONFIG_SECURITY_SELINUX_AVC_STATS=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 -# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set # CONFIG_SECURITY_SMACK is not set CONFIG_CRYPTO=y @@ -2143,11 +2371,18 @@ CONFIG_CRYPTO=y # # Crypto core or helper # +# CONFIG_CRYPTO_FIPS is not set CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y # CONFIG_CRYPTO_GF128MUL is not set # CONFIG_CRYPTO_NULL is not set # CONFIG_CRYPTO_CRYPTD is not set @@ -2182,6 +2417,7 @@ CONFIG_CRYPTO_HMAC=y # Digest # # CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_CRC32C_INTEL is not set # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MICHAEL_MIC is not set @@ -2222,6 +2458,11 @@ CONFIG_CRYPTO_DES=y # # CONFIG_CRYPTO_DEFLATE is not set # CONFIG_CRYPTO_LZO is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_PADLOCK is not set # CONFIG_CRYPTO_DEV_GEODE is not set @@ -2239,6 +2480,7 @@ CONFIG_VIRTUALIZATION=y CONFIG_BITREVERSE=y CONFIG_GENERIC_FIND_FIRST_BIT=y CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_GENERIC_FIND_LAST_BIT=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set CONFIG_CRC_T10DIF=y From dd5fc55449c5e7e4f008d7d26cda9e6e2bc94980 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Feb 2009 12:48:48 +0100 Subject: [PATCH 558/566] x86, defconfig: update the 64-bit defconfig Signed-off-by: Ingo Molnar --- arch/x86/configs/x86_64_defconfig | 412 ++++++++++++++++++++++++------ 1 file changed, 332 insertions(+), 80 deletions(-) diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 322dd2748fc9..4c85bfa58e19 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,14 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.27-rc5 -# Wed Sep 3 17:13:39 2008 +# Linux kernel version: 2.6.29-rc4 +# Thu Feb 12 12:48:13 2009 # CONFIG_64BIT=y # CONFIG_X86_32 is not set CONFIG_X86_64=y CONFIG_X86=y CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" -# CONFIG_GENERIC_LOCKBREAK is not set CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_CLOCKSOURCE_WATCHDOG=y @@ -23,17 +22,16 @@ CONFIG_ZONE_DMA=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y CONFIG_GENERIC_HWEIGHT=y -# CONFIG_GENERIC_GPIO is not set CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_RWSEM_GENERIC_SPINLOCK=y # CONFIG_RWSEM_XCHGADD_ALGORITHM is not set -# CONFIG_ARCH_HAS_ILOG2_U32 is not set -# CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_ARCH_HAS_CPU_RELAX=y +CONFIG_ARCH_HAS_DEFAULT_IDLE=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y @@ -42,12 +40,12 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y CONFIG_ZONE_DMA32=y CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_AUDIT_ARCH=y -CONFIG_ARCH_SUPPORTS_AOUT=y CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_PENDING_IRQ=y CONFIG_X86_SMP=y +CONFIG_USE_GENERIC_SMP_HELPERS=y CONFIG_X86_64_SMP=y CONFIG_X86_HT=y CONFIG_X86_BIOS_REBOOT=y @@ -76,30 +74,44 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_AUDIT_TREE=y + +# +# RCU Subsystem +# +# CONFIG_CLASSIC_RCU is not set +CONFIG_TREE_RCU=y +# CONFIG_PREEMPT_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=64 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set # CONFIG_IKCONFIG is not set CONFIG_LOG_BUF_SHIFT=18 -CONFIG_CGROUPS=y -# CONFIG_CGROUP_DEBUG is not set -CONFIG_CGROUP_NS=y -# CONFIG_CGROUP_DEVICE is not set -CONFIG_CPUSETS=y CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y CONFIG_GROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y # CONFIG_RT_GROUP_SCHED is not set # CONFIG_USER_SCHED is not set CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_CGROUP_NS=y +CONFIG_CGROUP_FREEZER=y +# CONFIG_CGROUP_DEVICE is not set +CONFIG_CPUSETS=y +CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y # CONFIG_CGROUP_MEM_RES_CTLR is not set # CONFIG_SYSFS_DEPRECATED_V2 is not set -CONFIG_PROC_PID_CPUSET=y CONFIG_RELAY=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_IPC_NS=y CONFIG_USER_NS=y CONFIG_PID_NS=y +CONFIG_NET_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -124,12 +136,15 @@ CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y +CONFIG_AIO=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_PCI_QUIRKS=y CONFIG_SLUB_DEBUG=y # CONFIG_SLAB is not set CONFIG_SLUB=y # CONFIG_SLOB is not set CONFIG_PROFILING=y +CONFIG_TRACEPOINTS=y CONFIG_MARKERS=y # CONFIG_OPROFILE is not set CONFIG_HAVE_OPROFILE=y @@ -139,15 +154,10 @@ CONFIG_KRETPROBES=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y -# CONFIG_HAVE_ARCH_TRACEHOOK is not set -# CONFIG_HAVE_DMA_ATTRS is not set -CONFIG_USE_GENERIC_SMP_HELPERS=y -# CONFIG_HAVE_CLK is not set -CONFIG_PROC_PAGE_MONITOR=y +CONFIG_HAVE_ARCH_TRACEHOOK=y # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y -# CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 CONFIG_MODULES=y # CONFIG_MODULE_FORCE_LOAD is not set @@ -155,7 +165,6 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set -CONFIG_KMOD=y CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y CONFIG_BLK_DEV_IO_TRACE=y @@ -175,7 +184,7 @@ CONFIG_IOSCHED_CFQ=y CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="cfq" -CONFIG_CLASSIC_RCU=y +CONFIG_FREEZER=y # # Processor type and features @@ -185,6 +194,8 @@ CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y +CONFIG_SPARSE_IRQ=y +# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y CONFIG_X86_PC=y @@ -192,6 +203,7 @@ CONFIG_X86_PC=y # CONFIG_X86_VOYAGER is not set # CONFIG_X86_GENERICARCH is not set # CONFIG_X86_VSMP is not set +CONFIG_SCHED_OMIT_FRAME_POINTER=y # CONFIG_PARAVIRT_GUEST is not set # CONFIG_MEMTEST is not set # CONFIG_M386 is not set @@ -230,6 +242,11 @@ CONFIG_X86_CMPXCHG64=y CONFIG_X86_CMOV=y CONFIG_X86_MINIMUM_CPU_FAMILY=64 CONFIG_X86_DEBUGCTLMSR=y +CONFIG_CPU_SUP_INTEL=y +CONFIG_CPU_SUP_AMD=y +CONFIG_CPU_SUP_CENTAUR_64=y +CONFIG_X86_DS=y +CONFIG_X86_PTRACE_BTS=y CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y CONFIG_DMI=y @@ -237,8 +254,11 @@ CONFIG_GART_IOMMU=y CONFIG_CALGARY_IOMMU=y CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y CONFIG_AMD_IOMMU=y +CONFIG_AMD_IOMMU_STATS=y CONFIG_SWIOTLB=y CONFIG_IOMMU_HELPER=y +CONFIG_IOMMU_API=y +# CONFIG_MAXSMP is not set CONFIG_NR_CPUS=64 CONFIG_SCHED_SMT=y CONFIG_SCHED_MC=y @@ -247,12 +267,17 @@ CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y +CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y # CONFIG_X86_MCE is not set # CONFIG_I8K is not set CONFIG_MICROCODE=y +CONFIG_MICROCODE_INTEL=y +CONFIG_MICROCODE_AMD=y CONFIG_MICROCODE_OLD_INTERFACE=y CONFIG_X86_MSR=y CONFIG_X86_CPUID=y +CONFIG_ARCH_PHYS_ADDR_T_64BIT=y +CONFIG_DIRECT_GBPAGES=y CONFIG_NUMA=y CONFIG_K8_NUMA=y CONFIG_X86_64_ACPI_NUMA=y @@ -269,7 +294,6 @@ CONFIG_SPARSEMEM_MANUAL=y CONFIG_SPARSEMEM=y CONFIG_NEED_MULTIPLE_NODES=y CONFIG_HAVE_MEMORY_PRESENT=y -# CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPARSEMEM_EXTREME=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y CONFIG_SPARSEMEM_VMEMMAP=y @@ -280,10 +304,14 @@ CONFIG_SPARSEMEM_VMEMMAP=y CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MIGRATION=y -CONFIG_RESOURCES_64BIT=y +CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y +CONFIG_UNEVICTABLE_LRU=y +CONFIG_X86_CHECK_BIOS_CORRUPTION=y +CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y +CONFIG_X86_RESERVE_LOW_64K=y CONFIG_MTRR=y # CONFIG_MTRR_SANITIZER is not set CONFIG_X86_PAT=y @@ -302,11 +330,12 @@ CONFIG_PHYSICAL_START=0x1000000 CONFIG_PHYSICAL_ALIGN=0x200000 CONFIG_HOTPLUG_CPU=y # CONFIG_COMPAT_VDSO is not set +# CONFIG_CMDLINE_BOOL is not set CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y # -# Power management options +# Power management and ACPI options # CONFIG_ARCH_HIBERNATION_HEADER=y CONFIG_PM=y @@ -333,20 +362,14 @@ CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y CONFIG_ACPI_FAN=y CONFIG_ACPI_DOCK=y -# CONFIG_ACPI_BAY is not set CONFIG_ACPI_PROCESSOR=y CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_THERMAL=y CONFIG_ACPI_NUMA=y -# CONFIG_ACPI_WMI is not set -# CONFIG_ACPI_ASUS is not set -# CONFIG_ACPI_TOSHIBA is not set # CONFIG_ACPI_CUSTOM_DSDT is not set CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set -CONFIG_ACPI_EC=y # CONFIG_ACPI_PCI_SLOT is not set -CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y CONFIG_ACPI_CONTAINER=y @@ -381,12 +404,16 @@ CONFIG_X86_ACPI_CPUFREQ=y # # shared options # -# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set # CONFIG_X86_SPEEDSTEP_LIB is not set CONFIG_CPU_IDLE=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPU_IDLE_GOV_MENU=y +# +# Memory power savings +# +# CONFIG_I7300_IDLE is not set + # # Bus options (PCI etc.) # @@ -395,8 +422,10 @@ CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y CONFIG_PCI_DOMAINS=y CONFIG_DMAR=y +# CONFIG_DMAR_DEFAULT_ON is not set CONFIG_DMAR_GFX_WA=y CONFIG_DMAR_FLOPPY_WA=y +# CONFIG_INTR_REMAP is not set CONFIG_PCIEPORTBUS=y # CONFIG_HOTPLUG_PCI_PCIE is not set CONFIG_PCIEAER=y @@ -405,6 +434,7 @@ CONFIG_ARCH_SUPPORTS_MSI=y CONFIG_PCI_MSI=y # CONFIG_PCI_LEGACY is not set # CONFIG_PCI_DEBUG is not set +# CONFIG_PCI_STUB is not set CONFIG_HT_IRQ=y CONFIG_ISA_DMA_API=y CONFIG_K8_NB=y @@ -438,6 +468,8 @@ CONFIG_HOTPLUG_PCI=y # CONFIG_BINFMT_ELF=y CONFIG_COMPAT_BINFMT_ELF=y +CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y +# CONFIG_HAVE_AOUT is not set CONFIG_BINFMT_MISC=y CONFIG_IA32_EMULATION=y # CONFIG_IA32_AOUT is not set @@ -449,6 +481,7 @@ CONFIG_NET=y # # Networking options # +CONFIG_COMPAT_NET_DEV_OPS=y CONFIG_PACKET=y CONFIG_PACKET_MMAP=y CONFIG_UNIX=y @@ -509,7 +542,6 @@ CONFIG_DEFAULT_CUBIC=y # CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_TCP_MD5SIG=y -# CONFIG_IP_VS is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set @@ -547,19 +579,21 @@ CONFIG_NF_CONNTRACK_IRC=y CONFIG_NF_CONNTRACK_SIP=y CONFIG_NF_CT_NETLINK=y CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y CONFIG_NETFILTER_XT_TARGET_MARK=y CONFIG_NETFILTER_XT_TARGET_NFLOG=y CONFIG_NETFILTER_XT_TARGET_SECMARK=y -CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y CONFIG_NETFILTER_XT_TARGET_TCPMSS=y CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y CONFIG_NETFILTER_XT_MATCH_MARK=y CONFIG_NETFILTER_XT_MATCH_POLICY=y CONFIG_NETFILTER_XT_MATCH_STATE=y +# CONFIG_IP_VS is not set # # IP: Netfilter Configuration # +CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_CONNTRACK_IPV4=y CONFIG_NF_CONNTRACK_PROC_COMPAT=y CONFIG_IP_NF_IPTABLES=y @@ -585,8 +619,8 @@ CONFIG_IP_NF_MANGLE=y CONFIG_NF_CONNTRACK_IPV6=y CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_MATCH_IPV6HEADER=y -CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_MANGLE=y # CONFIG_IP_DCCP is not set @@ -594,6 +628,7 @@ CONFIG_IP6_NF_MANGLE=y # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set # CONFIG_DECNET is not set CONFIG_LLC=y @@ -613,6 +648,7 @@ CONFIG_NET_SCHED=y # CONFIG_NET_SCH_HTB is not set # CONFIG_NET_SCH_HFSC is not set # CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_MULTIQ is not set # CONFIG_NET_SCH_RED is not set # CONFIG_NET_SCH_SFQ is not set # CONFIG_NET_SCH_TEQL is not set @@ -620,6 +656,7 @@ CONFIG_NET_SCHED=y # CONFIG_NET_SCH_GRED is not set # CONFIG_NET_SCH_DSMARK is not set # CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_DRR is not set # CONFIG_NET_SCH_INGRESS is not set # @@ -634,6 +671,7 @@ CONFIG_NET_CLS=y # CONFIG_NET_CLS_RSVP is not set # CONFIG_NET_CLS_RSVP6 is not set # CONFIG_NET_CLS_FLOW is not set +# CONFIG_NET_CLS_CGROUP is not set CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_STACK=32 # CONFIG_NET_EMATCH_CMP is not set @@ -649,7 +687,9 @@ CONFIG_NET_CLS_ACT=y # CONFIG_NET_ACT_NAT is not set # CONFIG_NET_ACT_PEDIT is not set # CONFIG_NET_ACT_SIMP is not set +# CONFIG_NET_ACT_SKBEDIT is not set CONFIG_NET_SCH_FIFO=y +# CONFIG_DCB is not set # # Network testing @@ -666,29 +706,33 @@ CONFIG_HAMRADIO=y # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set +# CONFIG_PHONET is not set CONFIG_FIB_RULES=y - -# -# Wireless -# +CONFIG_WIRELESS=y CONFIG_CFG80211=y +# CONFIG_CFG80211_REG_DEBUG is not set CONFIG_NL80211=y +CONFIG_WIRELESS_OLD_REGULATORY=y CONFIG_WIRELESS_EXT=y CONFIG_WIRELESS_EXT_SYSFS=y +# CONFIG_LIB80211 is not set CONFIG_MAC80211=y # # Rate control algorithm selection # -CONFIG_MAC80211_RC_PID=y -CONFIG_MAC80211_RC_DEFAULT_PID=y -CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_MINSTREL=y +# CONFIG_MAC80211_RC_DEFAULT_PID is not set +CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y +CONFIG_MAC80211_RC_DEFAULT="minstrel" # CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_DEBUGFS is not set # CONFIG_MAC80211_DEBUG_MENU is not set -# CONFIG_IEEE80211 is not set -# CONFIG_RFKILL is not set +# CONFIG_WIMAX is not set +CONFIG_RFKILL=y +# CONFIG_RFKILL_INPUT is not set +CONFIG_RFKILL_LEDS=y # CONFIG_NET_9P is not set # @@ -712,7 +756,7 @@ CONFIG_PROC_EVENTS=y # CONFIG_MTD is not set # CONFIG_PARPORT is not set CONFIG_PNP=y -# CONFIG_PNP_DEBUG is not set +CONFIG_PNP_DEBUG_MESSAGES=y # # Protocols @@ -740,21 +784,21 @@ CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_MISC_DEVICES=y # CONFIG_IBM_ASM is not set # CONFIG_PHANTOM is not set -# CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set -# CONFIG_ACER_WMI is not set -# CONFIG_ASUS_LAPTOP is not set -# CONFIG_FUJITSU_LAPTOP is not set -# CONFIG_MSI_LAPTOP is not set -# CONFIG_COMPAL_LAPTOP is not set -# CONFIG_SONY_LAPTOP is not set -# CONFIG_THINKPAD_ACPI is not set -# CONFIG_INTEL_MENLOW is not set +# CONFIG_ICS932S401 is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_SGI_XP is not set # CONFIG_HP_ILO is not set # CONFIG_SGI_GRU is not set +# CONFIG_C2PORT is not set + +# +# EEPROM support +# +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set +# CONFIG_EEPROM_93CX6 is not set CONFIG_HAVE_IDE=y # CONFIG_IDE is not set @@ -864,6 +908,7 @@ CONFIG_PATA_OLDPIIX=y CONFIG_PATA_SCH=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y +CONFIG_MD_AUTODETECT=y # CONFIG_MD_LINEAR is not set # CONFIG_MD_RAID0 is not set # CONFIG_MD_RAID1 is not set @@ -919,6 +964,9 @@ CONFIG_PHYLIB=y # CONFIG_BROADCOM_PHY is not set # CONFIG_ICPLUS_PHY is not set # CONFIG_REALTEK_PHY is not set +# CONFIG_NATIONAL_PHY is not set +# CONFIG_STE10XP is not set +# CONFIG_LSI_ET1011C_PHY is not set # CONFIG_FIXED_PHY is not set # CONFIG_MDIO_BITBANG is not set CONFIG_NET_ETHERNET=y @@ -942,6 +990,9 @@ CONFIG_NET_TULIP=y # CONFIG_IBM_NEW_EMAC_RGMII is not set # CONFIG_IBM_NEW_EMAC_TAH is not set # CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set +# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set +# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set # CONFIG_AMD8111_ETH is not set @@ -949,7 +1000,6 @@ CONFIG_NET_PCI=y # CONFIG_B44 is not set CONFIG_FORCEDETH=y # CONFIG_FORCEDETH_NAPI is not set -# CONFIG_EEPRO100 is not set CONFIG_E100=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set @@ -963,15 +1013,16 @@ CONFIG_8139TOO_PIO=y # CONFIG_R6040 is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set +# CONFIG_SMSC9420 is not set # CONFIG_SUNDANCE is not set # CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set # CONFIG_SC92031 is not set +# CONFIG_ATL2 is not set CONFIG_NETDEV_1000=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set CONFIG_E1000=y -# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set @@ -989,18 +1040,23 @@ CONFIG_TIGON3=y # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set # CONFIG_ATL1E is not set +# CONFIG_JME is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set +CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_CHELSIO_T3 is not set +# CONFIG_ENIC is not set # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set +# CONFIG_MLX4_EN is not set # CONFIG_MLX4_CORE is not set # CONFIG_TEHUTI is not set # CONFIG_BNX2X is not set +# CONFIG_QLGE is not set # CONFIG_SFC is not set CONFIG_TR=y # CONFIG_IBMOL is not set @@ -1013,9 +1069,8 @@ CONFIG_TR=y # CONFIG_WLAN_PRE80211 is not set CONFIG_WLAN_80211=y # CONFIG_PCMCIA_RAYCS is not set -# CONFIG_IPW2100 is not set -# CONFIG_IPW2200 is not set # CONFIG_LIBERTAS is not set +# CONFIG_LIBERTAS_THINFIRM is not set # CONFIG_AIRO is not set # CONFIG_HERMES is not set # CONFIG_ATMEL is not set @@ -1032,6 +1087,8 @@ CONFIG_WLAN_80211=y CONFIG_ATH5K=y # CONFIG_ATH5K_DEBUG is not set # CONFIG_ATH9K is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set # CONFIG_IWLCORE is not set # CONFIG_IWLWIFI_LEDS is not set # CONFIG_IWLAGN is not set @@ -1042,6 +1099,10 @@ CONFIG_ATH5K=y # CONFIG_ZD1211RW is not set # CONFIG_RT2X00 is not set +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# + # # USB Network Adapters # @@ -1050,6 +1111,7 @@ CONFIG_ATH5K=y # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set # CONFIG_USB_USBNET is not set +# CONFIG_USB_HSO is not set CONFIG_NET_PCMCIA=y # CONFIG_PCMCIA_3C589 is not set # CONFIG_PCMCIA_3C574 is not set @@ -1059,6 +1121,7 @@ CONFIG_NET_PCMCIA=y # CONFIG_PCMCIA_SMC91C92 is not set # CONFIG_PCMCIA_XIRC2PS is not set # CONFIG_PCMCIA_AXNET is not set +# CONFIG_PCMCIA_IBMTR is not set # CONFIG_WAN is not set CONFIG_FDDI=y # CONFIG_DEFXX is not set @@ -1110,6 +1173,7 @@ CONFIG_MOUSE_PS2_LOGIPS2PP=y CONFIG_MOUSE_PS2_SYNAPTICS=y CONFIG_MOUSE_PS2_LIFEBOOK=y CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_ELANTECH is not set # CONFIG_MOUSE_PS2_TOUCHKIT is not set # CONFIG_MOUSE_SERIAL is not set # CONFIG_MOUSE_APPLETOUCH is not set @@ -1147,15 +1211,16 @@ CONFIG_INPUT_TOUCHSCREEN=y # CONFIG_TOUCHSCREEN_FUJITSU is not set # CONFIG_TOUCHSCREEN_GUNZE is not set # CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set # CONFIG_TOUCHSCREEN_MTOUCH is not set # CONFIG_TOUCHSCREEN_INEXIO is not set # CONFIG_TOUCHSCREEN_MK712 is not set # CONFIG_TOUCHSCREEN_PENMOUNT is not set # CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set # CONFIG_TOUCHSCREEN_TOUCHWIN is not set -# CONFIG_TOUCHSCREEN_UCB1400 is not set # CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set # CONFIG_TOUCHSCREEN_TOUCHIT213 is not set +# CONFIG_TOUCHSCREEN_TSC2007 is not set CONFIG_INPUT_MISC=y # CONFIG_INPUT_PCSPKR is not set # CONFIG_INPUT_APANEL is not set @@ -1165,6 +1230,7 @@ CONFIG_INPUT_MISC=y # CONFIG_INPUT_KEYSPAN_REMOTE is not set # CONFIG_INPUT_POWERMATE is not set # CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_CM109 is not set # CONFIG_INPUT_UINPUT is not set # @@ -1231,6 +1297,7 @@ CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set # CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=y @@ -1260,6 +1327,7 @@ CONFIG_I2C=y CONFIG_I2C_BOARDINFO=y # CONFIG_I2C_CHARDEV is not set CONFIG_I2C_HELPER_AUTO=y +CONFIG_I2C_ALGOBIT=y # # I2C Hardware Bus support @@ -1311,8 +1379,6 @@ CONFIG_I2C_I801=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_EEPROM_AT24 is not set -# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -1331,8 +1397,78 @@ CONFIG_POWER_SUPPLY=y # CONFIG_POWER_SUPPLY_DEBUG is not set # CONFIG_PDA_POWER is not set # CONFIG_BATTERY_DS2760 is not set -# CONFIG_HWMON is not set +# CONFIG_BATTERY_BQ27x00 is not set +CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_ABITUGURU is not set +# CONFIG_SENSORS_ABITUGURU3 is not set +# CONFIG_SENSORS_AD7414 is not set +# CONFIG_SENSORS_AD7418 is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1029 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ADT7462 is not set +# CONFIG_SENSORS_ADT7470 is not set +# CONFIG_SENSORS_ADT7473 is not set +# CONFIG_SENSORS_ADT7475 is not set +# CONFIG_SENSORS_K8TEMP is not set +# CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_I5K_AMB is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_F71882FG is not set +# CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_FSCHER is not set +# CONFIG_SENSORS_FSCPOS is not set +# CONFIG_SENSORS_FSCHMD is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_CORETEMP is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +# CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_LTC4245 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_MAX6650 is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_DME1737 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47M192 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_ADS7828 is not set +# CONFIG_SENSORS_THMC50 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_VT8231 is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83791D is not set +# CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83L786NG is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +# CONFIG_SENSORS_HDAPS is not set +# CONFIG_SENSORS_LIS3LV02D is not set +# CONFIG_SENSORS_APPLESMC is not set +# CONFIG_HWMON_DEBUG_CHIP is not set CONFIG_THERMAL=y +# CONFIG_THERMAL_HWMON is not set CONFIG_WATCHDOG=y # CONFIG_WATCHDOG_NOWAYOUT is not set @@ -1352,15 +1488,18 @@ CONFIG_WATCHDOG=y # CONFIG_I6300ESB_WDT is not set # CONFIG_ITCO_WDT is not set # CONFIG_IT8712F_WDT is not set +# CONFIG_IT87_WDT is not set # CONFIG_HP_WATCHDOG is not set # CONFIG_SC1200_WDT is not set # CONFIG_PC87413_WDT is not set # CONFIG_60XX_WDT is not set # CONFIG_SBC8360_WDT is not set # CONFIG_CPU5_WDT is not set +# CONFIG_SMSC_SCH311X_WDT is not set # CONFIG_SMSC37B787_WDT is not set # CONFIG_W83627HF_WDT is not set # CONFIG_W83697HF_WDT is not set +# CONFIG_W83697UG_WDT is not set # CONFIG_W83877F_WDT is not set # CONFIG_W83977F_WDT is not set # CONFIG_MACHZ_WDT is not set @@ -1376,11 +1515,11 @@ CONFIG_WATCHDOG=y # USB-based Watchdog Cards # # CONFIG_USBPCWATCHDOG is not set +CONFIG_SSB_POSSIBLE=y # # Sonics Silicon Backplane # -CONFIG_SSB_POSSIBLE=y # CONFIG_SSB is not set # @@ -1389,7 +1528,13 @@ CONFIG_SSB_POSSIBLE=y # CONFIG_MFD_CORE is not set # CONFIG_MFD_SM501 is not set # CONFIG_HTC_PASIC3 is not set +# CONFIG_TWL4030_CORE is not set # CONFIG_MFD_TMIO is not set +# CONFIG_PMIC_DA903X is not set +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_REGULATOR is not set # # Multimedia devices @@ -1423,6 +1568,7 @@ CONFIG_DRM=y # CONFIG_DRM_I810 is not set # CONFIG_DRM_I830 is not set CONFIG_DRM_I915=y +CONFIG_DRM_I915_KMS=y # CONFIG_DRM_MGA is not set # CONFIG_DRM_SIS is not set # CONFIG_DRM_VIA is not set @@ -1432,6 +1578,7 @@ CONFIG_DRM_I915=y CONFIG_FB=y # CONFIG_FIRMWARE_EDID is not set # CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y @@ -1460,7 +1607,6 @@ CONFIG_FB_TILEBLITTING=y # CONFIG_FB_UVESA is not set # CONFIG_FB_VESA is not set CONFIG_FB_EFI=y -# CONFIG_FB_IMAC is not set # CONFIG_FB_N411 is not set # CONFIG_FB_HGA is not set # CONFIG_FB_S1D13XXX is not set @@ -1475,6 +1621,7 @@ CONFIG_FB_EFI=y # CONFIG_FB_S3 is not set # CONFIG_FB_SAVAGE is not set # CONFIG_FB_SIS is not set +# CONFIG_FB_VIA is not set # CONFIG_FB_NEOMAGIC is not set # CONFIG_FB_KYRO is not set # CONFIG_FB_3DFX is not set @@ -1486,12 +1633,15 @@ CONFIG_FB_EFI=y # CONFIG_FB_CARMINE is not set # CONFIG_FB_GEODE is not set # CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y -# CONFIG_BACKLIGHT_CORGI is not set +CONFIG_BACKLIGHT_GENERIC=y # CONFIG_BACKLIGHT_PROGEAR is not set # CONFIG_BACKLIGHT_MBP_NVIDIA is not set +# CONFIG_BACKLIGHT_SAHARA is not set # # Display device support @@ -1511,10 +1661,12 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_VGA16 is not set CONFIG_LOGO_LINUX_CLUT224=y CONFIG_SOUND=y +CONFIG_SOUND_OSS_CORE=y CONFIG_SND=y CONFIG_SND_TIMER=y CONFIG_SND_PCM=y CONFIG_SND_HWDEP=y +CONFIG_SND_JACK=y CONFIG_SND_SEQUENCER=y CONFIG_SND_SEQ_DUMMY=y CONFIG_SND_OSSEMUL=y @@ -1522,6 +1674,8 @@ CONFIG_SND_MIXER_OSS=y CONFIG_SND_PCM_OSS=y CONFIG_SND_PCM_OSS_PLUGINS=y CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_HRTIMER=y +CONFIG_SND_SEQ_HRTIMER_DEFAULT=y CONFIG_SND_DYNAMIC_MINORS=y CONFIG_SND_SUPPORT_OLD_API=y CONFIG_SND_VERBOSE_PROCFS=y @@ -1575,11 +1729,16 @@ CONFIG_SND_PCI=y # CONFIG_SND_FM801 is not set CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y +# CONFIG_SND_HDA_RECONFIG is not set +# CONFIG_SND_HDA_INPUT_BEEP is not set CONFIG_SND_HDA_CODEC_REALTEK=y CONFIG_SND_HDA_CODEC_ANALOG=y CONFIG_SND_HDA_CODEC_SIGMATEL=y CONFIG_SND_HDA_CODEC_VIA=y CONFIG_SND_HDA_CODEC_ATIHDMI=y +CONFIG_SND_HDA_CODEC_NVHDMI=y +CONFIG_SND_HDA_CODEC_INTELHDMI=y +CONFIG_SND_HDA_ELD=y CONFIG_SND_HDA_CODEC_CONEXANT=y CONFIG_SND_HDA_CODEC_CMEDIA=y CONFIG_SND_HDA_CODEC_SI3054=y @@ -1612,6 +1771,7 @@ CONFIG_SND_USB=y # CONFIG_SND_USB_AUDIO is not set # CONFIG_SND_USB_USX2Y is not set # CONFIG_SND_USB_CAIAQ is not set +# CONFIG_SND_USB_US122L is not set CONFIG_SND_PCMCIA=y # CONFIG_SND_VXPOCKET is not set # CONFIG_SND_PDAUDIOCF is not set @@ -1626,15 +1786,37 @@ CONFIG_HIDRAW=y # USB Input Devices # CONFIG_USB_HID=y -CONFIG_USB_HIDINPUT_POWERBOOK=y -CONFIG_HID_FF=y CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y + +# +# Special HID drivers +# +CONFIG_HID_COMPAT=y +CONFIG_HID_A4TECH=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GYRATION=y +CONFIG_HID_LOGITECH=y CONFIG_LOGITECH_FF=y # CONFIG_LOGIRUMBLEPAD2_FF is not set +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_NTRIG=y +CONFIG_HID_PANTHERLORD=y CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SUNPLUS=y +# CONFIG_GREENASIA_FF is not set +CONFIG_HID_TOPSEED=y CONFIG_THRUSTMASTER_FF=y CONFIG_ZEROPLUS_FF=y -CONFIG_USB_HIDDEV=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y @@ -1652,6 +1834,8 @@ CONFIG_USB_DEVICEFS=y CONFIG_USB_SUSPEND=y # CONFIG_USB_OTG is not set CONFIG_USB_MON=y +# CONFIG_USB_WUSB is not set +# CONFIG_USB_WUSB_CBAF is not set # # USB Host Controller Drivers @@ -1660,6 +1844,7 @@ CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_ROOT_HUB_TT is not set # CONFIG_USB_EHCI_TT_NEWSCHED is not set +# CONFIG_USB_OXU210HP_HCD is not set # CONFIG_USB_ISP116X_HCD is not set # CONFIG_USB_ISP1760_HCD is not set CONFIG_USB_OHCI_HCD=y @@ -1669,6 +1854,8 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_UHCI_HCD=y # CONFIG_USB_SL811_HCD is not set # CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_WHCI_HCD is not set +# CONFIG_USB_HWA_HCD is not set # # USB Device Class drivers @@ -1676,20 +1863,20 @@ CONFIG_USB_UHCI_HCD=y # CONFIG_USB_ACM is not set CONFIG_USB_PRINTER=y # CONFIG_USB_WDM is not set +# CONFIG_USB_TMC is not set # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; # # -# may also be needed; see USB_STORAGE Help for more information +# see USB_STORAGE Help for more information # CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_DEBUG is not set # CONFIG_USB_STORAGE_DATAFAB is not set # CONFIG_USB_STORAGE_FREECOM is not set # CONFIG_USB_STORAGE_ISD200 is not set -# CONFIG_USB_STORAGE_DPCM is not set # CONFIG_USB_STORAGE_USBAT is not set # CONFIG_USB_STORAGE_SDDR09 is not set # CONFIG_USB_STORAGE_SDDR55 is not set @@ -1697,7 +1884,6 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_ALAUDA is not set # CONFIG_USB_STORAGE_ONETOUCH is not set # CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_STORAGE_SIERRA is not set # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set CONFIG_USB_LIBUSUAL=y @@ -1718,6 +1904,7 @@ CONFIG_USB_LIBUSUAL=y # CONFIG_USB_EMI62 is not set # CONFIG_USB_EMI26 is not set # CONFIG_USB_ADUTUX is not set +# CONFIG_USB_SEVSEG is not set # CONFIG_USB_RIO500 is not set # CONFIG_USB_LEGOTOWER is not set # CONFIG_USB_LCD is not set @@ -1735,7 +1922,13 @@ CONFIG_USB_LIBUSUAL=y # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set # CONFIG_USB_ISIGHTFW is not set +# CONFIG_USB_VST is not set # CONFIG_USB_GADGET is not set + +# +# OTG and related infrastructure +# +# CONFIG_UWB is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set CONFIG_NEW_LEDS=y @@ -1744,6 +1937,7 @@ CONFIG_LEDS_CLASS=y # # LED drivers # +# CONFIG_LEDS_ALIX2 is not set # CONFIG_LEDS_PCA9532 is not set # CONFIG_LEDS_CLEVO_MAIL is not set # CONFIG_LEDS_PCA955X is not set @@ -1754,6 +1948,7 @@ CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y # CONFIG_LEDS_TRIGGER_TIMER is not set # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set +# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set @@ -1793,6 +1988,7 @@ CONFIG_RTC_INTF_DEV=y # CONFIG_RTC_DRV_M41T80 is not set # CONFIG_RTC_DRV_S35390A is not set # CONFIG_RTC_DRV_FM3130 is not set +# CONFIG_RTC_DRV_RX8581 is not set # # SPI RTC drivers @@ -1802,12 +1998,15 @@ CONFIG_RTC_INTF_DEV=y # Platform RTC drivers # CONFIG_RTC_DRV_CMOS=y +# CONFIG_RTC_DRV_DS1286 is not set # CONFIG_RTC_DRV_DS1511 is not set # CONFIG_RTC_DRV_DS1553 is not set # CONFIG_RTC_DRV_DS1742 is not set # CONFIG_RTC_DRV_STK17TA8 is not set # CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T35 is not set # CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_BQ4802 is not set # CONFIG_RTC_DRV_V3020 is not set # @@ -1820,6 +2019,21 @@ CONFIG_DMADEVICES=y # # CONFIG_INTEL_IOATDMA is not set # CONFIG_UIO is not set +# CONFIG_STAGING is not set +CONFIG_X86_PLATFORM_DEVICES=y +# CONFIG_ACER_WMI is not set +# CONFIG_ASUS_LAPTOP is not set +# CONFIG_FUJITSU_LAPTOP is not set +# CONFIG_MSI_LAPTOP is not set +# CONFIG_PANASONIC_LAPTOP is not set +# CONFIG_COMPAL_LAPTOP is not set +# CONFIG_SONY_LAPTOP is not set +# CONFIG_THINKPAD_ACPI is not set +# CONFIG_INTEL_MENLOW is not set +CONFIG_EEEPC_LAPTOP=y +# CONFIG_ACPI_WMI is not set +# CONFIG_ACPI_ASUS is not set +# CONFIG_ACPI_TOSHIBA is not set # # Firmware Drivers @@ -1841,22 +2055,25 @@ CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y -# CONFIG_EXT4DEV_FS is not set +# CONFIG_EXT4_FS is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y +CONFIG_FILE_LOCKING=y # CONFIG_XFS_FS is not set # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QUOTA_TREE=y # CONFIG_QFMT_V1 is not set CONFIG_QFMT_V2=y CONFIG_QUOTACTL=y @@ -1890,16 +2107,14 @@ CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_PROC_VMCORE=y CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y # CONFIG_CONFIGFS_FS is not set - -# -# Miscellaneous filesystems -# +CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_ECRYPT_FS is not set @@ -1909,6 +2124,7 @@ CONFIG_HUGETLB_PAGE=y # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set @@ -1930,6 +2146,7 @@ CONFIG_NFS_ACL_SUPPORT=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y +# CONFIG_SUNRPC_REGISTER_V4 is not set CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set @@ -2035,40 +2252,60 @@ CONFIG_TIMER_STATS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_VIRTUAL is not set # CONFIG_DEBUG_WRITECOUNT is not set CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +CONFIG_ARCH_WANT_FRAME_POINTERS=y CONFIG_FRAME_POINTER=y # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_KPROBES_SANITY_TEST is not set # CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set # CONFIG_LATENCYTOP is not set CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_HAVE_FTRACE=y +CONFIG_USER_STACKTRACE_SUPPORT=y +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y CONFIG_HAVE_DYNAMIC_FTRACE=y -# CONFIG_FTRACE is not set +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_HAVE_HW_BRANCH_TRACER=y + +# +# Tracers +# +# CONFIG_FUNCTION_TRACER is not set # CONFIG_IRQSOFF_TRACER is not set # CONFIG_SYSPROF_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_POWER_TRACER is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_HW_BRANCH_TRACER is not set CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +# CONFIG_DYNAMIC_PRINTK_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set # CONFIG_STRICT_DEVMEM is not set CONFIG_X86_VERBOSE_BOOTUP=y CONFIG_EARLY_PRINTK=y +CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y # CONFIG_DEBUG_PAGEALLOC is not set # CONFIG_DEBUG_PER_CPU_MAPS is not set # CONFIG_X86_PTDUMP is not set CONFIG_DEBUG_RODATA=y -# CONFIG_DIRECT_GBPAGES is not set # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_DEBUG_NX_TEST=m # CONFIG_IOMMU_DEBUG is not set @@ -2092,8 +2329,10 @@ CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS=y CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y +# CONFIG_SECURITYFS is not set CONFIG_SECURITY_NETWORK=y # CONFIG_SECURITY_NETWORK_XFRM is not set +# CONFIG_SECURITY_PATH is not set CONFIG_SECURITY_FILE_CAPABILITIES=y # CONFIG_SECURITY_ROOTPLUG is not set CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 @@ -2104,7 +2343,6 @@ CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_SELINUX_DEVELOP=y CONFIG_SECURITY_SELINUX_AVC_STATS=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 -# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set # CONFIG_SECURITY_SMACK is not set CONFIG_CRYPTO=y @@ -2112,11 +2350,18 @@ CONFIG_CRYPTO=y # # Crypto core or helper # +# CONFIG_CRYPTO_FIPS is not set CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y # CONFIG_CRYPTO_GF128MUL is not set # CONFIG_CRYPTO_NULL is not set # CONFIG_CRYPTO_CRYPTD is not set @@ -2151,6 +2396,7 @@ CONFIG_CRYPTO_HMAC=y # Digest # # CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_CRC32C_INTEL is not set # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MICHAEL_MIC is not set @@ -2191,6 +2437,11 @@ CONFIG_CRYPTO_DES=y # # CONFIG_CRYPTO_DEFLATE is not set # CONFIG_CRYPTO_LZO is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set CONFIG_HAVE_KVM=y @@ -2205,6 +2456,7 @@ CONFIG_VIRTUALIZATION=y CONFIG_BITREVERSE=y CONFIG_GENERIC_FIND_FIRST_BIT=y CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_GENERIC_FIND_LAST_BIT=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set CONFIG_CRC_T10DIF=y From 556831063bcf8fd818e92227c40e3eaabe759bab Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Feb 2009 12:51:29 +0100 Subject: [PATCH 559/566] x86, defconfig: turn off CONFIG_ENABLE_WARN_DEPRECATED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit deprecation warnings have become rather noisy lately: drivers/i2c/i2c-core.c: In function ‘i2c_new_device’: drivers/i2c/i2c-core.c:283: warning: ‘i2c_attach_client’ is deprecated (declared at include/linux/i2c.h:434) drivers/i2c/i2c-core.c: In function ‘i2c_del_adapter’: drivers/i2c/i2c-core.c:646: warning: ‘detach_client’ is deprecated (declared at include/linux/i2c.h:154) drivers/i2c/i2c-core.c: In function ‘i2c_register_driver’: drivers/i2c/i2c-core.c:713: warning: ‘detach_client’ is deprecated (declared at include/linux/i2c.h:154) drivers/i2c/i2c-core.c: In function ‘__detach_adapter’: drivers/i2c/i2c-core.c:780: warning: ‘detach_client’ is deprecated (declared at include/linux/i2c.h:154) drivers/i2c/i2c-core.c: At top level: drivers/i2c/i2c-core.c:876: warning: ‘i2c_attach_client’ is deprecated (declared at drivers/i2c/i2c-core.c:827) drivers/i2c/i2c-core.c:876: warning: ‘i2c_attach_client’ is deprecated (declared at drivers/i2c/i2c-core.c:827) drivers/i2c/i2c-core.c:904: warning: ‘i2c_detach_client’ is deprecated (declared at drivers/i2c/i2c-core.c:879) drivers/i2c/i2c-core.c:904: warning: ‘i2c_detach_client’ is deprecated (declared at drivers/i2c/i2c-core.c:879) So turn it off for now - these reminders can obscure critical warnings. Signed-off-by: Ingo Molnar --- arch/x86/configs/i386_defconfig | 3 +-- arch/x86/configs/x86_64_defconfig | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index c3186ccc9369..0562a36f74d9 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -2242,8 +2242,7 @@ CONFIG_NLS_UTF8=y # CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_PRINTK_TIME=y -CONFIG_ENABLE_WARN_DEPRECATED=y -CONFIG_ENABLE_MUST_CHECK=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 4c85bfa58e19..06633483b292 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -2223,8 +2223,7 @@ CONFIG_NLS_UTF8=y # CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_PRINTK_TIME=y -CONFIG_ENABLE_WARN_DEPRECATED=y -CONFIG_ENABLE_MUST_CHECK=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set From bd282422fe9566a89bc34af325efb6d2701903be Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Feb 2009 12:59:05 +0100 Subject: [PATCH 560/566] x86, defconfig: turn off CONFIG_SCSI_ISCSI_ATTRS=y It was enabled by mistake - iscsi is not included in a typical default PC, and no other architecture has it built-in (=y) either. Turn it off. Signed-off-by: Ingo Molnar --- arch/x86/configs/i386_defconfig | 8 ++++---- arch/x86/configs/x86_64_defconfig | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 0562a36f74d9..8f40a80ccb55 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.29-rc4 -# Thu Feb 12 12:42:50 2009 +# Thu Feb 12 12:57:57 2009 # # CONFIG_64BIT is not set CONFIG_X86_32=y @@ -835,7 +835,7 @@ CONFIG_SCSI_WAIT_SCAN=m # CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_FC_ATTRS is not set -CONFIG_SCSI_ISCSI_ATTRS=y +# CONFIG_SCSI_ISCSI_ATTRS is not set # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set # CONFIG_SCSI_SRP_ATTRS is not set @@ -2064,8 +2064,7 @@ CONFIG_EFI_VARS=y # CONFIG_DELL_RBU is not set # CONFIG_DCDBAS is not set CONFIG_DMIID=y -CONFIG_ISCSI_IBFT_FIND=y -CONFIG_ISCSI_IBFT=y +# CONFIG_ISCSI_IBFT_FIND is not set # # File systems @@ -2243,6 +2242,7 @@ CONFIG_NLS_UTF8=y CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_PRINTK_TIME=y # CONFIG_ENABLE_WARN_DEPRECATED is not set +CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 06633483b292..10728fd91c63 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.29-rc4 -# Thu Feb 12 12:48:13 2009 +# Thu Feb 12 12:57:29 2009 # CONFIG_64BIT=y # CONFIG_X86_32 is not set @@ -837,7 +837,7 @@ CONFIG_SCSI_WAIT_SCAN=m # CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_FC_ATTRS is not set -CONFIG_SCSI_ISCSI_ATTRS=y +# CONFIG_SCSI_ISCSI_ATTRS is not set # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set # CONFIG_SCSI_SRP_ATTRS is not set @@ -2044,8 +2044,7 @@ CONFIG_EFI_VARS=y # CONFIG_DELL_RBU is not set # CONFIG_DCDBAS is not set CONFIG_DMIID=y -CONFIG_ISCSI_IBFT_FIND=y -CONFIG_ISCSI_IBFT=y +# CONFIG_ISCSI_IBFT_FIND is not set # # File systems @@ -2224,6 +2223,7 @@ CONFIG_NLS_UTF8=y CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_PRINTK_TIME=y # CONFIG_ENABLE_WARN_DEPRECATED is not set +CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set From b1aabecd55931ee754f6a913969516b26a0e682e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 10 Feb 2009 15:21:44 +0200 Subject: [PATCH 561/566] mm: Export symbol ksize() Commit 7b2cd92adc5430b0c1adeb120971852b4ea1ab08 ("crypto: api - Fix zeroing on free") added modular user of ksize(). Export that to fix crypto.ko compilation. Cc: Herbert Xu Signed-off-by: Kirill A. Shutemov Signed-off-by: Pekka Enberg --- mm/slab.c | 1 + mm/slob.c | 1 + mm/slub.c | 1 + 3 files changed, 3 insertions(+) diff --git a/mm/slab.c b/mm/slab.c index ddc41f337d58..4d00855629c4 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4457,3 +4457,4 @@ size_t ksize(const void *objp) return obj_size(virt_to_cache(objp)); } +EXPORT_SYMBOL(ksize); diff --git a/mm/slob.c b/mm/slob.c index bf7e8fc3aed8..52bc8a2bd9ef 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -521,6 +521,7 @@ size_t ksize(const void *block) } else return sp->page.private; } +EXPORT_SYMBOL(ksize); struct kmem_cache { unsigned int size, align; diff --git a/mm/slub.c b/mm/slub.c index bdc9abb08a23..0280eee6cf37 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2736,6 +2736,7 @@ size_t ksize(const void *object) */ return s->size; } +EXPORT_SYMBOL(ksize); void kfree(const void *x) { From 3a4c6800f31ea8395628af5e7e490270ee5d0585 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 12 Feb 2009 04:34:23 +0100 Subject: [PATCH 562/566] Fix page writeback thinko, causing Berkeley DB slowdown A bug was introduced into write_cache_pages cyclic writeout by commit 31a12666d8f0c22235297e1c1575f82061480029 ("mm: write_cache_pages cyclic fix"). The intention (and comments) is that we should cycle back and look for more dirty pages at the beginning of the file if there is no more work to be done. But the !done condition was dropped from the test. This means that any time the page writeout loop breaks (eg. due to nr_to_write == 0), we will set index to 0, then goto again. This will set done_index to index, then find done is set, so will proceed to the end of the function. When updating mapping->writeback_index for cyclic writeout, we now use done_index == 0, so we're always cycling back to 0. This seemed to be causing random mmap writes (slapadd and iozone) to start writing more pages from the LRU and writeout would slowdown, and caused bugzilla entry http://bugzilla.kernel.org/show_bug.cgi?id=12604 about Berkeley DB slowing down dramatically. With this patch, iozone random write performance is increased nearly 5x on my system (iozone -B -r 4k -s 64k -s 512m -s 1200m on ext2). Signed-off-by: Nick Piggin Reported-and-tested-by: Jan Kara Signed-off-by: Linus Torvalds --- mm/page-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6106a5c7ed44..3c84128596ba 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1079,7 +1079,7 @@ continue_unlock: pagevec_release(&pvec); cond_resched(); } - if (!cycled) { + if (!cycled && !done) { /* * range_cyclic: * We hit the last page and there is more work to be done: wrap From d85cf93da66977dbc645352be1b2084a659d8a0b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 12 Feb 2009 10:02:56 -0800 Subject: [PATCH 563/566] x86/paravirt: make arch_flush_lazy_mmu/cpu disable preemption Impact: avoid access to percpu vars in preempible context They are intended to be used whenever there's the possibility that there's some stale state which is going to be overwritten with a queued update, or to force a state change when we may be in lazy mode. Either way, we could end up calling it with preemption enabled, so wrap the functions in their own little preempt-disable section so they can be safely called in any context (though preemption should never be enabled if we're actually in a lazy state). (Move out of line to avoid #include dependencies.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/paravirt.h | 17 ++--------------- arch/x86/kernel/paravirt.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aedc..a660eceaa273 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1352,14 +1352,7 @@ static inline void arch_leave_lazy_cpu_mode(void) PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); } -static inline void arch_flush_lazy_cpu_mode(void) -{ - if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { - arch_leave_lazy_cpu_mode(); - arch_enter_lazy_cpu_mode(); - } -} - +void arch_flush_lazy_cpu_mode(void); #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) @@ -1372,13 +1365,7 @@ static inline void arch_leave_lazy_mmu_mode(void) PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); } -static inline void arch_flush_lazy_mmu_mode(void) -{ - if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } -} +void arch_flush_lazy_mmu_mode(void); static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, unsigned long phys, pgprot_t flags) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4c8fb608873..dcba6c567a2a 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -268,6 +268,30 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return __get_cpu_var(paravirt_lazy_mode); } +void arch_flush_lazy_mmu_mode(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + +void arch_flush_lazy_cpu_mode(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { + arch_leave_lazy_cpu_mode(); + arch_enter_lazy_cpu_mode(); + } + + preempt_enable(); +} + struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, From 34b0900d323122113683685b200aae9f9b75e63b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 12 Feb 2009 21:30:48 +0100 Subject: [PATCH 564/566] x86: warn if arch_flush_lazy_mmu_cpu is called in preemptible context Impact: Catch cases where lazy MMU state is active in a preemtible context arch_flush_lazy_mmu_cpu() has been changed to disable preemption so the checks in enter/leave will never trigger. Put the preemtible() check into arch_flush_lazy_mmu_cpu() to catch such cases. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/paravirt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index dcba6c567a2a..c6520a4e85d4 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -273,6 +273,7 @@ void arch_flush_lazy_mmu_mode(void) preempt_disable(); if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + WARN_ON(preempt_count() == 1); arch_leave_lazy_mmu_mode(); arch_enter_lazy_mmu_mode(); } @@ -285,6 +286,7 @@ void arch_flush_lazy_cpu_mode(void) preempt_disable(); if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { + WARN_ON(preempt_count() == 1); arch_leave_lazy_cpu_mode(); arch_enter_lazy_cpu_mode(); } From 7ad9de6ac83bd825996d2de98c92e0f425c31050 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 12 Feb 2009 21:16:09 +0100 Subject: [PATCH 565/566] x86: CPA avoid repeated lazy mmu flush Impact: Flush the lazy MMU only once Pending mmu updates only need to be flushed once to bring the in-memory pagetable state up to date. Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f664bc1c4093..8ca0d8566fc8 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -575,14 +575,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) address = cpa->vaddr[cpa->curpage]; else address = *cpa->vaddr; - - /* - * If we're called with lazy mmu updates enabled, the - * in-memory pte state may be stale. Flush pending updates to - * bring them up to date. - */ - arch_flush_lazy_mmu_mode(); - repeat: kpte = lookup_address(address, &level); if (!kpte) @@ -819,6 +811,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, vm_unmap_aliases(); + /* + * If we're called with lazy mmu updates enabled, the + * in-memory pte state may be stale. Flush pending updates to + * bring them up to date. + */ + arch_flush_lazy_mmu_mode(); + cpa.vaddr = addr; cpa.numpages = numpages; cpa.mask_set = mask_set; From b13e24644c138d0ddbc451403c30a96b09bfd556 Mon Sep 17 00:00:00 2001 From: john stultz Date: Thu, 12 Feb 2009 18:48:53 -0800 Subject: [PATCH 566/566] x86, hpet: fix for LS21 + HPET = boot hang Between 2.6.23 and 2.6.24-rc1 a change was made that broke IBM LS21 systems that had the HPET enabled in the BIOS, resulting in boot hangs for x86_64. Specifically commit b8ce33590687888ebb900d09557b8807c4539022, which merges the i386 and x86_64 HPET code. Prior to this commit, when we setup the HPET timers in x86_64, we did the following: hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT, HPET_T0_CFG); However after the i386/x86_64 HPET merge, we do the following: cfg = hpet_readl(HPET_Tn_CFG(timer)); cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); However on LS21s with HPET enabled in the BIOS, the HPET_T0_CFG register boots with Level triggered interrupts (HPET_TN_LEVEL) enabled. This causes the periodic interrupt to be not so periodic, and that results in the boot time hang I reported earlier in the delay calibration. My fix: Always disable HPET_TN_LEVEL when setting up periodic mode. Signed-off-by: John Stultz Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 64d5ad0b8add..5c8da2c2c185 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode, now = hpet_readl(HPET_COUNTER); cmp = now + (unsigned long) delta; cfg = hpet_readl(HPET_Tn_CFG(timer)); + /* Make sure we use edge triggered interrupts */ + cfg &= ~HPET_TN_LEVEL; cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer));