From 281ff33b7c1b1ba2a5f9b03425e5f692a94913fa Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 18 Feb 2010 11:51:40 -0800 Subject: [PATCH 1/5] x86_64, cpa: Don't work hard in preserving kernel 2M mappings when using 4K already We currently enforce the !RW mapping for the kernel mapping that maps holes between different text, rodata and data sections. However, kernel identity mappings will have different RWX permissions to the pages mapping to text and to the pages padding (which are freed) the text, rodata sections. Hence kernel identity mappings will be broken to smaller pages. For 64-bit, kernel text and kernel identity mappings are different, so we can enable protection checks that come with CONFIG_DEBUG_RODATA, as well as retain 2MB large page mappings for kernel text. Konrad reported a boot failure with the Linux Xen paravirt guest because of this. In this paravirt guest case, the kernel text mapping and the kernel identity mapping share the same page-table pages. Thus forcing the !RW mapping for some of the kernel mappings also cause the kernel identity mappings to be read-only resulting in the boot failure. Linux Xen paravirt guest also uses 4k mappings and don't use 2M mapping. Fix this issue and retain large page performance advantage for native kernels by not working hard and not enforcing !RW for the kernel text mapping, if the current mapping is already using small page mapping. Reported-by: Konrad Rzeszutek Wilk Signed-off-by: Suresh Siddha LKML-Reference: <1266522700.2909.34.camel@sbs-t61.sc.intel.com> Tested-by: Konrad Rzeszutek Wilk Cc: stable@kernel.org [2.6.32, 2.6.33] Signed-off-by: H. Peter Anvin --- arch/x86/mm/pageattr.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1d4eb93d333c..cf07c26d9a4a 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -291,8 +291,29 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, */ if (kernel_set_to_readonly && within(address, (unsigned long)_text, - (unsigned long)__end_rodata_hpage_align)) - pgprot_val(forbidden) |= _PAGE_RW; + (unsigned long)__end_rodata_hpage_align)) { + unsigned int level; + + /* + * Don't enforce the !RW mapping for the kernel text mapping, + * if the current mapping is already using small page mapping. + * No need to work hard to preserve large page mappings in this + * case. + * + * This also fixes the Linux Xen paravirt guest boot failure + * (because of unexpected read-only mappings for kernel identity + * mappings). In this paravirt guest case, the kernel text + * mapping and the kernel identity mapping share the same + * page-table pages. Thus we can't really use different + * protections for the kernel text and identity mappings. Also, + * these shared mappings are made of small page mappings. + * Thus this don't enforce !RW mapping for small page kernel + * text mapping logic will help Linux Xen parvirt guest boot + * aswell. + */ + if (lookup_address(address, &level) && (level != PG_LEVEL_4K)) + pgprot_val(forbidden) |= _PAGE_RW; + } #endif prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); From 10fb7f1f2d311b4d2e5d881fe2d83f1c281100f9 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 5 Mar 2010 13:10:36 -0600 Subject: [PATCH 2/5] x86: Reduce per cpu MCA boot up messages Don't write per cpu MCA boot up messages. Signed-of-by: Mike Travis Cc: Hidetoshi Seto Cc: x86@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_intel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 7c785634af2b..d15df6e49bf0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -95,7 +95,7 @@ static void cmci_discover(int banks, int boot) /* Already owned by someone else? */ if (val & CMCI_EN) { - if (test_and_clear_bit(i, owned) || boot) + if (test_and_clear_bit(i, owned) && !boot) print_update("SHD", &hdr, i); __clear_bit(i, __get_cpu_var(mce_poll_banks)); continue; @@ -107,7 +107,7 @@ static void cmci_discover(int banks, int boot) /* Did the enable bit stick? -- the bank supports CMCI */ if (val & CMCI_EN) { - if (!test_and_set_bit(i, owned) || boot) + if (!test_and_set_bit(i, owned) && !boot) print_update("CMCI", &hdr, i); __clear_bit(i, __get_cpu_var(mce_poll_banks)); } else { From d6dd692168c049196f54edc2e8227c60702bb1d2 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 5 Mar 2010 13:10:38 -0600 Subject: [PATCH 3/5] x86: Reduce per cpu warning boot up messages Reduce warning message output to one line only instead of per cpu. Signed-of-by: Mike Travis Cc: Rusty Russell Cc: Frederic Weisbecker Cc: Brian Gerst Cc: x86@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c9b3522b6b46..4e8cb4ee9fcb 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -600,7 +600,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP if (pm_idle == poll_idle && smp_num_siblings > 1) { - printk(KERN_WARNING "WARNING: polling idle and HT enabled," + printk_once(KERN_WARNING "WARNING: polling idle and HT enabled," " performance may degrade.\n"); } #endif From 8447b360a3897bdfb0677107564d1dd9ab6e63be Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Mar 2010 12:43:29 -0600 Subject: [PATCH 4/5] x86, UV: Fix target_cpus() in x2apic_uv_x.c target_cpu() should initially target all cpus, not just cpu 0. Otherwise systems with lots of disks can exhaust the interrupt vectors on cpu 0 if a large number of disks are discovered before the irq balancer is running. Note: UV code only... Signed-off-by: Jack Steiner LKML-Reference: <20100311184328.GA21433@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 21db3cbea7dc..af0ca80e38a9 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -114,11 +114,9 @@ EXPORT_SYMBOL_GPL(uv_possible_blades); unsigned long sn_rtc_cycles_per_second; EXPORT_SYMBOL(sn_rtc_cycles_per_second); -/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ - static const struct cpumask *uv_target_cpus(void) { - return cpumask_of(0); + return cpu_online_mask; } static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) From 0e152cd7c16832bd5cadee0c2e41d9959bc9b6f9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 12 Mar 2010 15:43:03 +0100 Subject: [PATCH 5/5] x86, k8 nb: Fix boot crash: enable k8_northbridges unconditionally on AMD systems de957628ce7c84764ff41331111036b3ae5bad0f changed setting of the x86_init.iommu.iommu_init function ptr only when GART IOMMU is found. One side effect of it is that num_k8_northbridges is not initialized anymore if not explicitly called. This resulted in uninitialized pointers in , for example, which uses the num_k8_northbridges thing through node_to_k8_nb_misc(). Fix that through an initcall that runs right after the PCI subsystem and does all the scanning. Then, remove initialization in gart_iommu_init() which is a rootfs_initcall and we're running before that. What is more, since num_k8_northbridges is being used in other places beside GART IOMMU, include it whenever we add AMD CPU support. The previous dependency chain in kconfig contained K8_NB depends on AGP_AMD64|GART_IOMMU which was clearly incorrect. The more natural way in terms of hardware dependency should be AGP_AMD64|GART_IOMMU depends on K8_NB depends on CPU_SUP_AMD && PCI. Make it so Number One! Signed-off-by: Borislav Petkov Cc: FUJITA Tomonori Cc: Joerg Roedel LKML-Reference: <20100312144303.GA29262@aftab> Signed-off-by: Ingo Molnar Tested-by: Joerg Roedel --- arch/x86/Kconfig | 4 ++-- arch/x86/kernel/k8.c | 14 ++++++++++++++ arch/x86/kernel/pci-gart_64.c | 2 +- drivers/char/agp/Kconfig | 2 +- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index eb4092568f9e..ddb52b8d38a7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -627,7 +627,7 @@ config GART_IOMMU bool "GART IOMMU support" if EMBEDDED default y select SWIOTLB - depends on X86_64 && PCI + depends on X86_64 && PCI && K8_NB ---help--- Support for full DMA access of devices with 32bit memory access only on systems with more than 3GB. This is usually needed for USB, @@ -2026,7 +2026,7 @@ endif # X86_32 config K8_NB def_bool y - depends on AGP_AMD64 || (X86_64 && (GART_IOMMU || (PCI && NUMA))) + depends on CPU_SUP_AMD && PCI source "drivers/pcmcia/Kconfig" diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index cbc4332a77b2..9b895464dd03 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c @@ -121,3 +121,17 @@ void k8_flush_garts(void) } EXPORT_SYMBOL_GPL(k8_flush_garts); +static __init int init_k8_nbs(void) +{ + int err = 0; + + err = cache_k8_northbridges(); + + if (err < 0) + printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n"); + + return err; +} + +/* This has to go after the PCI subsystem */ +fs_initcall(init_k8_nbs); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 34de53b46f87..f3af115a573a 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -735,7 +735,7 @@ int __init gart_iommu_init(void) unsigned long scratch; long i; - if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) + if (num_k8_northbridges == 0) return 0; #ifndef CONFIG_AGP_AMD64 diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig index 2fb3a480f6b0..4b66c69eaf57 100644 --- a/drivers/char/agp/Kconfig +++ b/drivers/char/agp/Kconfig @@ -57,7 +57,7 @@ config AGP_AMD config AGP_AMD64 tristate "AMD Opteron/Athlon64 on-CPU GART support" - depends on AGP && X86 + depends on AGP && X86 && K8_NB help This option gives you AGP support for the GLX component of X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs.