From 885028e4ba4caf49d565c96481e1a05220ecb517 Mon Sep 17 00:00:00 2001 From: Srinidhi Kasagar Date: Thu, 17 Feb 2011 07:03:51 +0100 Subject: [PATCH 01/15] ARM: 6741/1: errata: pl310 cache sync operation may be faulty The effect of cache sync operation is to drain the store buffer and wait for all internal buffers to be empty. In normal conditions, store buffer is able to merge the normal memory writes within its 32-byte data buffers. Due to this erratum present in r3p0, the effect of cache sync operation on the store buffer still remains when the operation completes. This means that the store buffer is always asked to drain and this prevents it from merging any further writes. This can severely affect performance on the write traffic esp. on Normal memory NC one. The proposed workaround is to replace the normal offset of cache sync operation(0x730) by another offset targeting an unmapped PL310 register 0x740. Signed-off-by: srinidhi kasagar Acked-by: Linus Walleij Acked-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/Kconfig | 15 +++++++++++++++ arch/arm/include/asm/hardware/cache-l2x0.h | 1 + arch/arm/mm/cache-l2x0.c | 6 ++++++ 3 files changed, 22 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 26d45e5b636b..ba9fc213f344 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1177,6 +1177,21 @@ config ARM_ERRATA_743622 visible impact on the overall performance or power consumption of the processor. +config ARM_ERRATA_753970 + bool "ARM errata: cache sync operation may be faulty" + depends on CACHE_PL310 + help + This option enables the workaround for the 753970 PL310 (r3p0) erratum. + + Under some condition the effect of cache sync operation on + the store buffer still remains when the operation completes. + This means that the store buffer is always asked to drain and + this prevents it from merging any further writes. The workaround + is to replace the normal offset of cache sync operation (0x730) + by another offset targeting an unmapped PL310 register 0x740. + This has the same effect as the cache sync operation: store buffer + drain and waiting for all buffers empty. + endmenu source "arch/arm/common/Kconfig" diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h index 5aeec1e1735c..16bd48031583 100644 --- a/arch/arm/include/asm/hardware/cache-l2x0.h +++ b/arch/arm/include/asm/hardware/cache-l2x0.h @@ -36,6 +36,7 @@ #define L2X0_RAW_INTR_STAT 0x21C #define L2X0_INTR_CLEAR 0x220 #define L2X0_CACHE_SYNC 0x730 +#define L2X0_DUMMY_REG 0x740 #define L2X0_INV_LINE_PA 0x770 #define L2X0_INV_WAY 0x77C #define L2X0_CLEAN_LINE_PA 0x7B0 diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 170c9bb95866..f2ce38e085d2 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -49,7 +49,13 @@ static inline void cache_wait(void __iomem *reg, unsigned long mask) static inline void cache_sync(void) { void __iomem *base = l2x0_base; + +#ifdef CONFIG_ARM_ERRATA_753970 + /* write to an unmmapped register */ + writel_relaxed(0, base + L2X0_DUMMY_REG); +#else writel_relaxed(0, base + L2X0_CACHE_SYNC); +#endif cache_wait(base + L2X0_CACHE_SYNC, 1); } From 71efb063f4a145ae420be054f5a91dcf7c19b375 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 18 Feb 2011 16:21:06 +0100 Subject: [PATCH 02/15] ARM: 6742/1: pmu: avoid setting IRQ affinity on UP systems Now that we can execute a CONFIG_SMP kernel on a uniprocessor system, extra care has to be taken in the PMU IRQ affinity setting code to ensure that we don't always fail to initialise. This patch changes the CPU PMU initialisation code so that when we only have a single IRQ, whose affinity can not be changed at the controller, we report success (0) rather than -EINVAL. Reported-by: Avik Sil Acked-by: Jamie Iles Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/pmu.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/arm/kernel/pmu.c b/arch/arm/kernel/pmu.c index b8af96ea62e6..2c79eec19262 100644 --- a/arch/arm/kernel/pmu.c +++ b/arch/arm/kernel/pmu.c @@ -97,28 +97,34 @@ set_irq_affinity(int irq, irq, cpu); return err; #else - return 0; + return -EINVAL; #endif } static int init_cpu_pmu(void) { - int i, err = 0; + int i, irqs, err = 0; struct platform_device *pdev = pmu_devices[ARM_PMU_DEVICE_CPU]; - if (!pdev) { - err = -ENODEV; - goto out; - } + if (!pdev) + return -ENODEV; - for (i = 0; i < pdev->num_resources; ++i) { + irqs = pdev->num_resources; + + /* + * If we have a single PMU interrupt that we can't shift, assume that + * we're running on a uniprocessor machine and continue. + */ + if (irqs == 1 && !irq_can_set_affinity(platform_get_irq(pdev, 0))) + return 0; + + for (i = 0; i < irqs; ++i) { err = set_irq_affinity(platform_get_irq(pdev, i), i); if (err) break; } -out: return err; } From 9a27c27ce49df72b1b0062e2ad192a804e1b069b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 18 Feb 2011 16:36:35 +0100 Subject: [PATCH 03/15] ARM: 6743/1: errata: interrupted ICALLUIS may prevent completion of broadcasted operation On versions of the Cortex-A9 prior to r3p0, an interrupted ICIALLUIS operation may prevent the completion of a following broadcasted operation if the second operation is received by a CPU before the ICIALLUIS has completed, potentially leading to corrupted entries in the cache or TLB. This workaround sets a bit in the diagnostic register of the Cortex-A9, causing CP15 maintenance operations to be uninterruptible. Acked-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/Kconfig | 10 ++++++++++ arch/arm/mm/proc-v7.S | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ba9fc213f344..166efa2a19cd 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1177,6 +1177,16 @@ config ARM_ERRATA_743622 visible impact on the overall performance or power consumption of the processor. +config ARM_ERRATA_751472 + bool "ARM errata: Interrupted ICIALLUIS may prevent completion of broadcasted operation" + depends on CPU_V7 && SMP + help + This option enables the workaround for the 751472 Cortex-A9 (prior + to r3p0) erratum. An interrupted ICIALLUIS operation may prevent the + completion of a following broadcasted operation if the second + operation is received by a CPU before the ICIALLUIS has completed, + potentially leading to corrupted entries in the cache or TLB. + config ARM_ERRATA_753970 bool "ARM errata: cache sync operation may be faulty" depends on CACHE_PL310 diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 0c1172b56b4e..8e3356239136 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -264,6 +264,12 @@ __v7_setup: orreq r10, r10, #1 << 6 @ set bit #6 mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register #endif +#ifdef CONFIG_ARM_ERRATA_751472 + cmp r6, #0x30 @ present prior to r3p0 + mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrlt r10, r10, #1 << 11 @ set bit #11 + mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif 3: mov r10, #0 #ifdef HARVARD_CACHE From 32c3fcb08178f6ca14191b22d72760984cfb75cb Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 16 Feb 2011 17:55:38 +0100 Subject: [PATCH 04/15] ARM: 6739/1: update .gitignore for boot/compressed Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/boot/compressed/.gitignore | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore index ab204db594d3..c6028967d336 100644 --- a/arch/arm/boot/compressed/.gitignore +++ b/arch/arm/boot/compressed/.gitignore @@ -1,3 +1,7 @@ font.c -piggy.gz +lib1funcs.S +piggy.gzip +piggy.lzo +piggy.lzma +vmlinux vmlinux.lds From 315cfe7835c9a3fe27f15519bdeee8bf0a293e33 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 15 Feb 2011 18:06:57 +0100 Subject: [PATCH 05/15] ARM: 6676/1: Correct the cpu_architecture() function for ARMv7 If ID_MMFR0[3:0] >= 3, the architecture version is ARMv7. The code was currently only testing for ID_MMFR0[3:0] == 3. Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 420b8d6485d6..5ea4fb718b97 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -226,8 +226,8 @@ int cpu_architecture(void) * Register 0 and check for VMSAv7 or PMSAv7 */ asm("mrc p15, 0, %0, c0, c1, 4" : "=r" (mmfr0)); - if ((mmfr0 & 0x0000000f) == 0x00000003 || - (mmfr0 & 0x000000f0) == 0x00000030) + if ((mmfr0 & 0x0000000f) >= 0x00000003 || + (mmfr0 & 0x000000f0) >= 0x00000030) cpu_arch = CPU_ARCH_ARMv7; else if ((mmfr0 & 0x0000000f) == 0x00000002 || (mmfr0 & 0x000000f0) == 0x00000020) From d16613586aa064d87ef05e3b929e3b4fdc714cc4 Mon Sep 17 00:00:00 2001 From: viresh kumar Date: Wed, 16 Feb 2011 07:40:27 +0100 Subject: [PATCH 06/15] ARM: 6720/1: SPEAr: Append UL to VMALLOC_END This patch fixes following warning: arch/arm/mm/init.c:606: warning: format '%08lx' expects type 'long unsigned int', but argument 12 has type 'unsigned int' by appending UL to VMALLOC_END's Number. Reviewed-by: Stanley Miao Signed-off-by: Viresh Kumar Signed-off-by: Russell King --- arch/arm/plat-spear/include/plat/vmalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-spear/include/plat/vmalloc.h b/arch/arm/plat-spear/include/plat/vmalloc.h index 09e9372aea21..8c8b24d07046 100644 --- a/arch/arm/plat-spear/include/plat/vmalloc.h +++ b/arch/arm/plat-spear/include/plat/vmalloc.h @@ -14,6 +14,6 @@ #ifndef __PLAT_VMALLOC_H #define __PLAT_VMALLOC_H -#define VMALLOC_END 0xF0000000 +#define VMALLOC_END 0xF0000000UL #endif /* __PLAT_VMALLOC_H */ From ac1556b37e9b06a41d7691dca0d50a28210488f9 Mon Sep 17 00:00:00 2001 From: viresh kumar Date: Wed, 16 Feb 2011 07:41:06 +0100 Subject: [PATCH 07/15] ARM: 6712/1: SPEAr: replace readl(), writel() with relaxed versions in uncompress.h readl() and writel() calls the outer cache maintainance operations which are not available during Linux uncompression. This patch replaces readl() and writel() with readl_relaxed() and writel_relaxed() to avoid the link time errors. Reviewed-by: Stanley Miao Signed-off-by: Viresh Kumar Signed-off-by: Russell King --- arch/arm/plat-spear/include/plat/uncompress.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/plat-spear/include/plat/uncompress.h b/arch/arm/plat-spear/include/plat/uncompress.h index 99ba6789cc97..6dd455bafdfd 100644 --- a/arch/arm/plat-spear/include/plat/uncompress.h +++ b/arch/arm/plat-spear/include/plat/uncompress.h @@ -24,10 +24,10 @@ static inline void putc(int c) { void __iomem *base = (void __iomem *)SPEAR_DBG_UART_BASE; - while (readl(base + UART01x_FR) & UART01x_FR_TXFF) + while (readl_relaxed(base + UART01x_FR) & UART01x_FR_TXFF) barrier(); - writel(c, base + UART01x_DR); + writel_relaxed(c, base + UART01x_DR); } static inline void flush(void) From b8272a61c16decd4c8627fc1181bdd174c922c3f Mon Sep 17 00:00:00 2001 From: Shiraz Hashim Date: Wed, 16 Feb 2011 07:40:29 +0100 Subject: [PATCH 08/15] ARM: 6722/1: SPEAr: sp810: switch to slow mode before reset In sysctl_soft_reset(), switch to slow mode before resetting the system via the system controller. This is required. Reviewed-by: Stanley Miao Signed-off-by: Shiraz Hashim Signed-off-by: Russell King --- arch/arm/include/asm/hardware/sp810.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/include/asm/hardware/sp810.h b/arch/arm/include/asm/hardware/sp810.h index 721847dc68ab..e0d1c0cfa548 100644 --- a/arch/arm/include/asm/hardware/sp810.h +++ b/arch/arm/include/asm/hardware/sp810.h @@ -58,6 +58,9 @@ static inline void sysctl_soft_reset(void __iomem *base) { + /* switch to slow mode */ + writel(0x2, base + SCCTRL); + /* writing any value to SCSYSSTAT reg will reset system */ writel(0, base + SCSYSSTAT); } From 167879ae8924540660b187d759956f316dd6e8fe Mon Sep 17 00:00:00 2001 From: viresh kumar Date: Wed, 16 Feb 2011 07:40:41 +0100 Subject: [PATCH 09/15] ARM: 6700/1: SPEAr: Correct SOC config base address for spear320 SPEAR320_SOC_CONFIG_BASE was wrong, causing the wrong registers to be accessed. Reviewed-by: Stanley Miao Signed-off-by: Viresh Kumar Signed-off-by: Russell King --- arch/arm/mach-spear3xx/include/mach/spear320.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-spear3xx/include/mach/spear320.h b/arch/arm/mach-spear3xx/include/mach/spear320.h index cacf17a958cd..53677e464d4b 100644 --- a/arch/arm/mach-spear3xx/include/mach/spear320.h +++ b/arch/arm/mach-spear3xx/include/mach/spear320.h @@ -62,7 +62,7 @@ #define SPEAR320_SMII1_BASE 0xAB000000 #define SPEAR320_SMII1_SIZE 0x01000000 -#define SPEAR320_SOC_CONFIG_BASE 0xB4000000 +#define SPEAR320_SOC_CONFIG_BASE 0xB3000000 #define SPEAR320_SOC_CONFIG_SIZE 0x00000070 /* Interrupt registers offsets and masks */ #define INT_STS_MASK_REG 0x04 From dc810efb0ca5702c9d96782b99282d4b4383e877 Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Wed, 16 Feb 2011 18:54:01 +0100 Subject: [PATCH 10/15] ARM: 6740/1: Place correctly notes section in the linker script Commit 18991197b4b588255ccabf472ebc84db7b66a19c added --build-id linker option when toolchain supports it. ARM one does, but for some reason places the section at 0 when linker script doesn't mention it explicitly. The 1e621a8e3752367d4aae78a8ab00a18fb2793f34 worked around the problem removing this section from binary image with explicit objcopy options, but it still exists in vmlinux, confusing tools like debuggers and perf. This problem was discussed here: http://lists.infradead.org/pipermail/linux-arm-kernel/2010-May/015994.html http://lists.infradead.org/pipermail/linux-arm-kernel/2010-May/015994.html but the proposed changes to the linker script were substantial. This patch simply places NOTES (36 bytes long, at least when compiled with CodeSourcery toolchain) between data and bss, which seem to be the right place (and suggested by the sample linker script in include/asm-generic/vmlinux.lds.h). It is enough to place it correctly in vmlinux (so debuggers are happy): Section Headers: [11] .data PROGBITS c07ce000 7ce000 020fc0 00 WA 0 0 32 [12] .notes NOTE c07eefc0 7eefc0 000024 00 AX 0 0 4 [13] .bss NOBITS c07ef000 7eefe4 01e628 00 WA 0 0 32 Program Headers: LOAD 0x008000 0xc0008000 0xc0008000 0x7e6fe4 0x805628 RWE 0x8000 NOTE 0x7eefc0 0xc07eefc0 0xc07eefc0 0x00024 0x00024 R E 0x4 Section to Segment mapping: Segment Sections... 00 <...> .data .notes .bss 01 .notes and to get it exposed as /sys/kernel/notes used by perf tools. Signed-off-by: Pawel Moll Signed-off-by: Russell King --- arch/arm/Makefile | 2 +- arch/arm/kernel/vmlinux.lds.S | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index c22c1adfedd6..6f7b29294c80 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -15,7 +15,7 @@ ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 endif -OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S +OBJCOPYFLAGS :=-O binary -R .comment -S GZFLAGS :=-9 #KBUILD_CFLAGS +=-pipe # Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb: diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 86b66f3f2031..558bd81a4fce 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -247,6 +247,8 @@ SECTIONS } #endif + NOTES + BSS_SECTION(0, 0, 0) _end = .; From 53399053eb505cf541b2405bd9d9bca5ecfb96fb Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Feb 2011 12:22:52 +0000 Subject: [PATCH 11/15] ARM: Ensure predictable endian state on signal handler entry Ensure a predictable endian state when entering signal handlers. This avoids programs which use SETEND to momentarily switch their endian state from having their signal handlers entered with an unpredictable endian state. Cc: Acked-by: Dave Martin Signed-off-by: Russell King --- arch/arm/kernel/signal.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 907d5a620bca..abaf8445ce25 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -474,7 +474,9 @@ setup_return(struct pt_regs *regs, struct k_sigaction *ka, unsigned long handler = (unsigned long)ka->sa.sa_handler; unsigned long retcode; int thumb = 0; - unsigned long cpsr = regs->ARM_cpsr & ~PSR_f; + unsigned long cpsr = regs->ARM_cpsr & ~(PSR_f | PSR_E_BIT); + + cpsr |= PSR_ENDSTATE; /* * Maybe we need to deliver a 32-bit signal to a 26-bit task. From a9ad21fed09cb95d34af9474be0831525b30c4c6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 21 Feb 2011 10:13:36 +0000 Subject: [PATCH 12/15] ARM: Keep exit text/data around for SMP_ON_UP When SMP_ON_UP is used and the spinlocks are inlined, we end up with inline spinlocks in the exit code, with references from the SMP alternatives section to the exit sections. This causes link time errors. Avoid this by placing the exit sections in the init-discarded region. Cc: Tested-by: Dave Martin Signed-off-by: Russell King --- arch/arm/kernel/vmlinux.lds.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 558bd81a4fce..61462790757f 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -21,6 +21,12 @@ #define ARM_CPU_KEEP(x) #endif +#if defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK) +#define ARM_EXIT_KEEP(x) x +#else +#define ARM_EXIT_KEEP(x) +#endif + OUTPUT_ARCH(arm) ENTRY(stext) @@ -43,6 +49,7 @@ SECTIONS _sinittext = .; HEAD_TEXT INIT_TEXT + ARM_EXIT_KEEP(EXIT_TEXT) _einittext = .; ARM_CPU_DISCARD(PROC_INFO) __arch_info_begin = .; @@ -67,6 +74,7 @@ SECTIONS #ifndef CONFIG_XIP_KERNEL __init_begin = _stext; INIT_DATA + ARM_EXIT_KEEP(EXIT_DATA) #endif } @@ -162,6 +170,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); __init_begin = .; INIT_DATA + ARM_EXIT_KEEP(EXIT_DATA) . = ALIGN(PAGE_SIZE); __init_end = .; #endif From 06824ba824b3e9f2fedb38bee79af0643198ed7f Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Feb 2011 12:16:45 +0000 Subject: [PATCH 13/15] ARM: tlb: delay page freeing for SMP and ARMv7 CPUs We need to delay freeing any mapped page on SMP and ARMv7 systems to ensure that the data is not accessed by other CPUs, or is used for speculative prefetch with ARMv7. This includes not only mapped pages but also pages used for the page tables themselves. This avoids races with the MMU/other CPUs accessing pages after they've been freed but before we've invalidated the TLB. Signed-off-by: Russell King --- arch/arm/include/asm/tlb.h | 102 ++++++++++++++++++++++++++++++++----- 1 file changed, 89 insertions(+), 13 deletions(-) diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index f41a6f57cd12..e7690887b958 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -18,7 +18,6 @@ #define __ASMARM_TLB_H #include -#include #ifndef CONFIG_MMU @@ -27,7 +26,23 @@ #else /* !CONFIG_MMU */ +#include #include +#include + +/* + * We need to delay page freeing for SMP as other CPUs can access pages + * which have been removed but not yet had their TLB entries invalidated. + * Also, as ARMv7 speculative prefetch can drag new entries into the TLB, + * we need to apply this same delaying tactic to ensure correct operation. + */ +#if defined(CONFIG_SMP) || defined(CONFIG_CPU_32v7) +#define tlb_fast_mode(tlb) 0 +#define FREE_PTE_NR 500 +#else +#define tlb_fast_mode(tlb) 1 +#define FREE_PTE_NR 0 +#endif /* * TLB handling. This allows us to remove pages from the page @@ -36,12 +51,58 @@ struct mmu_gather { struct mm_struct *mm; unsigned int fullmm; + struct vm_area_struct *vma; unsigned long range_start; unsigned long range_end; + unsigned int nr; + struct page *pages[FREE_PTE_NR]; }; DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); +/* + * This is unnecessarily complex. There's three ways the TLB shootdown + * code is used: + * 1. Unmapping a range of vmas. See zap_page_range(), unmap_region(). + * tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called. + * tlb->vma will be non-NULL. + * 2. Unmapping all vmas. See exit_mmap(). + * tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called. + * tlb->vma will be non-NULL. Additionally, page tables will be freed. + * 3. Unmapping argument pages. See shift_arg_pages(). + * tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called. + * tlb->vma will be NULL. + */ +static inline void tlb_flush(struct mmu_gather *tlb) +{ + if (tlb->fullmm || !tlb->vma) + flush_tlb_mm(tlb->mm); + else if (tlb->range_end > 0) { + flush_tlb_range(tlb->vma, tlb->range_start, tlb->range_end); + tlb->range_start = TASK_SIZE; + tlb->range_end = 0; + } +} + +static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr) +{ + if (!tlb->fullmm) { + if (addr < tlb->range_start) + tlb->range_start = addr; + if (addr + PAGE_SIZE > tlb->range_end) + tlb->range_end = addr + PAGE_SIZE; + } +} + +static inline void tlb_flush_mmu(struct mmu_gather *tlb) +{ + tlb_flush(tlb); + if (!tlb_fast_mode(tlb)) { + free_pages_and_swap_cache(tlb->pages, tlb->nr); + tlb->nr = 0; + } +} + static inline struct mmu_gather * tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) { @@ -49,6 +110,8 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) tlb->mm = mm; tlb->fullmm = full_mm_flush; + tlb->vma = NULL; + tlb->nr = 0; return tlb; } @@ -56,8 +119,7 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) static inline void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - if (tlb->fullmm) - flush_tlb_mm(tlb->mm); + tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ check_pgt_cache(); @@ -71,12 +133,7 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) static inline void tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr) { - if (!tlb->fullmm) { - if (addr < tlb->range_start) - tlb->range_start = addr; - if (addr + PAGE_SIZE > tlb->range_end) - tlb->range_end = addr + PAGE_SIZE; - } + tlb_add_flush(tlb, addr); } /* @@ -89,6 +146,7 @@ tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (!tlb->fullmm) { flush_cache_range(vma, vma->vm_start, vma->vm_end); + tlb->vma = vma; tlb->range_start = TASK_SIZE; tlb->range_end = 0; } @@ -97,12 +155,30 @@ tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { - if (!tlb->fullmm && tlb->range_end > 0) - flush_tlb_range(vma, tlb->range_start, tlb->range_end); + if (!tlb->fullmm) + tlb_flush(tlb); } -#define tlb_remove_page(tlb,page) free_page_and_swap_cache(page) -#define pte_free_tlb(tlb, ptep, addr) pte_free((tlb)->mm, ptep) +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + if (tlb_fast_mode(tlb)) { + free_page_and_swap_cache(page); + } else { + tlb->pages[tlb->nr++] = page; + if (tlb->nr >= FREE_PTE_NR) + tlb_flush_mmu(tlb); + } +} + +static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, + unsigned long addr) +{ + pgtable_page_dtor(pte); + tlb_add_flush(tlb, addr); + tlb_remove_page(tlb, pte); +} + +#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr) #define pmd_free_tlb(tlb, pmdp, addr) pmd_free((tlb)->mm, pmdp) #define tlb_migrate_finish(mm) do { } while (0) From 58e9c47fa0dd76693b2c85c010c7430a4de77c6d Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Feb 2011 12:27:49 +0000 Subject: [PATCH 14/15] ARM: tlb: move noMMU tlb_flush() to asm/tlb.h There's no need to noMMU to put tlb_flush() in asm/tlbflush.h - it's part of the tlb shootdown interface. Move it to asm/tlb.h instead, as per x86. Signed-off-by: Russell King --- arch/arm/include/asm/tlb.h | 3 +++ arch/arm/include/asm/tlbflush.h | 7 +------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index e7690887b958..82dfe5d0c41e 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -22,6 +22,9 @@ #ifndef CONFIG_MMU #include + +#define tlb_flush(tlb) ((void) tlb) + #include #else /* !CONFIG_MMU */ diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index ce7378ea15a2..d2005de383b8 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -10,12 +10,7 @@ #ifndef _ASMARM_TLBFLUSH_H #define _ASMARM_TLBFLUSH_H - -#ifndef CONFIG_MMU - -#define tlb_flush(tlb) ((void) tlb) - -#else /* CONFIG_MMU */ +#ifdef CONFIG_MMU #include From 5a5af730536fbf15fc354980cba2a0400afa6b76 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 21 Feb 2011 04:37:20 +0100 Subject: [PATCH 15/15] ARM: 6745/1: kprobes insn decoding fix Marcin Slusarz says: > In arch/arm/kernel/kprobes-decode.c there's a function > arm_kprobe_decode_insn which does: > > } else if ((insn & 0x0e000000) == 0x0c400000) { > ... > > This is always false, so code below is dead. > I found this bug by coccinelle (http://coccinelle.lip6.fr/). Reported-by: Marcin Slusarz Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/kprobes-decode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c index 2c1f0050c9c4..8f6ed43861f1 100644 --- a/arch/arm/kernel/kprobes-decode.c +++ b/arch/arm/kernel/kprobes-decode.c @@ -1437,7 +1437,7 @@ arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) return space_cccc_1100_010x(insn, asi); - } else if ((insn & 0x0e000000) == 0x0c400000) { + } else if ((insn & 0x0e000000) == 0x0c000000) { return space_cccc_110x(insn, asi);