powerpc/mm: Switch to generic RCU get_user_pages_fast

This patch switch the ppc arch to use the generic RCU based gup implementation. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2014-11-05 21:57:41 +05:30 · 2014-11-05 21:57:41 +05:30 · b30e759072
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@ -149,6 +149,7 @@ config PPC
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
 	select NO_BOOTMEM
+	select HAVE_GENERIC_RCU_GUP

 config GENERIC_CSUM
 	def_bool CPU_LITTLE_ENDIAN
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@ -48,7 +48,7 @@ static inline unsigned int hugepd_shift(hugepd_t hpd)
 #endif /* CONFIG_PPC_BOOK3S_64 */


-static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
 				    unsigned pdshift)
 {
 	/*
@ -58,9 +58,9 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
 	 */
 	unsigned long idx = 0;

-	pte_t *dir = hugepd_page(*hpdp);
+	pte_t *dir = hugepd_page(hpd);
 #ifndef CONFIG_PPC_FSL_BOOK3E
-	idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
+	idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd);
 #endif

 	return dir + idx;
@ -193,7 +193,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
 }

 #define hugepd_shift(x) 0
-static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
 				    unsigned pdshift)
 {
 	return 0;
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@ -379,13 +379,14 @@ static inline int hugepd_ok(hugepd_t hpd)
 }
 #endif

-#define is_hugepd(pdep)               (hugepd_ok(*((hugepd_t *)(pdep))))
+#define is_hugepd(hpd)               (hugepd_ok(hpd))
 #define pgd_huge pgd_huge
 int pgd_huge(pgd_t pgd);
 #else /* CONFIG_HUGETLB_PAGE */
 #define is_hugepd(pdep)			0
 #define pgd_huge(pgd)			0
 #endif /* CONFIG_HUGETLB_PAGE */
+#define __hugepd(x) ((hugepd_t) { (x) })

 struct page;
 extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@ -600,6 +600,5 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
 	 */
 	return true;
 }
-
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@ -274,11 +274,9 @@ extern void paging_init(void);
 */
 extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);

-extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr,
-		      unsigned long end, int write, struct page **pages, int *nr);
-
 extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
-		       unsigned long end, int write, struct page **pages, int *nr);
+		       unsigned long end, int write,
+		       struct page **pages, int *nr);
 #ifndef CONFIG_TRANSPARENT_HUGEPAGE
 #define pmd_large(pmd)		0
 #define has_transparent_hugepage() 0
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror

 ccflags-$(CONFIG_PPC64)	:= $(NO_MINIMAL_TOC)

-obj-y				:= fault.o mem.o pgtable.o gup.o mmap.o \
+obj-y				:= fault.o mem.o pgtable.o mmap.o \
 				   init_$(CONFIG_WORD_SIZE).o \
 				   pgtable_$(CONFIG_WORD_SIZE).o
 obj-$(CONFIG_PPC_MMU_NOHASH)	+= mmu_context_nohash.o tlb_nohash.o \
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@ -1,235 +0,0 @@
-/*
- * Lockless get_user_pages_fast for powerpc
- *
- * Copyright (C) 2008 Nick Piggin
- * Copyright (C) 2008 Novell Inc.
- */
-#undef DEBUG
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <linux/vmstat.h>
-#include <linux/pagemap.h>
-#include <linux/rwsem.h>
-#include <asm/pgtable.h>
-
-#ifdef __HAVE_ARCH_PTE_SPECIAL
-
-/*
- * The performance critical leaf functions are made noinline otherwise gcc
- * inlines everything into a single function which results in too much
- * register pressure.
- */
-static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
-		unsigned long end, int write, struct page **pages, int *nr)
-{
-	unsigned long mask, result;
-	pte_t *ptep;
-
-	result = _PAGE_PRESENT|_PAGE_USER;
-	if (write)
-		result |= _PAGE_RW;
-	mask = result | _PAGE_SPECIAL;
-
-	ptep = pte_offset_kernel(&pmd, addr);
-	do {
-		pte_t pte = ACCESS_ONCE(*ptep);
-		struct page *page;
-		/*
-		 * Similar to the PMD case, NUMA hinting must take slow path
-		 */
-		if (pte_numa(pte))
-			return 0;
-
-		if ((pte_val(pte) & mask) != result)
-			return 0;
-		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-		page = pte_page(pte);
-		if (!page_cache_get_speculative(page))
-			return 0;
-		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
-			put_page(page);
-			return 0;
-		}
-		pages[*nr] = page;
-		(*nr)++;
-
-	} while (ptep++, addr += PAGE_SIZE, addr != end);
-
-	return 1;
-}
-
-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
-		int write, struct page **pages, int *nr)
-{
-	unsigned long next;
-	pmd_t *pmdp;
-
-	pmdp = pmd_offset(&pud, addr);
-	do {
-		pmd_t pmd = ACCESS_ONCE(*pmdp);
-
-		next = pmd_addr_end(addr, end);
-		/*
-		 * If we find a splitting transparent hugepage we
-		 * return zero. That will result in taking the slow
-		 * path which will call wait_split_huge_page()
-		 * if the pmd is still in splitting state
-		 */
-		if (pmd_none(pmd) || pmd_trans_splitting(pmd))
-			return 0;
-		if (pmd_huge(pmd) || pmd_large(pmd)) {
-			/*
-			 * NUMA hinting faults need to be handled in the GUP
-			 * slowpath for accounting purposes and so that they
-			 * can be serialised against THP migration.
-			 */
-			if (pmd_numa(pmd))
-				return 0;
-
-			if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,
-					 write, pages, nr))
-				return 0;
-		} else if (is_hugepd(pmdp)) {
-			if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
-					addr, next, write, pages, nr))
-				return 0;
-		} else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
-			return 0;
-	} while (pmdp++, addr = next, addr != end);
-
-	return 1;
-}
-
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
-		int write, struct page **pages, int *nr)
-{
-	unsigned long next;
-	pud_t *pudp;
-
-	pudp = pud_offset(&pgd, addr);
-	do {
-		pud_t pud = ACCESS_ONCE(*pudp);
-
-		next = pud_addr_end(addr, end);
-		if (pud_none(pud))
-			return 0;
-		if (pud_huge(pud)) {
-			if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next,
-					 write, pages, nr))
-				return 0;
-		} else if (is_hugepd(pudp)) {
-			if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT,
-					addr, next, write, pages, nr))
-				return 0;
-		} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
-			return 0;
-	} while (pudp++, addr = next, addr != end);
-
-	return 1;
-}
-
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
-			  struct page **pages)
-{
-	struct mm_struct *mm = current->mm;
-	unsigned long addr, len, end;
-	unsigned long next;
-	unsigned long flags;
-	pgd_t *pgdp;
-	int nr = 0;
-
-	pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
-
-	start &= PAGE_MASK;
-	addr = start;
-	len = (unsigned long) nr_pages << PAGE_SHIFT;
-	end = start + len;
-
-	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
-					start, len)))
-		return 0;
-
-	pr_devel("  aligned: %lx .. %lx\n", start, end);
-
-	/*
-	 * XXX: batch / limit 'nr', to avoid large irq off latency
-	 * needs some instrumenting to determine the common sizes used by
-	 * important workloads (eg. DB2), and whether limiting the batch size
-	 * will decrease performance.
-	 *
-	 * It seems like we're in the clear for the moment. Direct-IO is
-	 * the main guy that batches up lots of get_user_pages, and even
-	 * they are limited to 64-at-a-time which is not so many.
-	 */
-	/*
-	 * This doesn't prevent pagetable teardown, but does prevent
-	 * the pagetables from being freed on powerpc.
-	 *
-	 * So long as we atomically load page table pointers versus teardown,
-	 * we can follow the address down to the the page and take a ref on it.
-	 */
-	local_irq_save(flags);
-
-	pgdp = pgd_offset(mm, addr);
-	do {
-		pgd_t pgd = ACCESS_ONCE(*pgdp);
-
-		pr_devel("  %016lx: normal pgd %p\n", addr,
-			 (void *)pgd_val(pgd));
-		next = pgd_addr_end(addr, end);
-		if (pgd_none(pgd))
-			break;
-		if (pgd_huge(pgd)) {
-			if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next,
-					 write, pages, &nr))
-				break;
-		} else if (is_hugepd(pgdp)) {
-			if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
-					addr, next, write, pages, &nr))
-				break;
-		} else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
-			break;
-	} while (pgdp++, addr = next, addr != end);
-
-	local_irq_restore(flags);
-
-	return nr;
-}
-
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-			struct page **pages)
-{
-	struct mm_struct *mm = current->mm;
-	int nr, ret;
-
-	start &= PAGE_MASK;
-	nr = __get_user_pages_fast(start, nr_pages, write, pages);
-	ret = nr;
-
-	if (nr < nr_pages) {
-		pr_devel("  slow path ! nr = %d\n", nr);
-
-		/* Try to get the remaining pages with get_user_pages */
-		start += nr << PAGE_SHIFT;
-		pages += nr;
-
-		down_read(&mm->mmap_sem);
-		ret = get_user_pages(current, mm, start,
-				     nr_pages - nr, write, 0, pages, NULL);
-		up_read(&mm->mmap_sem);
-
-		/* Have to be a bit careful with return values */
-		if (nr > 0) {
-			if (ret < 0)
-				ret = nr;
-			else
-				ret += nr;
-		}
-	}
-
-	return ret;
-}
-
-#endif /* __HAVE_ARCH_PTE_SPECIAL */
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@ -233,7 +233,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
 	if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
 		return NULL;

-	return hugepte_offset(hpdp, addr, pdshift);
+	return hugepte_offset(*hpdp, addr, pdshift);
 }

 #else
@ -273,7 +273,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
 	if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
 		return NULL;

-	return hugepte_offset(hpdp, addr, pdshift);
+	return hugepte_offset(*hpdp, addr, pdshift);
 }
 #endif

@ -541,7 +541,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 	do {
 		pmd = pmd_offset(pud, addr);
 		next = pmd_addr_end(addr, end);
-		if (!is_hugepd(pmd)) {
+		if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
 			/*
 			 * if it is not hugepd pointer, we should already find
 			 * it cleared.
@ -590,7 +590,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 	do {
 		pud = pud_offset(pgd, addr);
 		next = pud_addr_end(addr, end);
-		if (!is_hugepd(pud)) {
+		if (!is_hugepd(__hugepd(pud_val(*pud)))) {
 			if (pud_none_or_clear_bad(pud))
 				continue;
 			hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
@ -656,7 +656,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 	do {
 		next = pgd_addr_end(addr, end);
 		pgd = pgd_offset(tlb->mm, addr);
-		if (!is_hugepd(pgd)) {
+		if (!is_hugepd(__hugepd(pgd_val(*pgd)))) {
 			if (pgd_none_or_clear_bad(pgd))
 				continue;
 			hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
@ -716,12 +716,11 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
 	return (__boundary - 1 < end - 1) ? __boundary : end;
 }

-int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
-	       unsigned long addr, unsigned long end,
-	       int write, struct page **pages, int *nr)
+int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift,
+		unsigned long end, int write, struct page **pages, int *nr)
 {
 	pte_t *ptep;
-	unsigned long sz = 1UL << hugepd_shift(*hugepd);
+	unsigned long sz = 1UL << hugepd_shift(hugepd);
 	unsigned long next;

 	ptep = hugepte_offset(hugepd, addr, pdshift);
@ -964,7 +963,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
 	else if (pgd_huge(pgd)) {
 		ret_pte = (pte_t *) pgdp;
 		goto out;
-	} else if (is_hugepd(&pgd))
+	} else if (is_hugepd(__hugepd(pgd_val(pgd))))
 		hpdp = (hugepd_t *)&pgd;
 	else {
 		/*
@ -981,7 +980,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
 		else if (pud_huge(pud)) {
 			ret_pte = (pte_t *) pudp;
 			goto out;
-		} else if (is_hugepd(&pud))
+		} else if (is_hugepd(__hugepd(pud_val(pud))))
 			hpdp = (hugepd_t *)&pud;
 		else {
 			pdshift = PMD_SHIFT;
@ -1002,7 +1001,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
 			if (pmd_huge(pmd) || pmd_large(pmd)) {
 				ret_pte = (pte_t *) pmdp;
 				goto out;
-			} else if (is_hugepd(&pmd))
+			} else if (is_hugepd(__hugepd(pmd_val(pmd))))
 				hpdp = (hugepd_t *)&pmd;
 			else
 				return pte_offset_kernel(&pmd, ea);
@ -1011,7 +1010,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
 	if (!hpdp)
 		return NULL;

-	ret_pte = hugepte_offset(hpdp, ea, pdshift);
+	ret_pte = hugepte_offset(*hpdp, ea, pdshift);
 	pdshift = hugepd_shift(*hpdp);
 out:
 	if (shift)
@ -1041,14 +1040,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 	if ((pte_val(pte) & mask) != mask)
 		return 0;

-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	/*
-	 * check for splitting here
-	 */
-	if (pmd_trans_splitting(pte_pmd(pte)))
-		return 0;
-#endif
-
 	/* hugepages are never "special" */
 	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));