diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1edf788d301c..8d078642b4be 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -149,6 +149,7 @@ config X86 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 + select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD select HAVE_ARCH_VMAP_STACK if X86_64 select HAVE_ARCH_WITHIN_STACK_FRAMES select HAVE_ASM_MODVERSIONS diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index afda66a6d325..c37e1649fb7e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -25,6 +25,7 @@ #include #include #include +#include extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); @@ -313,6 +314,23 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) return native_make_pte(v & ~clear); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pte_uffd_wp(pte_t pte) +{ + return pte_flags(pte) & _PAGE_UFFD_WP; +} + +static inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_UFFD_WP); +} + +static inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + static inline pte_t pte_mkclean(pte_t pte) { return pte_clear_flags(pte, _PAGE_DIRTY); @@ -392,6 +410,23 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) return native_make_pmd(v & ~clear); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pmd_uffd_wp(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_UFFD_WP; +} + +static inline pmd_t pmd_mkuffd_wp(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_UFFD_WP); +} + +static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + static inline pmd_t pmd_mkold(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_ACCESSED); @@ -1374,6 +1409,23 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) #endif #endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline pte_t pte_swp_mkuffd_wp(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SWP_UFFD_WP); +} + +static inline int pte_swp_uffd_wp(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SWP_UFFD_WP; +} + +static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + #define PKRU_AD_BIT 0x1 #define PKRU_WD_BIT 0x2 #define PKRU_BITS_PER_PKEY 2 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 0b6c4042942a..df1373415f11 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -189,7 +189,7 @@ extern void sync_global_pgds(unsigned long start, unsigned long end); * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names - * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry + * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|F|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above @@ -197,9 +197,15 @@ extern void sync_global_pgds(unsigned long start, unsigned long end); * erratum where they can be incorrectly set by hardware on * non-present PTEs. * + * SD Bits 1-4 are not used in non-present format and available for + * special use described below: + * * SD (1) in swp entry is used to store soft dirty bit, which helps us * remember soft dirty over page migration * + * F (2) in swp entry is used to record when a pagetable is + * writeprotected by userfaultfd WP support. + * * Bit 7 in swp entry should be 0 because pmd_present checks not only P, * but also L and G. * diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 65c2ecd730c5..b6606fe6cfdf 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -32,6 +32,7 @@ #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 +#define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 @@ -100,6 +101,14 @@ #define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0)) #endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define _PAGE_UFFD_WP (_AT(pteval_t, 1) << _PAGE_BIT_UFFD_WP) +#define _PAGE_SWP_UFFD_WP _PAGE_USER +#else +#define _PAGE_UFFD_WP (_AT(pteval_t, 0)) +#define _PAGE_SWP_UFFD_WP (_AT(pteval_t, 0)) +#endif + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) @@ -118,7 +127,8 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \ + _PAGE_UFFD_WP) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index e2e2bef07dd2..329b8c8ca703 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -10,6 +10,7 @@ #include #include #include +#include #if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \ defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS diff --git a/include/asm-generic/pgtable_uffd.h b/include/asm-generic/pgtable_uffd.h new file mode 100644 index 000000000000..643d1bf559c2 --- /dev/null +++ b/include/asm-generic/pgtable_uffd.h @@ -0,0 +1,51 @@ +#ifndef _ASM_GENERIC_PGTABLE_UFFD_H +#define _ASM_GENERIC_PGTABLE_UFFD_H + +#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static __always_inline int pte_uffd_wp(pte_t pte) +{ + return 0; +} + +static __always_inline int pmd_uffd_wp(pmd_t pmd) +{ + return 0; +} + +static __always_inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte; +} + +static __always_inline pmd_t pmd_mkuffd_wp(pmd_t pmd) +{ + return pmd; +} + +static __always_inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return pte; +} + +static __always_inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) +{ + return pmd; +} + +static __always_inline pte_t pte_swp_mkuffd_wp(pte_t pte) +{ + return pte; +} + +static __always_inline int pte_swp_uffd_wp(pte_t pte) +{ + return 0; +} + +static __always_inline pte_t pte_swp_clear_uffd_wp(pte_t pte) +{ + return pte; +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + +#endif /* _ASM_GENERIC_PGTABLE_UFFD_H */ diff --git a/init/Kconfig b/init/Kconfig index 1c12059e0f7e..2bc0e47689d8 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1556,6 +1556,11 @@ config ADVISE_SYSCALLS applications use these syscalls, you can disable this option to save space. +config HAVE_ARCH_USERFAULTFD_WP + bool + help + Arch has userfaultfd write protection support + config MEMBARRIER bool "Enable membarrier() system call" if EXPERT default y