From d6d261bded8a57aed4faa12d08a5b193418d3aa4 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 26 Oct 2021 15:00:44 -0700 Subject: [PATCH 01/24] x86/sgx: Add new sgx_epc_page flag bit to mark free pages SGX EPC pages go through the following life cycle: DIRTY ---> FREE ---> IN-USE --\ ^ | \-----------------/ Recovery action for poison for a DIRTY or FREE page is simple. Just make sure never to allocate the page. IN-USE pages need some extra handling. Add a new flag bit SGX_EPC_PAGE_IS_FREE that is set when a page is added to a free list and cleared when the page is allocated. Notes: 1) These transitions are made while holding the node->lock so that future code that checks the flags while holding the node->lock can be sure that if the SGX_EPC_PAGE_IS_FREE bit is set, then the page is on the free list. 2) Initially while the pages are on the dirty list the SGX_EPC_PAGE_IS_FREE bit is cleared. Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Tested-by: Reinette Chatre Link: https://lkml.kernel.org/r/20211026220050.697075-2-tony.luck@intel.com --- arch/x86/kernel/cpu/sgx/main.c | 2 ++ arch/x86/kernel/cpu/sgx/sgx.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 63d3de02bbcc..825aa91516c8 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -472,6 +472,7 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list); list_del_init(&page->list); sgx_nr_free_pages--; + page->flags = 0; spin_unlock(&node->lock); @@ -626,6 +627,7 @@ void sgx_free_epc_page(struct sgx_epc_page *page) list_add_tail(&page->list, &node->free_page_list); sgx_nr_free_pages++; + page->flags = SGX_EPC_PAGE_IS_FREE; spin_unlock(&node->lock); } diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h index 4628acec0009..5906471156c5 100644 --- a/arch/x86/kernel/cpu/sgx/sgx.h +++ b/arch/x86/kernel/cpu/sgx/sgx.h @@ -26,6 +26,9 @@ /* Pages, which are being tracked by the page reclaimer. */ #define SGX_EPC_PAGE_RECLAIMER_TRACKED BIT(0) +/* Pages on free list */ +#define SGX_EPC_PAGE_IS_FREE BIT(1) + struct sgx_epc_page { unsigned int section; unsigned int flags; From 40e0e7843e23d164625b9031514f5672f8758bf4 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 26 Oct 2021 15:00:45 -0700 Subject: [PATCH 02/24] x86/sgx: Add infrastructure to identify SGX EPC pages X86 machine check architecture reports a physical address when there is a memory error. Handling that error requires a method to determine whether the physical address reported is in any of the areas reserved for EPC pages by BIOS. SGX EPC pages do not have Linux "struct page" associated with them. Keep track of the mapping from ranges of EPC pages to the sections that contain them using an xarray. N.B. adds CONFIG_XARRAY_MULTI to the SGX dependecies. So "select" that in arch/x86/Kconfig for X86/SGX. Create a function arch_is_platform_page() that simply reports whether an address is an EPC page for use elsewhere in the kernel. The ACPI error injection code needs this function and is typically built as a module, so export it. Note that arch_is_platform_page() will be slower than other similar "what type is this page" functions that can simply check bits in the "struct page". If there is some future performance critical user of this function it may need to be implemented in a more efficient way. Note also that the current implementation of xarray allocates a few hundred kilobytes for this usage on a system with 4GB of SGX EPC memory configured. This isn't ideal, but worth it for the code simplicity. Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Tested-by: Reinette Chatre Link: https://lkml.kernel.org/r/20211026220050.697075-3-tony.luck@intel.com --- arch/x86/Kconfig | 1 + arch/x86/kernel/cpu/sgx/main.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 95dd1ee01546..b9281fab4e3e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1917,6 +1917,7 @@ config X86_SGX select SRCU select MMU_NOTIFIER select NUMA_KEEP_MEMINFO if NUMA + select XARRAY_MULTI help Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions that can be used by applications to set aside private regions of code diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 825aa91516c8..5c02cffdabc8 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -20,6 +20,7 @@ struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS]; static int sgx_nr_epc_sections; static struct task_struct *ksgxd_tsk; static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq); +static DEFINE_XARRAY(sgx_epc_address_space); /* * These variables are part of the state of the reclaimer, and must be accessed @@ -650,6 +651,8 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, } section->phys_addr = phys_addr; + xa_store_range(&sgx_epc_address_space, section->phys_addr, + phys_addr + size - 1, section, GFP_KERNEL); for (i = 0; i < nr_pages; i++) { section->pages[i].section = index; @@ -661,6 +664,12 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, return true; } +bool arch_is_platform_page(u64 paddr) +{ + return !!xa_load(&sgx_epc_address_space, paddr); +} +EXPORT_SYMBOL_GPL(arch_is_platform_page); + /** * A section metric is concatenated in a way that @low bits 12-31 define the * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the From 992801ae92431761b3d8ec88abd5793d154d34ac Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 26 Oct 2021 15:00:46 -0700 Subject: [PATCH 03/24] x86/sgx: Initial poison handling for dirty and free pages A memory controller patrol scrubber can report poison in a page that isn't currently being used. Add "poison" field in the sgx_epc_page that can be set for an sgx_epc_page. Check for it: 1) When sanitizing dirty pages 2) When freeing epc pages Poison is a new field separated from flags to avoid having to make all updates to flags atomic, or integrate poison state changes into some other locking scheme to protect flags (Currently just sgx_reclaimer_lock which protects the SGX_EPC_PAGE_RECLAIMER_TRACKED bit in page->flags). In both cases place the poisoned page on a per-node list of poisoned epc pages to make sure it will not be reallocated. Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Tested-by: Reinette Chatre Link: https://lkml.kernel.org/r/20211026220050.697075-4-tony.luck@intel.com --- arch/x86/kernel/cpu/sgx/main.c | 26 +++++++++++++++++++++++++- arch/x86/kernel/cpu/sgx/sgx.h | 4 +++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 5c02cffdabc8..e5fcb8354bcc 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -62,6 +62,24 @@ static void __sgx_sanitize_pages(struct list_head *dirty_page_list) page = list_first_entry(dirty_page_list, struct sgx_epc_page, list); + /* + * Checking page->poison without holding the node->lock + * is racy, but losing the race (i.e. poison is set just + * after the check) just means __eremove() will be uselessly + * called for a page that sgx_free_epc_page() will put onto + * the node->sgx_poison_page_list later. + */ + if (page->poison) { + struct sgx_epc_section *section = &sgx_epc_sections[page->section]; + struct sgx_numa_node *node = section->node; + + spin_lock(&node->lock); + list_move(&page->list, &node->sgx_poison_page_list); + spin_unlock(&node->lock); + + continue; + } + ret = __eremove(sgx_get_epc_virt_addr(page)); if (!ret) { /* @@ -626,7 +644,11 @@ void sgx_free_epc_page(struct sgx_epc_page *page) spin_lock(&node->lock); - list_add_tail(&page->list, &node->free_page_list); + page->owner = NULL; + if (page->poison) + list_add(&page->list, &node->sgx_poison_page_list); + else + list_add_tail(&page->list, &node->free_page_list); sgx_nr_free_pages++; page->flags = SGX_EPC_PAGE_IS_FREE; @@ -658,6 +680,7 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, section->pages[i].section = index; section->pages[i].flags = 0; section->pages[i].owner = NULL; + section->pages[i].poison = 0; list_add_tail(§ion->pages[i].list, &sgx_dirty_page_list); } @@ -724,6 +747,7 @@ static bool __init sgx_page_cache_init(void) if (!node_isset(nid, sgx_numa_mask)) { spin_lock_init(&sgx_numa_nodes[nid].lock); INIT_LIST_HEAD(&sgx_numa_nodes[nid].free_page_list); + INIT_LIST_HEAD(&sgx_numa_nodes[nid].sgx_poison_page_list); node_set(nid, sgx_numa_mask); } diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h index 5906471156c5..9ec3136c7800 100644 --- a/arch/x86/kernel/cpu/sgx/sgx.h +++ b/arch/x86/kernel/cpu/sgx/sgx.h @@ -31,7 +31,8 @@ struct sgx_epc_page { unsigned int section; - unsigned int flags; + u16 flags; + u16 poison; struct sgx_encl_page *owner; struct list_head list; }; @@ -42,6 +43,7 @@ struct sgx_epc_page { */ struct sgx_numa_node { struct list_head free_page_list; + struct list_head sgx_poison_page_list; spinlock_t lock; }; From a495cbdffa30558b34f3c95555cecc4fd9688039 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 26 Oct 2021 15:00:47 -0700 Subject: [PATCH 04/24] x86/sgx: Add SGX infrastructure to recover from poison Provide a recovery function sgx_memory_failure(). If the poison was consumed synchronously then send a SIGBUS. Note that the virtual address of the access is not included with the SIGBUS as is the case for poison outside of SGX enclaves. This doesn't matter as addresses of code/data inside an enclave is of little to no use to code executing outside the (now dead) enclave. Poison found in a free page results in the page being moved from the free list to the per-node poison page list. Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Tested-by: Reinette Chatre Link: https://lkml.kernel.org/r/20211026220050.697075-5-tony.luck@intel.com --- arch/x86/kernel/cpu/sgx/main.c | 76 ++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index e5fcb8354bcc..231c494dfd40 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -693,6 +693,82 @@ bool arch_is_platform_page(u64 paddr) } EXPORT_SYMBOL_GPL(arch_is_platform_page); +static struct sgx_epc_page *sgx_paddr_to_page(u64 paddr) +{ + struct sgx_epc_section *section; + + section = xa_load(&sgx_epc_address_space, paddr); + if (!section) + return NULL; + + return §ion->pages[PFN_DOWN(paddr - section->phys_addr)]; +} + +/* + * Called in process context to handle a hardware reported + * error in an SGX EPC page. + * If the MF_ACTION_REQUIRED bit is set in flags, then the + * context is the task that consumed the poison data. Otherwise + * this is called from a kernel thread unrelated to the page. + */ +int arch_memory_failure(unsigned long pfn, int flags) +{ + struct sgx_epc_page *page = sgx_paddr_to_page(pfn << PAGE_SHIFT); + struct sgx_epc_section *section; + struct sgx_numa_node *node; + + /* + * mm/memory-failure.c calls this routine for all errors + * where there isn't a "struct page" for the address. But that + * includes other address ranges besides SGX. + */ + if (!page) + return -ENXIO; + + /* + * If poison was consumed synchronously. Send a SIGBUS to + * the task. Hardware has already exited the SGX enclave and + * will not allow re-entry to an enclave that has a memory + * error. The signal may help the task understand why the + * enclave is broken. + */ + if (flags & MF_ACTION_REQUIRED) + force_sig(SIGBUS); + + section = &sgx_epc_sections[page->section]; + node = section->node; + + spin_lock(&node->lock); + + /* Already poisoned? Nothing more to do */ + if (page->poison) + goto out; + + page->poison = 1; + + /* + * If the page is on a free list, move it to the per-node + * poison page list. + */ + if (page->flags & SGX_EPC_PAGE_IS_FREE) { + list_move(&page->list, &node->sgx_poison_page_list); + goto out; + } + + /* + * TBD: Add additional plumbing to enable pre-emptive + * action for asynchronous poison notification. Until + * then just hope that the poison: + * a) is not accessed - sgx_free_epc_page() will deal with it + * when the user gives it back + * b) results in a recoverable machine check rather than + * a fatal one + */ +out: + spin_unlock(&node->lock); + return 0; +} + /** * A section metric is concatenated in a way that @low bits 12-31 define the * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the From 03b122da74b22fbe7cd98184fa5657a9ce13970c Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 26 Oct 2021 15:00:48 -0700 Subject: [PATCH 05/24] x86/sgx: Hook arch_memory_failure() into mainline code Add a call inside memory_failure() to call the arch specific code to check if the address is an SGX EPC page and handle it. Note the SGX EPC pages do not have a "struct page" entry, so the hook goes in at the same point as the device mapping hook. Pull the call to acquire the mutex earlier so the SGX errors are also protected. Make set_mce_nospec() skip SGX pages when trying to adjust the 1:1 map. Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Reviewed-by: Naoya Horiguchi Tested-by: Reinette Chatre Link: https://lkml.kernel.org/r/20211026220050.697075-6-tony.luck@intel.com --- arch/x86/include/asm/processor.h | 8 ++++++++ arch/x86/include/asm/set_memory.h | 4 ++++ include/linux/mm.h | 13 +++++++++++++ mm/memory-failure.c | 19 +++++++++++++------ 4 files changed, 38 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 355d38c0cf60..2c5f12ae7d04 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -855,4 +855,12 @@ enum mds_mitigations { MDS_MITIGATION_VMWERV, }; +#ifdef CONFIG_X86_SGX +int arch_memory_failure(unsigned long pfn, int flags); +#define arch_memory_failure arch_memory_failure + +bool arch_is_platform_page(u64 paddr); +#define arch_is_platform_page arch_is_platform_page +#endif + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 872617542bbc..ff0f2d90338a 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_SET_MEMORY_H #define _ASM_X86_SET_MEMORY_H +#include #include #include @@ -99,6 +100,9 @@ static inline int set_mce_nospec(unsigned long pfn, bool unmap) unsigned long decoy_addr; int rc; + /* SGX pages are not in the 1:1 map */ + if (arch_is_platform_page(pfn << PAGE_SHIFT)) + return 0; /* * We would like to just call: * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); diff --git a/include/linux/mm.h b/include/linux/mm.h index a7e4a9e7d807..57f1aa2a33b6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3231,6 +3231,19 @@ extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); +#ifndef arch_memory_failure +static inline int arch_memory_failure(unsigned long pfn, int flags) +{ + return -ENXIO; +} +#endif + +#ifndef arch_is_platform_page +static inline bool arch_is_platform_page(u64 paddr) +{ + return false; +} +#endif /* * Error handlers for various types of pages. diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 07c875fdeaf0..fddee33bc6cb 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1651,21 +1651,28 @@ int memory_failure(unsigned long pfn, int flags) if (!sysctl_memory_failure_recovery) panic("Memory failure on page %lx", pfn); + mutex_lock(&mf_mutex); + p = pfn_to_online_page(pfn); if (!p) { + res = arch_memory_failure(pfn, flags); + if (res == 0) + goto unlock_mutex; + if (pfn_valid(pfn)) { pgmap = get_dev_pagemap(pfn, NULL); - if (pgmap) - return memory_failure_dev_pagemap(pfn, flags, - pgmap); + if (pgmap) { + res = memory_failure_dev_pagemap(pfn, flags, + pgmap); + goto unlock_mutex; + } } pr_err("Memory failure: %#lx: memory outside kernel control\n", pfn); - return -ENXIO; + res = -ENXIO; + goto unlock_mutex; } - mutex_lock(&mf_mutex); - try_again: if (PageHuge(p)) { res = memory_failure_hugetlb(pfn, flags); From c6acb1e7bf4656b9434335c72b8245cc84575fde Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 26 Oct 2021 15:00:49 -0700 Subject: [PATCH 06/24] x86/sgx: Add hook to error injection address validation SGX reserved memory does not appear in the standard address maps. Add hook to call into the SGX code to check if an address is located in SGX memory. There are other challenges in injecting errors into SGX. Update the documentation with a sequence of operations to inject. Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Tested-by: Reinette Chatre Link: https://lkml.kernel.org/r/20211026220050.697075-7-tony.luck@intel.com --- .../firmware-guide/acpi/apei/einj.rst | 19 +++++++++++++++++++ drivers/acpi/apei/einj.c | 3 ++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/Documentation/firmware-guide/acpi/apei/einj.rst b/Documentation/firmware-guide/acpi/apei/einj.rst index c042176e1707..55e2331a6438 100644 --- a/Documentation/firmware-guide/acpi/apei/einj.rst +++ b/Documentation/firmware-guide/acpi/apei/einj.rst @@ -181,5 +181,24 @@ You should see something like this in dmesg:: [22715.834759] EDAC sbridge MC3: PROCESSOR 0:306e7 TIME 1422553404 SOCKET 0 APIC 0 [22716.616173] EDAC MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x12345 offset:0x0 grain:32 syndrome:0x0 - area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0) +Special notes for injection into SGX enclaves: + +There may be a separate BIOS setup option to enable SGX injection. + +The injection process consists of setting some special memory controller +trigger that will inject the error on the next write to the target +address. But the h/w prevents any software outside of an SGX enclave +from accessing enclave pages (even BIOS SMM mode). + +The following sequence can be used: + 1) Determine physical address of enclave page + 2) Use "notrigger=1" mode to inject (this will setup + the injection address, but will not actually inject) + 3) Enter the enclave + 4) Store data to the virtual address matching physical address from step 1 + 5) Execute CLFLUSH for that virtual address + 6) Spin delay for 250ms + 7) Read from the virtual address. This will trigger the error + For more information about EINJ, please refer to ACPI specification version 4.0, section 17.5 and ACPI 5.0, section 18.6. diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index edb2622fd35f..95cc2a9f3e05 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -545,7 +545,8 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE) != REGION_INTERSECTS) && (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY) - != REGION_INTERSECTS))) + != REGION_INTERSECTS) && + !arch_is_platform_page(base_addr))) return -EINVAL; inject: From 3ad6fd77a2d62e8f4465b429b65805eaf88e1b9e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 26 Oct 2021 15:00:50 -0700 Subject: [PATCH 07/24] x86/sgx: Add check for SGX pages to ghes_do_memory_failure() SGX EPC pages do not have a "struct page" associated with them so the pfn_valid() sanity check fails and results in a warning message to the console. Add an additional check to skip the warning if the address of the error is in an SGX EPC page. Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Tested-by: Reinette Chatre Link: https://lkml.kernel.org/r/20211026220050.697075-8-tony.luck@intel.com --- drivers/acpi/apei/ghes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 0c8330ed1ffd..0c5c9acc6254 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -449,7 +449,7 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags) return false; pfn = PHYS_PFN(physical_addr); - if (!pfn_valid(pfn)) { + if (!pfn_valid(pfn) && !arch_is_platform_page(physical_addr)) { pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid address in generic error data: %#llx\n", physical_addr); From 5064343fb155487362708bacc8c6ab9dc2c52bb8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 15 Nov 2021 10:35:14 -0800 Subject: [PATCH 08/24] selftests/sgx: Fix a benign linker warning The enclave binary (test_encl.elf) is built with only three sections (tcs, text, and data) as controlled by its custom linker script. If gcc is built with "--enable-linker-build-id" (this appears to be a common configuration even if it is by default off) then gcc will pass "--build-id" to the linker that will prompt it (the linker) to write unique bits identifying the linked file to a ".note.gnu.build-id" section. The section ".note.gnu.build-id" does not exist in the test enclave resulting in the following warning emitted by the linker: /usr/bin/ld: warning: .note.gnu.build-id section discarded, --build-id ignored The test enclave does not use the build id within the binary so fix the warning by passing a build id of "none" to the linker that will disable the setting from any earlier "--build-id" options and thus disable the attempt to write the build id to a ".note.gnu.build-id" section that does not exist. Link: https://lore.kernel.org/linux-sgx/20191017030340.18301-2-sean.j.christopherson@intel.com/ Suggested-by: Cedric Xing Signed-off-by: Sean Christopherson Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/ca0f8a81fc1e78af9bdbc6a88e0f9c37d82e53f2.1636997631.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 7f12d55b97f8..2956584e1e37 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -45,7 +45,7 @@ $(OUTPUT)/sign_key.o: sign_key.S $(CC) $(HOST_CFLAGS) -c $< -o $@ $(OUTPUT)/test_encl.elf: test_encl.lds test_encl.c test_encl_bootstrap.S - $(CC) $(ENCL_CFLAGS) -T $^ -o $@ + $(CC) $(ENCL_CFLAGS) -T $^ -o $@ -Wl,--build-id=none EXTRA_CLEAN := \ $(OUTPUT)/test_encl.elf \ From 39f62536be2f6160bba7294b5208e240d34703c3 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 15 Nov 2021 10:35:15 -0800 Subject: [PATCH 09/24] selftests/sgx: Assign source for each segment Define source per segment so that enclave pages can be added from different sources, e.g. anonymous VMA for zero pages. In other words, add 'src' field to struct encl_segment, and assign it to 'encl->src' for pages inherited from the enclave binary. Signed-off-by: Jarkko Sakkinen Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/7850709c3089fe20e4bcecb8295ba87c54cc2b4a.1636997631.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/load.c | 5 +++-- tools/testing/selftests/sgx/main.h | 1 + tools/testing/selftests/sgx/sigstruct.c | 8 ++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c index 3ebe5d1fe337..5605474aab73 100644 --- a/tools/testing/selftests/sgx/load.c +++ b/tools/testing/selftests/sgx/load.c @@ -107,7 +107,7 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg) memset(&secinfo, 0, sizeof(secinfo)); secinfo.flags = seg->flags; - ioc.src = (uint64_t)encl->src + seg->offset; + ioc.src = (uint64_t)seg->src; ioc.offset = seg->offset; ioc.length = seg->size; ioc.secinfo = (unsigned long)&secinfo; @@ -216,6 +216,7 @@ bool encl_load(const char *path, struct encl *encl) if (j == 0) { src_offset = phdr->p_offset & PAGE_MASK; + encl->src = encl->bin + src_offset; seg->prot = PROT_READ | PROT_WRITE; seg->flags = SGX_PAGE_TYPE_TCS << 8; @@ -228,13 +229,13 @@ bool encl_load(const char *path, struct encl *encl) seg->offset = (phdr->p_offset & PAGE_MASK) - src_offset; seg->size = (phdr->p_filesz + PAGE_SIZE - 1) & PAGE_MASK; + seg->src = encl->src + seg->offset; j++; } assert(j == encl->nr_segments); - encl->src = encl->bin + src_offset; encl->src_size = encl->segment_tbl[j - 1].offset + encl->segment_tbl[j - 1].size; diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h index 68672fd86cf9..452d11dc4889 100644 --- a/tools/testing/selftests/sgx/main.h +++ b/tools/testing/selftests/sgx/main.h @@ -7,6 +7,7 @@ #define MAIN_H struct encl_segment { + void *src; off_t offset; size_t size; unsigned int prot; diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c index 92bbc5a15c39..202a96fd81bf 100644 --- a/tools/testing/selftests/sgx/sigstruct.c +++ b/tools/testing/selftests/sgx/sigstruct.c @@ -289,14 +289,14 @@ static bool mrenclave_eextend(EVP_MD_CTX *ctx, uint64_t offset, static bool mrenclave_segment(EVP_MD_CTX *ctx, struct encl *encl, struct encl_segment *seg) { - uint64_t end = seg->offset + seg->size; + uint64_t end = seg->size; uint64_t offset; - for (offset = seg->offset; offset < end; offset += PAGE_SIZE) { - if (!mrenclave_eadd(ctx, offset, seg->flags)) + for (offset = 0; offset < end; offset += PAGE_SIZE) { + if (!mrenclave_eadd(ctx, seg->offset + offset, seg->flags)) return false; - if (!mrenclave_eextend(ctx, offset, encl->src + offset)) + if (!mrenclave_eextend(ctx, seg->offset + offset, seg->src + offset)) return false; } From 5f0ce664d8c6c160ce4333e809545a8a57fe2baf Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 15 Nov 2021 10:35:16 -0800 Subject: [PATCH 10/24] selftests/sgx: Make data measurement for an enclave segment optional For a heap makes sense to leave its contents "unmeasured" in the SGX enclave build process, meaning that they won't contribute to the cryptographic signature (a RSA-3072 signed SHA56 hash) of the enclave. Enclaves are signed blobs where the signature is calculated both from page data and also from "structural properties" of the pages. For instance a page offset of *every* page added to the enclave is hashed. For data, this is optional, not least because hashing a page has a significant contribution to the enclave load time. Thus, where there is no reason to hash, do not. The SGX ioctl interface supports this with SGX_PAGE_MEASURE flag. Only when the flag is *set*, data is measured. Add seg->measure boolean flag to struct encl_segment. Only when the flag is set, include the segment data to the signature (represented by SIGSTRUCT architectural structure). Signed-off-by: Jarkko Sakkinen Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/625b6fe28fed76275e9238ec4e15ec3c0d87de81.1636997631.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/load.c | 6 +++++- tools/testing/selftests/sgx/main.h | 1 + tools/testing/selftests/sgx/sigstruct.c | 6 ++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c index 5605474aab73..f1be78984c50 100644 --- a/tools/testing/selftests/sgx/load.c +++ b/tools/testing/selftests/sgx/load.c @@ -111,7 +111,10 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg) ioc.offset = seg->offset; ioc.length = seg->size; ioc.secinfo = (unsigned long)&secinfo; - ioc.flags = SGX_PAGE_MEASURE; + if (seg->measure) + ioc.flags = SGX_PAGE_MEASURE; + else + ioc.flags = 0; rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc); if (rc < 0) { @@ -230,6 +233,7 @@ bool encl_load(const char *path, struct encl *encl) seg->offset = (phdr->p_offset & PAGE_MASK) - src_offset; seg->size = (phdr->p_filesz + PAGE_SIZE - 1) & PAGE_MASK; seg->src = encl->src + seg->offset; + seg->measure = true; j++; } diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h index 452d11dc4889..aebc69e7cdc8 100644 --- a/tools/testing/selftests/sgx/main.h +++ b/tools/testing/selftests/sgx/main.h @@ -12,6 +12,7 @@ struct encl_segment { size_t size; unsigned int prot; unsigned int flags; + bool measure; }; struct encl { diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c index 202a96fd81bf..50c5ab1aa6fa 100644 --- a/tools/testing/selftests/sgx/sigstruct.c +++ b/tools/testing/selftests/sgx/sigstruct.c @@ -296,8 +296,10 @@ static bool mrenclave_segment(EVP_MD_CTX *ctx, struct encl *encl, if (!mrenclave_eadd(ctx, seg->offset + offset, seg->flags)) return false; - if (!mrenclave_eextend(ctx, seg->offset + offset, seg->src + offset)) - return false; + if (seg->measure) { + if (!mrenclave_eextend(ctx, seg->offset + offset, seg->src + offset)) + return false; + } } return true; From 3200505d4de6436af799d7be743d9dc87450ee5a Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 15 Nov 2021 10:35:17 -0800 Subject: [PATCH 11/24] selftests/sgx: Create a heap for the test enclave Create a heap for the test enclave, which is allocated from /dev/null, and left unmeasured. This is beneficial by its own because it verifies that an enclave built from multiple choices, works properly. If LSM hooks are added for SGX some day, a multi source enclave has higher probability to trigger bugs on access control checks. The immediate need comes from the need to implement page reclaim tests. In order to trigger the page reclaimer, one can just set the size of the heap to high enough. Signed-off-by: Jarkko Sakkinen Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/e070c5f23578c29608051cab879b1d276963a27a.1636997631.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/load.c | 29 ++++++++++++++++++++++------- tools/testing/selftests/sgx/main.c | 2 +- tools/testing/selftests/sgx/main.h | 4 +++- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c index f1be78984c50..9d4322c946e2 100644 --- a/tools/testing/selftests/sgx/load.c +++ b/tools/testing/selftests/sgx/load.c @@ -21,6 +21,8 @@ void encl_delete(struct encl *encl) { + struct encl_segment *heap_seg = &encl->segment_tbl[encl->nr_segments - 1]; + if (encl->encl_base) munmap((void *)encl->encl_base, encl->encl_size); @@ -30,6 +32,8 @@ void encl_delete(struct encl *encl) if (encl->fd) close(encl->fd); + munmap(heap_seg->src, heap_seg->size); + if (encl->segment_tbl) free(encl->segment_tbl); @@ -125,11 +129,10 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg) return true; } - - -bool encl_load(const char *path, struct encl *encl) +bool encl_load(const char *path, struct encl *encl, unsigned long heap_size) { const char device_path[] = "/dev/sgx_enclave"; + struct encl_segment *seg; Elf64_Phdr *phdr_tbl; off_t src_offset; Elf64_Ehdr *ehdr; @@ -181,6 +184,8 @@ bool encl_load(const char *path, struct encl *encl) ehdr = encl->bin; phdr_tbl = encl->bin + ehdr->e_phoff; + encl->nr_segments = 1; /* one for the heap */ + for (i = 0; i < ehdr->e_phnum; i++) { Elf64_Phdr *phdr = &phdr_tbl[i]; @@ -196,7 +201,6 @@ bool encl_load(const char *path, struct encl *encl) for (i = 0, j = 0; i < ehdr->e_phnum; i++) { Elf64_Phdr *phdr = &phdr_tbl[i]; unsigned int flags = phdr->p_flags; - struct encl_segment *seg; if (phdr->p_type != PT_LOAD) continue; @@ -238,10 +242,21 @@ bool encl_load(const char *path, struct encl *encl) j++; } - assert(j == encl->nr_segments); + assert(j == encl->nr_segments - 1); - encl->src_size = encl->segment_tbl[j - 1].offset + - encl->segment_tbl[j - 1].size; + seg = &encl->segment_tbl[j]; + seg->offset = encl->segment_tbl[j - 1].offset + encl->segment_tbl[j - 1].size; + seg->size = heap_size; + seg->src = mmap(NULL, heap_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + seg->prot = PROT_READ | PROT_WRITE; + seg->flags = (SGX_PAGE_TYPE_REG << 8) | seg->prot; + seg->measure = false; + + if (seg->src == MAP_FAILED) + goto err; + + encl->src_size = encl->segment_tbl[j].offset + encl->segment_tbl[j].size; for (encl->encl_size = 4096; encl->encl_size < encl->src_size; ) encl->encl_size <<= 1; diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index e252015e0c15..6858a35fed20 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -122,7 +122,7 @@ FIXTURE_SETUP(enclave) unsigned int i; void *addr; - if (!encl_load("test_encl.elf", &self->encl)) { + if (!encl_load("test_encl.elf", &self->encl, ENCL_HEAP_SIZE_DEFAULT)) { encl_delete(&self->encl); ksft_exit_skip("cannot load enclaves\n"); } diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h index aebc69e7cdc8..b45c52ec7ab3 100644 --- a/tools/testing/selftests/sgx/main.h +++ b/tools/testing/selftests/sgx/main.h @@ -6,6 +6,8 @@ #ifndef MAIN_H #define MAIN_H +#define ENCL_HEAP_SIZE_DEFAULT 4096 + struct encl_segment { void *src; off_t offset; @@ -33,7 +35,7 @@ extern unsigned char sign_key[]; extern unsigned char sign_key_end[]; void encl_delete(struct encl *ctx); -bool encl_load(const char *path, struct encl *encl); +bool encl_load(const char *path, struct encl *encl, unsigned long heap_size); bool encl_measure(struct encl *encl); bool encl_build(struct encl *encl); From 1471721489090515f9f0f059b25124898928e559 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 15 Nov 2021 10:35:18 -0800 Subject: [PATCH 12/24] selftests/sgx: Dump segments and /proc/self/maps only on failure Logging is always a compromise between clarity and detail. The main use case for dumping VMA's is when FIXTURE_SETUP() fails, and is less important for enclaves that do initialize correctly. Therefore, print the segments and /proc/self/maps only in the error case. Finally, if a single test ever creates multiple enclaves, the amount of log lines would become enormous. Signed-off-by: Jarkko Sakkinen Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/23cef0ae1de3a8a74cbfbbe74eca48ca3f300fde.1636997631.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/main.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index 6858a35fed20..deab02f2f3ce 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -127,12 +127,6 @@ FIXTURE_SETUP(enclave) ksft_exit_skip("cannot load enclaves\n"); } - for (i = 0; i < self->encl.nr_segments; i++) { - seg = &self->encl.segment_tbl[i]; - - TH_LOG("0x%016lx 0x%016lx 0x%02x", seg->offset, seg->size, seg->prot); - } - if (!encl_measure(&self->encl)) goto err; @@ -169,6 +163,17 @@ FIXTURE_SETUP(enclave) memset(&self->run, 0, sizeof(self->run)); self->run.tcs = self->encl.encl_base; + return; + +err: + encl_delete(&self->encl); + + for (i = 0; i < self->encl.nr_segments; i++) { + seg = &self->encl.segment_tbl[i]; + + TH_LOG("0x%016lx 0x%016lx 0x%02x", seg->offset, seg->size, seg->prot); + } + maps_file = fopen("/proc/self/maps", "r"); if (maps_file != NULL) { while (fgets(maps_line, sizeof(maps_line), maps_file) != NULL) { @@ -181,11 +186,7 @@ FIXTURE_SETUP(enclave) fclose(maps_file); } -err: - if (!sgx_enter_enclave_sym) - encl_delete(&self->encl); - - ASSERT_NE(sgx_enter_enclave_sym, NULL); + ASSERT_TRUE(false); } FIXTURE_TEARDOWN(enclave) From 1b35eb719549ab5143d61f9e09b0771cd3d00d94 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 15 Nov 2021 10:35:19 -0800 Subject: [PATCH 13/24] selftests/sgx: Encpsulate the test enclave creation Introduce setup_test_encl() so that the enclave creation can be moved to TEST_F()'s. This is required for a reclaimer test where the heap size needs to be set large enough to triger the page reclaimer. Signed-off-by: Jarkko Sakkinen Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/bee0ca867a95828a569c1ba2a8e443a44047dc71.1636997631.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/main.c | 44 ++++++++++++++++++------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index deab02f2f3ce..5b3e49a36344 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -112,7 +112,8 @@ FIXTURE(enclave) { struct sgx_enclave_run run; }; -FIXTURE_SETUP(enclave) +static bool setup_test_encl(unsigned long heap_size, struct encl *encl, + struct __test_metadata *_metadata) { Elf64_Sym *sgx_enter_enclave_sym = NULL; struct vdso_symtab symtab; @@ -122,25 +123,25 @@ FIXTURE_SETUP(enclave) unsigned int i; void *addr; - if (!encl_load("test_encl.elf", &self->encl, ENCL_HEAP_SIZE_DEFAULT)) { - encl_delete(&self->encl); - ksft_exit_skip("cannot load enclaves\n"); + if (!encl_load("test_encl.elf", encl, heap_size)) { + encl_delete(encl); + TH_LOG("Failed to load the test enclave.\n"); } - if (!encl_measure(&self->encl)) + if (!encl_measure(encl)) goto err; - if (!encl_build(&self->encl)) + if (!encl_build(encl)) goto err; /* * An enclave consumer only must do this. */ - for (i = 0; i < self->encl.nr_segments; i++) { - struct encl_segment *seg = &self->encl.segment_tbl[i]; + for (i = 0; i < encl->nr_segments; i++) { + struct encl_segment *seg = &encl->segment_tbl[i]; - addr = mmap((void *)self->encl.encl_base + seg->offset, seg->size, - seg->prot, MAP_SHARED | MAP_FIXED, self->encl.fd, 0); + addr = mmap((void *)encl->encl_base + seg->offset, seg->size, + seg->prot, MAP_SHARED | MAP_FIXED, encl->fd, 0); EXPECT_NE(addr, MAP_FAILED); if (addr == MAP_FAILED) goto err; @@ -160,16 +161,13 @@ FIXTURE_SETUP(enclave) vdso_sgx_enter_enclave = addr + sgx_enter_enclave_sym->st_value; - memset(&self->run, 0, sizeof(self->run)); - self->run.tcs = self->encl.encl_base; - - return; + return true; err: - encl_delete(&self->encl); + encl_delete(encl); - for (i = 0; i < self->encl.nr_segments; i++) { - seg = &self->encl.segment_tbl[i]; + for (i = 0; i < encl->nr_segments; i++) { + seg = &encl->segment_tbl[i]; TH_LOG("0x%016lx 0x%016lx 0x%02x", seg->offset, seg->size, seg->prot); } @@ -186,7 +184,17 @@ err: fclose(maps_file); } - ASSERT_TRUE(false); + TH_LOG("Failed to initialize the test enclave.\n"); + + return false; +} + +FIXTURE_SETUP(enclave) +{ + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; } FIXTURE_TEARDOWN(enclave) From 065825db1fd60aa7695565613a69ed086a831869 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 15 Nov 2021 10:35:20 -0800 Subject: [PATCH 14/24] selftests/sgx: Move setup_test_encl() to each TEST_F() Create the test enclave inside each TEST_F(), instead of FIXTURE_SETUP(), so that the heap size can be defined per test. Signed-off-by: Jarkko Sakkinen Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/70ca264535d2ca0dc8dcaf2281e7d6965f8d4a24.1636997631.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/main.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index 5b3e49a36344..f41fba919d06 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -191,10 +191,6 @@ err: FIXTURE_SETUP(enclave) { - ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); - - memset(&self->run, 0, sizeof(self->run)); - self->run.tcs = self->encl.encl_base; } FIXTURE_TEARDOWN(enclave) @@ -226,6 +222,11 @@ TEST_F(enclave, unclobbered_vdso) { struct encl_op op; + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + op.type = ENCL_OP_PUT; op.buffer = MAGIC; @@ -248,6 +249,11 @@ TEST_F(enclave, clobbered_vdso) { struct encl_op op; + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + op.type = ENCL_OP_PUT; op.buffer = MAGIC; @@ -278,6 +284,11 @@ TEST_F(enclave, clobbered_vdso_and_user_function) { struct encl_op op; + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + self->run.user_handler = (__u64)test_handler; self->run.user_data = 0xdeadbeef; From f0ff2447b8613b883f41ae845b6cc7540d6e5f71 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 15 Nov 2021 10:35:21 -0800 Subject: [PATCH 15/24] selftests/sgx: Add a new kselftest: Unclobbered_vdso_oversubscribed Add a variation of the unclobbered_vdso test. In the new test, create a heap for the test enclave, which has the same size as all available Enclave Page Cache (EPC) pages in the system. This will guarantee that all test_encl.elf pages *and* SGX Enclave Control Structure (SECS) have been swapped out by the page reclaimer during the load time. This test will trigger both the page reclaimer and the page fault handler. The page reclaimer triggered, while the heap is being created during the load time. The page fault handler is triggered for all the required pages, while the test case is executing. Signed-off-by: Jarkko Sakkinen Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/41f7c508eea79a3198b5014d7691903be08f9ff1.1636997631.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/main.c | 75 ++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index f41fba919d06..ee8139a22a3c 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -245,6 +245,81 @@ TEST_F(enclave, unclobbered_vdso) EXPECT_EQ(self->run.user_data, 0); } +/* + * A section metric is concatenated in a way that @low bits 12-31 define the + * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the + * metric. + */ +static unsigned long sgx_calc_section_metric(unsigned int low, + unsigned int high) +{ + return (low & GENMASK_ULL(31, 12)) + + ((high & GENMASK_ULL(19, 0)) << 32); +} + +/* + * Sum total available physical SGX memory across all EPC sections + * + * Return: total available physical SGX memory available on system + */ +static unsigned long get_total_epc_mem(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned long total_size = 0; + unsigned int type; + int section = 0; + + while (true) { + eax = SGX_CPUID; + ecx = section + SGX_CPUID_EPC; + __cpuid(&eax, &ebx, &ecx, &edx); + + type = eax & SGX_CPUID_EPC_MASK; + if (type == SGX_CPUID_EPC_INVALID) + break; + + if (type != SGX_CPUID_EPC_SECTION) + break; + + total_size += sgx_calc_section_metric(ecx, edx); + + section++; + } + + return total_size; +} + +TEST_F(enclave, unclobbered_vdso_oversubscribed) +{ + unsigned long total_mem; + struct encl_op op; + + total_mem = get_total_epc_mem(); + ASSERT_NE(total_mem, 0); + ASSERT_TRUE(setup_test_encl(total_mem, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + op.type = ENCL_OP_PUT; + op.buffer = MAGIC; + + EXPECT_EQ(ENCL_CALL(&op, &self->run, false), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.user_data, 0); + + op.type = ENCL_OP_GET; + op.buffer = 0; + + EXPECT_EQ(ENCL_CALL(&op, &self->run, false), 0); + + EXPECT_EQ(op.buffer, MAGIC); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.user_data, 0); + +} + TEST_F(enclave, clobbered_vdso) { struct encl_op op; From 41493a095e487c207b4b702aee2f8c59a7294e4f Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 15 Nov 2021 10:35:22 -0800 Subject: [PATCH 16/24] selftests/sgx: Provide per-op parameter structs for the test enclave To add more operations to the test enclave, the protocol needs to allow to have operations with varying parameters. Create a separate parameter struct for each existing operation, with the shared parameters in struct encl_op_header. [reinette: rebased to apply on top of oversubscription test series] Signed-off-by: Jarkko Sakkinen Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/f9a4a8c436b538003b8ebddaa66083992053cef1.1636997631.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/defines.h | 14 ++++- tools/testing/selftests/sgx/main.c | 68 +++++++++++++------------ tools/testing/selftests/sgx/test_encl.c | 33 +++++++----- 3 files changed, 69 insertions(+), 46 deletions(-) diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h index f88562afcaa0..6ff95a766287 100644 --- a/tools/testing/selftests/sgx/defines.h +++ b/tools/testing/selftests/sgx/defines.h @@ -21,11 +21,21 @@ enum encl_op_type { ENCL_OP_PUT, ENCL_OP_GET, + ENCL_OP_MAX, }; -struct encl_op { +struct encl_op_header { uint64_t type; - uint64_t buffer; +}; + +struct encl_op_put { + struct encl_op_header header; + uint64_t value; +}; + +struct encl_op_get { + struct encl_op_header header; + uint64_t value; }; #endif /* DEFINES_H */ diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index ee8139a22a3c..3996c00486b6 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -220,27 +220,28 @@ FIXTURE_TEARDOWN(enclave) TEST_F(enclave, unclobbered_vdso) { - struct encl_op op; + struct encl_op_put put_op; + struct encl_op_get get_op; ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); memset(&self->run, 0, sizeof(self->run)); self->run.tcs = self->encl.encl_base; - op.type = ENCL_OP_PUT; - op.buffer = MAGIC; + put_op.header.type = ENCL_OP_PUT; + put_op.value = MAGIC; - EXPECT_EQ(ENCL_CALL(&op, &self->run, false), 0); + EXPECT_EQ(ENCL_CALL(&put_op, &self->run, false), 0); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); - op.type = ENCL_OP_GET; - op.buffer = 0; + get_op.header.type = ENCL_OP_GET; + get_op.value = 0; - EXPECT_EQ(ENCL_CALL(&op, &self->run, false), 0); + EXPECT_EQ(ENCL_CALL(&get_op, &self->run, false), 0); - EXPECT_EQ(op.buffer, MAGIC); + EXPECT_EQ(get_op.value, MAGIC); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); } @@ -292,7 +293,8 @@ static unsigned long get_total_epc_mem(void) TEST_F(enclave, unclobbered_vdso_oversubscribed) { unsigned long total_mem; - struct encl_op op; + struct encl_op_put put_op; + struct encl_op_get get_op; total_mem = get_total_epc_mem(); ASSERT_NE(total_mem, 0); @@ -301,20 +303,20 @@ TEST_F(enclave, unclobbered_vdso_oversubscribed) memset(&self->run, 0, sizeof(self->run)); self->run.tcs = self->encl.encl_base; - op.type = ENCL_OP_PUT; - op.buffer = MAGIC; + put_op.header.type = ENCL_OP_PUT; + put_op.value = MAGIC; - EXPECT_EQ(ENCL_CALL(&op, &self->run, false), 0); + EXPECT_EQ(ENCL_CALL(&put_op, &self->run, false), 0); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); - op.type = ENCL_OP_GET; - op.buffer = 0; + get_op.header.type = ENCL_OP_GET; + get_op.value = 0; - EXPECT_EQ(ENCL_CALL(&op, &self->run, false), 0); + EXPECT_EQ(ENCL_CALL(&get_op, &self->run, false), 0); - EXPECT_EQ(op.buffer, MAGIC); + EXPECT_EQ(get_op.value, MAGIC); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); @@ -322,27 +324,28 @@ TEST_F(enclave, unclobbered_vdso_oversubscribed) TEST_F(enclave, clobbered_vdso) { - struct encl_op op; + struct encl_op_put put_op; + struct encl_op_get get_op; ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); memset(&self->run, 0, sizeof(self->run)); self->run.tcs = self->encl.encl_base; - op.type = ENCL_OP_PUT; - op.buffer = MAGIC; + put_op.header.type = ENCL_OP_PUT; + put_op.value = MAGIC; - EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0); + EXPECT_EQ(ENCL_CALL(&put_op, &self->run, true), 0); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); - op.type = ENCL_OP_GET; - op.buffer = 0; + get_op.header.type = ENCL_OP_GET; + get_op.value = 0; - EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0); + EXPECT_EQ(ENCL_CALL(&get_op, &self->run, true), 0); - EXPECT_EQ(op.buffer, MAGIC); + EXPECT_EQ(get_op.value, MAGIC); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); } @@ -357,7 +360,8 @@ static int test_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r TEST_F(enclave, clobbered_vdso_and_user_function) { - struct encl_op op; + struct encl_op_put put_op; + struct encl_op_get get_op; ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); @@ -367,20 +371,20 @@ TEST_F(enclave, clobbered_vdso_and_user_function) self->run.user_handler = (__u64)test_handler; self->run.user_data = 0xdeadbeef; - op.type = ENCL_OP_PUT; - op.buffer = MAGIC; + put_op.header.type = ENCL_OP_PUT; + put_op.value = MAGIC; - EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0); + EXPECT_EQ(ENCL_CALL(&put_op, &self->run, true), 0); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); - op.type = ENCL_OP_GET; - op.buffer = 0; + get_op.header.type = ENCL_OP_GET; + get_op.value = 0; - EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0); + EXPECT_EQ(ENCL_CALL(&get_op, &self->run, true), 0); - EXPECT_EQ(op.buffer, MAGIC); + EXPECT_EQ(get_op.value, MAGIC); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); } diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c index 734ea52f9924..f11eb8315704 100644 --- a/tools/testing/selftests/sgx/test_encl.c +++ b/tools/testing/selftests/sgx/test_encl.c @@ -16,20 +16,29 @@ static void *memcpy(void *dest, const void *src, size_t n) return dest; } +static void do_encl_op_put(void *op) +{ + struct encl_op_put *op2 = op; + + memcpy(&encl_buffer[0], &op2->value, 8); +} + +static void do_encl_op_get(void *op) +{ + struct encl_op_get *op2 = op; + + memcpy(&op2->value, &encl_buffer[0], 8); +} + void encl_body(void *rdi, void *rsi) { - struct encl_op *op = (struct encl_op *)rdi; + const void (*encl_op_array[ENCL_OP_MAX])(void *) = { + do_encl_op_put, + do_encl_op_get, + }; - switch (op->type) { - case ENCL_OP_PUT: - memcpy(&encl_buffer[0], &op->buffer, 8); - break; + struct encl_op_header *op = (struct encl_op_header *)rdi; - case ENCL_OP_GET: - memcpy(&op->buffer, &encl_buffer[0], 8); - break; - - default: - break; - } + if (op->type < ENCL_OP_MAX) + (*encl_op_array[op->type])(op); } From c085dfc7685c8c36698b851b03990b75a3226e97 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Mon, 15 Nov 2021 10:35:23 -0800 Subject: [PATCH 17/24] selftests/sgx: Rename test properties in preparation for more enclave tests SGX selftests prepares a data structure outside of the enclave with the type of and data for the operation that needs to be run within the enclave. At this time only two complementary operations are supported by the enclave: copying a value from outside the enclave into a default buffer within the enclave and reading a value from the enclave's default buffer into a variable accessible outside the enclave. In preparation for more operations supported by the enclave the names of the current enclave operations are changed to more accurately reflect the operations and more easily distinguish it from future operations: * The enums ENCL_OP_PUT and ENCL_OP_GET are renamed to ENCL_OP_PUT_TO_BUFFER and ENCL_OP_GET_FROM_BUFFER respectively. * The structs encl_op_put and encl_op_get are renamed to encl_op_put_to_buf and encl_op_get_from_buf respectively. * The enclave functions do_encl_op_put and do_encl_op_get are renamed to do_encl_op_put_to_buf and do_encl_op_get_from_buf respectively. No functional changes. Suggested-by: Jarkko Sakkinen Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Acked-by: Jarkko Sakkinen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/023fda047c787cf330b88ed9337705edae6a0078.1636997631.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/defines.h | 8 +++---- tools/testing/selftests/sgx/main.c | 32 ++++++++++++------------- tools/testing/selftests/sgx/test_encl.c | 12 +++++----- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h index 6ff95a766287..9ea0c7882dfb 100644 --- a/tools/testing/selftests/sgx/defines.h +++ b/tools/testing/selftests/sgx/defines.h @@ -19,8 +19,8 @@ #include "../../../../arch/x86/include/uapi/asm/sgx.h" enum encl_op_type { - ENCL_OP_PUT, - ENCL_OP_GET, + ENCL_OP_PUT_TO_BUFFER, + ENCL_OP_GET_FROM_BUFFER, ENCL_OP_MAX, }; @@ -28,12 +28,12 @@ struct encl_op_header { uint64_t type; }; -struct encl_op_put { +struct encl_op_put_to_buf { struct encl_op_header header; uint64_t value; }; -struct encl_op_get { +struct encl_op_get_from_buf { struct encl_op_header header; uint64_t value; }; diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index 3996c00486b6..61321229e485 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -220,15 +220,15 @@ FIXTURE_TEARDOWN(enclave) TEST_F(enclave, unclobbered_vdso) { - struct encl_op_put put_op; - struct encl_op_get get_op; + struct encl_op_get_from_buf get_op; + struct encl_op_put_to_buf put_op; ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); memset(&self->run, 0, sizeof(self->run)); self->run.tcs = self->encl.encl_base; - put_op.header.type = ENCL_OP_PUT; + put_op.header.type = ENCL_OP_PUT_TO_BUFFER; put_op.value = MAGIC; EXPECT_EQ(ENCL_CALL(&put_op, &self->run, false), 0); @@ -236,7 +236,7 @@ TEST_F(enclave, unclobbered_vdso) EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); - get_op.header.type = ENCL_OP_GET; + get_op.header.type = ENCL_OP_GET_FROM_BUFFER; get_op.value = 0; EXPECT_EQ(ENCL_CALL(&get_op, &self->run, false), 0); @@ -292,9 +292,9 @@ static unsigned long get_total_epc_mem(void) TEST_F(enclave, unclobbered_vdso_oversubscribed) { + struct encl_op_get_from_buf get_op; + struct encl_op_put_to_buf put_op; unsigned long total_mem; - struct encl_op_put put_op; - struct encl_op_get get_op; total_mem = get_total_epc_mem(); ASSERT_NE(total_mem, 0); @@ -303,7 +303,7 @@ TEST_F(enclave, unclobbered_vdso_oversubscribed) memset(&self->run, 0, sizeof(self->run)); self->run.tcs = self->encl.encl_base; - put_op.header.type = ENCL_OP_PUT; + put_op.header.type = ENCL_OP_PUT_TO_BUFFER; put_op.value = MAGIC; EXPECT_EQ(ENCL_CALL(&put_op, &self->run, false), 0); @@ -311,7 +311,7 @@ TEST_F(enclave, unclobbered_vdso_oversubscribed) EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); - get_op.header.type = ENCL_OP_GET; + get_op.header.type = ENCL_OP_GET_FROM_BUFFER; get_op.value = 0; EXPECT_EQ(ENCL_CALL(&get_op, &self->run, false), 0); @@ -324,15 +324,15 @@ TEST_F(enclave, unclobbered_vdso_oversubscribed) TEST_F(enclave, clobbered_vdso) { - struct encl_op_put put_op; - struct encl_op_get get_op; + struct encl_op_get_from_buf get_op; + struct encl_op_put_to_buf put_op; ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); memset(&self->run, 0, sizeof(self->run)); self->run.tcs = self->encl.encl_base; - put_op.header.type = ENCL_OP_PUT; + put_op.header.type = ENCL_OP_PUT_TO_BUFFER; put_op.value = MAGIC; EXPECT_EQ(ENCL_CALL(&put_op, &self->run, true), 0); @@ -340,7 +340,7 @@ TEST_F(enclave, clobbered_vdso) EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); - get_op.header.type = ENCL_OP_GET; + get_op.header.type = ENCL_OP_GET_FROM_BUFFER; get_op.value = 0; EXPECT_EQ(ENCL_CALL(&get_op, &self->run, true), 0); @@ -360,8 +360,8 @@ static int test_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r TEST_F(enclave, clobbered_vdso_and_user_function) { - struct encl_op_put put_op; - struct encl_op_get get_op; + struct encl_op_get_from_buf get_op; + struct encl_op_put_to_buf put_op; ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); @@ -371,7 +371,7 @@ TEST_F(enclave, clobbered_vdso_and_user_function) self->run.user_handler = (__u64)test_handler; self->run.user_data = 0xdeadbeef; - put_op.header.type = ENCL_OP_PUT; + put_op.header.type = ENCL_OP_PUT_TO_BUFFER; put_op.value = MAGIC; EXPECT_EQ(ENCL_CALL(&put_op, &self->run, true), 0); @@ -379,7 +379,7 @@ TEST_F(enclave, clobbered_vdso_and_user_function) EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); - get_op.header.type = ENCL_OP_GET; + get_op.header.type = ENCL_OP_GET_FROM_BUFFER; get_op.value = 0; EXPECT_EQ(ENCL_CALL(&get_op, &self->run, true), 0); diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c index f11eb8315704..4e8da738173f 100644 --- a/tools/testing/selftests/sgx/test_encl.c +++ b/tools/testing/selftests/sgx/test_encl.c @@ -16,16 +16,16 @@ static void *memcpy(void *dest, const void *src, size_t n) return dest; } -static void do_encl_op_put(void *op) +static void do_encl_op_put_to_buf(void *op) { - struct encl_op_put *op2 = op; + struct encl_op_put_to_buf *op2 = op; memcpy(&encl_buffer[0], &op2->value, 8); } -static void do_encl_op_get(void *op) +static void do_encl_op_get_from_buf(void *op) { - struct encl_op_get *op2 = op; + struct encl_op_get_from_buf *op2 = op; memcpy(&op2->value, &encl_buffer[0], 8); } @@ -33,8 +33,8 @@ static void do_encl_op_get(void *op) void encl_body(void *rdi, void *rsi) { const void (*encl_op_array[ENCL_OP_MAX])(void *) = { - do_encl_op_put, - do_encl_op_get, + do_encl_op_put_to_buf, + do_encl_op_get_from_buf, }; struct encl_op_header *op = (struct encl_op_header *)rdi; From abc5cec4735080d12d644c2d39f96cf98c0a367c Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Mon, 15 Nov 2021 10:35:24 -0800 Subject: [PATCH 18/24] selftests/sgx: Add page permission and exception test The Enclave Page Cache Map (EPCM) is a secure structure used by the processor to track the contents of the enclave page cache. The EPCM contains permissions with which enclave pages can be accessed. SGX support allows EPCM and PTE page permissions to differ - as long as the PTE permissions do not exceed the EPCM permissions. Add a test that: (1) Creates an SGX enclave page with writable EPCM permission. (2) Changes the PTE permission on the page to read-only. This should be permitted because the permission does not exceed the EPCM permission. (3) Attempts a write to the page. This should generate a page fault (#PF) because of the read-only PTE even though the EPCM permissions allow the page to be written to. This introduces the first test of SGX exception handling. In this test the issue that caused the exception (PTE page permissions) can be fixed from outside the enclave and after doing so it is possible to re-enter enclave at original entrypoint with ERESUME. Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/3bcc73a4b9fe8780bdb40571805e7ced59e01df7.1636997631.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/defines.h | 14 +++ tools/testing/selftests/sgx/main.c | 134 ++++++++++++++++++++++++ tools/testing/selftests/sgx/test_encl.c | 21 ++++ 3 files changed, 169 insertions(+) diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h index 9ea0c7882dfb..0bbda6f0c7d3 100644 --- a/tools/testing/selftests/sgx/defines.h +++ b/tools/testing/selftests/sgx/defines.h @@ -21,6 +21,8 @@ enum encl_op_type { ENCL_OP_PUT_TO_BUFFER, ENCL_OP_GET_FROM_BUFFER, + ENCL_OP_PUT_TO_ADDRESS, + ENCL_OP_GET_FROM_ADDRESS, ENCL_OP_MAX, }; @@ -38,4 +40,16 @@ struct encl_op_get_from_buf { uint64_t value; }; +struct encl_op_put_to_addr { + struct encl_op_header header; + uint64_t value; + uint64_t addr; +}; + +struct encl_op_get_from_addr { + struct encl_op_header header; + uint64_t value; + uint64_t addr; +}; + #endif /* DEFINES_H */ diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index 61321229e485..1b4858f8ca4e 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -21,6 +21,7 @@ #include "main.h" static const uint64_t MAGIC = 0x1122334455667788ULL; +static const uint64_t MAGIC2 = 0x8877665544332211ULL; vdso_sgx_enter_enclave_t vdso_sgx_enter_enclave; struct vdso_symtab { @@ -107,6 +108,25 @@ static Elf64_Sym *vdso_symtab_get(struct vdso_symtab *symtab, const char *name) return NULL; } +/* + * Return the offset in the enclave where the data segment can be found. + * The first RW segment loaded is the TCS, skip that to get info on the + * data segment. + */ +static off_t encl_get_data_offset(struct encl *encl) +{ + int i; + + for (i = 1; i < encl->nr_segments; i++) { + struct encl_segment *seg = &encl->segment_tbl[i]; + + if (seg->prot == (PROT_READ | PROT_WRITE)) + return seg->offset; + } + + return -1; +} + FIXTURE(enclave) { struct encl encl; struct sgx_enclave_run run; @@ -389,4 +409,118 @@ TEST_F(enclave, clobbered_vdso_and_user_function) EXPECT_EQ(self->run.user_data, 0); } +/* + * Second page of .data segment is used to test changing PTE permissions. + * This spans the local encl_buffer within the test enclave. + * + * 1) Start with a sanity check: a value is written to the target page within + * the enclave and read back to ensure target page can be written to. + * 2) Change PTE permissions (RW -> RO) of target page within enclave. + * 3) Repeat (1) - this time expecting a regular #PF communicated via the + * vDSO. + * 4) Change PTE permissions of target page within enclave back to be RW. + * 5) Repeat (1) by resuming enclave, now expected to be possible to write to + * and read from target page within enclave. + */ +TEST_F(enclave, pte_permissions) +{ + struct encl_op_get_from_addr get_addr_op; + struct encl_op_put_to_addr put_addr_op; + unsigned long data_start; + int ret; + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + data_start = self->encl.encl_base + + encl_get_data_offset(&self->encl) + + PAGE_SIZE; + + /* + * Sanity check to ensure it is possible to write to page that will + * have its permissions manipulated. + */ + + /* Write MAGIC to page */ + put_addr_op.value = MAGIC; + put_addr_op.addr = data_start; + put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* + * Read memory that was just written to, confirming that it is the + * value previously written (MAGIC). + */ + get_addr_op.value = 0; + get_addr_op.addr = data_start; + get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0); + + EXPECT_EQ(get_addr_op.value, MAGIC); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* Change PTE permissions of target page within the enclave */ + ret = mprotect((void *)data_start, PAGE_SIZE, PROT_READ); + if (ret) + perror("mprotect"); + + /* + * PTE permissions of target page changed to read-only, EPCM + * permissions unchanged (EPCM permissions are RW), attempt to + * write to the page, expecting a regular #PF. + */ + + put_addr_op.value = MAGIC2; + + EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0); + + EXPECT_EQ(self->run.exception_vector, 14); + EXPECT_EQ(self->run.exception_error_code, 0x7); + EXPECT_EQ(self->run.exception_addr, data_start); + + self->run.exception_vector = 0; + self->run.exception_error_code = 0; + self->run.exception_addr = 0; + + /* + * Change PTE permissions back to enable enclave to write to the + * target page and resume enclave - do not expect any exceptions this + * time. + */ + ret = mprotect((void *)data_start, PAGE_SIZE, PROT_READ | PROT_WRITE); + if (ret) + perror("mprotect"); + + EXPECT_EQ(vdso_sgx_enter_enclave((unsigned long)&put_addr_op, 0, + 0, ERESUME, 0, 0, &self->run), + 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + get_addr_op.value = 0; + + EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0); + + EXPECT_EQ(get_addr_op.value, MAGIC2); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c index 4e8da738173f..5d86e3e6456a 100644 --- a/tools/testing/selftests/sgx/test_encl.c +++ b/tools/testing/selftests/sgx/test_encl.c @@ -4,6 +4,11 @@ #include #include "defines.h" +/* + * Data buffer spanning two pages that will be placed first in .data + * segment. Even if not used internally the second page is needed by + * external test manipulating page permissions. + */ static uint8_t encl_buffer[8192] = { 1 }; static void *memcpy(void *dest, const void *src, size_t n) @@ -30,11 +35,27 @@ static void do_encl_op_get_from_buf(void *op) memcpy(&op2->value, &encl_buffer[0], 8); } +static void do_encl_op_put_to_addr(void *_op) +{ + struct encl_op_put_to_addr *op = _op; + + memcpy((void *)op->addr, &op->value, 8); +} + +static void do_encl_op_get_from_addr(void *_op) +{ + struct encl_op_get_from_addr *op = _op; + + memcpy(&op->value, (void *)op->addr, 8); +} + void encl_body(void *rdi, void *rsi) { const void (*encl_op_array[ENCL_OP_MAX])(void *) = { do_encl_op_put_to_buf, do_encl_op_get_from_buf, + do_encl_op_put_to_addr, + do_encl_op_get_from_addr, }; struct encl_op_header *op = (struct encl_op_header *)rdi; From 26e688f1263a3c226f3bb5e3441c310ae11e8001 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Mon, 15 Nov 2021 10:35:25 -0800 Subject: [PATCH 19/24] selftests/sgx: Enable multiple thread support Each thread executing in an enclave is associated with a Thread Control Structure (TCS). The test enclave contains two hardcoded TCS. Each TCS contains meta-data used by the hardware to save and restore thread specific information when entering/exiting the enclave. The two TCS structures within the test enclave share their SSA (State Save Area) resulting in the threads clobbering each other's data. Fix this by providing each TCS their own SSA area. Additionally, there is an 8K stack space and its address is computed from the enclave entry point which is correctly done for TCS #1 that starts on the first address inside the enclave but results in out of bounds memory when entering as TCS #2. Split 8K stack space into two separate pages with offset symbol between to ensure the current enclave entry calculation can continue to be used for both threads. While using the enclave with multiple threads requires these fixes the impact is not apparent because every test up to this point enters the enclave from the first TCS. More detail about the stack fix: ------------------------------- Before this change the test enclave (test_encl) looks as follows: .tcs (2 pages): (page 1) TCS #1 (page 2) TCS #2 .text (1 page) One page of code .data (5 pages) (page 1) encl_buffer (page 2) encl_buffer (page 3) SSA (page 4 and 5) STACK encl_stack: As shown above there is a symbol, encl_stack, that points to the end of the .data segment (pointing to the end of page 5 in .data) which is also the end of the enclave. The enclave entry code computes the stack address by adding encl_stack to the pointer to the TCS that entered the enclave. When entering at TCS #1 the stack is computed correctly but when entering at TCS #2 the stack pointer would point to one page beyond the end of the enclave and a #PF would result when TCS #2 attempts to enter the enclave. The fix involves moving the encl_stack symbol between the two stack pages. Doing so enables the stack address computation in the entry code to compute the correct stack address for each TCS. Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/a49dc0d85401db788a0a3f0d795e848abf3b1f44.1636997631.git.reinette.chatre@intel.com --- .../selftests/sgx/test_encl_bootstrap.S | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/sgx/test_encl_bootstrap.S b/tools/testing/selftests/sgx/test_encl_bootstrap.S index 5d5680d4ea39..82fb0dfcbd23 100644 --- a/tools/testing/selftests/sgx/test_encl_bootstrap.S +++ b/tools/testing/selftests/sgx/test_encl_bootstrap.S @@ -12,7 +12,7 @@ .fill 1, 8, 0 # STATE (set by CPU) .fill 1, 8, 0 # FLAGS - .quad encl_ssa # OSSA + .quad encl_ssa_tcs1 # OSSA .fill 1, 4, 0 # CSSA (set by CPU) .fill 1, 4, 1 # NSSA .quad encl_entry # OENTRY @@ -23,10 +23,10 @@ .fill 1, 4, 0xFFFFFFFF # GSLIMIT .fill 4024, 1, 0 # Reserved - # Identical to the previous TCS. + # TCS2 .fill 1, 8, 0 # STATE (set by CPU) .fill 1, 8, 0 # FLAGS - .quad encl_ssa # OSSA + .quad encl_ssa_tcs2 # OSSA .fill 1, 4, 0 # CSSA (set by CPU) .fill 1, 4, 1 # NSSA .quad encl_entry # OENTRY @@ -40,8 +40,9 @@ .text encl_entry: - # RBX contains the base address for TCS, which is also the first address - # inside the enclave. By adding the value of le_stack_end to it, we get + # RBX contains the base address for TCS, which is the first address + # inside the enclave for TCS #1 and one page into the enclave for + # TCS #2. By adding the value of encl_stack to it, we get # the absolute address for the stack. lea (encl_stack)(%rbx), %rax xchg %rsp, %rax @@ -81,9 +82,15 @@ encl_entry: .section ".data", "aw" -encl_ssa: +encl_ssa_tcs1: + .space 4096 +encl_ssa_tcs2: .space 4096 .balign 4096 - .space 8192 + # Stack of TCS #1 + .space 4096 encl_stack: + .balign 4096 + # Stack of TCS #2 + .space 4096 From 688542e29fae655a8be25832f6a9959bdd308dd8 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Mon, 15 Nov 2021 10:35:26 -0800 Subject: [PATCH 20/24] selftests/sgx: Add test for multiple TCS entry Each thread executing in an enclave is associated with a Thread Control Structure (TCS). The SGX test enclave contains two hardcoded TCS, thus supporting two threads in the enclave. Add a test to ensure it is possible to enter enclave at both entrypoints. Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/7be151a57b4c7959a2364753b995e0006efa3da1.1636997631.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/defines.h | 1 + tools/testing/selftests/sgx/main.c | 32 +++++++++++++++++++++++++ tools/testing/selftests/sgx/test_encl.c | 6 +++++ 3 files changed, 39 insertions(+) diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h index 0bbda6f0c7d3..02d775789ea7 100644 --- a/tools/testing/selftests/sgx/defines.h +++ b/tools/testing/selftests/sgx/defines.h @@ -23,6 +23,7 @@ enum encl_op_type { ENCL_OP_GET_FROM_BUFFER, ENCL_OP_PUT_TO_ADDRESS, ENCL_OP_GET_FROM_ADDRESS, + ENCL_OP_NOP, ENCL_OP_MAX, }; diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index 1b4858f8ca4e..7e912db4c6c5 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -409,6 +409,38 @@ TEST_F(enclave, clobbered_vdso_and_user_function) EXPECT_EQ(self->run.user_data, 0); } +/* + * Sanity check that it is possible to enter either of the two hardcoded TCS + */ +TEST_F(enclave, tcs_entry) +{ + struct encl_op_header op; + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + op.type = ENCL_OP_NOP; + + EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* Move to the next TCS. */ + self->run.tcs = self->encl.encl_base + PAGE_SIZE; + + EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); +} + /* * Second page of .data segment is used to test changing PTE permissions. * This spans the local encl_buffer within the test enclave. diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c index 5d86e3e6456a..4fca01cfd898 100644 --- a/tools/testing/selftests/sgx/test_encl.c +++ b/tools/testing/selftests/sgx/test_encl.c @@ -49,6 +49,11 @@ static void do_encl_op_get_from_addr(void *_op) memcpy(&op->value, (void *)op->addr, 8); } +static void do_encl_op_nop(void *_op) +{ + +} + void encl_body(void *rdi, void *rsi) { const void (*encl_op_array[ENCL_OP_MAX])(void *) = { @@ -56,6 +61,7 @@ void encl_body(void *rdi, void *rsi) do_encl_op_get_from_buf, do_encl_op_put_to_addr, do_encl_op_get_from_addr, + do_encl_op_nop, }; struct encl_op_header *op = (struct encl_op_header *)rdi; From 379e4de9e140850cf699dd390f21ea4b923c955d Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Fri, 29 Oct 2021 10:49:56 -0700 Subject: [PATCH 21/24] x86/sgx: Fix minor documentation issues The SGX documentation has a few repeated or one-off issues: * Remove capitalization from regular words in the middle of a sentence. * Remove punctuation found in the middle of a sentence. * Fix name of SGX daemon to consistently be ksgxd. * Fix typo of SGX instruction: ENIT -> EINIT [ dhansen: tweaked subject and changelog ] Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Link: https://lkml.kernel.org/r/ab99a87368eef69e3fb96f073368becff3eff874.1635529506.git.reinette.chatre@intel.com --- Documentation/x86/sgx.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/x86/sgx.rst b/Documentation/x86/sgx.rst index a608f667fb95..265568a9292c 100644 --- a/Documentation/x86/sgx.rst +++ b/Documentation/x86/sgx.rst @@ -10,7 +10,7 @@ Overview Software Guard eXtensions (SGX) hardware enables for user space applications to set aside private memory regions of code and data: -* Privileged (ring-0) ENCLS functions orchestrate the construction of the. +* Privileged (ring-0) ENCLS functions orchestrate the construction of the regions. * Unprivileged (ring-3) ENCLU functions allow an application to enter and execute inside the regions. @@ -91,7 +91,7 @@ In addition to the traditional compiler and linker build process, SGX has a separate enclave “build” process. Enclaves must be built before they can be executed (entered). The first step in building an enclave is opening the **/dev/sgx_enclave** device. Since enclave memory is protected from direct -access, special privileged instructions are Then used to copy data into enclave +access, special privileged instructions are then used to copy data into enclave pages and establish enclave page permissions. .. kernel-doc:: arch/x86/kernel/cpu/sgx/ioctl.c @@ -126,13 +126,13 @@ the need to juggle signal handlers. ksgxd ===== -SGX support includes a kernel thread called *ksgxwapd*. +SGX support includes a kernel thread called *ksgxd*. EPC sanitization ---------------- ksgxd is started when SGX initializes. Enclave memory is typically ready -For use when the processor powers on or resets. However, if SGX has been in +for use when the processor powers on or resets. However, if SGX has been in use since the reset, enclave pages may be in an inconsistent state. This might occur after a crash and kexec() cycle, for instance. At boot, ksgxd reinitializes all enclave pages so that they can be allocated and re-used. @@ -147,7 +147,7 @@ Page reclaimer Similar to the core kswapd, ksgxd, is responsible for managing the overcommitment of enclave memory. If the system runs out of enclave memory, -*ksgxwapd* “swaps” enclave memory to normal memory. +*ksgxd* “swaps” enclave memory to normal memory. Launch Control ============== @@ -156,7 +156,7 @@ SGX provides a launch control mechanism. After all enclave pages have been copied, kernel executes EINIT function, which initializes the enclave. Only after this the CPU can execute inside the enclave. -ENIT function takes an RSA-3072 signature of the enclave measurement. The function +EINIT function takes an RSA-3072 signature of the enclave measurement. The function checks that the measurement is correct and signature is signed with the key hashed to the four **IA32_SGXLEPUBKEYHASH{0, 1, 2, 3}** MSRs representing the SHA256 of a public key. @@ -184,7 +184,7 @@ CPUs starting from Icelake use Total Memory Encryption (TME) in the place of MEE. TME-based SGX implementations do not have an integrity Merkle tree, which means integrity and replay-attacks are not mitigated. B, it includes additional changes to prevent cipher text from being returned and SW memory -aliases from being Created. +aliases from being created. DMA to enclave memory is blocked by range registers on both MEE and TME systems (SDM section 41.10). From 50468e4313355b161cac8a5155a45832995b7f25 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 16 Nov 2021 18:21:16 +0200 Subject: [PATCH 22/24] x86/sgx: Add an attribute for the amount of SGX memory in a NUMA node == Problem == The amount of SGX memory on a system is determined by the BIOS and it varies wildly between systems. It can be as small as dozens of MB's and as large as many GB's on servers. Just like how applications need to know how much regular RAM is available, enclave builders need to know how much SGX memory an enclave can consume. == Solution == Introduce a new sysfs file: /sys/devices/system/node/nodeX/x86/sgx_total_bytes to enumerate the amount of SGX memory available in each NUMA node. This serves the same function for SGX as /proc/meminfo or /sys/devices/system/node/nodeX/meminfo does for normal RAM. 'sgx_total_bytes' is needed today to help drive the SGX selftests. SGX-specific swap code is exercised by creating overcommitted enclaves which are larger than the physical SGX memory on the system. They currently use a CPUID-based approach which can diverge from the actual amount of SGX memory available. 'sgx_total_bytes' ensures that the selftests can work efficiently and do not attempt stupid things like creating a 100,000 MB enclave on a system with 128 MB of SGX memory. == Implementation Details == Introduce CONFIG_HAVE_ARCH_NODE_DEV_GROUP opt-in flag to expose an arch specific attribute group, and add an attribute for the amount of SGX memory in bytes to each NUMA node: == ABI Design Discussion == As opposed to the per-node ABI, a single, global ABI was considered. However, this would prevent enclaves from being able to size themselves so that they fit on a single NUMA node. Essentially, a single value would rule out NUMA optimizations for enclaves. Create a new "x86/" directory inside each "nodeX/" sysfs directory. 'sgx_total_bytes' is expected to be the first of at least a few sgx-specific files to be placed in the new directory. Just scanning /proc/meminfo, these are the no-brainers that we have for RAM, but we need for SGX: MemTotal: xxxx kB // sgx_total_bytes (implemented here) MemFree: yyyy kB // sgx_free_bytes SwapTotal: zzzz kB // sgx_swapped_bytes So, at *least* three. I think we will eventually end up needing something more along the lines of a dozen. A new directory (as opposed to being in the nodeX/ "root") directory avoids cluttering the root with several "sgx_*" files. Place the new file in a new "nodeX/x86/" directory because SGX is highly x86-specific. It is very unlikely that any other architecture (or even non-Intel x86 vendor) will ever implement SGX. Using "sgx/" as opposed to "x86/" was also considered. But, there is a real chance this can get used for other arch-specific purposes. [ dhansen: rewrite changelog ] Signed-off-by: Jarkko Sakkinen Signed-off-by: Dave Hansen Acked-by: Greg Kroah-Hartman Acked-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211116162116.93081-2-jarkko@kernel.org --- Documentation/ABI/stable/sysfs-devices-node | 6 ++++++ arch/Kconfig | 4 ++++ arch/x86/Kconfig | 1 + arch/x86/kernel/cpu/sgx/main.c | 20 ++++++++++++++++++++ arch/x86/kernel/cpu/sgx/sgx.h | 1 + drivers/base/node.c | 3 +++ include/linux/numa.h | 4 ++++ 7 files changed, 39 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node index 484fc04bcc25..8db67aa472f1 100644 --- a/Documentation/ABI/stable/sysfs-devices-node +++ b/Documentation/ABI/stable/sysfs-devices-node @@ -176,3 +176,9 @@ Contact: Keith Busch Description: The cache write policy: 0 for write-back, 1 for write-through, other or unknown. + +What: /sys/devices/system/node/nodeX/x86/sgx_total_bytes +Date: November 2021 +Contact: Jarkko Sakkinen +Description: + The total amount of SGX physical memory in bytes. diff --git a/arch/Kconfig b/arch/Kconfig index 26b8ed11639d..0a9dadb00b61 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1302,6 +1302,10 @@ config ARCH_HAS_PARANOID_L1D_FLUSH config DYNAMIC_SIGFRAME bool +# Select, if arch has a named attribute group bound to NUMA device nodes. +config HAVE_ARCH_NODE_DEV_GROUP + bool + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b9281fab4e3e..f2b699d12eb8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -269,6 +269,7 @@ config X86 select HAVE_ARCH_KCSAN if X86_64 select X86_FEATURE_NAMES if PROC_FS select PROC_PID_ARCH_STATUS if PROC_FS + select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI config INSTRUCTION_DECODER diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 6036328de255..2857a49f2335 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -825,9 +825,11 @@ static bool __init sgx_page_cache_init(void) INIT_LIST_HEAD(&sgx_numa_nodes[nid].free_page_list); INIT_LIST_HEAD(&sgx_numa_nodes[nid].sgx_poison_page_list); node_set(nid, sgx_numa_mask); + sgx_numa_nodes[nid].size = 0; } sgx_epc_sections[i].node = &sgx_numa_nodes[nid]; + sgx_numa_nodes[nid].size += size; sgx_nr_epc_sections++; } @@ -901,6 +903,24 @@ int sgx_set_attribute(unsigned long *allowed_attributes, } EXPORT_SYMBOL_GPL(sgx_set_attribute); +#ifdef CONFIG_NUMA +static ssize_t sgx_total_bytes_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lu\n", sgx_numa_nodes[dev->id].size); +} +static DEVICE_ATTR_RO(sgx_total_bytes); + +static struct attribute *arch_node_dev_attrs[] = { + &dev_attr_sgx_total_bytes.attr, + NULL, +}; + +const struct attribute_group arch_node_dev_group = { + .name = "x86", + .attrs = arch_node_dev_attrs, +}; +#endif /* CONFIG_NUMA */ + static int __init sgx_init(void) { int ret; diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h index 9ec3136c7800..0f17def9fe6f 100644 --- a/arch/x86/kernel/cpu/sgx/sgx.h +++ b/arch/x86/kernel/cpu/sgx/sgx.h @@ -44,6 +44,7 @@ struct sgx_epc_page { struct sgx_numa_node { struct list_head free_page_list; struct list_head sgx_poison_page_list; + unsigned long size; spinlock_t lock; }; diff --git a/drivers/base/node.c b/drivers/base/node.c index b5a4ba18f9f9..87acc47e8951 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -581,6 +581,9 @@ static const struct attribute_group node_dev_group = { static const struct attribute_group *node_dev_groups[] = { &node_dev_group, +#ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP + &arch_node_dev_group, +#endif NULL }; diff --git a/include/linux/numa.h b/include/linux/numa.h index cb44cfe2b725..59df211d051f 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -58,4 +58,8 @@ static inline int phys_to_target_node(u64 start) } #endif +#ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP +extern const struct attribute_group arch_node_dev_group; +#endif + #endif /* _LINUX_NUMA_H */ From 572a0a647b9b491729d24c083c8410c55bf16326 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Sat, 4 Dec 2021 22:23:55 +0200 Subject: [PATCH 23/24] selftests/sgx: Fix corrupted cpuid macro invocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SGX selftest fails to build on tip/x86/sgx: main.c: In function ‘get_total_epc_mem’: main.c:296:17: error: implicit declaration of function ‘__cpuid’ [-Werror=implicit-function-declaration] 296 | __cpuid(&eax, &ebx, &ecx, &edx); | ^~~~~~~ Include cpuid.h and use __cpuid_count() macro in order to fix the compilation issue. [ dhansen: tweak commit message ] Fixes: f0ff2447b861 ("selftests/sgx: Add a new kselftest: Unclobbered_vdso_oversubscribed") Signed-off-by: Jarkko Sakkinen Signed-off-by: Dave Hansen Acked-by: Reinette Chatre Link: https://lkml.kernel.org/r/20211204202355.23005-1-jarkko@kernel.org Cc: Shuah Khan --- tools/testing/selftests/sgx/main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index 7e912db4c6c5..370c4995f7c4 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2016-20 Intel Corporation. */ +#include #include #include #include @@ -291,9 +292,7 @@ static unsigned long get_total_epc_mem(void) int section = 0; while (true) { - eax = SGX_CPUID; - ecx = section + SGX_CPUID_EPC; - __cpuid(&eax, &ebx, &ecx, &edx); + __cpuid_count(SGX_CPUID, section + SGX_CPUID_EPC, eax, ebx, ecx, edx); type = eax & SGX_CPUID_EPC_MASK; if (type == SGX_CPUID_EPC_INVALID) From 2056e2989bf47ad7274ecc5e9dda2add53c112f9 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 4 Jan 2022 09:15:27 -0800 Subject: [PATCH 24/24] x86/sgx: Fix NULL pointer dereference on non-SGX systems == Problem == Nathan Chancellor reported an oops when aceessing the 'sgx_total_bytes' sysfs file: https://lore.kernel.org/all/YbzhBrimHGGpddDM@archlinux-ax161/ The sysfs output code accesses the sgx_numa_nodes[] array unconditionally. However, this array is allocated during SGX initialization, which only occurs on systems where SGX is supported. If the sysfs file is accessed on systems without SGX support, sgx_numa_nodes[] is NULL and an oops occurs. == Solution == To fix this, hide the entire nodeX/x86/ attribute group on systems without SGX support using the ->is_visible attribute group callback. Unfortunately, SGX is initialized via a device_initcall() which occurs _after_ the ->is_visible() callback. Instead of moving SGX initialization earlier, call sysfs_update_group() during SGX initialization to update the group visiblility. This update requires moving the SGX sysfs code earlier in sgx/main.c. There are no code changes other than the addition of arch_update_sysfs_visibility() and a minor whitespace fixup to arch_node_attr_is_visible() which checkpatch caught. CC: Greg Kroah-Hartman Cc: linux-sgx@vger.kernel.org Cc: x86@kernel.org Fixes: 50468e431335 ("x86/sgx: Add an attribute for the amount of SGX memory in a NUMA node") Reported-by: Nathan Chancellor Signed-off-by: Dave Hansen Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jarkko Sakkinen Tested-by: Nathan Chancellor Tested-by: Jarkko Sakkinen Link: https://lkml.kernel.org/r/20220104171527.5E8416A8@davehans-spike.ostc.intel.com --- arch/x86/kernel/cpu/sgx/main.c | 65 ++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 2857a49f2335..4b41efc9e367 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -6,11 +6,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include "driver.h" #include "encl.h" @@ -780,6 +782,48 @@ static inline u64 __init sgx_calc_section_metric(u64 low, u64 high) ((high & GENMASK_ULL(19, 0)) << 32); } +#ifdef CONFIG_NUMA +static ssize_t sgx_total_bytes_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lu\n", sgx_numa_nodes[dev->id].size); +} +static DEVICE_ATTR_RO(sgx_total_bytes); + +static umode_t arch_node_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int idx) +{ + /* Make all x86/ attributes invisible when SGX is not initialized: */ + if (nodes_empty(sgx_numa_mask)) + return 0; + + return attr->mode; +} + +static struct attribute *arch_node_dev_attrs[] = { + &dev_attr_sgx_total_bytes.attr, + NULL, +}; + +const struct attribute_group arch_node_dev_group = { + .name = "x86", + .attrs = arch_node_dev_attrs, + .is_visible = arch_node_attr_is_visible, +}; + +static void __init arch_update_sysfs_visibility(int nid) +{ + struct node *node = node_devices[nid]; + int ret; + + ret = sysfs_update_group(&node->dev.kobj, &arch_node_dev_group); + + if (ret) + pr_err("sysfs update failed (%d), files may be invisible", ret); +} +#else /* !CONFIG_NUMA */ +static void __init arch_update_sysfs_visibility(int nid) {} +#endif + static bool __init sgx_page_cache_init(void) { u32 eax, ebx, ecx, edx, type; @@ -826,6 +870,9 @@ static bool __init sgx_page_cache_init(void) INIT_LIST_HEAD(&sgx_numa_nodes[nid].sgx_poison_page_list); node_set(nid, sgx_numa_mask); sgx_numa_nodes[nid].size = 0; + + /* Make SGX-specific node sysfs files visible: */ + arch_update_sysfs_visibility(nid); } sgx_epc_sections[i].node = &sgx_numa_nodes[nid]; @@ -903,24 +950,6 @@ int sgx_set_attribute(unsigned long *allowed_attributes, } EXPORT_SYMBOL_GPL(sgx_set_attribute); -#ifdef CONFIG_NUMA -static ssize_t sgx_total_bytes_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - return sysfs_emit(buf, "%lu\n", sgx_numa_nodes[dev->id].size); -} -static DEVICE_ATTR_RO(sgx_total_bytes); - -static struct attribute *arch_node_dev_attrs[] = { - &dev_attr_sgx_total_bytes.attr, - NULL, -}; - -const struct attribute_group arch_node_dev_group = { - .name = "x86", - .attrs = arch_node_dev_attrs, -}; -#endif /* CONFIG_NUMA */ - static int __init sgx_init(void) { int ret;