From 63db506e07622c344a3c748a1c06293d48780f83 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Jul 2021 16:35:47 +0100 Subject: [PATCH 1/6] KVM: arm64: Introduce helper to retrieve a PTE and its level It is becoming a common need to fetch the PTE for a given address together with its level. Add such a helper. Signed-off-by: Marc Zyngier Reviewed-by: Quentin Perret Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210726153552.1535838-2-maz@kernel.org --- arch/arm64/include/asm/kvm_pgtable.h | 20 ++++++++++++++ arch/arm64/kvm/hyp/pgtable.c | 39 ++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index f004c0115d89..e42b55bd50a2 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -432,6 +432,26 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_pgtable_walker *walker); +/** + * kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry + * with its level. + * @pgt: Page-table structure initialised by kvm_pgtable_*_init() + * or a similar initialiser. + * @addr: Input address for the start of the walk. + * @ptep: Pointer to storage for the retrieved PTE. + * @level: Pointer to storage for the level of the retrieved PTE. + * + * The offset of @addr within a page is ignored. + * + * The walker will walk the page-table entries corresponding to the input + * address specified, retrieving the leaf corresponding to this address. + * Invalid entries are treated as leaf entries. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, + kvm_pte_t *ptep, u32 *level); + /** * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical * Addresses with compatible permission diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 05321f4165e3..78f36bd5df6c 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -326,6 +326,45 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, return _kvm_pgtable_walk(&walk_data); } +struct leaf_walk_data { + kvm_pte_t pte; + u32 level; +}; + +static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, void * const arg) +{ + struct leaf_walk_data *data = arg; + + data->pte = *ptep; + data->level = level; + + return 0; +} + +int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, + kvm_pte_t *ptep, u32 *level) +{ + struct leaf_walk_data data; + struct kvm_pgtable_walker walker = { + .cb = leaf_walker, + .flags = KVM_PGTABLE_WALK_LEAF, + .arg = &data, + }; + int ret; + + ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE), + PAGE_SIZE, &walker); + if (!ret) { + if (ptep) + *ptep = data.pte; + if (level) + *level = data.level; + } + + return ret; +} + struct hyp_map_data { u64 phys; kvm_pte_t attr; From 6011cf68c88545e16cb32039c2cecfdae6a32315 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Jul 2021 16:35:48 +0100 Subject: [PATCH 2/6] KVM: arm64: Walk userspace page tables to compute the THP mapping size We currently rely on the kvm_is_transparent_hugepage() helper to discover whether a given page has the potential to be mapped as a block mapping. However, this API doesn't really give un everything we want: - we don't get the size: this is not crucial today as we only support PMD-sized THPs, but we'd like to have larger sizes in the future - we're the only user left of the API, and there is a will to remove it altogether To address the above, implement a simple walker using the existing page table infrastructure, and plumb it into transparent_hugepage_adjust(). No new page sizes are supported in the process. Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210726153552.1535838-3-maz@kernel.org --- arch/arm64/kvm/mmu.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0625bf2353c2..183c107c06b2 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -433,6 +433,32 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, return 0; } +static struct kvm_pgtable_mm_ops kvm_user_mm_ops = { + /* We shouldn't need any other callback to walk the PT */ + .phys_to_virt = kvm_host_va, +}; + +static int get_user_mapping_size(struct kvm *kvm, u64 addr) +{ + struct kvm_pgtable pgt = { + .pgd = (kvm_pte_t *)kvm->mm->pgd, + .ia_bits = VA_BITS, + .start_level = (KVM_PGTABLE_MAX_LEVELS - + CONFIG_PGTABLE_LEVELS), + .mm_ops = &kvm_user_mm_ops, + }; + kvm_pte_t pte = 0; /* Keep GCC quiet... */ + u32 level = ~0; + int ret; + + ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level); + VM_BUG_ON(ret); + VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS); + VM_BUG_ON(!(pte & PTE_VALID)); + + return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level)); +} + static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { .zalloc_page = stage2_memcache_zalloc_page, .zalloc_pages_exact = kvm_host_zalloc_pages_exact, @@ -780,7 +806,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot, * Returns the size of the mapping. */ static unsigned long -transparent_hugepage_adjust(struct kvm_memory_slot *memslot, +transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long hva, kvm_pfn_t *pfnp, phys_addr_t *ipap) { @@ -791,8 +817,8 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot, * sure that the HVA and IPA are sufficiently aligned and that the * block map is contained within the memslot. */ - if (kvm_is_transparent_hugepage(pfn) && - fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) { + if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) && + get_user_mapping_size(kvm, hva) >= PMD_SIZE) { /* * The address we faulted on is backed by a transparent huge * page. However, because we map the compound huge page and @@ -1051,7 +1077,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * backed by a THP and thus use block mapping if possible. */ if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) - vma_pagesize = transparent_hugepage_adjust(memslot, hva, + vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva, &pfn, &fault_ipa); if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) { From f2cc327303b13a70311e823bd52aa0bca8c7ddbc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Jul 2021 16:35:49 +0100 Subject: [PATCH 3/6] KVM: arm64: Avoid mapping size adjustment on permission fault Since we only support PMD-sized mappings for THP, getting a permission fault on a level that results in a mapping being larger than PAGE_SIZE is a sure indication that we have already upgraded our mapping to a PMD. In this case, there is no need to try and parse userspace page tables, as the fault information already tells us everything. Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210726153552.1535838-4-maz@kernel.org --- arch/arm64/kvm/mmu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 183c107c06b2..116a2910a4a4 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1076,9 +1076,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * If we are not forced to use page mapping, check if we are * backed by a THP and thus use block mapping if possible. */ - if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) - vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva, - &pfn, &fault_ipa); + if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) { + if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE) + vma_pagesize = fault_granule; + else + vma_pagesize = transparent_hugepage_adjust(kvm, memslot, + hva, &pfn, + &fault_ipa); + } if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) { /* Check the VMM hasn't introduced a new VM_SHARED VMA */ From 205d76ff0684a0b4fe3ff3a283d143a47439d191 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Jul 2021 16:35:50 +0100 Subject: [PATCH 4/6] KVM: Remove kvm_is_transparent_hugepage() and PageTransCompoundMap() Now that arm64 has stopped using kvm_is_transparent_hugepage(), we can remove it, as well as PageTransCompoundMap() which was only used by the former. Acked-by: Paolo Bonzini Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210726153552.1535838-5-maz@kernel.org --- include/linux/page-flags.h | 37 ------------------------------------- virt/kvm/kvm_main.c | 10 ---------- 2 files changed, 47 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5922031ffab6..1ace27c4a8e0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -632,43 +632,6 @@ static inline int PageTransCompound(struct page *page) return PageCompound(page); } -/* - * PageTransCompoundMap is the same as PageTransCompound, but it also - * guarantees the primary MMU has the entire compound page mapped - * through pmd_trans_huge, which in turn guarantees the secondary MMUs - * can also map the entire compound page. This allows the secondary - * MMUs to call get_user_pages() only once for each compound page and - * to immediately map the entire compound page with a single secondary - * MMU fault. If there will be a pmd split later, the secondary MMUs - * will get an update through the MMU notifier invalidation through - * split_huge_pmd(). - * - * Unlike PageTransCompound, this is safe to be called only while - * split_huge_pmd() cannot run from under us, like if protected by the - * MMU notifier, otherwise it may result in page->_mapcount check false - * positives. - * - * We have to treat page cache THP differently since every subpage of it - * would get _mapcount inc'ed once it is PMD mapped. But, it may be PTE - * mapped in the current process so comparing subpage's _mapcount to - * compound_mapcount to filter out PTE mapped case. - */ -static inline int PageTransCompoundMap(struct page *page) -{ - struct page *head; - - if (!PageTransCompound(page)) - return 0; - - if (PageAnon(page)) - return atomic_read(&page->_mapcount) < 0; - - head = compound_head(page); - /* File THP is PMD mapped and not PTE mapped */ - return atomic_read(&page->_mapcount) == - atomic_read(compound_mapcount_ptr(head)); -} - /* * PageTransTail returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d20fba0fc290..7b72a2b35a7e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -189,16 +189,6 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn) return true; } -bool kvm_is_transparent_hugepage(kvm_pfn_t pfn) -{ - struct page *page = pfn_to_page(pfn); - - if (!PageTransCompoundMap(page)) - return false; - - return is_transparent_hugepage(compound_head(page)); -} - /* * Switches to specified vcpu, until a matching vcpu_put() */ From 0fe49630101b3ce23bd21a2788440ac719ec868a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Jul 2021 16:35:51 +0100 Subject: [PATCH 5/6] KVM: arm64: Use get_page() instead of kvm_get_pfn() When mapping a THP, we are guaranteed that the page isn't reserved, and we can safely avoid the kvm_is_reserved_pfn() call. Replace kvm_get_pfn() with get_page(pfn_to_page()). Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210726153552.1535838-6-maz@kernel.org --- arch/arm64/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 116a2910a4a4..6e002d30a478 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -840,7 +840,7 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot, *ipap &= PMD_MASK; kvm_release_pfn_clean(pfn); pfn &= ~(PTRS_PER_PMD - 1); - kvm_get_pfn(pfn); + get_page(pfn_to_page(pfn)); *pfnp = pfn; return PMD_SIZE; From 36c3ce6c0d03a6c9992c3359f879cdc70fde836a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Jul 2021 16:35:52 +0100 Subject: [PATCH 6/6] KVM: Get rid of kvm_get_pfn() Nobody is using kvm_get_pfn() anymore. Get rid of it. Acked-by: Paolo Bonzini Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210726153552.1535838-7-maz@kernel.org --- include/linux/kvm_host.h | 1 - virt/kvm/kvm_main.c | 9 +-------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ae7735b490b4..9818d271c2a1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -824,7 +824,6 @@ void kvm_release_pfn_clean(kvm_pfn_t pfn); void kvm_release_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_accessed(kvm_pfn_t pfn); -void kvm_get_pfn(kvm_pfn_t pfn); void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache); int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7b72a2b35a7e..1d3a03c0fed3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2215,7 +2215,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, * Get a reference here because callers of *hva_to_pfn* and * *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the * returned pfn. This is only needed if the VMA has VM_MIXEDMAP - * set, but the kvm_get_pfn/kvm_release_pfn_clean pair will + * set, but the kvm_try_get_pfn/kvm_release_pfn_clean pair will * simply do nothing for reserved pfns. * * Whoever called remap_pfn_range is also going to call e.g. @@ -2612,13 +2612,6 @@ void kvm_set_pfn_accessed(kvm_pfn_t pfn) } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); -void kvm_get_pfn(kvm_pfn_t pfn) -{ - if (!kvm_is_reserved_pfn(pfn)) - get_page(pfn_to_page(pfn)); -} -EXPORT_SYMBOL_GPL(kvm_get_pfn); - static int next_segment(unsigned long len, int offset) { if (len > PAGE_SIZE - offset)