Merge branch kvm-arm64/mmu/mapping-levels into kvmarm-master/next
Revamp the KVM/arm64 THP code by parsing the userspace page tables instead of relying on an infrastructure that is about to disappear (we are the last user). * kvm-arm64/mmu/mapping-levels: KVM: Get rid of kvm_get_pfn() KVM: arm64: Use get_page() instead of kvm_get_pfn() KVM: Remove kvm_is_transparent_hugepage() and PageTransCompoundMap() KVM: arm64: Avoid mapping size adjustment on permission fault KVM: arm64: Walk userspace page tables to compute the THP mapping size KVM: arm64: Introduce helper to retrieve a PTE and its level Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Коммит
2d84f3ce5e
|
@ -432,6 +432,26 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
|
|||
int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
struct kvm_pgtable_walker *walker);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry
|
||||
* with its level.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_*_init()
|
||||
* or a similar initialiser.
|
||||
* @addr: Input address for the start of the walk.
|
||||
* @ptep: Pointer to storage for the retrieved PTE.
|
||||
* @level: Pointer to storage for the level of the retrieved PTE.
|
||||
*
|
||||
* The offset of @addr within a page is ignored.
|
||||
*
|
||||
* The walker will walk the page-table entries corresponding to the input
|
||||
* address specified, retrieving the leaf corresponding to this address.
|
||||
* Invalid entries are treated as leaf entries.
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure.
|
||||
*/
|
||||
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
|
||||
kvm_pte_t *ptep, u32 *level);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical
|
||||
* Addresses with compatible permission
|
||||
|
|
|
@ -326,6 +326,45 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
|||
return _kvm_pgtable_walk(&walk_data);
|
||||
}
|
||||
|
||||
struct leaf_walk_data {
|
||||
kvm_pte_t pte;
|
||||
u32 level;
|
||||
};
|
||||
|
||||
static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||
enum kvm_pgtable_walk_flags flag, void * const arg)
|
||||
{
|
||||
struct leaf_walk_data *data = arg;
|
||||
|
||||
data->pte = *ptep;
|
||||
data->level = level;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
|
||||
kvm_pte_t *ptep, u32 *level)
|
||||
{
|
||||
struct leaf_walk_data data;
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = leaf_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF,
|
||||
.arg = &data,
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
|
||||
PAGE_SIZE, &walker);
|
||||
if (!ret) {
|
||||
if (ptep)
|
||||
*ptep = data.pte;
|
||||
if (level)
|
||||
*level = data.level;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct hyp_map_data {
|
||||
u64 phys;
|
||||
kvm_pte_t attr;
|
||||
|
|
|
@ -433,6 +433,32 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct kvm_pgtable_mm_ops kvm_user_mm_ops = {
|
||||
/* We shouldn't need any other callback to walk the PT */
|
||||
.phys_to_virt = kvm_host_va,
|
||||
};
|
||||
|
||||
static int get_user_mapping_size(struct kvm *kvm, u64 addr)
|
||||
{
|
||||
struct kvm_pgtable pgt = {
|
||||
.pgd = (kvm_pte_t *)kvm->mm->pgd,
|
||||
.ia_bits = VA_BITS,
|
||||
.start_level = (KVM_PGTABLE_MAX_LEVELS -
|
||||
CONFIG_PGTABLE_LEVELS),
|
||||
.mm_ops = &kvm_user_mm_ops,
|
||||
};
|
||||
kvm_pte_t pte = 0; /* Keep GCC quiet... */
|
||||
u32 level = ~0;
|
||||
int ret;
|
||||
|
||||
ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
|
||||
VM_BUG_ON(ret);
|
||||
VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
|
||||
VM_BUG_ON(!(pte & PTE_VALID));
|
||||
|
||||
return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
|
||||
}
|
||||
|
||||
static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
|
||||
.zalloc_page = stage2_memcache_zalloc_page,
|
||||
.zalloc_pages_exact = kvm_host_zalloc_pages_exact,
|
||||
|
@ -780,7 +806,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
|
|||
* Returns the size of the mapping.
|
||||
*/
|
||||
static unsigned long
|
||||
transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
|
||||
transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long hva, kvm_pfn_t *pfnp,
|
||||
phys_addr_t *ipap)
|
||||
{
|
||||
|
@ -791,8 +817,8 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
|
|||
* sure that the HVA and IPA are sufficiently aligned and that the
|
||||
* block map is contained within the memslot.
|
||||
*/
|
||||
if (kvm_is_transparent_hugepage(pfn) &&
|
||||
fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
|
||||
if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
|
||||
get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
|
||||
/*
|
||||
* The address we faulted on is backed by a transparent huge
|
||||
* page. However, because we map the compound huge page and
|
||||
|
@ -814,7 +840,7 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
|
|||
*ipap &= PMD_MASK;
|
||||
kvm_release_pfn_clean(pfn);
|
||||
pfn &= ~(PTRS_PER_PMD - 1);
|
||||
kvm_get_pfn(pfn);
|
||||
get_page(pfn_to_page(pfn));
|
||||
*pfnp = pfn;
|
||||
|
||||
return PMD_SIZE;
|
||||
|
@ -1050,9 +1076,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
* If we are not forced to use page mapping, check if we are
|
||||
* backed by a THP and thus use block mapping if possible.
|
||||
*/
|
||||
if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
|
||||
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
|
||||
&pfn, &fault_ipa);
|
||||
if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
|
||||
if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE)
|
||||
vma_pagesize = fault_granule;
|
||||
else
|
||||
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
|
||||
hva, &pfn,
|
||||
&fault_ipa);
|
||||
}
|
||||
|
||||
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
|
||||
/* Check the VMM hasn't introduced a new VM_SHARED VMA */
|
||||
|
|
|
@ -824,7 +824,6 @@ void kvm_release_pfn_clean(kvm_pfn_t pfn);
|
|||
void kvm_release_pfn_dirty(kvm_pfn_t pfn);
|
||||
void kvm_set_pfn_dirty(kvm_pfn_t pfn);
|
||||
void kvm_set_pfn_accessed(kvm_pfn_t pfn);
|
||||
void kvm_get_pfn(kvm_pfn_t pfn);
|
||||
|
||||
void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
|
||||
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
|
||||
|
|
|
@ -632,43 +632,6 @@ static inline int PageTransCompound(struct page *page)
|
|||
return PageCompound(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* PageTransCompoundMap is the same as PageTransCompound, but it also
|
||||
* guarantees the primary MMU has the entire compound page mapped
|
||||
* through pmd_trans_huge, which in turn guarantees the secondary MMUs
|
||||
* can also map the entire compound page. This allows the secondary
|
||||
* MMUs to call get_user_pages() only once for each compound page and
|
||||
* to immediately map the entire compound page with a single secondary
|
||||
* MMU fault. If there will be a pmd split later, the secondary MMUs
|
||||
* will get an update through the MMU notifier invalidation through
|
||||
* split_huge_pmd().
|
||||
*
|
||||
* Unlike PageTransCompound, this is safe to be called only while
|
||||
* split_huge_pmd() cannot run from under us, like if protected by the
|
||||
* MMU notifier, otherwise it may result in page->_mapcount check false
|
||||
* positives.
|
||||
*
|
||||
* We have to treat page cache THP differently since every subpage of it
|
||||
* would get _mapcount inc'ed once it is PMD mapped. But, it may be PTE
|
||||
* mapped in the current process so comparing subpage's _mapcount to
|
||||
* compound_mapcount to filter out PTE mapped case.
|
||||
*/
|
||||
static inline int PageTransCompoundMap(struct page *page)
|
||||
{
|
||||
struct page *head;
|
||||
|
||||
if (!PageTransCompound(page))
|
||||
return 0;
|
||||
|
||||
if (PageAnon(page))
|
||||
return atomic_read(&page->_mapcount) < 0;
|
||||
|
||||
head = compound_head(page);
|
||||
/* File THP is PMD mapped and not PTE mapped */
|
||||
return atomic_read(&page->_mapcount) ==
|
||||
atomic_read(compound_mapcount_ptr(head));
|
||||
}
|
||||
|
||||
/*
|
||||
* PageTransTail returns true for both transparent huge pages
|
||||
* and hugetlbfs pages, so it should only be called when it's known
|
||||
|
|
|
@ -189,16 +189,6 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool kvm_is_transparent_hugepage(kvm_pfn_t pfn)
|
||||
{
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
if (!PageTransCompoundMap(page))
|
||||
return false;
|
||||
|
||||
return is_transparent_hugepage(compound_head(page));
|
||||
}
|
||||
|
||||
/*
|
||||
* Switches to specified vcpu, until a matching vcpu_put()
|
||||
*/
|
||||
|
@ -2225,7 +2215,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
|
|||
* Get a reference here because callers of *hva_to_pfn* and
|
||||
* *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the
|
||||
* returned pfn. This is only needed if the VMA has VM_MIXEDMAP
|
||||
* set, but the kvm_get_pfn/kvm_release_pfn_clean pair will
|
||||
* set, but the kvm_try_get_pfn/kvm_release_pfn_clean pair will
|
||||
* simply do nothing for reserved pfns.
|
||||
*
|
||||
* Whoever called remap_pfn_range is also going to call e.g.
|
||||
|
@ -2622,13 +2612,6 @@ void kvm_set_pfn_accessed(kvm_pfn_t pfn)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
|
||||
|
||||
void kvm_get_pfn(kvm_pfn_t pfn)
|
||||
{
|
||||
if (!kvm_is_reserved_pfn(pfn))
|
||||
get_page(pfn_to_page(pfn));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_get_pfn);
|
||||
|
||||
static int next_segment(unsigned long len, int offset)
|
||||
{
|
||||
if (len > PAGE_SIZE - offset)
|
||||
|
|
Загрузка…
Ссылка в новой задаче