KVM: arm64: Protect stage-2 traversal with RCU

Use RCU to safely walk the stage-2 page tables in parallel. Acquire and
release the RCU read lock when traversing the page tables. Defer the
freeing of table memory to an RCU callback. Indirect the calls into RCU
and provide stubs for hypervisor code, as RCU is not available in such a
context.

The RCU protection doesn't amount to much at the moment, as readers are
already protected by the read-write lock (all walkers that free table
memory take the write lock). Nonetheless, a subsequent change will
futher relax the locking requirements around the stage-2 MMU, thereby
depending on RCU.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107215644.1895162-9-oliver.upton@linux.dev
This commit is contained in:
Oliver Upton 2022-11-07 21:56:38 +00:00 коммит произвёл Marc Zyngier
Родитель 5c359cca1f
Коммит c3119ae45d
3 изменённых файлов: 71 добавлений и 2 удалений

Просмотреть файл

@ -37,6 +37,13 @@ static inline u64 kvm_get_parange(u64 mmfr0)
typedef u64 kvm_pte_t;
/*
* RCU cannot be used in a non-kernel context such as the hyp. As such, page
* table walkers used in hyp do not call into RCU and instead use other
* synchronization mechanisms (such as a spinlock).
*/
#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
typedef kvm_pte_t *kvm_pteref_t;
static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
@ -44,6 +51,40 @@ static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared
return pteref;
}
static inline void kvm_pgtable_walk_begin(void) {}
static inline void kvm_pgtable_walk_end(void) {}
static inline bool kvm_pgtable_walk_lock_held(void)
{
return true;
}
#else
typedef kvm_pte_t __rcu *kvm_pteref_t;
static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
{
return rcu_dereference_check(pteref, !shared);
}
static inline void kvm_pgtable_walk_begin(void)
{
rcu_read_lock();
}
static inline void kvm_pgtable_walk_end(void)
{
rcu_read_unlock();
}
static inline bool kvm_pgtable_walk_lock_held(void)
{
return rcu_read_lock_held();
}
#endif
#define KVM_PTE_VALID BIT(0)
#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
@ -202,11 +243,14 @@ struct kvm_pgtable {
* children.
* @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their
* children.
* @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared
* with other software walkers.
*/
enum kvm_pgtable_walk_flags {
KVM_PGTABLE_WALK_LEAF = BIT(0),
KVM_PGTABLE_WALK_TABLE_PRE = BIT(1),
KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
KVM_PGTABLE_WALK_SHARED = BIT(3),
};
struct kvm_pgtable_visit_ctx {
@ -223,6 +267,11 @@ struct kvm_pgtable_visit_ctx {
typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit);
static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx)
{
return ctx->flags & KVM_PGTABLE_WALK_SHARED;
}
/**
* struct kvm_pgtable_walker - Hook into a page-table walk.
* @cb: Callback function to invoke during the walk.

Просмотреть файл

@ -171,6 +171,9 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data,
enum kvm_pgtable_walk_flags visit)
{
struct kvm_pgtable_walker *walker = data->walker;
/* Ensure the appropriate lock is held (e.g. RCU lock for stage-2 MMU) */
WARN_ON_ONCE(kvm_pgtable_walk_shared(ctx) && !kvm_pgtable_walk_lock_held());
return walker->cb(ctx, visit);
}
@ -281,8 +284,13 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
.end = PAGE_ALIGN(walk_data.addr + size),
.walker = walker,
};
int r;
return _kvm_pgtable_walk(pgt, &walk_data);
kvm_pgtable_walk_begin();
r = _kvm_pgtable_walk(pgt, &walk_data);
kvm_pgtable_walk_end();
return r;
}
struct leaf_walk_data {

Просмотреть файл

@ -130,9 +130,21 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size)
static struct kvm_pgtable_mm_ops kvm_s2_mm_ops;
static void stage2_free_removed_table_rcu_cb(struct rcu_head *head)
{
struct page *page = container_of(head, struct page, rcu_head);
void *pgtable = page_to_virt(page);
u32 level = page_private(page);
kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level);
}
static void stage2_free_removed_table(void *addr, u32 level)
{
kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, addr, level);
struct page *page = virt_to_page(addr);
set_page_private(page, (unsigned long)level);
call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb);
}
static void kvm_host_get_page(void *addr)