KVM: Use interval tree to do fast hva lookup in memslots

The current memslots implementation only allows quick binary search by gfn,
quick lookup by hva is not possible - the implementation has to do a linear
scan of the whole memslots array, even though the operation being performed
might apply just to a single memslot.

This significantly hurts performance of per-hva operations with higher
memslot counts.

Since hva ranges can overlap between memslots an interval tree is needed
for tracking them.

[sean: handle interval tree updates in kvm_replace_memslot()]
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <d66b9974becaa9839be9c4e1a5de97b177b4ac20.1638817640.git.maciej.szmigiero@oracle.com>
This commit is contained in:
Maciej S. Szmigiero 2021-12-06 20:54:28 +01:00 коммит произвёл Paolo Bonzini
Родитель 26b8345abc
Коммит ed922739c9
7 изменённых файлов: 47 добавлений и 14 удалений

Просмотреть файл

@ -39,6 +39,7 @@ menuconfig KVM
select HAVE_KVM_IRQ_BYPASS select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_VCPU_RUN_PID_CHANGE select HAVE_KVM_VCPU_RUN_PID_CHANGE
select SCHED_INFO select SCHED_INFO
select INTERVAL_TREE
help help
Support hosting virtualized guest machines. Support hosting virtualized guest machines.

Просмотреть файл

@ -27,6 +27,7 @@ config KVM
select KVM_MMIO select KVM_MMIO
select MMU_NOTIFIER select MMU_NOTIFIER
select SRCU select SRCU
select INTERVAL_TREE
help help
Support for hosting Guest kernels. Support for hosting Guest kernels.

Просмотреть файл

@ -26,6 +26,7 @@ config KVM
select KVM_VFIO select KVM_VFIO
select IRQ_BYPASS_MANAGER select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS select HAVE_KVM_IRQ_BYPASS
select INTERVAL_TREE
config KVM_BOOK3S_HANDLER config KVM_BOOK3S_HANDLER
bool bool

Просмотреть файл

@ -33,6 +33,7 @@ config KVM
select HAVE_KVM_NO_POLL select HAVE_KVM_NO_POLL
select SRCU select SRCU
select KVM_VFIO select KVM_VFIO
select INTERVAL_TREE
help help
Support hosting paravirtualized guest machines using the SIE Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work virtualization capability on the mainframe. This should work

Просмотреть файл

@ -43,6 +43,7 @@ config KVM
select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_VFIO select KVM_VFIO
select SRCU select SRCU
select INTERVAL_TREE
select HAVE_KVM_PM_NOTIFIER if PM select HAVE_KVM_PM_NOTIFIER if PM
help help
Support hosting fully virtualized guest machines using hardware Support hosting fully virtualized guest machines using hardware

Просмотреть файл

@ -30,6 +30,7 @@
#include <linux/nospec.h> #include <linux/nospec.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/hashtable.h> #include <linux/hashtable.h>
#include <linux/interval_tree.h>
#include <linux/xarray.h> #include <linux/xarray.h>
#include <asm/signal.h> #include <asm/signal.h>
@ -428,6 +429,7 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
struct kvm_memory_slot { struct kvm_memory_slot {
struct hlist_node id_node; struct hlist_node id_node;
struct interval_tree_node hva_node;
gfn_t base_gfn; gfn_t base_gfn;
unsigned long npages; unsigned long npages;
unsigned long *dirty_bitmap; unsigned long *dirty_bitmap;
@ -529,6 +531,7 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
*/ */
struct kvm_memslots { struct kvm_memslots {
u64 generation; u64 generation;
struct rb_root_cached hva_tree;
/* /*
* The mapping table from slot id to the index in memslots[]. * The mapping table from slot id to the index in memslots[].
* *

Просмотреть файл

@ -512,6 +512,12 @@ static void kvm_null_fn(void)
} }
#define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn) #define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn)
/* Iterate over each memslot intersecting [start, last] (inclusive) range */
#define kvm_for_each_memslot_in_hva_range(node, slots, start, last) \
for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
node; \
node = interval_tree_iter_next(node, start, last)) \
static __always_inline int __kvm_handle_hva_range(struct kvm *kvm, static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
const struct kvm_hva_range *range) const struct kvm_hva_range *range)
{ {
@ -521,6 +527,9 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
struct kvm_memslots *slots; struct kvm_memslots *slots;
int i, idx; int i, idx;
if (WARN_ON_ONCE(range->end <= range->start))
return 0;
/* A null handler is allowed if and only if on_lock() is provided. */ /* A null handler is allowed if and only if on_lock() is provided. */
if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) && if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) &&
IS_KVM_NULL_FN(range->handler))) IS_KVM_NULL_FN(range->handler)))
@ -529,15 +538,17 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
idx = srcu_read_lock(&kvm->srcu); idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
struct interval_tree_node *node;
slots = __kvm_memslots(kvm, i); slots = __kvm_memslots(kvm, i);
kvm_for_each_memslot(slot, slots) { kvm_for_each_memslot_in_hva_range(node, slots,
range->start, range->end - 1) {
unsigned long hva_start, hva_end; unsigned long hva_start, hva_end;
slot = container_of(node, struct kvm_memory_slot, hva_node);
hva_start = max(range->start, slot->userspace_addr); hva_start = max(range->start, slot->userspace_addr);
hva_end = min(range->end, slot->userspace_addr + hva_end = min(range->end, slot->userspace_addr +
(slot->npages << PAGE_SHIFT)); (slot->npages << PAGE_SHIFT));
if (hva_start >= hva_end)
continue;
/* /*
* To optimize for the likely case where the address * To optimize for the likely case where the address
@ -873,6 +884,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
if (!slots) if (!slots)
return NULL; return NULL;
slots->hva_tree = RB_ROOT_CACHED;
hash_init(slots->id_hash); hash_init(slots->id_hash);
return slots; return slots;
@ -1277,21 +1289,28 @@ static void kvm_replace_memslot(struct kvm_memslots *slots,
struct kvm_memory_slot *new) struct kvm_memory_slot *new)
{ {
/* /*
* Remove the old memslot from the hash list, copying the node data * Remove the old memslot from the hash list and interval tree, copying
* would corrupt the list. * the node data would corrupt the structures.
*/ */
if (old) { if (old) {
hash_del(&old->id_node); hash_del(&old->id_node);
interval_tree_remove(&old->hva_node, &slots->hva_tree);
if (!new) if (!new)
return; return;
/* Copy the source *data*, not the pointer, to the destination. */ /* Copy the source *data*, not the pointer, to the destination. */
*new = *old; *new = *old;
} else {
/* If @old is NULL, initialize @new's hva range. */
new->hva_node.start = new->userspace_addr;
new->hva_node.last = new->userspace_addr +
(new->npages << PAGE_SHIFT) - 1;
} }
/* (Re)Add the new memslot. */ /* (Re)Add the new memslot. */
hash_add(slots->id_hash, &new->id_node, new->id); hash_add(slots->id_hash, &new->id_node, new->id);
interval_tree_insert(&new->hva_node, &slots->hva_tree);
} }
static void kvm_shift_memslot(struct kvm_memslots *slots, int dst, int src) static void kvm_shift_memslot(struct kvm_memslots *slots, int dst, int src)
@ -1322,7 +1341,7 @@ static inline void kvm_memslot_delete(struct kvm_memslots *slots,
atomic_set(&slots->last_used_slot, 0); atomic_set(&slots->last_used_slot, 0);
/* /*
* Remove the to-be-deleted memslot from the list _before_ shifting * Remove the to-be-deleted memslot from the list/tree _before_ shifting
* the trailing memslots forward, its data will be overwritten. * the trailing memslots forward, its data will be overwritten.
* Defer the (somewhat pointless) copying of the memslot until after * Defer the (somewhat pointless) copying of the memslot until after
* the last slot has been shifted to avoid overwriting said last slot. * the last slot has been shifted to avoid overwriting said last slot.
@ -1349,7 +1368,8 @@ static inline int kvm_memslot_insert_back(struct kvm_memslots *slots)
* itself is not preserved in the array, i.e. not swapped at this time, only * itself is not preserved in the array, i.e. not swapped at this time, only
* its new index into the array is tracked. Returns the changed memslot's * its new index into the array is tracked. Returns the changed memslot's
* current index into the memslots array. * current index into the memslots array.
* The memslot at the returned index will not be in @slots->id_hash by then. * The memslot at the returned index will not be in @slots->hva_tree or
* @slots->id_hash by then.
* @memslot is a detached struct with desired final data of the changed slot. * @memslot is a detached struct with desired final data of the changed slot.
*/ */
static inline int kvm_memslot_move_backward(struct kvm_memslots *slots, static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
@ -1363,10 +1383,10 @@ static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
return -1; return -1;
/* /*
* Delete the slot from the hash table before sorting the remaining * Delete the slot from the hash table and interval tree before sorting
* slots, the slot's data may be overwritten when copying slots as part * the remaining slots, the slot's data may be overwritten when copying
* of the sorting proccess. update_memslots() will unconditionally * slots as part of the sorting proccess. update_memslots() will
* rewrite the entire slot and re-add it to the hash table. * unconditionally rewrite and re-add the entire slot.
*/ */
kvm_replace_memslot(slots, oldslot, NULL); kvm_replace_memslot(slots, oldslot, NULL);
@ -1392,10 +1412,12 @@ static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
* is not preserved in the array, i.e. not swapped at this time, only its new * is not preserved in the array, i.e. not swapped at this time, only its new
* index into the array is tracked. Returns the changed memslot's final index * index into the array is tracked. Returns the changed memslot's final index
* into the memslots array. * into the memslots array.
* The memslot at the returned index will not be in @slots->id_hash by then. * The memslot at the returned index will not be in @slots->hva_tree or
* @slots->id_hash by then.
* @memslot is a detached struct with desired final data of the new or * @memslot is a detached struct with desired final data of the new or
* changed slot. * changed slot.
* Assumes that the memslot at @start index is not in @slots->id_hash. * Assumes that the memslot at @start index is not in @slots->hva_tree or
* @slots->id_hash.
*/ */
static inline int kvm_memslot_move_forward(struct kvm_memslots *slots, static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
struct kvm_memory_slot *memslot, struct kvm_memory_slot *memslot,
@ -1588,9 +1610,12 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
memcpy(slots, old, kvm_memslots_size(old->used_slots)); memcpy(slots, old, kvm_memslots_size(old->used_slots));
slots->hva_tree = RB_ROOT_CACHED;
hash_init(slots->id_hash); hash_init(slots->id_hash);
kvm_for_each_memslot(memslot, slots) kvm_for_each_memslot(memslot, slots) {
interval_tree_insert(&memslot->hva_node, &slots->hva_tree);
hash_add(slots->id_hash, &memslot->id_node, memslot->id); hash_add(slots->id_hash, &memslot->id_node, memslot->id);
}
return slots; return slots;
} }