KVM: Intelligent device lookup on I/O bus
Currently the method of dealing with an IO operation on a bus (PIO/MMIO) is to call the read or write callback for each device registered on the bus until we find a device which handles it. Since the number of devices on a bus can be significant due to ioeventfds and coalesced MMIO zones, this leads to a lot of overhead on each IO operation. Instead of registering devices, we now register ranges which points to a device. Lookup is done using an efficient bsearch instead of a linear search. Performance test was conducted by comparing exit count per second with 200 ioeventfds created on one byte and the guest is trying to access a different byte continuously (triggering usermode exits). Before the patch the guest has achieved 259k exits per second, after the patch the guest does 274k exits per second. Cc: Avi Kivity <avi@redhat.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Sasha Levin <levinsasha928@gmail.com> Signed-off-by: Avi Kivity <avi@redhat.com>
This commit is contained in:
Родитель
0d460ffc09
Коммит
743eeb0b01
|
@ -713,14 +713,16 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
|
|||
kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
|
||||
|
||||
kvm_iodevice_init(&pit->dev, &pit_dev_ops);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS,
|
||||
KVM_PIT_MEM_LENGTH, &pit->dev);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
|
||||
if (flags & KVM_PIT_SPEAKER_DUMMY) {
|
||||
kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
|
||||
&pit->speaker_dev);
|
||||
KVM_SPEAKER_BASE_ADDRESS, 4,
|
||||
&pit->speaker_dev);
|
||||
if (ret < 0)
|
||||
goto fail_unregister;
|
||||
}
|
||||
|
|
|
@ -459,15 +459,9 @@ static int picdev_in_range(gpa_t addr)
|
|||
}
|
||||
}
|
||||
|
||||
static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
|
||||
{
|
||||
return container_of(dev, struct kvm_pic, dev);
|
||||
}
|
||||
|
||||
static int picdev_write(struct kvm_io_device *this,
|
||||
static int picdev_write(struct kvm_pic *s,
|
||||
gpa_t addr, int len, const void *val)
|
||||
{
|
||||
struct kvm_pic *s = to_pic(this);
|
||||
unsigned char data = *(unsigned char *)val;
|
||||
if (!picdev_in_range(addr))
|
||||
return -EOPNOTSUPP;
|
||||
|
@ -494,10 +488,9 @@ static int picdev_write(struct kvm_io_device *this,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int picdev_read(struct kvm_io_device *this,
|
||||
static int picdev_read(struct kvm_pic *s,
|
||||
gpa_t addr, int len, void *val)
|
||||
{
|
||||
struct kvm_pic *s = to_pic(this);
|
||||
unsigned char data = 0;
|
||||
if (!picdev_in_range(addr))
|
||||
return -EOPNOTSUPP;
|
||||
|
@ -525,6 +518,48 @@ static int picdev_read(struct kvm_io_device *this,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int picdev_master_write(struct kvm_io_device *dev,
|
||||
gpa_t addr, int len, const void *val)
|
||||
{
|
||||
return picdev_write(container_of(dev, struct kvm_pic, dev_master),
|
||||
addr, len, val);
|
||||
}
|
||||
|
||||
static int picdev_master_read(struct kvm_io_device *dev,
|
||||
gpa_t addr, int len, void *val)
|
||||
{
|
||||
return picdev_read(container_of(dev, struct kvm_pic, dev_master),
|
||||
addr, len, val);
|
||||
}
|
||||
|
||||
static int picdev_slave_write(struct kvm_io_device *dev,
|
||||
gpa_t addr, int len, const void *val)
|
||||
{
|
||||
return picdev_write(container_of(dev, struct kvm_pic, dev_slave),
|
||||
addr, len, val);
|
||||
}
|
||||
|
||||
static int picdev_slave_read(struct kvm_io_device *dev,
|
||||
gpa_t addr, int len, void *val)
|
||||
{
|
||||
return picdev_read(container_of(dev, struct kvm_pic, dev_slave),
|
||||
addr, len, val);
|
||||
}
|
||||
|
||||
static int picdev_eclr_write(struct kvm_io_device *dev,
|
||||
gpa_t addr, int len, const void *val)
|
||||
{
|
||||
return picdev_write(container_of(dev, struct kvm_pic, dev_eclr),
|
||||
addr, len, val);
|
||||
}
|
||||
|
||||
static int picdev_eclr_read(struct kvm_io_device *dev,
|
||||
gpa_t addr, int len, void *val)
|
||||
{
|
||||
return picdev_read(container_of(dev, struct kvm_pic, dev_eclr),
|
||||
addr, len, val);
|
||||
}
|
||||
|
||||
/*
|
||||
* callback when PIC0 irq status changed
|
||||
*/
|
||||
|
@ -537,9 +572,19 @@ static void pic_irq_request(struct kvm *kvm, int level)
|
|||
s->output = level;
|
||||
}
|
||||
|
||||
static const struct kvm_io_device_ops picdev_ops = {
|
||||
.read = picdev_read,
|
||||
.write = picdev_write,
|
||||
static const struct kvm_io_device_ops picdev_master_ops = {
|
||||
.read = picdev_master_read,
|
||||
.write = picdev_master_write,
|
||||
};
|
||||
|
||||
static const struct kvm_io_device_ops picdev_slave_ops = {
|
||||
.read = picdev_slave_read,
|
||||
.write = picdev_slave_write,
|
||||
};
|
||||
|
||||
static const struct kvm_io_device_ops picdev_eclr_ops = {
|
||||
.read = picdev_eclr_read,
|
||||
.write = picdev_eclr_write,
|
||||
};
|
||||
|
||||
struct kvm_pic *kvm_create_pic(struct kvm *kvm)
|
||||
|
@ -560,16 +605,39 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
|
|||
/*
|
||||
* Initialize PIO device
|
||||
*/
|
||||
kvm_iodevice_init(&s->dev, &picdev_ops);
|
||||
kvm_iodevice_init(&s->dev_master, &picdev_master_ops);
|
||||
kvm_iodevice_init(&s->dev_slave, &picdev_slave_ops);
|
||||
kvm_iodevice_init(&s->dev_eclr, &picdev_eclr_ops);
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2,
|
||||
&s->dev_master);
|
||||
if (ret < 0)
|
||||
goto fail_unlock;
|
||||
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2, &s->dev_slave);
|
||||
if (ret < 0)
|
||||
goto fail_unreg_2;
|
||||
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev_eclr);
|
||||
if (ret < 0)
|
||||
goto fail_unreg_1;
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
if (ret < 0) {
|
||||
kfree(s);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return s;
|
||||
|
||||
fail_unreg_1:
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave);
|
||||
|
||||
fail_unreg_2:
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_master);
|
||||
|
||||
fail_unlock:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
kfree(s);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void kvm_destroy_pic(struct kvm *kvm)
|
||||
|
@ -577,7 +645,9 @@ void kvm_destroy_pic(struct kvm *kvm)
|
|||
struct kvm_pic *vpic = kvm->arch.vpic;
|
||||
|
||||
if (vpic) {
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr);
|
||||
kvm->arch.vpic = NULL;
|
||||
kfree(vpic);
|
||||
}
|
||||
|
|
|
@ -66,7 +66,9 @@ struct kvm_pic {
|
|||
struct kvm *kvm;
|
||||
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
|
||||
int output; /* intr from master PIC */
|
||||
struct kvm_io_device dev;
|
||||
struct kvm_io_device dev_master;
|
||||
struct kvm_io_device dev_slave;
|
||||
struct kvm_io_device dev_eclr;
|
||||
void (*ack_notifier)(void *opaque, int irq);
|
||||
unsigned long irq_states[16];
|
||||
};
|
||||
|
|
|
@ -3562,7 +3562,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
if (r) {
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
|
||||
&vpic->dev);
|
||||
&vpic->dev_master);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
|
||||
&vpic->dev_slave);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
|
||||
&vpic->dev_eclr);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
kfree(vpic);
|
||||
goto create_irqchip_unlock;
|
||||
|
|
|
@ -55,16 +55,16 @@ struct kvm;
|
|||
struct kvm_vcpu;
|
||||
extern struct kmem_cache *kvm_vcpu_cache;
|
||||
|
||||
/*
|
||||
* It would be nice to use something smarter than a linear search, TBD...
|
||||
* Thankfully we dont expect many devices to register (famous last words :),
|
||||
* so until then it will suffice. At least its abstracted so we can change
|
||||
* in one place.
|
||||
*/
|
||||
struct kvm_io_range {
|
||||
gpa_t addr;
|
||||
int len;
|
||||
struct kvm_io_device *dev;
|
||||
};
|
||||
|
||||
struct kvm_io_bus {
|
||||
int dev_count;
|
||||
#define NR_IOBUS_DEVS 300
|
||||
struct kvm_io_device *devs[NR_IOBUS_DEVS];
|
||||
struct kvm_io_range range[NR_IOBUS_DEVS];
|
||||
};
|
||||
|
||||
enum kvm_bus {
|
||||
|
@ -77,8 +77,8 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
|||
int len, const void *val);
|
||||
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
|
||||
void *val);
|
||||
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
|
||||
struct kvm_io_device *dev);
|
||||
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
||||
int len, struct kvm_io_device *dev);
|
||||
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
|
||||
struct kvm_io_device *dev);
|
||||
|
||||
|
|
|
@ -141,7 +141,8 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
|
|||
dev->zone = *zone;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr,
|
||||
zone->size, &dev->dev);
|
||||
if (ret < 0)
|
||||
goto out_free_dev;
|
||||
list_add_tail(&dev->list, &kvm->coalesced_zones);
|
||||
|
|
|
@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
|||
|
||||
kvm_iodevice_init(&p->dev, &ioeventfd_ops);
|
||||
|
||||
ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
|
||||
ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
|
||||
&p->dev);
|
||||
if (ret < 0)
|
||||
goto unlock_fail;
|
||||
|
||||
|
|
|
@ -394,7 +394,8 @@ int kvm_ioapic_init(struct kvm *kvm)
|
|||
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
|
||||
ioapic->kvm = kvm;
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
|
||||
IOAPIC_MEM_LENGTH, &ioapic->dev);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
if (ret < 0) {
|
||||
kvm->arch.vioapic = NULL;
|
||||
|
|
|
@ -47,6 +47,8 @@
|
|||
#include <linux/srcu.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/bsearch.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/io.h>
|
||||
|
@ -2391,24 +2393,92 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < bus->dev_count; i++) {
|
||||
struct kvm_io_device *pos = bus->devs[i];
|
||||
struct kvm_io_device *pos = bus->range[i].dev;
|
||||
|
||||
kvm_iodevice_destructor(pos);
|
||||
}
|
||||
kfree(bus);
|
||||
}
|
||||
|
||||
int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
|
||||
{
|
||||
const struct kvm_io_range *r1 = p1;
|
||||
const struct kvm_io_range *r2 = p2;
|
||||
|
||||
if (r1->addr < r2->addr)
|
||||
return -1;
|
||||
if (r1->addr + r1->len > r2->addr + r2->len)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
|
||||
gpa_t addr, int len)
|
||||
{
|
||||
if (bus->dev_count == NR_IOBUS_DEVS)
|
||||
return -ENOSPC;
|
||||
|
||||
bus->range[bus->dev_count++] = (struct kvm_io_range) {
|
||||
.addr = addr,
|
||||
.len = len,
|
||||
.dev = dev,
|
||||
};
|
||||
|
||||
sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range),
|
||||
kvm_io_bus_sort_cmp, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
|
||||
gpa_t addr, int len)
|
||||
{
|
||||
struct kvm_io_range *range, key;
|
||||
int off;
|
||||
|
||||
key = (struct kvm_io_range) {
|
||||
.addr = addr,
|
||||
.len = len,
|
||||
};
|
||||
|
||||
range = bsearch(&key, bus->range, bus->dev_count,
|
||||
sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp);
|
||||
if (range == NULL)
|
||||
return -ENOENT;
|
||||
|
||||
off = range - bus->range;
|
||||
|
||||
while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0)
|
||||
off--;
|
||||
|
||||
return off;
|
||||
}
|
||||
|
||||
/* kvm_io_bus_write - called under kvm->slots_lock */
|
||||
int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
||||
int len, const void *val)
|
||||
{
|
||||
int i;
|
||||
int idx;
|
||||
struct kvm_io_bus *bus;
|
||||
struct kvm_io_range range;
|
||||
|
||||
range = (struct kvm_io_range) {
|
||||
.addr = addr,
|
||||
.len = len,
|
||||
};
|
||||
|
||||
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
|
||||
for (i = 0; i < bus->dev_count; i++)
|
||||
if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
|
||||
idx = kvm_io_bus_get_first_dev(bus, addr, len);
|
||||
if (idx < 0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
while (idx < bus->dev_count &&
|
||||
kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
|
||||
if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val))
|
||||
return 0;
|
||||
idx++;
|
||||
}
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
|
@ -2416,19 +2486,33 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
|||
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
||||
int len, void *val)
|
||||
{
|
||||
int i;
|
||||
int idx;
|
||||
struct kvm_io_bus *bus;
|
||||
struct kvm_io_range range;
|
||||
|
||||
range = (struct kvm_io_range) {
|
||||
.addr = addr,
|
||||
.len = len,
|
||||
};
|
||||
|
||||
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
|
||||
for (i = 0; i < bus->dev_count; i++)
|
||||
if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
|
||||
idx = kvm_io_bus_get_first_dev(bus, addr, len);
|
||||
if (idx < 0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
while (idx < bus->dev_count &&
|
||||
kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
|
||||
if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val))
|
||||
return 0;
|
||||
idx++;
|
||||
}
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* Caller must hold slots_lock. */
|
||||
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
|
||||
struct kvm_io_device *dev)
|
||||
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
||||
int len, struct kvm_io_device *dev)
|
||||
{
|
||||
struct kvm_io_bus *new_bus, *bus;
|
||||
|
||||
|
@ -2440,7 +2524,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
|
|||
if (!new_bus)
|
||||
return -ENOMEM;
|
||||
memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
|
||||
new_bus->devs[new_bus->dev_count++] = dev;
|
||||
kvm_io_bus_insert_dev(new_bus, dev, addr, len);
|
||||
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
|
||||
synchronize_srcu_expedited(&kvm->srcu);
|
||||
kfree(bus);
|
||||
|
@ -2464,9 +2548,13 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
|
|||
|
||||
r = -ENOENT;
|
||||
for (i = 0; i < new_bus->dev_count; i++)
|
||||
if (new_bus->devs[i] == dev) {
|
||||
if (new_bus->range[i].dev == dev) {
|
||||
r = 0;
|
||||
new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
|
||||
new_bus->dev_count--;
|
||||
new_bus->range[i] = new_bus->range[new_bus->dev_count];
|
||||
sort(new_bus->range, new_bus->dev_count,
|
||||
sizeof(struct kvm_io_range),
|
||||
kvm_io_bus_sort_cmp, NULL);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче