KVM: switch irq injection/acking data structures to irq_lock

Protect irq injection/acking data structures with a separate irq_lock
mutex. This fixes the following deadlock:

CPU A                               CPU B
kvm_vm_ioctl_deassign_dev_irq()
  mutex_lock(&kvm->lock);            worker_thread()
  -> kvm_deassign_irq()                -> kvm_assigned_dev_interrupt_work_handler()
    -> deassign_host_irq()               mutex_lock(&kvm->lock);
      -> cancel_work_sync() [blocked]

[gleb: fix ia64 path]

Reported-by: Alex Williamson <alex.williamson@hp.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
This commit is contained in:
Marcelo Tosatti 2009-06-04 15:08:24 -03:00 коммит произвёл Avi Kivity
Родитель 60eead79ad
Коммит fa40a8214b
8 изменённых файлов: 58 добавлений и 30 удалений

Просмотреть файл

@ -1000,10 +1000,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out; goto out;
if (irqchip_in_kernel(kvm)) { if (irqchip_in_kernel(kvm)) {
__s32 status; __s32 status;
mutex_lock(&kvm->lock); mutex_lock(&kvm->irq_lock);
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
irq_event.irq, irq_event.level); irq_event.irq, irq_event.level);
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->irq_lock);
if (ioctl == KVM_IRQ_LINE_STATUS) { if (ioctl == KVM_IRQ_LINE_STATUS) {
irq_event.status = status; irq_event.status = status;
if (copy_to_user(argp, &irq_event, if (copy_to_user(argp, &irq_event,

Просмотреть файл

@ -654,10 +654,10 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
int i; int i;
mutex_lock(&kvm->lock); mutex_lock(&kvm->irq_lock);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->irq_lock);
/* /*
* Provides NMI watchdog support via Virtual Wire mode. * Provides NMI watchdog support via Virtual Wire mode.

Просмотреть файл

@ -424,7 +424,9 @@ static void apic_set_eoi(struct kvm_lapic *apic)
trigger_mode = IOAPIC_LEVEL_TRIG; trigger_mode = IOAPIC_LEVEL_TRIG;
else else
trigger_mode = IOAPIC_EDGE_TRIG; trigger_mode = IOAPIC_EDGE_TRIG;
mutex_lock(&apic->vcpu->kvm->irq_lock);
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
mutex_unlock(&apic->vcpu->kvm->irq_lock);
} }
static void apic_send_ipi(struct kvm_lapic *apic) static void apic_send_ipi(struct kvm_lapic *apic)
@ -448,7 +450,9 @@ static void apic_send_ipi(struct kvm_lapic *apic)
irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
irq.vector); irq.vector);
mutex_lock(&apic->vcpu->kvm->irq_lock);
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
mutex_unlock(&apic->vcpu->kvm->irq_lock);
} }
static u32 apic_get_tmcct(struct kvm_lapic *apic) static u32 apic_get_tmcct(struct kvm_lapic *apic)

Просмотреть файл

@ -2136,10 +2136,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out; goto out;
if (irqchip_in_kernel(kvm)) { if (irqchip_in_kernel(kvm)) {
__s32 status; __s32 status;
mutex_lock(&kvm->lock); mutex_lock(&kvm->irq_lock);
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
irq_event.irq, irq_event.level); irq_event.irq, irq_event.level);
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->irq_lock);
if (ioctl == KVM_IRQ_LINE_STATUS) { if (ioctl == KVM_IRQ_LINE_STATUS) {
irq_event.status = status; irq_event.status = status;
if (copy_to_user(argp, &irq_event, if (copy_to_user(argp, &irq_event,
@ -2385,12 +2385,11 @@ mmio:
*/ */
mutex_lock(&vcpu->kvm->lock); mutex_lock(&vcpu->kvm->lock);
mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0); mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
mutex_unlock(&vcpu->kvm->lock);
if (mmio_dev) { if (mmio_dev) {
kvm_iodevice_read(mmio_dev, gpa, bytes, val); kvm_iodevice_read(mmio_dev, gpa, bytes, val);
mutex_unlock(&vcpu->kvm->lock);
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
} }
mutex_unlock(&vcpu->kvm->lock);
vcpu->mmio_needed = 1; vcpu->mmio_needed = 1;
vcpu->mmio_phys_addr = gpa; vcpu->mmio_phys_addr = gpa;
@ -2440,12 +2439,11 @@ mmio:
*/ */
mutex_lock(&vcpu->kvm->lock); mutex_lock(&vcpu->kvm->lock);
mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1); mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
mutex_unlock(&vcpu->kvm->lock);
if (mmio_dev) { if (mmio_dev) {
kvm_iodevice_write(mmio_dev, gpa, bytes, val); kvm_iodevice_write(mmio_dev, gpa, bytes, val);
mutex_unlock(&vcpu->kvm->lock);
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
} }
mutex_unlock(&vcpu->kvm->lock);
vcpu->mmio_needed = 1; vcpu->mmio_needed = 1;
vcpu->mmio_phys_addr = gpa; vcpu->mmio_phys_addr = gpa;
@ -2768,7 +2766,6 @@ static void kernel_pio(struct kvm_io_device *pio_dev,
{ {
/* TODO: String I/O for in kernel device */ /* TODO: String I/O for in kernel device */
mutex_lock(&vcpu->kvm->lock);
if (vcpu->arch.pio.in) if (vcpu->arch.pio.in)
kvm_iodevice_read(pio_dev, vcpu->arch.pio.port, kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
vcpu->arch.pio.size, vcpu->arch.pio.size,
@ -2777,7 +2774,6 @@ static void kernel_pio(struct kvm_io_device *pio_dev,
kvm_iodevice_write(pio_dev, vcpu->arch.pio.port, kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
vcpu->arch.pio.size, vcpu->arch.pio.size,
pd); pd);
mutex_unlock(&vcpu->kvm->lock);
} }
static void pio_string_write(struct kvm_io_device *pio_dev, static void pio_string_write(struct kvm_io_device *pio_dev,
@ -2787,14 +2783,12 @@ static void pio_string_write(struct kvm_io_device *pio_dev,
void *pd = vcpu->arch.pio_data; void *pd = vcpu->arch.pio_data;
int i; int i;
mutex_lock(&vcpu->kvm->lock);
for (i = 0; i < io->cur_count; i++) { for (i = 0; i < io->cur_count; i++) {
kvm_iodevice_write(pio_dev, io->port, kvm_iodevice_write(pio_dev, io->port,
io->size, io->size,
pd); pd);
pd += io->size; pd += io->size;
} }
mutex_unlock(&vcpu->kvm->lock);
} }
static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
@ -2831,7 +2825,9 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
val = kvm_register_read(vcpu, VCPU_REGS_RAX); val = kvm_register_read(vcpu, VCPU_REGS_RAX);
memcpy(vcpu->arch.pio_data, &val, 4); memcpy(vcpu->arch.pio_data, &val, 4);
mutex_lock(&vcpu->kvm->lock);
pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
mutex_unlock(&vcpu->kvm->lock);
if (pio_dev) { if (pio_dev) {
kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
complete_pio(vcpu); complete_pio(vcpu);
@ -2895,9 +2891,12 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
vcpu->arch.pio.guest_gva = address; vcpu->arch.pio.guest_gva = address;
mutex_lock(&vcpu->kvm->lock);
pio_dev = vcpu_find_pio_dev(vcpu, port, pio_dev = vcpu_find_pio_dev(vcpu, port,
vcpu->arch.pio.cur_count, vcpu->arch.pio.cur_count,
!vcpu->arch.pio.in); !vcpu->arch.pio.in);
mutex_unlock(&vcpu->kvm->lock);
if (!vcpu->arch.pio.in) { if (!vcpu->arch.pio.in) {
/* string PIO write */ /* string PIO write */
ret = pio_copy_data(vcpu); ret = pio_copy_data(vcpu);

Просмотреть файл

@ -371,7 +371,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
void kvm_register_irq_ack_notifier(struct kvm *kvm, void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian); struct kvm_irq_ack_notifier *kian);
void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
int kvm_request_irq_source_id(struct kvm *kvm); int kvm_request_irq_source_id(struct kvm *kvm);
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);

Просмотреть файл

@ -57,10 +57,10 @@ irqfd_inject(struct work_struct *work)
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
struct kvm *kvm = irqfd->kvm; struct kvm *kvm = irqfd->kvm;
mutex_lock(&kvm->lock); mutex_lock(&kvm->irq_lock);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->irq_lock);
} }
/* /*

Просмотреть файл

@ -62,6 +62,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
int i, r = -1; int i, r = -1;
struct kvm_vcpu *vcpu, *lowest = NULL; struct kvm_vcpu *vcpu, *lowest = NULL;
WARN_ON(!mutex_is_locked(&kvm->irq_lock));
if (irq->dest_mode == 0 && irq->dest_id == 0xff && if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
kvm_is_dm_lowest_prio(irq)) kvm_is_dm_lowest_prio(irq))
printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
@ -113,7 +115,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
return kvm_irq_delivery_to_apic(kvm, NULL, &irq); return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
} }
/* This should be called with the kvm->lock mutex held /* This should be called with the kvm->irq_lock mutex held
* Return value: * Return value:
* < 0 Interrupt was ignored (masked or not delivered for other reasons) * < 0 Interrupt was ignored (masked or not delivered for other reasons)
* = 0 Interrupt was coalesced (previous irq is still pending) * = 0 Interrupt was coalesced (previous irq is still pending)
@ -125,6 +127,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
unsigned long *irq_state, sig_level; unsigned long *irq_state, sig_level;
int ret = -1; int ret = -1;
WARN_ON(!mutex_is_locked(&kvm->irq_lock));
if (irq < KVM_IOAPIC_NUM_PINS) { if (irq < KVM_IOAPIC_NUM_PINS) {
irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
@ -175,19 +179,26 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
void kvm_register_irq_ack_notifier(struct kvm *kvm, void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian) struct kvm_irq_ack_notifier *kian)
{ {
mutex_lock(&kvm->irq_lock);
hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
mutex_unlock(&kvm->irq_lock);
} }
void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian) void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
{ {
mutex_lock(&kvm->irq_lock);
hlist_del_init(&kian->link); hlist_del_init(&kian->link);
mutex_unlock(&kvm->irq_lock);
} }
/* The caller must hold kvm->lock mutex */
int kvm_request_irq_source_id(struct kvm *kvm) int kvm_request_irq_source_id(struct kvm *kvm)
{ {
unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
int irq_source_id = find_first_zero_bit(bitmap, int irq_source_id;
mutex_lock(&kvm->irq_lock);
irq_source_id = find_first_zero_bit(bitmap,
sizeof(kvm->arch.irq_sources_bitmap)); sizeof(kvm->arch.irq_sources_bitmap));
if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
@ -197,6 +208,7 @@ int kvm_request_irq_source_id(struct kvm *kvm)
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
set_bit(irq_source_id, bitmap); set_bit(irq_source_id, bitmap);
mutex_unlock(&kvm->irq_lock);
return irq_source_id; return irq_source_id;
} }
@ -207,6 +219,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
mutex_lock(&kvm->irq_lock);
if (irq_source_id < 0 || if (irq_source_id < 0 ||
irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
@ -215,19 +228,24 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
clear_bit(irq_source_id, &kvm->arch.irq_states[i]); clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
mutex_unlock(&kvm->irq_lock);
} }
void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
struct kvm_irq_mask_notifier *kimn) struct kvm_irq_mask_notifier *kimn)
{ {
mutex_lock(&kvm->irq_lock);
kimn->irq = irq; kimn->irq = irq;
hlist_add_head(&kimn->link, &kvm->mask_notifier_list); hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
mutex_unlock(&kvm->irq_lock);
} }
void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
struct kvm_irq_mask_notifier *kimn) struct kvm_irq_mask_notifier *kimn)
{ {
mutex_lock(&kvm->irq_lock);
hlist_del(&kimn->link); hlist_del(&kimn->link);
mutex_unlock(&kvm->irq_lock);
} }
void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
@ -235,6 +253,8 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
struct kvm_irq_mask_notifier *kimn; struct kvm_irq_mask_notifier *kimn;
struct hlist_node *n; struct hlist_node *n;
WARN_ON(!mutex_is_locked(&kvm->irq_lock));
hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link) hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
if (kimn->irq == irq) if (kimn->irq == irq)
kimn->func(kimn, mask); kimn->func(kimn, mask);
@ -250,7 +270,9 @@ static void __kvm_free_irq_routing(struct list_head *irq_routing)
void kvm_free_irq_routing(struct kvm *kvm) void kvm_free_irq_routing(struct kvm *kvm)
{ {
mutex_lock(&kvm->irq_lock);
__kvm_free_irq_routing(&kvm->irq_routing); __kvm_free_irq_routing(&kvm->irq_routing);
mutex_unlock(&kvm->irq_lock);
} }
static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
@ -325,13 +347,13 @@ int kvm_set_irq_routing(struct kvm *kvm,
e = NULL; e = NULL;
} }
mutex_lock(&kvm->lock); mutex_lock(&kvm->irq_lock);
list_splice(&kvm->irq_routing, &tmp); list_splice(&kvm->irq_routing, &tmp);
INIT_LIST_HEAD(&kvm->irq_routing); INIT_LIST_HEAD(&kvm->irq_routing);
list_splice(&irq_list, &kvm->irq_routing); list_splice(&irq_list, &kvm->irq_routing);
INIT_LIST_HEAD(&irq_list); INIT_LIST_HEAD(&irq_list);
list_splice(&tmp, &irq_list); list_splice(&tmp, &irq_list);
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->irq_lock);
r = 0; r = 0;

Просмотреть файл

@ -62,6 +62,12 @@
MODULE_AUTHOR("Qumranet"); MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
/*
* Ordering of locks:
*
* kvm->lock --> kvm->irq_lock
*/
DEFINE_SPINLOCK(kvm_lock); DEFINE_SPINLOCK(kvm_lock);
LIST_HEAD(vm_list); LIST_HEAD(vm_list);
@ -126,11 +132,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
interrupt_work); interrupt_work);
kvm = assigned_dev->kvm; kvm = assigned_dev->kvm;
/* This is taken to safely inject irq inside the guest. When mutex_lock(&kvm->irq_lock);
* the interrupt injection (or the ioapic code) uses a
* finer-grained lock, update this
*/
mutex_lock(&kvm->lock);
spin_lock_irq(&assigned_dev->assigned_dev_lock); spin_lock_irq(&assigned_dev->assigned_dev_lock);
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
struct kvm_guest_msix_entry *guest_entries = struct kvm_guest_msix_entry *guest_entries =
@ -149,7 +151,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
assigned_dev->guest_irq, 1); assigned_dev->guest_irq, 1);
spin_unlock_irq(&assigned_dev->assigned_dev_lock); spin_unlock_irq(&assigned_dev->assigned_dev_lock);
mutex_unlock(&assigned_dev->kvm->lock); mutex_unlock(&assigned_dev->kvm->irq_lock);
} }
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@ -207,7 +209,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
static void deassign_guest_irq(struct kvm *kvm, static void deassign_guest_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev) struct kvm_assigned_dev_kernel *assigned_dev)
{ {
kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
assigned_dev->ack_notifier.gsi = -1; assigned_dev->ack_notifier.gsi = -1;
if (assigned_dev->irq_source_id != -1) if (assigned_dev->irq_source_id != -1)