KVM: PPC: Book 3S: XICS: Implement ICS P/Q states

This patch implements P(Presented)/Q(Queued) states for ICS irqs.

When the interrupt is presented, set P. Present if P was not set.
If P is already set, don't present again, set Q.
When the interrupt is EOI'ed, move Q into P (and clear Q). If it is
set, re-present.

The asserted flag used by LSI is also incorporated into the P bit.

When the irq state is saved, P/Q bits are also saved, they need some
qemu modifications to be recognized and passed around to be restored.
KVM_XICS_PENDING bit set and saved should also indicate
KVM_XICS_PRESENTED bit set and saved. But it is possible some old
code doesn't have/recognize the P bit, so when we restore, we set P
for PENDING bit, too.

The idea and much of the code come from Ben.

Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
Li Zhong 2016-11-11 12:57:35 +08:00 коммит произвёл Paul Mackerras
Родитель bf5a71d538
Коммит 17d48610ae
4 изменённых файлов: 161 добавлений и 71 удалений

Просмотреть файл

@ -613,5 +613,7 @@ struct kvm_get_htab_header {
#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) #define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40)
#define KVM_XICS_MASKED (1ULL << 41) #define KVM_XICS_MASKED (1ULL << 41)
#define KVM_XICS_PENDING (1ULL << 42) #define KVM_XICS_PENDING (1ULL << 42)
#define KVM_XICS_PRESENTED (1ULL << 43)
#define KVM_XICS_QUEUED (1ULL << 44)
#endif /* __LINUX_KVM_POWERPC_H */ #endif /* __LINUX_KVM_POWERPC_H */

Просмотреть файл

@ -672,51 +672,39 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
return check_too_hard(xics, icp); return check_too_hard(xics, icp);
} }
int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq)
{ {
struct kvmppc_xics *xics = vcpu->kvm->arch.xics; struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp; struct kvmppc_icp *icp = vcpu->arch.icp;
struct kvmppc_ics *ics; struct kvmppc_ics *ics;
struct ics_irq_state *state; struct ics_irq_state *state;
u32 irq = xirr & 0x00ffffff;
u16 src; u16 src;
u32 pq_old, pq_new;
if (!xics || !xics->real_mode)
return H_TOO_HARD;
/* /*
* ICP State: EOI * ICS EOI handling: For LSI, if P bit is still set, we need to
* resend it.
* *
* Note: If EOI is incorrectly used by SW to lower the CPPR * For MSI, we move Q bit into P (and clear Q). If it is set,
* value (ie more favored), we do not check for rejection of * resend it.
* a pending interrupt, this is a SW error and PAPR sepcifies
* that we don't have to deal with it.
*
* The sending of an EOI to the ICS is handled after the
* CPPR update
*
* ICP State: Down_CPPR which we handle
* in a separate function as it's shared with H_CPPR.
*/ */
icp_rm_down_cppr(xics, icp, xirr >> 24);
/* IPIs have no EOI */
if (irq == XICS_IPI)
goto bail;
/*
* EOI handling: If the interrupt is still asserted, we need to
* resend it. We can take a lockless "peek" at the ICS state here.
*
* "Message" interrupts will never have "asserted" set
*/
ics = kvmppc_xics_find_ics(xics, irq, &src); ics = kvmppc_xics_find_ics(xics, irq, &src);
if (!ics) if (!ics)
goto bail; goto bail;
state = &ics->irq_state[src]; state = &ics->irq_state[src];
/* Still asserted, resend it */ if (state->lsi)
if (state->asserted) pq_new = state->pq_state;
icp_rm_deliver_irq(xics, icp, irq); else
do {
pq_old = state->pq_state;
pq_new = pq_old >> 1;
} while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
if (pq_new & PQ_PRESENTED)
icp_rm_deliver_irq(xics, NULL, irq);
if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) { if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) {
icp->rm_action |= XICS_RM_NOTIFY_EOI; icp->rm_action |= XICS_RM_NOTIFY_EOI;
@ -737,10 +725,43 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
state->intr_cpu = -1; state->intr_cpu = -1;
} }
} }
bail: bail:
return check_too_hard(xics, icp); return check_too_hard(xics, icp);
} }
int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
{
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
u32 irq = xirr & 0x00ffffff;
if (!xics || !xics->real_mode)
return H_TOO_HARD;
/*
* ICP State: EOI
*
* Note: If EOI is incorrectly used by SW to lower the CPPR
* value (ie more favored), we do not check for rejection of
* a pending interrupt, this is a SW error and PAPR specifies
* that we don't have to deal with it.
*
* The sending of an EOI to the ICS is handled after the
* CPPR update
*
* ICP State: Down_CPPR which we handle
* in a separate function as it's shared with H_CPPR.
*/
icp_rm_down_cppr(xics, icp, xirr >> 24);
/* IPIs have no EOI */
if (irq == XICS_IPI)
return check_too_hard(xics, icp);
return ics_rm_eoi(vcpu, irq);
}
unsigned long eoi_rc; unsigned long eoi_rc;
static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
@ -827,14 +848,33 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
{ {
struct kvmppc_xics *xics; struct kvmppc_xics *xics;
struct kvmppc_icp *icp; struct kvmppc_icp *icp;
struct kvmppc_ics *ics;
struct ics_irq_state *state;
u32 irq; u32 irq;
u16 src;
u32 pq_old, pq_new;
irq = irq_map->v_hwirq; irq = irq_map->v_hwirq;
xics = vcpu->kvm->arch.xics; xics = vcpu->kvm->arch.xics;
icp = vcpu->arch.icp; icp = vcpu->arch.icp;
kvmppc_rm_handle_irq_desc(irq_map->desc); kvmppc_rm_handle_irq_desc(irq_map->desc);
icp_rm_deliver_irq(xics, icp, irq);
ics = kvmppc_xics_find_ics(xics, irq, &src);
if (!ics)
return 2;
state = &ics->irq_state[src];
/* only MSIs register bypass producers, so it must be MSI here */
do {
pq_old = state->pq_state;
pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED;
} while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
/* Test P=1, Q=0, this is the only case where we present */
if (pq_new == PQ_PRESENTED)
icp_rm_deliver_irq(xics, icp, irq);
/* EOI the interrupt */ /* EOI the interrupt */
icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr, icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr,

Просмотреть файл

@ -75,6 +75,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
struct ics_irq_state *state; struct ics_irq_state *state;
struct kvmppc_ics *ics; struct kvmppc_ics *ics;
u16 src; u16 src;
u32 pq_old, pq_new;
XICS_DBG("ics deliver %#x (level: %d)\n", irq, level); XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
@ -87,25 +88,41 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
if (!state->exists) if (!state->exists)
return -EINVAL; return -EINVAL;
if (level == KVM_INTERRUPT_SET_LEVEL || level == KVM_INTERRUPT_SET)
level = 1;
else if (level == KVM_INTERRUPT_UNSET)
level = 0;
/* /*
* We set state->asserted locklessly. This should be fine as * Take other values the same as 1, consistent with original code.
* we are the only setter, thus concurrent access is undefined * maybe WARN here?
* to begin with.
*/ */
if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL)
state->asserted = 1; if (!state->lsi && level == 0) /* noop for MSI */
else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
state->asserted = 0;
return 0; return 0;
}
do {
pq_old = state->pq_state;
if (state->lsi) {
if (level) {
if (pq_old & PQ_PRESENTED)
/* Setting already set LSI ... */
return 0;
pq_new = PQ_PRESENTED;
} else
pq_new = 0;
} else
pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED;
} while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
/* Test P=1, Q=0, this is the only case where we present */
if (pq_new == PQ_PRESENTED)
icp_deliver_irq(xics, NULL, irq);
/* Record which CPU this arrived on for passed-through interrupts */ /* Record which CPU this arrived on for passed-through interrupts */
if (state->host_irq) if (state->host_irq)
state->intr_cpu = raw_smp_processor_id(); state->intr_cpu = raw_smp_processor_id();
/* Attempt delivery */
icp_deliver_irq(xics, NULL, irq);
return 0; return 0;
} }
@ -768,14 +785,51 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
icp_deliver_irq(xics, icp, reject); icp_deliver_irq(xics, icp, reject);
} }
static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) static int ics_eoi(struct kvm_vcpu *vcpu, u32 irq)
{ {
struct kvmppc_xics *xics = vcpu->kvm->arch.xics; struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp; struct kvmppc_icp *icp = vcpu->arch.icp;
struct kvmppc_ics *ics; struct kvmppc_ics *ics;
struct ics_irq_state *state; struct ics_irq_state *state;
u32 irq = xirr & 0x00ffffff;
u16 src; u16 src;
u32 pq_old, pq_new;
/*
* ICS EOI handling: For LSI, if P bit is still set, we need to
* resend it.
*
* For MSI, we move Q bit into P (and clear Q). If it is set,
* resend it.
*/
ics = kvmppc_xics_find_ics(xics, irq, &src);
if (!ics) {
XICS_DBG("ios_eoi: IRQ 0x%06x not found !\n", irq);
return H_PARAMETER;
}
state = &ics->irq_state[src];
if (state->lsi)
pq_new = state->pq_state;
else
do {
pq_old = state->pq_state;
pq_new = pq_old >> 1;
} while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
if (pq_new & PQ_PRESENTED)
icp_deliver_irq(xics, icp, irq);
kvm_notify_acked_irq(vcpu->kvm, 0, irq);
return H_SUCCESS;
}
static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
{
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
u32 irq = xirr & 0x00ffffff;
XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr); XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
@ -798,26 +852,8 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
/* IPIs have no EOI */ /* IPIs have no EOI */
if (irq == XICS_IPI) if (irq == XICS_IPI)
return H_SUCCESS; return H_SUCCESS;
/*
* EOI handling: If the interrupt is still asserted, we need to
* resend it. We can take a lockless "peek" at the ICS state here.
*
* "Message" interrupts will never have "asserted" set
*/
ics = kvmppc_xics_find_ics(xics, irq, &src);
if (!ics) {
XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq);
return H_PARAMETER;
}
state = &ics->irq_state[src];
/* Still asserted, resend it */ return ics_eoi(vcpu, irq);
if (state->asserted)
icp_deliver_irq(xics, icp, irq);
kvm_notify_acked_irq(vcpu->kvm, 0, irq);
return H_SUCCESS;
} }
int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
@ -975,9 +1011,9 @@ static int xics_debug_show(struct seq_file *m, void *private)
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
struct ics_irq_state *irq = &ics->irq_state[i]; struct ics_irq_state *irq = &ics->irq_state[i];
seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n", seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x pq_state %d resend %d masked pending %d\n",
irq->number, irq->server, irq->priority, irq->number, irq->server, irq->priority,
irq->saved_priority, irq->asserted, irq->saved_priority, irq->pq_state,
irq->resend, irq->masked_pending); irq->resend, irq->masked_pending);
} }
@ -1196,10 +1232,17 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
val |= prio << KVM_XICS_PRIORITY_SHIFT; val |= prio << KVM_XICS_PRIORITY_SHIFT;
if (irqp->lsi) { if (irqp->lsi) {
val |= KVM_XICS_LEVEL_SENSITIVE; val |= KVM_XICS_LEVEL_SENSITIVE;
if (irqp->asserted) if (irqp->pq_state & PQ_PRESENTED)
val |= KVM_XICS_PENDING; val |= KVM_XICS_PENDING;
} else if (irqp->masked_pending || irqp->resend) } else if (irqp->masked_pending || irqp->resend)
val |= KVM_XICS_PENDING; val |= KVM_XICS_PENDING;
if (irqp->pq_state & PQ_PRESENTED)
val |= KVM_XICS_PRESENTED;
if (irqp->pq_state & PQ_QUEUED)
val |= KVM_XICS_QUEUED;
ret = 0; ret = 0;
} }
arch_spin_unlock(&ics->lock); arch_spin_unlock(&ics->lock);
@ -1251,12 +1294,14 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
irqp->resend = 0; irqp->resend = 0;
irqp->masked_pending = 0; irqp->masked_pending = 0;
irqp->lsi = 0; irqp->lsi = 0;
irqp->asserted = 0; irqp->pq_state = 0;
if (val & KVM_XICS_LEVEL_SENSITIVE) { if (val & KVM_XICS_LEVEL_SENSITIVE)
irqp->lsi = 1; irqp->lsi = 1;
if (val & KVM_XICS_PENDING) /* If PENDING, set P in case P is not saved because of old code */
irqp->asserted = 1; if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
} irqp->pq_state |= PQ_PRESENTED;
if (val & KVM_XICS_QUEUED)
irqp->pq_state |= PQ_QUEUED;
irqp->exists = 1; irqp->exists = 1;
arch_spin_unlock(&ics->lock); arch_spin_unlock(&ics->lock);
local_irq_restore(flags); local_irq_restore(flags);

Просмотреть файл

@ -31,16 +31,19 @@
/* Priority value to use for disabling an interrupt */ /* Priority value to use for disabling an interrupt */
#define MASKED 0xff #define MASKED 0xff
#define PQ_PRESENTED 1
#define PQ_QUEUED 2
/* State for one irq source */ /* State for one irq source */
struct ics_irq_state { struct ics_irq_state {
u32 number; u32 number;
u32 server; u32 server;
u32 pq_state;
u8 priority; u8 priority;
u8 saved_priority; u8 saved_priority;
u8 resend; u8 resend;
u8 masked_pending; u8 masked_pending;
u8 lsi; /* level-sensitive interrupt */ u8 lsi; /* level-sensitive interrupt */
u8 asserted; /* Only for LSI */
u8 exists; u8 exists;
int intr_cpu; int intr_cpu;
u32 host_irq; u32 host_irq;