summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm/book3s_xive_template.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm/book3s_xive_template.c')
-rw-r--r--arch/powerpc/kvm/book3s_xive_template.c159
1 files changed, 116 insertions, 43 deletions
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index c7a5deadd1cc..033363d6e764 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -11,6 +11,9 @@
#define XGLUE(a,b) a##b
#define GLUE(a,b) XGLUE(a,b)
+/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
+#define XICS_DUMMY 1
+
static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
{
u8 cppr;
@@ -22,18 +25,6 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
*/
eieio();
- /*
- * DD1 bug workaround: If PIPR is less favored than CPPR
- * ignore the interrupt or we might incorrectly lose an IPB
- * bit.
- */
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- __be64 qw1 = __x_readq(__x_tima + TM_QW1_OS);
- u8 pipr = be64_to_cpu(qw1) & 0xff;
- if (pipr >= xc->hw_cppr)
- return;
- }
-
/* Perform the acknowledge OS to register cycle. */
ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
@@ -86,8 +77,15 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
/* If the XIVE supports the new "store EOI facility, use it */
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
__x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
- else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
+ else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW)
opal_int_eoi(hw_irq);
+ else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
+ /*
+ * For LSIs the HW EOI cycle is used rather than PQ bits,
+ * as they are automatically re-triggred in HW when still
+ * pending.
+ */
+ __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
} else {
uint64_t eoi_val;
@@ -99,20 +97,12 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
*
* This allows us to then do a re-trigger if Q was set
* rather than synthetizing an interrupt in software
- *
- * For LSIs, using the HW EOI cycle works around a problem
- * on P9 DD1 PHBs where the other ESB accesses don't work
- * properly.
*/
- if (xd->flags & XIVE_IRQ_FLAG_LSI)
- __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
- else {
- eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
-
- /* Re-trigger if needed */
- if ((eoi_val & 1) && __x_trig_page(xd))
- __x_writeq(0, __x_trig_page(xd));
- }
+ eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
+
+ /* Re-trigger if needed */
+ if ((eoi_val & 1) && __x_trig_page(xd))
+ __x_writeq(0, __x_trig_page(xd));
}
}
@@ -205,6 +195,10 @@ skip_ipi:
goto skip_ipi;
}
+ /* If it's the dummy interrupt, continue searching */
+ if (hirq == XICS_DUMMY)
+ goto skip_ipi;
+
/* If fetching, update queue pointers */
if (scan_type == scan_fetch) {
q->idx = idx;
@@ -286,14 +280,6 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
/* First collect pending bits from HW */
GLUE(X_PFX,ack_pending)(xc);
- /*
- * Cleanup the old-style bits if needed (they may have been
- * set by pull or an escalation interrupts).
- */
- if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions))
- clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
- &vcpu->arch.pending_exceptions);
-
pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
xc->pending, xc->hw_cppr, xc->cppr);
@@ -327,7 +313,7 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
*/
/* Return interrupt and old CPPR in GPR4 */
- vcpu->arch.gpr[4] = hirq | (old_cppr << 24);
+ vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
return H_SUCCESS;
}
@@ -362,7 +348,7 @@ X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long
hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
/* Return interrupt and old CPPR in GPR4 */
- vcpu->arch.gpr[4] = hirq | (xc->cppr << 24);
+ vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
return H_SUCCESS;
}
@@ -385,9 +371,76 @@ static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
__x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
}
+static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
+ struct kvmppc_xive_vcpu *xc)
+{
+ unsigned int prio;
+
+ /* For each priority that is now masked */
+ for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+ struct xive_q *q = &xc->queues[prio];
+ struct kvmppc_xive_irq_state *state;
+ struct kvmppc_xive_src_block *sb;
+ u32 idx, toggle, entry, irq, hw_num;
+ struct xive_irq_data *xd;
+ __be32 *qpage;
+ u16 src;
+
+ idx = q->idx;
+ toggle = q->toggle;
+ qpage = READ_ONCE(q->qpage);
+ if (!qpage)
+ continue;
+
+ /* For each interrupt in the queue */
+ for (;;) {
+ entry = be32_to_cpup(qpage + idx);
+
+ /* No more ? */
+ if ((entry >> 31) == toggle)
+ break;
+ irq = entry & 0x7fffffff;
+
+ /* Skip dummies and IPIs */
+ if (irq == XICS_DUMMY || irq == XICS_IPI)
+ goto next;
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb)
+ goto next;
+ state = &sb->irq_state[src];
+
+ /* Has it been rerouted ? */
+ if (xc->server_num == state->act_server)
+ goto next;
+
+ /*
+ * Allright, it *has* been re-routed, kill it from
+ * the queue.
+ */
+ qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
+
+ /* Find the HW interrupt */
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+ /* If it's not an LSI, set PQ to 11 the EOI will force a resend */
+ if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
+ GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
+
+ /* EOI the source */
+ GLUE(X_PFX,source_eoi)(hw_num, xd);
+
+ next:
+ idx = (idx + 1) & q->msk;
+ if (idx == 0)
+ toggle ^= 1;
+ }
+ }
+}
+
X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
u8 old_cppr;
pr_devel("H_CPPR(cppr=%ld)\n", cppr);
@@ -407,14 +460,34 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
*/
smp_mb();
- /*
- * We are masking less, we need to look for pending things
- * to deliver and set VP pending bits accordingly to trigger
- * a new interrupt otherwise we might miss MFRR changes for
- * which we have optimized out sending an IPI signal.
- */
- if (cppr > old_cppr)
+ if (cppr > old_cppr) {
+ /*
+ * We are masking less, we need to look for pending things
+ * to deliver and set VP pending bits accordingly to trigger
+ * a new interrupt otherwise we might miss MFRR changes for
+ * which we have optimized out sending an IPI signal.
+ */
GLUE(X_PFX,push_pending_to_hw)(xc);
+ } else {
+ /*
+ * We are masking more, we need to check the queue for any
+ * interrupt that has been routed to another CPU, take
+ * it out (replace it with the dummy) and retrigger it.
+ *
+ * This is necessary since those interrupts may otherwise
+ * never be processed, at least not until this CPU restores
+ * its CPPR.
+ *
+ * This is in theory racy vs. HW adding new interrupts to
+ * the queue. In practice this works because the interesting
+ * cases are when the guest has done a set_xive() to move the
+ * interrupt away, which flushes the xive, followed by the
+ * target CPU doing a H_CPPR. So any new interrupt coming into
+ * the queue must still be routed to us and isn't a source
+ * of concern.
+ */
+ GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
+ }
/* Apply new CPPR */
xc->hw_cppr = cppr;
OpenPOWER on IntegriCloud