5 files changed, 78 insertions, 67 deletions
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 3bb6acb76339..e1c90725522a 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -43,17 +43,11 @@
 static bool pnv_eeh_nb_init = false;
 static int eeh_event_irq = -EINVAL;
 
-/**
- * pnv_eeh_init - EEH platform dependent initialization
- *
- * EEH platform dependent initialization on powernv
- */
 static int pnv_eeh_init(void)
 {
 	struct pci_controller *hose;
 	struct pnv_phb *phb;
 
-	/* We require OPALv3 */
 	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
 		pr_warn("%s: OPALv3 is required !\n",
 			__func__);
@@ -77,9 +71,9 @@ static int pnv_eeh_init(void)
 		/*
 		 * PE#0 should be regarded as valid by EEH core
 		 * if it's not the reserved one. Currently, we
-		 * have the reserved PE#0 and PE#127 for PHB3
+		 * have the reserved PE#255 and PE#127 for PHB3
 		 * and P7IOC separately. So we should regard
-		 * PE#0 as valid for P7IOC.
+		 * PE#0 as valid for PHB3 and P7IOC.
 		 */
 		if (phb->ioda.reserved_pe != 0)
 			eeh_add_flag(EEH_VALID_PE_ZERO);
@@ -284,33 +278,23 @@ static int pnv_eeh_post_init(void)
 #endif /* CONFIG_DEBUG_FS */
 	}
 
-
 	return ret;
 }
 
-static int pnv_eeh_cap_start(struct pci_dn *pdn)
+static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
 {
-	u32 status;
+	int pos = PCI_CAPABILITY_LIST;
+	int cnt = 48;   /* Maximal number of capabilities */
+	u32 status, id;
 
 	if (!pdn)
 		return 0;
 
+	/* Check if the device supports capabilities */
 	pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status);
 	if (!(status & PCI_STATUS_CAP_LIST))
 		return 0;
 
-	return PCI_CAPABILITY_LIST;
-}
-
-static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
-{
-	int pos = pnv_eeh_cap_start(pdn);
-	int cnt = 48;   /* Maximal number of capabilities */
-	u32 id;
-
-	if (!pos)
-		return 0;
-
 	while (cnt--) {
 		pnv_pci_cfg_read(pdn, pos, 1, &pos);
 		if (pos < 0x40)
@@ -443,11 +427,14 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
 	 * that PE to block its config space.
 	 *
 	 * Broadcom Austin 4-ports NICs (14e4:1657)
+	 * Broadcom Shiner 4-ports 1G NICs (14e4:168a)
 	 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
 	 */
 	if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
 	     pdn->device_id == 0x1657) ||
 	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+	     pdn->device_id == 0x168a) ||
+	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
 	     pdn->device_id == 0x168e))
 		edev->pe->state |= EEH_PE_CFG_RESTRICTED;
 
@@ -487,10 +474,9 @@ static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
 	struct pci_controller *hose = pe->phb;
 	struct pnv_phb *phb = hose->private_data;
 	bool freeze_pe = false;
-	int opt, ret = 0;
+	int opt;
 	s64 rc;
 
-	/* Sanity check on option */
 	switch (option) {
 	case EEH_OPT_DISABLE:
 		return -EPERM;
@@ -511,38 +497,37 @@ static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
 		return -EINVAL;
 	}
 
-	/* If PHB supports compound PE, to handle it */
+	/* Freeze master and slave PEs if PHB supports compound PEs */
 	if (freeze_pe) {
 		if (phb->freeze_pe) {
 			phb->freeze_pe(phb, pe->addr);
-		} else {
-			rc = opal_pci_eeh_freeze_set(phb->opal_id,
-						     pe->addr, opt);
-			if (rc != OPAL_SUCCESS) {
-				pr_warn("%s: Failure %lld freezing "
-					"PHB#%x-PE#%x\n",
-					__func__, rc,
-					phb->hose->global_number, pe->addr);
-				ret = -EIO;
-			}
+			return 0;
 		}
-	} else {
-		if (phb->unfreeze_pe) {
-			ret = phb->unfreeze_pe(phb, pe->addr, opt);
-		} else {
-			rc = opal_pci_eeh_freeze_clear(phb->opal_id,
-						       pe->addr, opt);
-			if (rc != OPAL_SUCCESS) {
-				pr_warn("%s: Failure %lld enable %d "
-					"for PHB#%x-PE#%x\n",
-					__func__, rc, option,
-					phb->hose->global_number, pe->addr);
-				ret = -EIO;
-			}
+
+		rc = opal_pci_eeh_freeze_set(phb->opal_id, pe->addr, opt);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+				__func__, rc, phb->hose->global_number,
+				pe->addr);
+			return -EIO;
 		}
+
+		return 0;
 	}
 
-	return ret;
+	/* Unfreeze master and slave PEs if PHB supports */
+	if (phb->unfreeze_pe)
+		return phb->unfreeze_pe(phb, pe->addr, opt);
+
+	rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe->addr, opt);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n",
+			__func__, rc, option, phb->hose->global_number,
+			pe->addr);
+		return -EIO;
+	}
+
+	return 0;
 }
 
 /**
@@ -1065,7 +1050,6 @@ static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
 	struct pnv_phb *phb = hose->private_data;
 	s64 rc;
 
-	/* Sanity check on error type */
 	if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
 	    type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
 		pr_warn("%s: Invalid error type %d\n",
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index 2c91ee7800b9..6ccfb6c1c707 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -137,7 +137,7 @@ static void opal_handle_irq_work(struct irq_work *work)
 static int opal_event_match(struct irq_domain *h, struct device_node *node,
 			    enum irq_domain_bus_token bus_token)
 {
-	return h->of_node == node;
+	return irq_domain_get_of_node(h) == node;
 }
 
 static int opal_event_xlate(struct irq_domain *h, struct device_node *np,
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 230f3a7cdea4..4296d55e88f3 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -487,9 +487,12 @@ int opal_machine_check(struct pt_regs *regs)
 	 *    PRD component would have already got notified about this
 	 *    error through other channels.
 	 *
-	 * In any case, let us just fall through. We anyway heading
-	 * down to panic path.
+	 * If hardware marked this as an unrecoverable MCE, we are
+	 * going to panic anyway. Even if it didn't, it's not safe to
+	 * continue at this point, so we should explicitly panic.
 	 */
+
+	panic("PowerNV Unrecovered Machine Check");
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 685b3cbe1362..a9a8fa37a555 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -187,7 +187,7 @@ static void pnv_kexec_wait_secondaries_down(void)
 
 	for_each_online_cpu(i) {
 		uint8_t status;
-		int64_t rc;
+		int64_t rc, timeout = 1000;
 
 		if (i == my_cpu)
 			continue;
@@ -204,6 +204,18 @@ static void pnv_kexec_wait_secondaries_down(void)
 				       i, paca[i].hw_cpu_id);
 				notified = i;
 			}
+
+			/*
+			 * On crash secondaries might be unreachable or hung,
+			 * so timeout if we've waited too long
+			 * */
+			mdelay(1);
+			if (timeout-- == 0) {
+				printk(KERN_ERR "kexec: timed out waiting for "
+				       "cpu %d (physical %d) to enter OPAL\n",
+				       i, paca[i].hw_cpu_id);
+				break;
+			}
 		}
 	}
 }
@@ -225,13 +237,6 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
 
 		/* Return the CPU to OPAL */
 		opal_return_cpu();
-	} else if (crash_shutdown) {
-		/*
-		 * On crash, we don't wait for secondaries to go
-		 * down as they might be unreachable or hung, so
-		 * instead we just wait a bit and move on.
-		 */
-		mdelay(1);
 	} else {
 		/* Primary waits for the secondaries to have reached OPAL */
 		pnv_kexec_wait_secondaries_down();
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 8f70ba681a78..ca264833ee64 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -171,7 +171,26 @@ static void pnv_smp_cpu_kill_self(void)
 	 * so clear LPCR:PECE1. We keep PECE2 enabled.
 	 */
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
+
+	/*
+	 * Hard-disable interrupts, and then clear irq_happened flags
+	 * that we can safely ignore while off-line, since they
+	 * are for things for which we do no processing when off-line
+	 * (or in the case of HMI, all the processing we need to do
+	 * is done in lower-level real-mode code).
+	 */
+	hard_irq_disable();
+	local_paca->irq_happened &= ~(PACA_IRQ_DEC | PACA_IRQ_HMI);
+
 	while (!generic_check_cpu_restart(cpu)) {
+		/*
+		 * Clear IPI flag, since we don't handle IPIs while
+		 * offline, except for those when changing micro-threading
+		 * mode, which are handled explicitly below, and those
+		 * for coming online, which are handled via
+		 * generic_check_cpu_restart() calls.
+		 */
+		kvmppc_set_host_ipi(cpu, 0);
 
 		ppc64_runlatch_off();
 
@@ -196,20 +215,20 @@ static void pnv_smp_cpu_kill_self(void)
 		 * having finished executing in a KVM guest, then srr1
 		 * contains 0.
 		 */
-		if ((srr1 & wmask) == SRR1_WAKEEE) {
+		if (((srr1 & wmask) == SRR1_WAKEEE) ||
+		    (local_paca->irq_happened & PACA_IRQ_EE)) {
 			icp_native_flush_interrupt();
-			local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
-			smp_mb();
 		} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
 			unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
 			asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
-			kvmppc_set_host_ipi(cpu, 0);
 		}
+		local_paca->irq_happened &= ~(PACA_IRQ_EE | PACA_IRQ_DBELL);
+		smp_mb();
 
 		if (cpu_core_split_required())
 			continue;
 
-		if (!generic_check_cpu_restart(cpu))
+		if (srr1 && !generic_check_cpu_restart(cpu))
 			DBG("CPU%d Unexpected exit while offline !\n", cpu);
 	}
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1);