summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicholas Piggin <npiggin@gmail.com>2018-04-08 16:49:37 +1000
committerStewart Smith <stewart@linux.ibm.com>2018-04-18 20:23:07 -0500
commit8514e4dc9a82f3ff85d40138f2c8e8a1dc64efa4 (patch)
treebfa96163d153d55f4d546256096c74a46a84760d
parentad0941960bd045644f6834d6e711bedbde3c29c8 (diff)
downloadtalos-skiboot-8514e4dc9a82f3ff85d40138f2c8e8a1dc64efa4.tar.gz
talos-skiboot-8514e4dc9a82f3ff85d40138f2c8e8a1dc64efa4.zip
asm/head: implement quiescing without stack or clobbering regs
Quiescing currently is implmeented in C in opal_entry before the opal call handler is called. This works well enough for simple cases like fast reset when one CPU wants all others out of the way. Linux would like to use it to prevent an sreset IPI from interrupting firmware, which could lead to deadlocks when crash dumping or entering the debugger. Linux interrupts do not recover well when returning back to general OPAL code, due to r13 not being restored. OPAL also can't be re-entered, which may happen e.g., from the debugger. So move the quiesce hold/reject to entry code, beore the stack or r1 or r13 registers are switched. OPAL can be interrupted and returned to or re-entered during this period. This does not completely solve all such problems. OPAL will be interrupted with sreset if the quiesce times out, and it can be interrupted by MCEs as well. These still have the issues above. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
-rw-r--r--asm/asm-offsets.c2
-rw-r--r--asm/head.S72
-rw-r--r--core/opal.c41
-rw-r--r--include/cpu.h2
4 files changed, 83 insertions, 34 deletions
diff --git a/asm/asm-offsets.c b/asm/asm-offsets.c
index 71199503..3eac592d 100644
--- a/asm/asm-offsets.c
+++ b/asm/asm-offsets.c
@@ -37,6 +37,8 @@ int main(void)
OFFSET(CPUTHREAD_PIR, cpu_thread, pir);
OFFSET(CPUTHREAD_SAVE_R1, cpu_thread, save_r1);
OFFSET(CPUTHREAD_STATE, cpu_thread, state);
+ OFFSET(CPUTHREAD_IN_OPAL_CALL, cpu_thread, in_opal_call);
+ OFFSET(CPUTHREAD_QUIESCE_OPAL_CALL, cpu_thread, quiesce_opal_call);
OFFSET(CPUTHREAD_CUR_TOKEN, cpu_thread, current_token);
DEFINE(CPUTHREAD_GAP, sizeof(struct cpu_thread) + STACK_SAFETY_GAP);
#ifdef STACK_CHECK_ENABLED
diff --git a/asm/head.S b/asm/head.S
index ad306252..eeefcaa4 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -955,14 +955,64 @@ opal_boot_trampoline:
* r0: Token
* r2: OPAL Base
* r3..r10: Args
- * r12: Scratch
+ * r11..r12: Scratch
* r13..r31: Preserved
- *
*/
.balign 0x10
.global opal_entry
opal_entry:
- /* Get our per CPU stack */
+ /* Get our per CPU pointer in r12 to check for quiesce */
+ mfspr %r12,SPR_PIR
+ GET_STACK(%r12,%r12)
+
+ /* Get CPU thread */
+ clrrdi %r12,%r12,STACK_SHIFT
+
+ /*
+ * OPAL entry must first increment in_opal_call, then check
+ * for quiesce, without touching the stack or clobbering
+ * registers other than r11 and r12 and cr0. In this way, OPAL
+ * is tolerant of re-entry on this same CPU while it is spinning
+ * for quiesce.
+ *
+ * Sequence goes:
+ * in_opal_call++;
+ * sync;
+ * if (quiesce_opal_call) {
+ * in_opal_call--;
+ * reject-or-spin-then-retry;
+ */
+1: lwz %r11,CPUTHREAD_IN_OPAL_CALL(%r12)
+ addi %r11,%r11,1
+ stw %r11,CPUTHREAD_IN_OPAL_CALL(%r12)
+ /*
+ * Order the store in_opal_call vs load quiesce_opal_call.
+ * This also provides an acquire barrier for opal entry vs
+ * another thread quiescing opal. In this way, quiescing
+ * can behave as mutual exclusion.
+ */
+ sync
+ lwz %r11,CPUTHREAD_QUIESCE_OPAL_CALL(%r12)
+ cmpwi %cr0,%r11,0
+ beq+ 4f
+ /* We are quiescing, hold or reject */
+ cmpwi %cr0,%r11,QUIESCE_REJECT
+ bne 2f
+ li %r3,OPAL_BUSY
+ b .Lreturn /* reject */
+2: /* hold */
+ lwz %r11,CPUTHREAD_IN_OPAL_CALL(%r12)
+ subi %r11,%r11,1
+ stw %r11,CPUTHREAD_IN_OPAL_CALL(%r12)
+ smt_lowest
+3: lwz %r11,CPUTHREAD_QUIESCE_OPAL_CALL(%r12)
+ cmpwi %cr0,%r11,QUIESCE_HOLD
+ beq 3b
+ /* spin finished, try again */
+ smt_medium
+ b 1b
+
+4: /* Quiesce protocol done, get our per CPU stack */
mfspr %r12,SPR_PIR
GET_STACK(%r12,%r12)
stdu %r12,-STACK_FRAMESIZE(%r12)
@@ -1006,7 +1056,7 @@ opal_entry:
mr %r3,%r1
bl opal_entry_check
cmpdi %r3,0
- bne 1f
+ bne .Lreturn
ld %r0,STACK_GPR0(%r1)
ld %r3,STACK_GPR3(%r1)
@@ -1031,12 +1081,22 @@ opal_entry:
bctrl
mr %r4,%r1
- bl opal_exit_check
+ bl opal_exit_check /* r3 is preserved */
-1: ld %r12,STACK_LR(%r1)
+ /*
+ * Restore r1 and r13 before decrementing in_opal_call.
+ * Move per-cpu pointer to volatile r12, restore lr, r1, r13.
+ */
+.Lreturn:
+ ld %r12,STACK_LR(%r1)
mtlr %r12
+ mr %r12,%r13
ld %r13,STACK_GPR13(%r1)
ld %r1,STACK_GPR1(%r1)
+ sync /* release barrier vs quiescing */
+ lwz %r11,CPUTHREAD_IN_OPAL_CALL(%r12)
+ subi %r11,%r11,1
+ stw %r11,CPUTHREAD_IN_OPAL_CALL(%r12)
blr
.global start_kernel
diff --git a/core/opal.c b/core/opal.c
index e57f0a18..f6922b26 100644
--- a/core/opal.c
+++ b/core/opal.c
@@ -142,7 +142,7 @@ int64_t opal_entry_check(struct stack_frame *eframe)
if (!opal_check_token(token))
return opal_bad_token(token);
- if (!opal_quiesce_state && cpu->in_opal_call) {
+ if (!opal_quiesce_state && cpu->in_opal_call > 1) {
disable_fast_reboot("Kernel re-entered OPAL");
switch (token) {
case OPAL_CONSOLE_READ:
@@ -158,30 +158,14 @@ int64_t opal_entry_check(struct stack_frame *eframe)
default:
printf("CPU ATTEMPT TO RE-ENTER FIRMWARE! PIR=%04lx cpu @%p -> pir=%04x token=%llu\n",
mfspr(SPR_PIR), cpu, cpu->pir, token);
+ if (cpu->in_opal_call > 2) {
+ printf("Emergency stack is destroyed, can't continue.\n");
+ abort();
+ }
return OPAL_INTERNAL_ERROR;
}
}
-again:
- cpu->in_opal_call++;
- /*
- * Order the store in_opal_call vs load quiesce_opal_call.
- * This also provides an acquire barrier for opal entry vs
- * another thread quiescing opal. In this way, quiescing
- * can behave as mutual exclusion.
- */
- sync();
- if (cpu->quiesce_opal_call) {
- cpu->in_opal_call--;
- if (opal_quiesce_state == QUIESCE_REJECT)
- return OPAL_BUSY;
- smt_lowest();
- while (cpu->quiesce_opal_call)
- barrier();
- smt_medium();
- goto again;
- }
-
return OPAL_SUCCESS;
}
@@ -196,14 +180,17 @@ int64_t opal_exit_check(int64_t retval, struct stack_frame *eframe)
disable_fast_reboot("Un-accounted firmware entry");
printf("CPU UN-ACCOUNTED FIRMWARE ENTRY! PIR=%04lx cpu @%p -> pir=%04x token=%llu retval=%lld\n",
mfspr(SPR_PIR), cpu, cpu->pir, token, retval);
+ cpu->in_opal_call++; /* avoid exit path underflowing */
} else {
+ if (cpu->in_opal_call > 2) {
+ printf("Emergency stack is destroyed, can't continue.\n");
+ abort();
+ }
if (!list_empty(&cpu->locks_held)) {
prlog(PR_ERR, "OPAL exiting with locks held, token=%llu retval=%lld\n",
token, retval);
drop_my_locks(true);
}
- sync(); /* release barrier vs quiescing */
- cpu->in_opal_call--;
}
return retval;
}
@@ -253,7 +240,7 @@ int64_t opal_quiesce(uint32_t quiesce_type, int32_t cpu_target)
bust_locks = false;
sync(); /* release barrier vs opal entry */
if (target) {
- target->quiesce_opal_call = false;
+ target->quiesce_opal_call = 0;
} else {
for_each_cpu(c) {
if (quiesce_type == QUIESCE_RESUME_FAST_REBOOT)
@@ -263,7 +250,7 @@ int64_t opal_quiesce(uint32_t quiesce_type, int32_t cpu_target)
assert(!c->quiesce_opal_call);
continue;
}
- c->quiesce_opal_call = false;
+ c->quiesce_opal_call = 0;
}
}
sync();
@@ -281,12 +268,12 @@ int64_t opal_quiesce(uint32_t quiesce_type, int32_t cpu_target)
}
if (target) {
- target->quiesce_opal_call = true;
+ target->quiesce_opal_call = quiesce_type;
} else {
for_each_cpu(c) {
if (c == cpu)
continue;
- c->quiesce_opal_call = true;
+ c->quiesce_opal_call = quiesce_type;
}
}
diff --git a/include/cpu.h b/include/cpu.h
index 68f24639..4a6bc4a5 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -61,10 +61,10 @@ struct cpu_thread {
uint64_t save_r1;
void *icp_regs;
uint32_t in_opal_call;
+ uint32_t quiesce_opal_call;
uint32_t con_suspend;
struct list_head locks_held;
bool con_need_flush;
- bool quiesce_opal_call;
bool in_mcount;
bool in_poller;
bool in_reinit;
OpenPOWER on IntegriCloud