summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp183
1 files changed, 151 insertions, 32 deletions
diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index 923d82f051f..e9b4032a199 100644
--- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -1763,13 +1763,6 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
// pass specifically so that we have the complete set of instructions for
// which we will do post-load hardening and can defer it in certain
// circumstances.
- //
- // FIXME: This could probably be made even more effective by doing it
- // across the entire function. Rather than just walking the flat list
- // backwards here, we could walk the function in PO and each block bottom
- // up, allowing us to in some cases sink hardening across block blocks. As
- // long as the in-block predicate state is used at the eventual hardening
- // site, this remains safe.
for (MachineInstr &MI : MBB) {
if (HardenLoads) {
// We cannot both require hardening the def of a load and its address.
@@ -1851,8 +1844,8 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
}
// Otherwise we have a call. We need to handle transferring the predicate
- // state into a call and recovering it after the call returns unless this
- // is a tail call.
+ // state into a call and recovering it after the call returns (unless this
+ // is a tail call).
assert(MI.isCall() && "Should only reach here for calls!");
tracePredStateThroughCall(MI);
}
@@ -2374,21 +2367,10 @@ void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
DebugLoc Loc = MI.getDebugLoc();
auto InsertPt = MI.getIterator();
- if (FenceCallAndRet) {
- // Simply forcibly block speculation of loads out of the function by using
- // an LFENCE. This is potentially a heavy-weight mitigation strategy, but
- // should be secure, is simple from an ABI perspective, and the cost can be
- // minimized through inlining.
- //
- // FIXME: We should investigate ways to establish a strong data-dependency
- // on the return. However, poisoning the stack pointer is unlikely to work
- // because the return is *predicted* rather than relying on the load of the
- // return address to actually resolve.
- BuildMI(MBB, InsertPt, Loc, TII->get(X86::LFENCE));
- ++NumInstsInserted;
- ++NumLFENCEsInserted;
+ if (FenceCallAndRet)
+ // No need to fence here as we'll fence at the return site itself. That
+ // handles more cases than we can handle here.
return;
- }
// Take our predicate state, shift it to the high 17 bits (so that we keep
// pointers canonical) and merge it into RSP. This will allow the caller to
@@ -2406,31 +2388,168 @@ void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
///
/// For tail calls, this is all we need to do.
///
-/// For calls where we might return to control flow, we further need to extract
-/// the predicate state built up within that function from the high bits of the
-/// stack pointer, and make that the newly available predicate state.
+/// For calls where we might return and resume the control flow, we need to
+/// extract the predicate state from the high bits of the stack pointer after
+/// control returns from the called function.
+///
+/// We also need to verify that we intended to return to this location in the
+/// code. An attacker might arrange for the processor to mispredict the return
+/// to this valid but incorrect return address in the program rather than the
+/// correct one. See the paper on this attack, called "ret2spec" by the
+/// researchers, here:
+/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
+///
+/// The way we verify that we returned to the correct location is by preserving
+/// the expected return address across the call. One technique involves taking
+/// advantage of the red-zone to load the return address from `8(%rsp)` where it
+/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
+/// directly save the address into a register that will be preserved across the
+/// call. We compare this intended return address against the address
+/// immediately following the call (the observed return address). If these
+/// mismatch, we have detected misspeculation and can poison our predicate
+/// state.
void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
auto InsertPt = MI.getIterator();
DebugLoc Loc = MI.getDebugLoc();
+ if (FenceCallAndRet) {
+ if (MI.isReturn())
+ // Tail call, we don't return to this function.
+ // FIXME: We should also handle noreturn calls.
+ return;
+
+ // We don't need to fence before the call because the function should fence
+ // in its entry. However, we do need to fence after the call returns.
+ // Fencing before the return doesn't correctly handle cases where the return
+ // itself is mispredicted.
+ BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
+ ++NumInstsInserted;
+ ++NumLFENCEsInserted;
+ return;
+ }
+
// First, we transfer the predicate state into the called function by merging
// it into the stack pointer. This will kill the current def of the state.
unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
// If this call is also a return, it is a tail call and we don't need anything
- // else to handle it so just continue.
- // FIXME: We should also handle noreturn calls.
- if (MI.isReturn())
+ // else to handle it so just return. Also, if there are no further
+ // instructions and no successors, this call does not return so we can also
+ // bail.
+ if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
return;
- // We need to step past the call and recover the predicate state from SP after
- // the return, and make this new state available.
+ // Create a symbol to track the return address and attach it to the call
+ // machine instruction. We will lower extra symbols attached to call
+ // instructions as label immediately following the call.
+ MCSymbol *RetSymbol =
+ MF.getContext().createTempSymbol("slh_ret_addr",
+ /*AlwaysAddSuffix*/ true);
+ MI.setPostInstrSymbol(MF, RetSymbol);
+
+ const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
+ unsigned ExpectedRetAddrReg = 0;
+
+ // If we have no red zones or if the function returns twice (possibly without
+ // using the `ret` instruction) like setjmp, we need to save the expected
+ // return address prior to the call.
+ if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone) ||
+ MF.exposesReturnsTwice()) {
+ // If we don't have red zones, we need to compute the expected return
+ // address prior to the call and store it in a register that lives across
+ // the call.
+ //
+ // In some ways, this is doubly satisfying as a mitigation because it will
+ // also successfully detect stack smashing bugs in some cases (typically,
+ // when a callee-saved register is used and the callee doesn't push it onto
+ // the stack). But that isn't our primary goal, so we only use it as
+ // a fallback.
+ //
+ // FIXME: It isn't clear that this is reliable in the face of
+ // rematerialization in the register allocator. We somehow need to force
+ // that to not occur for this particular instruction, and instead to spill
+ // or otherwise preserve the value computed *prior* to the call.
+ //
+ // FIXME: It is even less clear why MachineCSE can't just fold this when we
+ // end up having to use identical instructions both before and after the
+ // call to feed the comparison.
+ ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
+ if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+ !Subtarget->isPositionIndependent()) {
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
+ .addSym(RetSymbol);
+ } else {
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
+ .addReg(/*Base*/ X86::RIP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addSym(RetSymbol)
+ .addReg(/*Segment*/ 0);
+ }
+ }
+
+ // Step past the call to handle when it returns.
++InsertPt;
+
+ // If we didn't pre-compute the expected return address into a register, then
+ // red zones are enabled and the return address is still available on the
+ // stack immediately after the call. As the very first instruction, we load it
+ // into a register.
+ if (!ExpectedRetAddrReg) {
+ ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
+ .addReg(/*Base*/ X86::RSP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
+ // the return address is 8-bytes past it.
+ .addReg(/*Segment*/ 0);
+ }
+
+ // Now we extract the callee's predicate state from the stack pointer.
unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
- PS->SSA.AddAvailableValue(&MBB, NewStateReg);
+
+ // Test the expected return address against our actual address. If we can
+ // form this basic block's address as an immediate, this is easy. Otherwise
+ // we compute it.
+ if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+ !Subtarget->isPositionIndependent()) {
+ // FIXME: Could we fold this with the load? It would require careful EFLAGS
+ // management.
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
+ .addReg(ExpectedRetAddrReg, RegState::Kill)
+ .addSym(RetSymbol);
+ } else {
+ unsigned ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
+ .addReg(/*Base*/ X86::RIP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addSym(RetSymbol)
+ .addReg(/*Segment*/ 0);
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
+ .addReg(ExpectedRetAddrReg, RegState::Kill)
+ .addReg(ActualRetAddrReg, RegState::Kill);
+ }
+
+ // Now conditionally update the predicate state we just extracted if we ended
+ // up at a different return address than expected.
+ int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
+ auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes);
+
+ unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
+ auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
+ .addReg(NewStateReg, RegState::Kill)
+ .addReg(PS->PoisonReg);
+ CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
+ ++NumInstsInserted;
+ LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
+
+ PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
}
/// An attacker may speculatively store over a value that is then speculatively
OpenPOWER on IntegriCloud