diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp | 183 |
1 files changed, 151 insertions, 32 deletions
diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp index 923d82f051f..e9b4032a199 100644 --- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -1763,13 +1763,6 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden( // pass specifically so that we have the complete set of instructions for // which we will do post-load hardening and can defer it in certain // circumstances. - // - // FIXME: This could probably be made even more effective by doing it - // across the entire function. Rather than just walking the flat list - // backwards here, we could walk the function in PO and each block bottom - // up, allowing us to in some cases sink hardening across block blocks. As - // long as the in-block predicate state is used at the eventual hardening - // site, this remains safe. for (MachineInstr &MI : MBB) { if (HardenLoads) { // We cannot both require hardening the def of a load and its address. @@ -1851,8 +1844,8 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden( } // Otherwise we have a call. We need to handle transferring the predicate - // state into a call and recovering it after the call returns unless this - // is a tail call. + // state into a call and recovering it after the call returns (unless this + // is a tail call). assert(MI.isCall() && "Should only reach here for calls!"); tracePredStateThroughCall(MI); } @@ -2374,21 +2367,10 @@ void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) { DebugLoc Loc = MI.getDebugLoc(); auto InsertPt = MI.getIterator(); - if (FenceCallAndRet) { - // Simply forcibly block speculation of loads out of the function by using - // an LFENCE. This is potentially a heavy-weight mitigation strategy, but - // should be secure, is simple from an ABI perspective, and the cost can be - // minimized through inlining. - // - // FIXME: We should investigate ways to establish a strong data-dependency - // on the return. However, poisoning the stack pointer is unlikely to work - // because the return is *predicted* rather than relying on the load of the - // return address to actually resolve. - BuildMI(MBB, InsertPt, Loc, TII->get(X86::LFENCE)); - ++NumInstsInserted; - ++NumLFENCEsInserted; + if (FenceCallAndRet) + // No need to fence here as we'll fence at the return site itself. That + // handles more cases than we can handle here. return; - } // Take our predicate state, shift it to the high 17 bits (so that we keep // pointers canonical) and merge it into RSP. This will allow the caller to @@ -2406,31 +2388,168 @@ void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) { /// /// For tail calls, this is all we need to do. /// -/// For calls where we might return to control flow, we further need to extract -/// the predicate state built up within that function from the high bits of the -/// stack pointer, and make that the newly available predicate state. +/// For calls where we might return and resume the control flow, we need to +/// extract the predicate state from the high bits of the stack pointer after +/// control returns from the called function. +/// +/// We also need to verify that we intended to return to this location in the +/// code. An attacker might arrange for the processor to mispredict the return +/// to this valid but incorrect return address in the program rather than the +/// correct one. See the paper on this attack, called "ret2spec" by the +/// researchers, here: +/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf +/// +/// The way we verify that we returned to the correct location is by preserving +/// the expected return address across the call. One technique involves taking +/// advantage of the red-zone to load the return address from `8(%rsp)` where it +/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can +/// directly save the address into a register that will be preserved across the +/// call. We compare this intended return address against the address +/// immediately following the call (the observed return address). If these +/// mismatch, we have detected misspeculation and can poison our predicate +/// state. void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall( MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); auto InsertPt = MI.getIterator(); DebugLoc Loc = MI.getDebugLoc(); + if (FenceCallAndRet) { + if (MI.isReturn()) + // Tail call, we don't return to this function. + // FIXME: We should also handle noreturn calls. + return; + + // We don't need to fence before the call because the function should fence + // in its entry. However, we do need to fence after the call returns. + // Fencing before the return doesn't correctly handle cases where the return + // itself is mispredicted. + BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE)); + ++NumInstsInserted; + ++NumLFENCEsInserted; + return; + } + // First, we transfer the predicate state into the called function by merging // it into the stack pointer. This will kill the current def of the state. unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB); mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg); // If this call is also a return, it is a tail call and we don't need anything - // else to handle it so just continue. - // FIXME: We should also handle noreturn calls. - if (MI.isReturn()) + // else to handle it so just return. Also, if there are no further + // instructions and no successors, this call does not return so we can also + // bail. + if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty())) return; - // We need to step past the call and recover the predicate state from SP after - // the return, and make this new state available. + // Create a symbol to track the return address and attach it to the call + // machine instruction. We will lower extra symbols attached to call + // instructions as label immediately following the call. + MCSymbol *RetSymbol = + MF.getContext().createTempSymbol("slh_ret_addr", + /*AlwaysAddSuffix*/ true); + MI.setPostInstrSymbol(MF, RetSymbol); + + const TargetRegisterClass *AddrRC = &X86::GR64RegClass; + unsigned ExpectedRetAddrReg = 0; + + // If we have no red zones or if the function returns twice (possibly without + // using the `ret` instruction) like setjmp, we need to save the expected + // return address prior to the call. + if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone) || + MF.exposesReturnsTwice()) { + // If we don't have red zones, we need to compute the expected return + // address prior to the call and store it in a register that lives across + // the call. + // + // In some ways, this is doubly satisfying as a mitigation because it will + // also successfully detect stack smashing bugs in some cases (typically, + // when a callee-saved register is used and the callee doesn't push it onto + // the stack). But that isn't our primary goal, so we only use it as + // a fallback. + // + // FIXME: It isn't clear that this is reliable in the face of + // rematerialization in the register allocator. We somehow need to force + // that to not occur for this particular instruction, and instead to spill + // or otherwise preserve the value computed *prior* to the call. + // + // FIXME: It is even less clear why MachineCSE can't just fold this when we + // end up having to use identical instructions both before and after the + // call to feed the comparison. + ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC); + if (MF.getTarget().getCodeModel() == CodeModel::Small && + !Subtarget->isPositionIndependent()) { + BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg) + .addSym(RetSymbol); + } else { + BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg) + .addReg(/*Base*/ X86::RIP) + .addImm(/*Scale*/ 1) + .addReg(/*Index*/ 0) + .addSym(RetSymbol) + .addReg(/*Segment*/ 0); + } + } + + // Step past the call to handle when it returns. ++InsertPt; + + // If we didn't pre-compute the expected return address into a register, then + // red zones are enabled and the return address is still available on the + // stack immediately after the call. As the very first instruction, we load it + // into a register. + if (!ExpectedRetAddrReg) { + ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC); + BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg) + .addReg(/*Base*/ X86::RSP) + .addImm(/*Scale*/ 1) + .addReg(/*Index*/ 0) + .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so + // the return address is 8-bytes past it. + .addReg(/*Segment*/ 0); + } + + // Now we extract the callee's predicate state from the stack pointer. unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc); - PS->SSA.AddAvailableValue(&MBB, NewStateReg); + + // Test the expected return address against our actual address. If we can + // form this basic block's address as an immediate, this is easy. Otherwise + // we compute it. + if (MF.getTarget().getCodeModel() == CodeModel::Small && + !Subtarget->isPositionIndependent()) { + // FIXME: Could we fold this with the load? It would require careful EFLAGS + // management. + BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32)) + .addReg(ExpectedRetAddrReg, RegState::Kill) + .addSym(RetSymbol); + } else { + unsigned ActualRetAddrReg = MRI->createVirtualRegister(AddrRC); + BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg) + .addReg(/*Base*/ X86::RIP) + .addImm(/*Scale*/ 1) + .addReg(/*Index*/ 0) + .addSym(RetSymbol) + .addReg(/*Segment*/ 0); + BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr)) + .addReg(ExpectedRetAddrReg, RegState::Kill) + .addReg(ActualRetAddrReg, RegState::Kill); + } + + // Now conditionally update the predicate state we just extracted if we ended + // up at a different return address than expected. + int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; + auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes); + + unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); + auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg) + .addReg(NewStateReg, RegState::Kill) + .addReg(PS->PoisonReg); + CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); + ++NumInstsInserted; + LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n"); + + PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg); } /// An attacker may speculatively store over a value that is then speculatively |