diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp | 271 |
1 files changed, 268 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp index 81bd9014a88..923d82f051f 100644 --- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -179,6 +179,9 @@ private: void unfoldCallAndJumpLoads(MachineFunction &MF); + SmallVector<MachineInstr *, 16> + tracePredStateThroughIndirectBranches(MachineFunction &MF); + void tracePredStateThroughBlocksAndHarden(MachineFunction &MF); unsigned saveEFLAGS(MachineBasicBlock &MBB, @@ -522,11 +525,16 @@ bool X86SpeculativeLoadHardeningPass::runOnMachineFunction( } } - // If we are going to harden calls and jumps we need to unfold their memory - // operands. - if (HardenIndirectCallsAndJumps) + if (HardenIndirectCallsAndJumps) { + // If we are going to harden calls and jumps we need to unfold their memory + // operands. unfoldCallAndJumpLoads(MF); + // Then we trace predicate state through the indirect branches. + auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF); + CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end()); + } + // Now that we have the predicate state available at the start of each block // in the CFG, trace it through each block, hardening vulnerable instructions // as we go. @@ -925,6 +933,263 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads( } } +/// Trace the predicate state through indirect branches, instrumenting them to +/// poison the state if a target is reached that does not match the expected +/// target. +/// +/// This is designed to mitigate Spectre variant 1 attacks where an indirect +/// branch is trained to predict a particular target and then mispredicts that +/// target in a way that can leak data. Despite using an indirect branch, this +/// is really a variant 1 style attack: it does not steer execution to an +/// arbitrary or attacker controlled address, and it does not require any +/// special code executing next to the victim. This attack can also be mitigated +/// through retpolines, but those require either replacing indirect branches +/// with conditional direct branches or lowering them through a device that +/// blocks speculation. This mitigation can replace these retpoline-style +/// mitigations for jump tables and other indirect branches within a function +/// when variant 2 isn't a risk while allowing limited speculation. Indirect +/// calls, however, cannot be mitigated through this technique without changing +/// the ABI in a fundamental way. +SmallVector<MachineInstr *, 16> +X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches( + MachineFunction &MF) { + // We use the SSAUpdater to insert PHI nodes for the target addresses of + // indirect branches. We don't actually need the full power of the SSA updater + // in this particular case as we always have immediately available values, but + // this avoids us having to re-implement the PHI construction logic. + MachineSSAUpdater TargetAddrSSA(MF); + TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass)); + + // Track which blocks were terminated with an indirect branch. + SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs; + + // We need to know what blocks end up reached via indirect branches. We + // expect this to be a subset of those whose address is taken and so track it + // directly via the CFG. + SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs; + + // Walk all the blocks which end in an indirect branch and make the + // target address available. + for (MachineBasicBlock &MBB : MF) { + // Find the last terminator. + auto MII = MBB.instr_rbegin(); + while (MII != MBB.instr_rend() && MII->isDebugInstr()) + ++MII; + if (MII == MBB.instr_rend()) + continue; + MachineInstr &TI = *MII; + if (!TI.isTerminator() || !TI.isBranch()) + // No terminator or non-branch terminator. + continue; + + unsigned TargetReg; + + switch (TI.getOpcode()) { + default: + // Direct branch or conditional branch (leading to fallthrough). + continue; + + case X86::FARJMP16m: + case X86::FARJMP32m: + case X86::FARJMP64: + // We cannot mitigate far jumps or calls, but we also don't expect them + // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks. + continue; + + case X86::JMP16m: + case X86::JMP16m_NT: + case X86::JMP32m: + case X86::JMP32m_NT: + case X86::JMP64m: + case X86::JMP64m_NT: + // Mostly as documentation. + report_fatal_error("Memory operand jumps should have been unfolded!"); + + case X86::JMP16r: + report_fatal_error( + "Support for 16-bit indirect branches is not implemented."); + case X86::JMP32r: + report_fatal_error( + "Support for 32-bit indirect branches is not implemented."); + + case X86::JMP64r: + TargetReg = TI.getOperand(0).getReg(); + } + + // We have definitely found an indirect branch. Verify that there are no + // preceding conditional branches as we don't yet support that. + if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) { + return !OtherTI.isDebugInstr() && &OtherTI != &TI; + })) { + LLVM_DEBUG({ + dbgs() << "ERROR: Found other terminators in a block with an indirect " + "branch! This is not yet supported! Terminator sequence:\n"; + for (MachineInstr &MI : MBB.terminators()) { + MI.dump(); + dbgs() << '\n'; + } + }); + report_fatal_error("Unimplemented terminator sequence!"); + } + + // Make the target register an available value for this block. + TargetAddrSSA.AddAvailableValue(&MBB, TargetReg); + IndirectTerminatedMBBs.insert(&MBB); + + // Add all the successors to our target candidates. + for (MachineBasicBlock *Succ : MBB.successors()) + IndirectTargetMBBs.insert(Succ); + } + + // Keep track of the cmov instructions we insert so we can return them. + SmallVector<MachineInstr *, 16> CMovs; + + // If we didn't find any indirect branches with targets, nothing to do here. + if (IndirectTargetMBBs.empty()) + return CMovs; + + // We found indirect branches and targets that need to be instrumented to + // harden loads within them. Walk the blocks of the function (to get a stable + // ordering) and instrument each target of an indirect branch. + for (MachineBasicBlock &MBB : MF) { + // Skip the blocks that aren't candidate targets. + if (!IndirectTargetMBBs.count(&MBB)) + continue; + + // We don't expect EH pads to ever be reached via an indirect branch. If + // this is desired for some reason, we could simply skip them here rather + // than asserting. + assert(!MBB.isEHPad() && + "Unexpected EH pad as target of an indirect branch!"); + + // We should never end up threading EFLAGS into a block to harden + // conditional jumps as there would be an additional successor via the + // indirect branch. As a consequence, all such edges would be split before + // reaching here, and the inserted block will handle the EFLAGS-based + // hardening. + assert(!MBB.isLiveIn(X86::EFLAGS) && + "Cannot check within a block that already has live-in EFLAGS!"); + + // We can't handle having non-indirect edges into this block unless this is + // the only successor and we can synthesize the necessary target address. + for (MachineBasicBlock *Pred : MBB.predecessors()) { + // If we've already handled this by extracting the target directly, + // nothing to do. + if (IndirectTerminatedMBBs.count(Pred)) + continue; + + // Otherwise, we have to be the only successor. We generally expect this + // to be true as conditional branches should have had a critical edge + // split already. We don't however need to worry about EH pad successors + // as they'll happily ignore the target and their hardening strategy is + // resilient to all ways in which they could be reached speculatively. + if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) { + return Succ->isEHPad() || Succ == &MBB; + })) { + LLVM_DEBUG({ + dbgs() << "ERROR: Found conditional entry to target of indirect " + "branch!\n"; + Pred->dump(); + MBB.dump(); + }); + report_fatal_error("Cannot harden a conditional entry to a target of " + "an indirect branch!"); + } + + // Now we need to compute the address of this block and install it as a + // synthetic target in the predecessor. We do this at the bottom of the + // predecessor. + auto InsertPt = Pred->getFirstTerminator(); + unsigned TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass); + if (MF.getTarget().getCodeModel() == CodeModel::Small && + !Subtarget->isPositionIndependent()) { + // Directly materialize it into an immediate. + auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), + TII->get(X86::MOV64ri32), TargetReg) + .addMBB(&MBB); + ++NumInstsInserted; + (void)AddrI; + LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump(); + dbgs() << "\n"); + } else { + auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r), + TargetReg) + .addReg(/*Base*/ X86::RIP) + .addImm(/*Scale*/ 1) + .addReg(/*Index*/ 0) + .addMBB(&MBB) + .addReg(/*Segment*/ 0); + ++NumInstsInserted; + (void)AddrI; + LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); + dbgs() << "\n"); + } + // And make this available. + TargetAddrSSA.AddAvailableValue(Pred, TargetReg); + } + + // Materialize the needed SSA value of the target. Note that we need the + // middle of the block as this block might at the bottom have an indirect + // branch back to itself. We can do this here because at this point, every + // predecessor of this block has an available value. This is basically just + // automating the construction of a PHI node for this target. + unsigned TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB); + + // Insert a comparison of the incoming target register with this block's + // address. + auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin()); + if (MF.getTarget().getCodeModel() == CodeModel::Small && + !Subtarget->isPositionIndependent()) { + // Check directly against a relocated immediate when we can. + auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32)) + .addReg(TargetReg, RegState::Kill) + .addMBB(&MBB); + ++NumInstsInserted; + (void)CheckI; + LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n"); + } else { + // Otherwise compute the address into a register first. + unsigned AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass); + auto AddrI = + BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg) + .addReg(/*Base*/ X86::RIP) + .addImm(/*Scale*/ 1) + .addReg(/*Index*/ 0) + .addMBB(&MBB) + .addReg(/*Segment*/ 0); + ++NumInstsInserted; + (void)AddrI; + LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n"); + auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr)) + .addReg(TargetReg, RegState::Kill) + .addReg(AddrReg, RegState::Kill); + ++NumInstsInserted; + (void)CheckI; + LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n"); + } + + // Now cmov over the predicate if the comparison wasn't equal. + int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; + auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes); + unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); + auto CMovI = + BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg) + .addReg(PS->InitialReg) + .addReg(PS->PoisonReg); + CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); + ++NumInstsInserted; + LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n"); + CMovs.push_back(&*CMovI); + + // And put the new value into the available values for SSA form of our + // predicate state. + PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg); + } + + // Return all the newly inserted cmov instructions of the predicate state. + return CMovs; +} + /// Returns true if the instruction has no behavior (specified or otherwise) /// that is based on the value of any of its register operands /// |