diff options
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64FastISel.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 18 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp | 34 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp | 368 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/O0-pipeline.ll | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll | 71 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/speculation-hardening.ll | 156 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/speculation-hardening.mir | 117 | 
14 files changed, 789 insertions, 9 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h index 2f0d0bf346d..c36d9354f3b 100644 --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -39,6 +39,7 @@ FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,                                   CodeGenOpt::Level OptLevel);  FunctionPass *createAArch64StorePairSuppressPass();  FunctionPass *createAArch64ExpandPseudoPass(); +FunctionPass *createAArch64SpeculationHardeningPass();  FunctionPass *createAArch64LoadStoreOptimizationPass();  FunctionPass *createAArch64SIMDInstrOptPass();  ModulePass *createAArch64PromoteConstantPass(); @@ -68,6 +69,7 @@ void initializeAArch64ConditionalComparesPass(PassRegistry&);  void initializeAArch64ConditionOptimizerPass(PassRegistry&);  void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);  void initializeAArch64ExpandPseudoPass(PassRegistry&); +void initializeAArch64SpeculationHardeningPass(PassRegistry&);  void initializeAArch64LoadStoreOptPass(PassRegistry&);  void initializeAArch64SIMDInstrOptPass(PassRegistry&);  void initializeAArch64PreLegalizerCombinerPass(PassRegistry&); diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 7a7b0dd20a4..47550cabb9f 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -2258,6 +2258,13 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {  /// Try to emit a combined compare-and-branch instruction.  bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { +  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions +  // will not be produced, as they are conditional branch instructions that do +  // not set flags. +  if (FuncInfo.MF->getFunction().hasFnAttribute( +          Attribute::SpeculativeLoadHardening)) +    return false; +    assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");    const CmpInst *CI = cast<CmpInst>(BI->getCondition());    CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7b539417941..cc10c9688e1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4343,6 +4343,13 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {    SDValue Dest = Op.getOperand(4);    SDLoc dl(Op); +  MachineFunction &MF = DAG.getMachineFunction(); +  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions +  // will not be produced, as they are conditional branch instructions that do +  // not set flags. +  bool ProduceNonFlagSettingCondBr = +      !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); +    // Handle f128 first, since lowering it will result in comparing the return    // value of a libcall against zero, which is just what the rest of LowerBR_CC    // is expecting to deal with. @@ -4385,7 +4392,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {      // If the RHS of the comparison is zero, we can potentially fold this      // to a specialized branch.      const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS); -    if (RHSC && RHSC->getZExtValue() == 0) { +    if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {        if (CC == ISD::SETEQ) {          // See if we can use a TBZ to fold in an AND as well.          // TBZ has a smaller branch displacement than CBZ.  If the offset is @@ -4428,7 +4435,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {        }      }      if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT && -        LHS.getOpcode() != ISD::AND) { +        LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {        // Don't combine AND since emitComparison converts the AND to an ANDS        // (a.k.a. TST) and the test in the test bit and branch instruction        // becomes redundant.  This would also increase register pressure. @@ -10807,6 +10814,13 @@ SDValue performCONDCombine(SDNode *N,  static SDValue performBRCONDCombine(SDNode *N,                                      TargetLowering::DAGCombinerInfo &DCI,                                      SelectionDAG &DAG) { +  MachineFunction &MF = DAG.getMachineFunction(); +  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions +  // will not be produced, as they are conditional branch instructions that do +  // not set flags. +  if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening)) +    return SDValue(); +    if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))      N = NV.getNode();    SDValue Chain = N->getOperand(0); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index eddb349f0bf..10464ea57bb 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -964,6 +964,13 @@ bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,                                              const MachineFunction &MF) const {    if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))      return true; +  switch (MI.getOpcode()) { +  case AArch64::DSB: +  case AArch64::ISB: +    // DSB and ISB also are scheduling barriers. +    return true; +  default:; +  }    return isSEHInstruction(MI);  } diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 90258cc1555..6cbfb6ab161 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -788,16 +788,36 @@ bool AArch64InstructionSelector::select(MachineInstr &I,      const unsigned CondReg = I.getOperand(0).getReg();      MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); -    if (selectCompareBranch(I, MF, MRI)) +    // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z +    // instructions will not be produced, as they are conditional branch +    // instructions that do not set flags. +    bool ProduceNonFlagSettingCondBr = +        !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); +    if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))        return true; -    auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW)) -                   .addUse(CondReg) -                   .addImm(/*bit offset=*/0) -                   .addMBB(DestMBB); +    if (ProduceNonFlagSettingCondBr) { +      auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW)) +                     .addUse(CondReg) +                     .addImm(/*bit offset=*/0) +                     .addMBB(DestMBB); -    I.eraseFromParent(); -    return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI); +      I.eraseFromParent(); +      return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI); +    } else { +      auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri)) +                     .addDef(AArch64::WZR) +                     .addUse(CondReg) +                     .addImm(1); +      constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI); +      auto Bcc = +          BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc)) +              .addImm(AArch64CC::EQ) +              .addMBB(DestMBB); + +      I.eraseFromParent(); +      return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI); +    }    }    case TargetOpcode::G_BRINDIRECT: { diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 55631bcba23..96ae45ae3d0 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -203,6 +203,10 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {    if (hasBasePointer(MF))      markSuperRegs(Reserved, AArch64::W19); +  // SLH uses register W16/X16 as the taint register. +  if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening)) +    markSuperRegs(Reserved, AArch64::W16); +    assert(checkAllSuperRegsMarked(Reserved));    return Reserved;  } diff --git a/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp new file mode 100644 index 00000000000..1f8ef5ee6ea --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp @@ -0,0 +1,368 @@ +//===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation  --===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass to insert code to mitigate against side channel +// vulnerabilities that may happen under control flow miss-speculation. +// +// The pass implements tracking of control flow miss-speculation into a "taint" +// register. That taint register can then be used to mask off registers with +// sensitive data when executing under miss-speculation, a.k.a. "transient +// execution". +// This pass is aimed at mitigating against SpectreV1-style vulnarabilities. +// +// At the moment, it implements the tracking of miss-speculation of control +// flow into a taint register, but doesn't implement a mechanism yet to then +// use that taint register to mask of vulnerable data in registers (something +// for a follow-on improvement). Possible strategies to mask out vulnerable +// data that can be implemented on top of this are: +// - speculative load hardening to automatically mask of data loaded +//   in registers. +// - using intrinsics to mask of data in registers as indicated by the +//   programmer (see https://lwn.net/Articles/759423/). +// +// For AArch64, the following implementation choices are made below. +// Some of these are different than the implementation choices made in +// the similar pass implemented in X86SpeculativeLoadHardening.cpp, as +// the instruction set characteristics result in different trade-offs. +// - The speculation hardening is done after register allocation. With a +//   relative abundance of registers, one register is reserved (X16) to be +//   the taint register. X16 is expected to not clash with other register +//   reservation mechanisms with very high probability because: +//   . The AArch64 ABI doesn't guarantee X16 to be retained across any call. +//   . The only way to request X16 to be used as a programmer is through +//     inline assembly. In the rare case a function explicitly demands to +//     use X16/W16, this pass falls back to hardening against speculation +//     by inserting a DSB SYS/ISB barrier pair which will prevent control +//     flow speculation. +// - It is easy to insert mask operations at this late stage as we have +//   mask operations available that don't set flags. +// - The taint variable contains all-ones when no miss-speculation is detected, +//   and contains all-zeros when miss-speculation is detected. Therefore, when +//   masking, an AND instruction (which only changes the register to be masked, +//   no other side effects) can easily be inserted anywhere that's needed. +// - The tracking of miss-speculation is done by using a data-flow conditional +//   select instruction (CSEL) to evaluate the flags that were also used to +//   make conditional branch direction decisions. Speculation of the CSEL +//   instruction can be limited with a CSDB instruction - so the combination of +//   CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL +//   aren't speculated. When conditional branch direction gets miss-speculated, +//   the semantics of the inserted CSEL instruction is such that the taint +//   register will contain all zero bits. +//   One key requirement for this to work is that the conditional branch is +//   followed by an execution of the CSEL instruction, where the CSEL +//   instruction needs to use the same flags status as the conditional branch. +//   This means that the conditional branches must not be implemented as one +//   of the AArch64 conditional branches that do not use the flags as input +//   (CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction +//   selectors to not produce these instructions when speculation hardening +//   is enabled. This pass will assert if it does encounter such an instruction. +// - On function call boundaries, the miss-speculation state is transferred from +//   the taint register X16 to be encoded in the SP register as value 0. +// +// Future extensions/improvements could be: +// - Implement this functionality using full speculation barriers, akin to the +//   x86-slh-lfence option. This may be more useful for the intrinsics-based +//   approach than for the SLH approach to masking. +//   Note that this pass already inserts the full speculation barriers if the +//   function for some niche reason makes use of X16/W16. +// - no indirect branch misprediction gets protected/instrumented; but this +//   could be done for some indirect branches, such as switch jump tables. +//===----------------------------------------------------------------------===// + +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Target/TargetMachine.h" +#include <cassert> + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-speculation-hardening" + +#define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass" + +namespace { + +class AArch64SpeculationHardening : public MachineFunctionPass { +public: +  const TargetInstrInfo *TII; +  const TargetRegisterInfo *TRI; + +  static char ID; + +  AArch64SpeculationHardening() : MachineFunctionPass(ID) { +    initializeAArch64SpeculationHardeningPass(*PassRegistry::getPassRegistry()); +  } + +  bool runOnMachineFunction(MachineFunction &Fn) override; + +  StringRef getPassName() const override { +    return AARCH64_SPECULATION_HARDENING_NAME; +  } + +private: +  unsigned MisspeculatingTaintReg; +  bool UseControlFlowSpeculationBarrier; + +  bool functionUsesHardeningRegister(MachineFunction &MF) const; +  bool instrumentControlFlow(MachineBasicBlock &MBB); +  bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, +                               MachineBasicBlock *&FBB, +                               AArch64CC::CondCode &CondCode) const; +  void insertTrackingCode(MachineBasicBlock &SplitEdgeBB, +                          AArch64CC::CondCode &CondCode, DebugLoc DL) const; +  void insertSPToRegTaintPropagation(MachineBasicBlock *MBB, +                                     MachineBasicBlock::iterator MBBI) const; +  void insertRegToSPTaintPropagation(MachineBasicBlock *MBB, +                                     MachineBasicBlock::iterator MBBI, +                                     unsigned TmpReg) const; +}; + +} // end anonymous namespace + +char AArch64SpeculationHardening::ID = 0; + +INITIALIZE_PASS(AArch64SpeculationHardening, "aarch64-speculation-hardening", +                AARCH64_SPECULATION_HARDENING_NAME, false, false) + +bool AArch64SpeculationHardening::endsWithCondControlFlow( +    MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, +    AArch64CC::CondCode &CondCode) const { +  SmallVector<MachineOperand, 1> analyzeBranchCondCode; +  if (TII->analyzeBranch(MBB, TBB, FBB, analyzeBranchCondCode, false)) +    return false; + +  // Ignore if the BB ends in an unconditional branch/fall-through. +  if (analyzeBranchCondCode.empty()) +    return false; + +  // If the BB ends with a single conditional branch, FBB will be set to +  // nullptr (see API docs for TII->analyzeBranch). For the rest of the +  // analysis we want the FBB block to be set always. +  assert(TBB != nullptr); +  if (FBB == nullptr) +    FBB = MBB.getFallThrough(); + +  // If both the true and the false condition jump to the same basic block, +  // there isn't need for any protection - whether the branch is speculated +  // correctly or not, we end up executing the architecturally correct code. +  if (TBB == FBB) +    return false; + +  assert(MBB.succ_size() == 2); +  // translate analyzeBranchCondCode to CondCode. +  assert(analyzeBranchCondCode.size() == 1 && "unknown Cond array format"); +  CondCode = AArch64CC::CondCode(analyzeBranchCondCode[0].getImm()); +  return true; +} + +void AArch64SpeculationHardening::insertTrackingCode( +    MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode, +    DebugLoc DL) const { +  if (UseControlFlowSpeculationBarrier) { +    // insert full control flow speculation barrier (DSB SYS + ISB) +    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::ISB)) +        .addImm(0xf); +    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::DSB)) +        .addImm(0xf); +  } else { +    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr)) +        .addDef(MisspeculatingTaintReg) +        .addUse(MisspeculatingTaintReg) +        .addUse(AArch64::XZR) +        .addImm(CondCode); +    SplitEdgeBB.addLiveIn(AArch64::NZCV); +  } +} + +bool AArch64SpeculationHardening::instrumentControlFlow( +    MachineBasicBlock &MBB) { +  LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB); + +  bool Modified = false; +  MachineBasicBlock *TBB = nullptr; +  MachineBasicBlock *FBB = nullptr; +  AArch64CC::CondCode CondCode; + +  if (!endsWithCondControlFlow(MBB, TBB, FBB, CondCode)) { +    LLVM_DEBUG(dbgs() << "... doesn't end with CondControlFlow\n"); +  } else { +    // Now insert: +    // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, cond" on the True edge and +    // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, Invertcond" on the False +    // edge. +    AArch64CC::CondCode InvCondCode = AArch64CC::getInvertedCondCode(CondCode); + +    MachineBasicBlock *SplitEdgeTBB = MBB.SplitCriticalEdge(TBB, *this); +    MachineBasicBlock *SplitEdgeFBB = MBB.SplitCriticalEdge(FBB, *this); + +    assert(SplitEdgeTBB != nullptr); +    assert(SplitEdgeFBB != nullptr); + +    DebugLoc DL; +    if (MBB.instr_end() != MBB.instr_begin()) +      DL = (--MBB.instr_end())->getDebugLoc(); + +    insertTrackingCode(*SplitEdgeTBB, CondCode, DL); +    insertTrackingCode(*SplitEdgeFBB, InvCondCode, DL); + +    LLVM_DEBUG(dbgs() << "SplitEdgeTBB: " << *SplitEdgeTBB << "\n"); +    LLVM_DEBUG(dbgs() << "SplitEdgeFBB: " << *SplitEdgeFBB << "\n"); +    Modified = true; +  } + +  // Perform correct code generation around function calls and before returns. +  { +    SmallVector<MachineInstr *, 4> ReturnInstructions; +    SmallVector<MachineInstr *, 4> CallInstructions; + +    for (MachineInstr &MI : MBB) { +      if (MI.isReturn()) +        ReturnInstructions.push_back(&MI); +      else if (MI.isCall()) +        CallInstructions.push_back(&MI); +    } + +    Modified |= +        (ReturnInstructions.size() > 0) || (CallInstructions.size() > 0); + +    for (MachineInstr *Return : ReturnInstructions) +      insertRegToSPTaintPropagation(Return->getParent(), Return, AArch64::X17); +    for (MachineInstr *Call : CallInstructions) { +      // Just after the call: +      MachineBasicBlock::iterator i = Call; +      i++; +      insertSPToRegTaintPropagation(Call->getParent(), i); +      // Just before the call: +      insertRegToSPTaintPropagation(Call->getParent(), Call, AArch64::X17); +    } +  } + +  return Modified; +} + +void AArch64SpeculationHardening::insertSPToRegTaintPropagation( +    MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) const { +  // If full control flow speculation barriers are used, emit a control flow +  // barrier to block potential miss-speculation in flight coming in to this +  // function. +  if (UseControlFlowSpeculationBarrier) { +    // insert full control flow speculation barrier (DSB SYS + ISB) +    BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::DSB)).addImm(0xf); +    BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ISB)).addImm(0xf); +    return; +  } + +  // CMP   SP, #0   === SUBS   xzr, SP, #0 +  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri)) +      .addDef(AArch64::XZR) +      .addUse(AArch64::SP) +      .addImm(0) +      .addImm(0); // no shift +  // CSETM x16, NE  === CSINV  x16, xzr, xzr, EQ +  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr)) +      .addDef(MisspeculatingTaintReg) +      .addUse(AArch64::XZR) +      .addUse(AArch64::XZR) +      .addImm(AArch64CC::EQ); +} + +void AArch64SpeculationHardening::insertRegToSPTaintPropagation( +    MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI, +    unsigned TmpReg) const { +  // If full control flow speculation barriers are used, there will not be +  // miss-speculation when returning from this function, and therefore, also +  // no need to encode potential miss-speculation into the stack pointer. +  if (UseControlFlowSpeculationBarrier) +    return; + +  // mov   Xtmp, SP  === ADD  Xtmp, SP, #0 +  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri)) +      .addDef(TmpReg) +      .addUse(AArch64::SP) +      .addImm(0) +      .addImm(0); // no shift +  // and   Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0 +  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs)) +      .addDef(TmpReg, RegState::Renamable) +      .addUse(TmpReg, RegState::Kill | RegState::Renamable) +      .addUse(MisspeculatingTaintReg, RegState::Kill) +      .addImm(0); +  // mov   SP, Xtmp === ADD SP, Xtmp, #0 +  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri)) +      .addDef(AArch64::SP) +      .addUse(TmpReg, RegState::Kill) +      .addImm(0) +      .addImm(0); // no shift +} + +bool AArch64SpeculationHardening::functionUsesHardeningRegister( +    MachineFunction &MF) const { +  for (MachineBasicBlock &MBB : MF) { +    for (MachineInstr &MI : MBB) { +      // treat function calls specially, as the hardening register does not +      // need to remain live across function calls. +      if (MI.isCall()) +        continue; +      if (MI.readsRegister(MisspeculatingTaintReg, TRI) || +          MI.modifiesRegister(MisspeculatingTaintReg, TRI)) +        return true; +    } +  } +  return false; +} + +bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) { +  if (!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening)) +    return false; + +  MisspeculatingTaintReg = AArch64::X16; +  TII = MF.getSubtarget().getInstrInfo(); +  TRI = MF.getSubtarget().getRegisterInfo(); +  bool Modified = false; + +  UseControlFlowSpeculationBarrier = functionUsesHardeningRegister(MF); + +  // Instrument control flow speculation tracking, if requested. +  LLVM_DEBUG( +      dbgs() +      << "***** AArch64SpeculationHardening - track control flow *****\n"); + +  // 1. Add instrumentation code to function entry and exits. +  SmallVector<MachineBasicBlock *, 2> EntryBlocks; +  EntryBlocks.push_back(&MF.front()); +  for (const LandingPadInfo &LPI : MF.getLandingPads()) +    EntryBlocks.push_back(LPI.LandingPadBlock); +  for (auto Entry : EntryBlocks) +    insertSPToRegTaintPropagation( +        Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin())); + +  // 2. Add instrumentation code to every basic block. +  for (auto &MBB : MF) +    Modified |= instrumentControlFlow(MBB); + +  return Modified; +} + +/// \brief Returns an instance of the pseudo instruction expansion pass. +FunctionPass *llvm::createAArch64SpeculationHardeningPass() { +  return new AArch64SpeculationHardening(); +} diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 32c853483e3..4e016525f7e 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -177,6 +177,7 @@ extern "C" void LLVMInitializeAArch64Target() {    initializeFalkorHWPFFixPass(*PR);    initializeFalkorMarkStridedAccessesLegacyPass(*PR);    initializeLDTLSCleanupPass(*PR); +  initializeAArch64SpeculationHardeningPass(*PR);  }  //===----------------------------------------------------------------------===// @@ -550,6 +551,16 @@ void AArch64PassConfig::addPreSched2() {    if (TM->getOptLevel() != CodeGenOpt::None) {      if (EnableLoadStoreOpt)        addPass(createAArch64LoadStoreOptimizationPass()); +  } + +  // The AArch64SpeculationHardeningPass destroys dominator tree and natural +  // loop info, which is needed for the FalkorHWPFFixPass and also later on. +  // Therefore, run the AArch64SpeculationHardeningPass before the +  // FalkorHWPFFixPass to avoid recomputing dominator tree and natural loop +  // info. +  addPass(createAArch64SpeculationHardeningPass()); + +  if (TM->getOptLevel() != CodeGenOpt::None) {      if (EnableFalkorHWPFFix)        addPass(createFalkorHWPFFixPass());    } diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt index 9c8c1d0e0ff..7778882d491 100644 --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -52,6 +52,7 @@ add_llvm_target(AArch64CodeGen    AArch64RegisterBankInfo.cpp    AArch64RegisterInfo.cpp    AArch64SelectionDAGInfo.cpp +  AArch64SpeculationHardening.cpp    AArch64StorePairSuppress.cpp    AArch64Subtarget.cpp    AArch64TargetMachine.cpp diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll index d85d126883c..6d0aa91272b 100644 --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -50,6 +50,7 @@  ; CHECK-NEXT:       Prologue/Epilogue Insertion & Frame Finalization  ; CHECK-NEXT:       Post-RA pseudo instruction expansion pass  ; CHECK-NEXT:       AArch64 pseudo instruction expansion pass +; CHECK-NEXT:       AArch64 speculation hardening pass  ; CHECK-NEXT:       Analyze Machine Code For Garbage Collection  ; CHECK-NEXT:       Branch relaxation pass  ; CHECK-NEXT:       AArch64 Branch Targets diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 29682b7b2d1..98cef01b6a9 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -146,6 +146,7 @@  ; CHECK-NEXT:       Post-RA pseudo instruction expansion pass  ; CHECK-NEXT:       AArch64 pseudo instruction expansion pass  ; CHECK-NEXT:       AArch64 load / store optimization pass +; CHECK-NEXT:       AArch64 speculation hardening pass  ; CHECK-NEXT:       MachineDominator Tree Construction  ; CHECK-NEXT:       Machine Natural Loop Construction  ; CHECK-NEXT:       Falkor HW Prefetch Fix Late Phase diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll b/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll new file mode 100644 index 00000000000..4d13d98441e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll @@ -0,0 +1,71 @@ +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure + +declare i64 @g(i64, i64) local_unnamed_addr +define i64 @f_using_reserved_reg_x16(i64 %a, i64 %b) local_unnamed_addr SLHATTR { +; CHECK-LABEL: f_using_reserved_reg_x16 +; SLH: dsb sy +; SLH: isb +; NOSLH-NOT: dsb sy +; NOSLH-NOT: isb +entry: +  %cmp = icmp ugt i64 %a, %b +  br i1 %cmp, label %if.then, label %cleanup + +; CHECK: b.ls +; SLH: dsb sy +; SLH: isb +; NOSLH-NOT: dsb sy +; NOSLH-NOT: isb +if.then: +  %0 = tail call i64 asm "autia1716", "={x17},{x16},0"(i64 %b, i64 %a) +; CHECK: bl g +; SLH: dsb sy +; SLH: isb +; NOSLH-NOT: dsb sy +; NOSLH-NOT: isb +; CHECK: ret +  %call = tail call i64 @g(i64 %a, i64 %b) #3 +  %add = add i64 %call, %0 +  br label %cleanup + +cleanup: +; SLH: dsb sy +; SLH: isb +; NOSLH-NOT: dsb sy +; NOSLH-NOT: isb +; SLH: ret +  %retval.0 = phi i64 [ %add, %if.then ], [ %b, %entry ] +  ret i64 %retval.0 +} + +define i32 @f_clobbered_reg_w16(i32 %a, i32 %b) local_unnamed_addr SLHATTR { +; CHECK-LABEL: f_clobbered_reg_w16 +entry: +; SLH: dsb sy +; SLH: isb +; NOSLH-NOT: dsb sy +; NOSLH-NOT: isb +  %cmp = icmp sgt i32 %a, %b +  br i1 %cmp, label %if.then, label %if.end +; CHECK: b.le + +if.then: +; SLH: dsb sy +; SLH: isb +; NOSLH-NOT: dsb sy +; NOSLH-NOT: isb +; CHECK: mov w16, w0 +  tail call void asm sideeffect "mov w16, ${0:w}", "r,~{w16}"(i32 %a) +  br label %if.end +; SLH: ret + +if.end: +  %add = add nsw i32 %b, %a +  ret i32 %add +; SLH: dsb sy +; SLH: isb +; NOSLH-NOT: dsb sy +; NOSLH-NOT: isb +; SLH: ret +} diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.ll b/llvm/test/CodeGen/AArch64/speculation-hardening.ll new file mode 100644 index 00000000000..3535b63c32c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/speculation-hardening.ll @@ -0,0 +1,156 @@ +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure +; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure + +define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR { +; CHECK-LABEL: f +entry: +; SLH:  cmp sp, #0 +; SLH:  csetm x16, ne +; NOSLH-NOT:  cmp sp, #0 +; NOSLH-NOT:  csetm x16, ne + +; SLH:  mov x17, sp +; SLH:  and x17, x17, x16 +; SLH:  mov sp, x17 +; NOSLH-NOT:  mov x17, sp +; NOSLH-NOT:  and x17, x17, x16 +; NOSLH-NOT:  mov sp, x17 +  %call = tail call i32 @tail_callee(i32 %i) +; SLH:  cmp sp, #0 +; SLH:  csetm x16, ne +; NOSLH-NOT:  cmp sp, #0 +; NOSLH-NOT:  csetm x16, ne +  %cmp = icmp slt i32 %call, %N +  br i1 %cmp, label %if.then, label %return +; GlobalISel lowers the branch to a b.ne sometimes instead of b.ge as expected.. +; CHECK: b.[[COND:(ge)|(lt)|(ne)]] + +if.then:                                          ; preds = %entry +; NOSLH-NOT: csel x16, x16, xzr, {{(lt)|(ge)|(eq)}} +; SLH-DAG: csel x16, x16, xzr, {{(lt)|(ge)|(eq)}} +  %idxprom = sext i32 %i to i64 +  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %idxprom +  %0 = load i8, i8* %arrayidx, align 1 +; CHECK-DAG:      ldrb [[LOADED:w[0-9]+]], +  %conv = zext i8 %0 to i32 +  br label %return + +; SLH-DAG: csel x16, x16, xzr, [[COND]] +; NOSLH-NOT: csel x16, x16, xzr, [[COND]] +return:                                           ; preds = %entry, %if.then +  %retval.0 = phi i32 [ %conv, %if.then ], [ 0, %entry ] +; SLH:  mov x17, sp +; SLH:  and x17, x17, x16 +; SLH:  mov sp, x17 +; NOSLH-NOT:  mov x17, sp +; NOSLH-NOT:  and x17, x17, x16 +; NOSLH-NOT:  mov sp, x17 +  ret i32 %retval.0 +} + +; Make sure that for a tail call, taint doesn't get put into SP twice. +define i32 @tail_caller(i32 %a) local_unnamed_addr SLHATTR { +; CHECK-LABEL: tail_caller: +; SLH:     mov     x17, sp +; SLH:     and     x17, x17, x16 +; SLH:     mov     sp, x17 +; NOSLH-NOT:     mov     x17, sp +; NOSLH-NOT:     and     x17, x17, x16 +; NOSLH-NOT:     mov     sp, x17 +;  GlobalISel doesn't optimize tail calls (yet?), so only check that +;  cross-call taint register setup code is missing if a tail call was +;  actually produced. +; SLH:     {{(bl tail_callee[[:space:]] cmp sp, #0)|(b tail_callee)}} +; SLH-NOT: cmp sp, #0 +  %call = tail call i32 @tail_callee(i32 %a) +  ret i32 %call +} + +declare i32 @tail_callee(i32) local_unnamed_addr + +; Verify that no cb(n)z/tb(n)z instructions are produced when implementing +; SLH +define i32 @compare_branch_zero(i32, i32) SLHATTR { +; CHECK-LABEL: compare_branch_zero +  %3 = icmp eq i32 %0, 0 +  br i1 %3, label %then, label %else +;SLH-NOT:   cb{{n?}}z +;NOSLH:     cb{{n?}}z +then: +  %4 = sdiv i32 5, %1 +  ret i32 %4 +else: +  %5 = sdiv i32 %1, %0 +  ret i32 %5 +} + +define i32 @test_branch_zero(i32, i32) SLHATTR { +; CHECK-LABEL: test_branch_zero +  %3 = and i32 %0, 16 +  %4 = icmp eq i32 %3, 0 +  br i1 %4, label %then, label %else +;SLH-NOT:   tb{{n?}}z +;NOSLH:     tb{{n?}}z +then: +  %5 = sdiv i32 5, %1 +  ret i32 %5 +else: +  %6 = sdiv i32 %1, %0 +  ret i32 %6 +} + +define i32 @landingpad(i32 %l0, i32 %l1) SLHATTR personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: landingpad +entry: +; SLH:  cmp sp, #0 +; SLH:  csetm x16, ne +; NOSLH-NOT:  cmp sp, #0 +; NOSLH-NOT:  csetm x16, ne +; CHECK: bl _Z10throwing_fv +  invoke void @_Z10throwing_fv() +          to label %exit unwind label %lpad +; SLH:  cmp sp, #0 +; SLH:  csetm x16, ne + +lpad: +  %l4 = landingpad { i8*, i32 } +          catch i8* null +; SLH:  cmp sp, #0 +; SLH:  csetm x16, ne +; NOSLH-NOT:  cmp sp, #0 +; NOSLH-NOT:  csetm x16, ne +  %l5 = extractvalue { i8*, i32 } %l4, 0 +  %l6 = tail call i8* @__cxa_begin_catch(i8* %l5) +  %l7 = icmp sgt i32 %l0, %l1 +  br i1 %l7, label %then, label %else +; GlobalISel lowers the branch to a b.ne sometimes instead of b.ge as expected.. +; CHECK: b.[[COND:(le)|(gt)|(ne)]] + +then: +; SLH-DAG: csel x16, x16, xzr, [[COND]] +  %l9 = sdiv i32 %l0, %l1 +  br label %postif + +else: +; SLH-DAG: csel x16, x16, xzr, {{(gt)|(le)|(eq)}} +  %l11 = sdiv i32 %l1, %l0 +  br label %postif + +postif: +  %l13 = phi i32 [ %l9, %then ], [ %l11, %else ] +  tail call void @__cxa_end_catch() +  br label %exit + +exit: +  %l15 = phi i32 [ %l13, %postif ], [ 0, %entry ] +  ret i32 %l15 +} + +declare i32 @__gxx_personality_v0(...) +declare void @_Z10throwing_fv() local_unnamed_addr +declare i8* @__cxa_begin_catch(i8*) local_unnamed_addr +declare void @__cxa_end_catch() local_unnamed_addr diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.mir b/llvm/test/CodeGen/AArch64/speculation-hardening.mir new file mode 100644 index 00000000000..cf8357d9558 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/speculation-hardening.mir @@ -0,0 +1,117 @@ +# RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu \ +# RUN:     -start-before aarch64-speculation-hardening -o - %s \ +# RUN:   | FileCheck %s --dump-input-on-failure + +# Check that the speculation hardening pass generates code as expected for +# basic blocks ending with a variety of branch patterns: +# - (1) no branches (fallthrough) +# - (2) one unconditional branch +# - (3) one conditional branch + fall-through +# - (4) one conditional branch + one unconditional branch +# - other direct branches don't seem to be generated by the AArch64 codegen +--- | +  define void @nobranch_fallthrough(i32 %a, i32 %b) speculative_load_hardening { +   ret void +  } +  define void @uncondbranch(i32 %a, i32 %b) speculative_load_hardening { +   ret void +  } +  define void @condbranch_fallthrough(i32 %a, i32 %b) speculative_load_hardening { +   ret void +  } +  define void @condbranch_uncondbranch(i32 %a, i32 %b) speculative_load_hardening { +   ret void +  } +  define void @indirectbranch(i32 %a, i32 %b) speculative_load_hardening { +   ret void +  } +... +--- +name:            nobranch_fallthrough +tracksRegLiveness: true +body:             | +  ; CHECK-LABEL: nobranch_fallthrough +  bb.0: +    successors: %bb.1 +    liveins: $w0, $w1 +  ; CHECK-NOT: csel +  bb.1: +    liveins: $w0 +   RET undef $lr, implicit $w0 +... +--- +name:            uncondbranch +tracksRegLiveness: true +body:             | +  ; CHECK-LABEL: uncondbranch +  bb.0: +    successors: %bb.1 +    liveins: $w0, $w1 +    B %bb.1 +  ; CHECK-NOT: csel +  bb.1: +   liveins: $w0 +   RET undef $lr, implicit $w0 +... +--- +name:            condbranch_fallthrough +tracksRegLiveness: true +body:             | +  ; CHECK-LABEL: condbranch_fallthrough +  bb.0: +    successors: %bb.1, %bb.2 +    liveins: $w0, $w1 +    $wzr = SUBSWrs renamable $w0, renamable $w1, 0, implicit-def $nzcv, implicit-def $nzcv +    Bcc 11, %bb.2, implicit $nzcv +  ; CHECK: b.lt [[BB_LT_T:\.LBB[0-9_]+]] + +  bb.1: +    liveins: $nzcv, $w0 +  ; CHECK: csel x16, x16, xzr, ge +    RET undef $lr, implicit $w0 +  bb.2: +    liveins: $nzcv, $w0 +  ; CHECK: csel x16, x16, xzr, lt +    RET undef $lr, implicit $w0 +... +--- +name:            condbranch_uncondbranch +tracksRegLiveness: true +body:             | +  ; CHECK-LABEL: condbranch_uncondbranch +  bb.0: +    successors: %bb.1, %bb.2 +    liveins: $w0, $w1 +    $wzr = SUBSWrs renamable $w0, renamable $w1, 0, implicit-def $nzcv, implicit-def $nzcv +    Bcc 11, %bb.2, implicit $nzcv +    B %bb.1, implicit $nzcv +  ; CHECK: b.lt [[BB_LT_T:\.LBB[0-9_]+]] + +  bb.1: +    liveins: $nzcv, $w0 +  ; CHECK: csel x16, x16, xzr, ge +    RET undef $lr, implicit $w0 +  bb.2: +    liveins: $nzcv, $w0 +  ; CHECK: csel x16, x16, xzr, lt +    RET undef $lr, implicit $w0 +... +--- +name:            indirectbranch +tracksRegLiveness: true +body:             | +  ; Check that no instrumentation is done on indirect branches (for now). +  ; CHECK-LABEL: indirectbranch +  bb.0: +    successors: %bb.1, %bb.2 +    liveins: $x0 +    BR $x0 +  bb.1: +   liveins: $x0 +  ; CHECK-NOT: csel +   RET undef $lr, implicit $x0 +  bb.2: +   liveins: $x0 +  ; CHECK-NOT: csel +   RET undef $lr, implicit $x0 +...  | 

