summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AArch64/AArch64.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp7
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp18
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp7
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp34
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp368
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp11
-rw-r--r--llvm/lib/Target/AArch64/CMakeLists.txt1
-rw-r--r--llvm/test/CodeGen/AArch64/O0-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/AArch64/O3-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll71
-rw-r--r--llvm/test/CodeGen/AArch64/speculation-hardening.ll156
-rw-r--r--llvm/test/CodeGen/AArch64/speculation-hardening.mir117
14 files changed, 789 insertions, 9 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 2f0d0bf346d..c36d9354f3b 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -39,6 +39,7 @@ FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createAArch64StorePairSuppressPass();
FunctionPass *createAArch64ExpandPseudoPass();
+FunctionPass *createAArch64SpeculationHardeningPass();
FunctionPass *createAArch64LoadStoreOptimizationPass();
FunctionPass *createAArch64SIMDInstrOptPass();
ModulePass *createAArch64PromoteConstantPass();
@@ -68,6 +69,7 @@ void initializeAArch64ConditionalComparesPass(PassRegistry&);
void initializeAArch64ConditionOptimizerPass(PassRegistry&);
void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
void initializeAArch64ExpandPseudoPass(PassRegistry&);
+void initializeAArch64SpeculationHardeningPass(PassRegistry&);
void initializeAArch64LoadStoreOptPass(PassRegistry&);
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 7a7b0dd20a4..47550cabb9f 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -2258,6 +2258,13 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
/// Try to emit a combined compare-and-branch instruction.
bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
+ // will not be produced, as they are conditional branch instructions that do
+ // not set flags.
+ if (FuncInfo.MF->getFunction().hasFnAttribute(
+ Attribute::SpeculativeLoadHardening))
+ return false;
+
assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
const CmpInst *CI = cast<CmpInst>(BI->getCondition());
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7b539417941..cc10c9688e1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4343,6 +4343,13 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
+ MachineFunction &MF = DAG.getMachineFunction();
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
+ // will not be produced, as they are conditional branch instructions that do
+ // not set flags.
+ bool ProduceNonFlagSettingCondBr =
+ !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
+
// Handle f128 first, since lowering it will result in comparing the return
// value of a libcall against zero, which is just what the rest of LowerBR_CC
// is expecting to deal with.
@@ -4385,7 +4392,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// If the RHS of the comparison is zero, we can potentially fold this
// to a specialized branch.
const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
- if (RHSC && RHSC->getZExtValue() == 0) {
+ if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
if (CC == ISD::SETEQ) {
// See if we can use a TBZ to fold in an AND as well.
// TBZ has a smaller branch displacement than CBZ. If the offset is
@@ -4428,7 +4435,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
}
}
if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
- LHS.getOpcode() != ISD::AND) {
+ LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
// Don't combine AND since emitComparison converts the AND to an ANDS
// (a.k.a. TST) and the test in the test bit and branch instruction
// becomes redundant. This would also increase register pressure.
@@ -10807,6 +10814,13 @@ SDValue performCONDCombine(SDNode *N,
static SDValue performBRCONDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
+ // will not be produced, as they are conditional branch instructions that do
+ // not set flags.
+ if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+ return SDValue();
+
if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
N = NV.getNode();
SDValue Chain = N->getOperand(0);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index eddb349f0bf..10464ea57bb 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -964,6 +964,13 @@ bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
const MachineFunction &MF) const {
if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
return true;
+ switch (MI.getOpcode()) {
+ case AArch64::DSB:
+ case AArch64::ISB:
+ // DSB and ISB also are scheduling barriers.
+ return true;
+ default:;
+ }
return isSEHInstruction(MI);
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 90258cc1555..6cbfb6ab161 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -788,16 +788,36 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
const unsigned CondReg = I.getOperand(0).getReg();
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
- if (selectCompareBranch(I, MF, MRI))
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
+ // instructions will not be produced, as they are conditional branch
+ // instructions that do not set flags.
+ bool ProduceNonFlagSettingCondBr =
+ !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
+ if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
return true;
- auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
- .addUse(CondReg)
- .addImm(/*bit offset=*/0)
- .addMBB(DestMBB);
+ if (ProduceNonFlagSettingCondBr) {
+ auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
+ .addUse(CondReg)
+ .addImm(/*bit offset=*/0)
+ .addMBB(DestMBB);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
+ } else {
+ auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
+ .addDef(AArch64::WZR)
+ .addUse(CondReg)
+ .addImm(1);
+ constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
+ auto Bcc =
+ BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
+ .addImm(AArch64CC::EQ)
+ .addMBB(DestMBB);
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
+ }
}
case TargetOpcode::G_BRINDIRECT: {
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 55631bcba23..96ae45ae3d0 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -203,6 +203,10 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (hasBasePointer(MF))
markSuperRegs(Reserved, AArch64::W19);
+ // SLH uses register W16/X16 as the taint register.
+ if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+ markSuperRegs(Reserved, AArch64::W16);
+
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
new file mode 100644
index 00000000000..1f8ef5ee6ea
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
@@ -0,0 +1,368 @@
+//===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass to insert code to mitigate against side channel
+// vulnerabilities that may happen under control flow miss-speculation.
+//
+// The pass implements tracking of control flow miss-speculation into a "taint"
+// register. That taint register can then be used to mask off registers with
+// sensitive data when executing under miss-speculation, a.k.a. "transient
+// execution".
+// This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
+//
+// At the moment, it implements the tracking of miss-speculation of control
+// flow into a taint register, but doesn't implement a mechanism yet to then
+// use that taint register to mask of vulnerable data in registers (something
+// for a follow-on improvement). Possible strategies to mask out vulnerable
+// data that can be implemented on top of this are:
+// - speculative load hardening to automatically mask of data loaded
+// in registers.
+// - using intrinsics to mask of data in registers as indicated by the
+// programmer (see https://lwn.net/Articles/759423/).
+//
+// For AArch64, the following implementation choices are made below.
+// Some of these are different than the implementation choices made in
+// the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
+// the instruction set characteristics result in different trade-offs.
+// - The speculation hardening is done after register allocation. With a
+// relative abundance of registers, one register is reserved (X16) to be
+// the taint register. X16 is expected to not clash with other register
+// reservation mechanisms with very high probability because:
+// . The AArch64 ABI doesn't guarantee X16 to be retained across any call.
+// . The only way to request X16 to be used as a programmer is through
+// inline assembly. In the rare case a function explicitly demands to
+// use X16/W16, this pass falls back to hardening against speculation
+// by inserting a DSB SYS/ISB barrier pair which will prevent control
+// flow speculation.
+// - It is easy to insert mask operations at this late stage as we have
+// mask operations available that don't set flags.
+// - The taint variable contains all-ones when no miss-speculation is detected,
+// and contains all-zeros when miss-speculation is detected. Therefore, when
+// masking, an AND instruction (which only changes the register to be masked,
+// no other side effects) can easily be inserted anywhere that's needed.
+// - The tracking of miss-speculation is done by using a data-flow conditional
+// select instruction (CSEL) to evaluate the flags that were also used to
+// make conditional branch direction decisions. Speculation of the CSEL
+// instruction can be limited with a CSDB instruction - so the combination of
+// CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
+// aren't speculated. When conditional branch direction gets miss-speculated,
+// the semantics of the inserted CSEL instruction is such that the taint
+// register will contain all zero bits.
+// One key requirement for this to work is that the conditional branch is
+// followed by an execution of the CSEL instruction, where the CSEL
+// instruction needs to use the same flags status as the conditional branch.
+// This means that the conditional branches must not be implemented as one
+// of the AArch64 conditional branches that do not use the flags as input
+// (CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
+// selectors to not produce these instructions when speculation hardening
+// is enabled. This pass will assert if it does encounter such an instruction.
+// - On function call boundaries, the miss-speculation state is transferred from
+// the taint register X16 to be encoded in the SP register as value 0.
+//
+// Future extensions/improvements could be:
+// - Implement this functionality using full speculation barriers, akin to the
+// x86-slh-lfence option. This may be more useful for the intrinsics-based
+// approach than for the SLH approach to masking.
+// Note that this pass already inserts the full speculation barriers if the
+// function for some niche reason makes use of X16/W16.
+// - no indirect branch misprediction gets protected/instrumented; but this
+// could be done for some indirect branches, such as switch jump tables.
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-speculation-hardening"
+
+#define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass"
+
+namespace {
+
+class AArch64SpeculationHardening : public MachineFunctionPass {
+public:
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+
+ static char ID;
+
+ AArch64SpeculationHardening() : MachineFunctionPass(ID) {
+ initializeAArch64SpeculationHardeningPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override {
+ return AARCH64_SPECULATION_HARDENING_NAME;
+ }
+
+private:
+ unsigned MisspeculatingTaintReg;
+ bool UseControlFlowSpeculationBarrier;
+
+ bool functionUsesHardeningRegister(MachineFunction &MF) const;
+ bool instrumentControlFlow(MachineBasicBlock &MBB);
+ bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ AArch64CC::CondCode &CondCode) const;
+ void insertTrackingCode(MachineBasicBlock &SplitEdgeBB,
+ AArch64CC::CondCode &CondCode, DebugLoc DL) const;
+ void insertSPToRegTaintPropagation(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MBBI) const;
+ void insertRegToSPTaintPropagation(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned TmpReg) const;
+};
+
+} // end anonymous namespace
+
+char AArch64SpeculationHardening::ID = 0;
+
+INITIALIZE_PASS(AArch64SpeculationHardening, "aarch64-speculation-hardening",
+ AARCH64_SPECULATION_HARDENING_NAME, false, false)
+
+bool AArch64SpeculationHardening::endsWithCondControlFlow(
+ MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ AArch64CC::CondCode &CondCode) const {
+ SmallVector<MachineOperand, 1> analyzeBranchCondCode;
+ if (TII->analyzeBranch(MBB, TBB, FBB, analyzeBranchCondCode, false))
+ return false;
+
+ // Ignore if the BB ends in an unconditional branch/fall-through.
+ if (analyzeBranchCondCode.empty())
+ return false;
+
+ // If the BB ends with a single conditional branch, FBB will be set to
+ // nullptr (see API docs for TII->analyzeBranch). For the rest of the
+ // analysis we want the FBB block to be set always.
+ assert(TBB != nullptr);
+ if (FBB == nullptr)
+ FBB = MBB.getFallThrough();
+
+ // If both the true and the false condition jump to the same basic block,
+ // there isn't need for any protection - whether the branch is speculated
+ // correctly or not, we end up executing the architecturally correct code.
+ if (TBB == FBB)
+ return false;
+
+ assert(MBB.succ_size() == 2);
+ // translate analyzeBranchCondCode to CondCode.
+ assert(analyzeBranchCondCode.size() == 1 && "unknown Cond array format");
+ CondCode = AArch64CC::CondCode(analyzeBranchCondCode[0].getImm());
+ return true;
+}
+
+void AArch64SpeculationHardening::insertTrackingCode(
+ MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode,
+ DebugLoc DL) const {
+ if (UseControlFlowSpeculationBarrier) {
+ // insert full control flow speculation barrier (DSB SYS + ISB)
+ BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::ISB))
+ .addImm(0xf);
+ BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::DSB))
+ .addImm(0xf);
+ } else {
+ BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr))
+ .addDef(MisspeculatingTaintReg)
+ .addUse(MisspeculatingTaintReg)
+ .addUse(AArch64::XZR)
+ .addImm(CondCode);
+ SplitEdgeBB.addLiveIn(AArch64::NZCV);
+ }
+}
+
+bool AArch64SpeculationHardening::instrumentControlFlow(
+ MachineBasicBlock &MBB) {
+ LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB);
+
+ bool Modified = false;
+ MachineBasicBlock *TBB = nullptr;
+ MachineBasicBlock *FBB = nullptr;
+ AArch64CC::CondCode CondCode;
+
+ if (!endsWithCondControlFlow(MBB, TBB, FBB, CondCode)) {
+ LLVM_DEBUG(dbgs() << "... doesn't end with CondControlFlow\n");
+ } else {
+ // Now insert:
+ // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, cond" on the True edge and
+ // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, Invertcond" on the False
+ // edge.
+ AArch64CC::CondCode InvCondCode = AArch64CC::getInvertedCondCode(CondCode);
+
+ MachineBasicBlock *SplitEdgeTBB = MBB.SplitCriticalEdge(TBB, *this);
+ MachineBasicBlock *SplitEdgeFBB = MBB.SplitCriticalEdge(FBB, *this);
+
+ assert(SplitEdgeTBB != nullptr);
+ assert(SplitEdgeFBB != nullptr);
+
+ DebugLoc DL;
+ if (MBB.instr_end() != MBB.instr_begin())
+ DL = (--MBB.instr_end())->getDebugLoc();
+
+ insertTrackingCode(*SplitEdgeTBB, CondCode, DL);
+ insertTrackingCode(*SplitEdgeFBB, InvCondCode, DL);
+
+ LLVM_DEBUG(dbgs() << "SplitEdgeTBB: " << *SplitEdgeTBB << "\n");
+ LLVM_DEBUG(dbgs() << "SplitEdgeFBB: " << *SplitEdgeFBB << "\n");
+ Modified = true;
+ }
+
+ // Perform correct code generation around function calls and before returns.
+ {
+ SmallVector<MachineInstr *, 4> ReturnInstructions;
+ SmallVector<MachineInstr *, 4> CallInstructions;
+
+ for (MachineInstr &MI : MBB) {
+ if (MI.isReturn())
+ ReturnInstructions.push_back(&MI);
+ else if (MI.isCall())
+ CallInstructions.push_back(&MI);
+ }
+
+ Modified |=
+ (ReturnInstructions.size() > 0) || (CallInstructions.size() > 0);
+
+ for (MachineInstr *Return : ReturnInstructions)
+ insertRegToSPTaintPropagation(Return->getParent(), Return, AArch64::X17);
+ for (MachineInstr *Call : CallInstructions) {
+ // Just after the call:
+ MachineBasicBlock::iterator i = Call;
+ i++;
+ insertSPToRegTaintPropagation(Call->getParent(), i);
+ // Just before the call:
+ insertRegToSPTaintPropagation(Call->getParent(), Call, AArch64::X17);
+ }
+ }
+
+ return Modified;
+}
+
+void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
+ MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) const {
+ // If full control flow speculation barriers are used, emit a control flow
+ // barrier to block potential miss-speculation in flight coming in to this
+ // function.
+ if (UseControlFlowSpeculationBarrier) {
+ // insert full control flow speculation barrier (DSB SYS + ISB)
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::DSB)).addImm(0xf);
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ISB)).addImm(0xf);
+ return;
+ }
+
+ // CMP SP, #0 === SUBS xzr, SP, #0
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
+ .addDef(AArch64::XZR)
+ .addUse(AArch64::SP)
+ .addImm(0)
+ .addImm(0); // no shift
+ // CSETM x16, NE === CSINV x16, xzr, xzr, EQ
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
+ .addDef(MisspeculatingTaintReg)
+ .addUse(AArch64::XZR)
+ .addUse(AArch64::XZR)
+ .addImm(AArch64CC::EQ);
+}
+
+void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
+ MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
+ unsigned TmpReg) const {
+ // If full control flow speculation barriers are used, there will not be
+ // miss-speculation when returning from this function, and therefore, also
+ // no need to encode potential miss-speculation into the stack pointer.
+ if (UseControlFlowSpeculationBarrier)
+ return;
+
+ // mov Xtmp, SP === ADD Xtmp, SP, #0
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+ .addDef(TmpReg)
+ .addUse(AArch64::SP)
+ .addImm(0)
+ .addImm(0); // no shift
+ // and Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
+ .addDef(TmpReg, RegState::Renamable)
+ .addUse(TmpReg, RegState::Kill | RegState::Renamable)
+ .addUse(MisspeculatingTaintReg, RegState::Kill)
+ .addImm(0);
+ // mov SP, Xtmp === ADD SP, Xtmp, #0
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+ .addDef(AArch64::SP)
+ .addUse(TmpReg, RegState::Kill)
+ .addImm(0)
+ .addImm(0); // no shift
+}
+
+bool AArch64SpeculationHardening::functionUsesHardeningRegister(
+ MachineFunction &MF) const {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ // treat function calls specially, as the hardening register does not
+ // need to remain live across function calls.
+ if (MI.isCall())
+ continue;
+ if (MI.readsRegister(MisspeculatingTaintReg, TRI) ||
+ MI.modifiesRegister(MisspeculatingTaintReg, TRI))
+ return true;
+ }
+ }
+ return false;
+}
+
+bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
+ if (!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+ return false;
+
+ MisspeculatingTaintReg = AArch64::X16;
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ bool Modified = false;
+
+ UseControlFlowSpeculationBarrier = functionUsesHardeningRegister(MF);
+
+ // Instrument control flow speculation tracking, if requested.
+ LLVM_DEBUG(
+ dbgs()
+ << "***** AArch64SpeculationHardening - track control flow *****\n");
+
+ // 1. Add instrumentation code to function entry and exits.
+ SmallVector<MachineBasicBlock *, 2> EntryBlocks;
+ EntryBlocks.push_back(&MF.front());
+ for (const LandingPadInfo &LPI : MF.getLandingPads())
+ EntryBlocks.push_back(LPI.LandingPadBlock);
+ for (auto Entry : EntryBlocks)
+ insertSPToRegTaintPropagation(
+ Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
+
+ // 2. Add instrumentation code to every basic block.
+ for (auto &MBB : MF)
+ Modified |= instrumentControlFlow(MBB);
+
+ return Modified;
+}
+
+/// \brief Returns an instance of the pseudo instruction expansion pass.
+FunctionPass *llvm::createAArch64SpeculationHardeningPass() {
+ return new AArch64SpeculationHardening();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 32c853483e3..4e016525f7e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -177,6 +177,7 @@ extern "C" void LLVMInitializeAArch64Target() {
initializeFalkorHWPFFixPass(*PR);
initializeFalkorMarkStridedAccessesLegacyPass(*PR);
initializeLDTLSCleanupPass(*PR);
+ initializeAArch64SpeculationHardeningPass(*PR);
}
//===----------------------------------------------------------------------===//
@@ -550,6 +551,16 @@ void AArch64PassConfig::addPreSched2() {
if (TM->getOptLevel() != CodeGenOpt::None) {
if (EnableLoadStoreOpt)
addPass(createAArch64LoadStoreOptimizationPass());
+ }
+
+ // The AArch64SpeculationHardeningPass destroys dominator tree and natural
+ // loop info, which is needed for the FalkorHWPFFixPass and also later on.
+ // Therefore, run the AArch64SpeculationHardeningPass before the
+ // FalkorHWPFFixPass to avoid recomputing dominator tree and natural loop
+ // info.
+ addPass(createAArch64SpeculationHardeningPass());
+
+ if (TM->getOptLevel() != CodeGenOpt::None) {
if (EnableFalkorHWPFFix)
addPass(createFalkorHWPFFixPass());
}
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 9c8c1d0e0ff..7778882d491 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -52,6 +52,7 @@ add_llvm_target(AArch64CodeGen
AArch64RegisterBankInfo.cpp
AArch64RegisterInfo.cpp
AArch64SelectionDAGInfo.cpp
+ AArch64SpeculationHardening.cpp
AArch64StorePairSuppress.cpp
AArch64Subtarget.cpp
AArch64TargetMachine.cpp
diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
index d85d126883c..6d0aa91272b 100644
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -50,6 +50,7 @@
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; CHECK-NEXT: Post-RA pseudo instruction expansion pass
; CHECK-NEXT: AArch64 pseudo instruction expansion pass
+; CHECK-NEXT: AArch64 speculation hardening pass
; CHECK-NEXT: Analyze Machine Code For Garbage Collection
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: AArch64 Branch Targets
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 29682b7b2d1..98cef01b6a9 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -146,6 +146,7 @@
; CHECK-NEXT: Post-RA pseudo instruction expansion pass
; CHECK-NEXT: AArch64 pseudo instruction expansion pass
; CHECK-NEXT: AArch64 load / store optimization pass
+; CHECK-NEXT: AArch64 speculation hardening pass
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: Falkor HW Prefetch Fix Late Phase
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll b/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll
new file mode 100644
index 00000000000..4d13d98441e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll
@@ -0,0 +1,71 @@
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
+
+declare i64 @g(i64, i64) local_unnamed_addr
+define i64 @f_using_reserved_reg_x16(i64 %a, i64 %b) local_unnamed_addr SLHATTR {
+; CHECK-LABEL: f_using_reserved_reg_x16
+; SLH: dsb sy
+; SLH: isb
+; NOSLH-NOT: dsb sy
+; NOSLH-NOT: isb
+entry:
+ %cmp = icmp ugt i64 %a, %b
+ br i1 %cmp, label %if.then, label %cleanup
+
+; CHECK: b.ls
+; SLH: dsb sy
+; SLH: isb
+; NOSLH-NOT: dsb sy
+; NOSLH-NOT: isb
+if.then:
+ %0 = tail call i64 asm "autia1716", "={x17},{x16},0"(i64 %b, i64 %a)
+; CHECK: bl g
+; SLH: dsb sy
+; SLH: isb
+; NOSLH-NOT: dsb sy
+; NOSLH-NOT: isb
+; CHECK: ret
+ %call = tail call i64 @g(i64 %a, i64 %b) #3
+ %add = add i64 %call, %0
+ br label %cleanup
+
+cleanup:
+; SLH: dsb sy
+; SLH: isb
+; NOSLH-NOT: dsb sy
+; NOSLH-NOT: isb
+; SLH: ret
+ %retval.0 = phi i64 [ %add, %if.then ], [ %b, %entry ]
+ ret i64 %retval.0
+}
+
+define i32 @f_clobbered_reg_w16(i32 %a, i32 %b) local_unnamed_addr SLHATTR {
+; CHECK-LABEL: f_clobbered_reg_w16
+entry:
+; SLH: dsb sy
+; SLH: isb
+; NOSLH-NOT: dsb sy
+; NOSLH-NOT: isb
+ %cmp = icmp sgt i32 %a, %b
+ br i1 %cmp, label %if.then, label %if.end
+; CHECK: b.le
+
+if.then:
+; SLH: dsb sy
+; SLH: isb
+; NOSLH-NOT: dsb sy
+; NOSLH-NOT: isb
+; CHECK: mov w16, w0
+ tail call void asm sideeffect "mov w16, ${0:w}", "r,~{w16}"(i32 %a)
+ br label %if.end
+; SLH: ret
+
+if.end:
+ %add = add nsw i32 %b, %a
+ ret i32 %add
+; SLH: dsb sy
+; SLH: isb
+; NOSLH-NOT: dsb sy
+; NOSLH-NOT: isb
+; SLH: ret
+}
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.ll b/llvm/test/CodeGen/AArch64/speculation-hardening.ll
new file mode 100644
index 00000000000..3535b63c32c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/speculation-hardening.ll
@@ -0,0 +1,156 @@
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
+; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
+
+define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR {
+; CHECK-LABEL: f
+entry:
+; SLH: cmp sp, #0
+; SLH: csetm x16, ne
+; NOSLH-NOT: cmp sp, #0
+; NOSLH-NOT: csetm x16, ne
+
+; SLH: mov x17, sp
+; SLH: and x17, x17, x16
+; SLH: mov sp, x17
+; NOSLH-NOT: mov x17, sp
+; NOSLH-NOT: and x17, x17, x16
+; NOSLH-NOT: mov sp, x17
+ %call = tail call i32 @tail_callee(i32 %i)
+; SLH: cmp sp, #0
+; SLH: csetm x16, ne
+; NOSLH-NOT: cmp sp, #0
+; NOSLH-NOT: csetm x16, ne
+ %cmp = icmp slt i32 %call, %N
+ br i1 %cmp, label %if.then, label %return
+; GlobalISel lowers the branch to a b.ne sometimes instead of b.ge as expected..
+; CHECK: b.[[COND:(ge)|(lt)|(ne)]]
+
+if.then: ; preds = %entry
+; NOSLH-NOT: csel x16, x16, xzr, {{(lt)|(ge)|(eq)}}
+; SLH-DAG: csel x16, x16, xzr, {{(lt)|(ge)|(eq)}}
+ %idxprom = sext i32 %i to i64
+ %arrayidx = getelementptr inbounds i8, i8* %p, i64 %idxprom
+ %0 = load i8, i8* %arrayidx, align 1
+; CHECK-DAG: ldrb [[LOADED:w[0-9]+]],
+ %conv = zext i8 %0 to i32
+ br label %return
+
+; SLH-DAG: csel x16, x16, xzr, [[COND]]
+; NOSLH-NOT: csel x16, x16, xzr, [[COND]]
+return: ; preds = %entry, %if.then
+ %retval.0 = phi i32 [ %conv, %if.then ], [ 0, %entry ]
+; SLH: mov x17, sp
+; SLH: and x17, x17, x16
+; SLH: mov sp, x17
+; NOSLH-NOT: mov x17, sp
+; NOSLH-NOT: and x17, x17, x16
+; NOSLH-NOT: mov sp, x17
+ ret i32 %retval.0
+}
+
+; Make sure that for a tail call, taint doesn't get put into SP twice.
+define i32 @tail_caller(i32 %a) local_unnamed_addr SLHATTR {
+; CHECK-LABEL: tail_caller:
+; SLH: mov x17, sp
+; SLH: and x17, x17, x16
+; SLH: mov sp, x17
+; NOSLH-NOT: mov x17, sp
+; NOSLH-NOT: and x17, x17, x16
+; NOSLH-NOT: mov sp, x17
+; GlobalISel doesn't optimize tail calls (yet?), so only check that
+; cross-call taint register setup code is missing if a tail call was
+; actually produced.
+; SLH: {{(bl tail_callee[[:space:]] cmp sp, #0)|(b tail_callee)}}
+; SLH-NOT: cmp sp, #0
+ %call = tail call i32 @tail_callee(i32 %a)
+ ret i32 %call
+}
+
+declare i32 @tail_callee(i32) local_unnamed_addr
+
+; Verify that no cb(n)z/tb(n)z instructions are produced when implementing
+; SLH
+define i32 @compare_branch_zero(i32, i32) SLHATTR {
+; CHECK-LABEL: compare_branch_zero
+ %3 = icmp eq i32 %0, 0
+ br i1 %3, label %then, label %else
+;SLH-NOT: cb{{n?}}z
+;NOSLH: cb{{n?}}z
+then:
+ %4 = sdiv i32 5, %1
+ ret i32 %4
+else:
+ %5 = sdiv i32 %1, %0
+ ret i32 %5
+}
+
+define i32 @test_branch_zero(i32, i32) SLHATTR {
+; CHECK-LABEL: test_branch_zero
+ %3 = and i32 %0, 16
+ %4 = icmp eq i32 %3, 0
+ br i1 %4, label %then, label %else
+;SLH-NOT: tb{{n?}}z
+;NOSLH: tb{{n?}}z
+then:
+ %5 = sdiv i32 5, %1
+ ret i32 %5
+else:
+ %6 = sdiv i32 %1, %0
+ ret i32 %6
+}
+
+define i32 @landingpad(i32 %l0, i32 %l1) SLHATTR personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: landingpad
+entry:
+; SLH: cmp sp, #0
+; SLH: csetm x16, ne
+; NOSLH-NOT: cmp sp, #0
+; NOSLH-NOT: csetm x16, ne
+; CHECK: bl _Z10throwing_fv
+ invoke void @_Z10throwing_fv()
+ to label %exit unwind label %lpad
+; SLH: cmp sp, #0
+; SLH: csetm x16, ne
+
+lpad:
+ %l4 = landingpad { i8*, i32 }
+ catch i8* null
+; SLH: cmp sp, #0
+; SLH: csetm x16, ne
+; NOSLH-NOT: cmp sp, #0
+; NOSLH-NOT: csetm x16, ne
+ %l5 = extractvalue { i8*, i32 } %l4, 0
+ %l6 = tail call i8* @__cxa_begin_catch(i8* %l5)
+ %l7 = icmp sgt i32 %l0, %l1
+ br i1 %l7, label %then, label %else
+; GlobalISel lowers the branch to a b.ne sometimes instead of b.ge as expected..
+; CHECK: b.[[COND:(le)|(gt)|(ne)]]
+
+then:
+; SLH-DAG: csel x16, x16, xzr, [[COND]]
+ %l9 = sdiv i32 %l0, %l1
+ br label %postif
+
+else:
+; SLH-DAG: csel x16, x16, xzr, {{(gt)|(le)|(eq)}}
+ %l11 = sdiv i32 %l1, %l0
+ br label %postif
+
+postif:
+ %l13 = phi i32 [ %l9, %then ], [ %l11, %else ]
+ tail call void @__cxa_end_catch()
+ br label %exit
+
+exit:
+ %l15 = phi i32 [ %l13, %postif ], [ 0, %entry ]
+ ret i32 %l15
+}
+
+declare i32 @__gxx_personality_v0(...)
+declare void @_Z10throwing_fv() local_unnamed_addr
+declare i8* @__cxa_begin_catch(i8*) local_unnamed_addr
+declare void @__cxa_end_catch() local_unnamed_addr
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.mir b/llvm/test/CodeGen/AArch64/speculation-hardening.mir
new file mode 100644
index 00000000000..cf8357d9558
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/speculation-hardening.mir
@@ -0,0 +1,117 @@
+# RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu \
+# RUN: -start-before aarch64-speculation-hardening -o - %s \
+# RUN: | FileCheck %s --dump-input-on-failure
+
+# Check that the speculation hardening pass generates code as expected for
+# basic blocks ending with a variety of branch patterns:
+# - (1) no branches (fallthrough)
+# - (2) one unconditional branch
+# - (3) one conditional branch + fall-through
+# - (4) one conditional branch + one unconditional branch
+# - other direct branches don't seem to be generated by the AArch64 codegen
+--- |
+ define void @nobranch_fallthrough(i32 %a, i32 %b) speculative_load_hardening {
+ ret void
+ }
+ define void @uncondbranch(i32 %a, i32 %b) speculative_load_hardening {
+ ret void
+ }
+ define void @condbranch_fallthrough(i32 %a, i32 %b) speculative_load_hardening {
+ ret void
+ }
+ define void @condbranch_uncondbranch(i32 %a, i32 %b) speculative_load_hardening {
+ ret void
+ }
+ define void @indirectbranch(i32 %a, i32 %b) speculative_load_hardening {
+ ret void
+ }
+...
+---
+name: nobranch_fallthrough
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: nobranch_fallthrough
+ bb.0:
+ successors: %bb.1
+ liveins: $w0, $w1
+ ; CHECK-NOT: csel
+ bb.1:
+ liveins: $w0
+ RET undef $lr, implicit $w0
+...
+---
+name: uncondbranch
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: uncondbranch
+ bb.0:
+ successors: %bb.1
+ liveins: $w0, $w1
+ B %bb.1
+ ; CHECK-NOT: csel
+ bb.1:
+ liveins: $w0
+ RET undef $lr, implicit $w0
+...
+---
+name: condbranch_fallthrough
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: condbranch_fallthrough
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: $w0, $w1
+ $wzr = SUBSWrs renamable $w0, renamable $w1, 0, implicit-def $nzcv, implicit-def $nzcv
+ Bcc 11, %bb.2, implicit $nzcv
+ ; CHECK: b.lt [[BB_LT_T:\.LBB[0-9_]+]]
+
+ bb.1:
+ liveins: $nzcv, $w0
+ ; CHECK: csel x16, x16, xzr, ge
+ RET undef $lr, implicit $w0
+ bb.2:
+ liveins: $nzcv, $w0
+ ; CHECK: csel x16, x16, xzr, lt
+ RET undef $lr, implicit $w0
+...
+---
+name: condbranch_uncondbranch
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: condbranch_uncondbranch
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: $w0, $w1
+ $wzr = SUBSWrs renamable $w0, renamable $w1, 0, implicit-def $nzcv, implicit-def $nzcv
+ Bcc 11, %bb.2, implicit $nzcv
+ B %bb.1, implicit $nzcv
+ ; CHECK: b.lt [[BB_LT_T:\.LBB[0-9_]+]]
+
+ bb.1:
+ liveins: $nzcv, $w0
+ ; CHECK: csel x16, x16, xzr, ge
+ RET undef $lr, implicit $w0
+ bb.2:
+ liveins: $nzcv, $w0
+ ; CHECK: csel x16, x16, xzr, lt
+ RET undef $lr, implicit $w0
+...
+---
+name: indirectbranch
+tracksRegLiveness: true
+body: |
+ ; Check that no instrumentation is done on indirect branches (for now).
+ ; CHECK-LABEL: indirectbranch
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: $x0
+ BR $x0
+ bb.1:
+ liveins: $x0
+ ; CHECK-NOT: csel
+ RET undef $lr, implicit $x0
+ bb.2:
+ liveins: $x0
+ ; CHECK-NOT: csel
+ RET undef $lr, implicit $x0
+...
OpenPOWER on IntegriCloud