summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp271
-rw-r--r--llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll439
2 files changed, 686 insertions, 24 deletions
diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index 81bd9014a88..923d82f051f 100644
--- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -179,6 +179,9 @@ private:
void unfoldCallAndJumpLoads(MachineFunction &MF);
+ SmallVector<MachineInstr *, 16>
+ tracePredStateThroughIndirectBranches(MachineFunction &MF);
+
void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
unsigned saveEFLAGS(MachineBasicBlock &MBB,
@@ -522,11 +525,16 @@ bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
}
}
- // If we are going to harden calls and jumps we need to unfold their memory
- // operands.
- if (HardenIndirectCallsAndJumps)
+ if (HardenIndirectCallsAndJumps) {
+ // If we are going to harden calls and jumps we need to unfold their memory
+ // operands.
unfoldCallAndJumpLoads(MF);
+ // Then we trace predicate state through the indirect branches.
+ auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
+ CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
+ }
+
// Now that we have the predicate state available at the start of each block
// in the CFG, trace it through each block, hardening vulnerable instructions
// as we go.
@@ -925,6 +933,263 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
}
}
+/// Trace the predicate state through indirect branches, instrumenting them to
+/// poison the state if a target is reached that does not match the expected
+/// target.
+///
+/// This is designed to mitigate Spectre variant 1 attacks where an indirect
+/// branch is trained to predict a particular target and then mispredicts that
+/// target in a way that can leak data. Despite using an indirect branch, this
+/// is really a variant 1 style attack: it does not steer execution to an
+/// arbitrary or attacker controlled address, and it does not require any
+/// special code executing next to the victim. This attack can also be mitigated
+/// through retpolines, but those require either replacing indirect branches
+/// with conditional direct branches or lowering them through a device that
+/// blocks speculation. This mitigation can replace these retpoline-style
+/// mitigations for jump tables and other indirect branches within a function
+/// when variant 2 isn't a risk while allowing limited speculation. Indirect
+/// calls, however, cannot be mitigated through this technique without changing
+/// the ABI in a fundamental way.
+SmallVector<MachineInstr *, 16>
+X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
+ MachineFunction &MF) {
+ // We use the SSAUpdater to insert PHI nodes for the target addresses of
+ // indirect branches. We don't actually need the full power of the SSA updater
+ // in this particular case as we always have immediately available values, but
+ // this avoids us having to re-implement the PHI construction logic.
+ MachineSSAUpdater TargetAddrSSA(MF);
+ TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
+
+ // Track which blocks were terminated with an indirect branch.
+ SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
+
+ // We need to know what blocks end up reached via indirect branches. We
+ // expect this to be a subset of those whose address is taken and so track it
+ // directly via the CFG.
+ SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
+
+ // Walk all the blocks which end in an indirect branch and make the
+ // target address available.
+ for (MachineBasicBlock &MBB : MF) {
+ // Find the last terminator.
+ auto MII = MBB.instr_rbegin();
+ while (MII != MBB.instr_rend() && MII->isDebugInstr())
+ ++MII;
+ if (MII == MBB.instr_rend())
+ continue;
+ MachineInstr &TI = *MII;
+ if (!TI.isTerminator() || !TI.isBranch())
+ // No terminator or non-branch terminator.
+ continue;
+
+ unsigned TargetReg;
+
+ switch (TI.getOpcode()) {
+ default:
+ // Direct branch or conditional branch (leading to fallthrough).
+ continue;
+
+ case X86::FARJMP16m:
+ case X86::FARJMP32m:
+ case X86::FARJMP64:
+ // We cannot mitigate far jumps or calls, but we also don't expect them
+ // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
+ continue;
+
+ case X86::JMP16m:
+ case X86::JMP16m_NT:
+ case X86::JMP32m:
+ case X86::JMP32m_NT:
+ case X86::JMP64m:
+ case X86::JMP64m_NT:
+ // Mostly as documentation.
+ report_fatal_error("Memory operand jumps should have been unfolded!");
+
+ case X86::JMP16r:
+ report_fatal_error(
+ "Support for 16-bit indirect branches is not implemented.");
+ case X86::JMP32r:
+ report_fatal_error(
+ "Support for 32-bit indirect branches is not implemented.");
+
+ case X86::JMP64r:
+ TargetReg = TI.getOperand(0).getReg();
+ }
+
+ // We have definitely found an indirect branch. Verify that there are no
+ // preceding conditional branches as we don't yet support that.
+ if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
+ return !OtherTI.isDebugInstr() && &OtherTI != &TI;
+ })) {
+ LLVM_DEBUG({
+ dbgs() << "ERROR: Found other terminators in a block with an indirect "
+ "branch! This is not yet supported! Terminator sequence:\n";
+ for (MachineInstr &MI : MBB.terminators()) {
+ MI.dump();
+ dbgs() << '\n';
+ }
+ });
+ report_fatal_error("Unimplemented terminator sequence!");
+ }
+
+ // Make the target register an available value for this block.
+ TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
+ IndirectTerminatedMBBs.insert(&MBB);
+
+ // Add all the successors to our target candidates.
+ for (MachineBasicBlock *Succ : MBB.successors())
+ IndirectTargetMBBs.insert(Succ);
+ }
+
+ // Keep track of the cmov instructions we insert so we can return them.
+ SmallVector<MachineInstr *, 16> CMovs;
+
+ // If we didn't find any indirect branches with targets, nothing to do here.
+ if (IndirectTargetMBBs.empty())
+ return CMovs;
+
+ // We found indirect branches and targets that need to be instrumented to
+ // harden loads within them. Walk the blocks of the function (to get a stable
+ // ordering) and instrument each target of an indirect branch.
+ for (MachineBasicBlock &MBB : MF) {
+ // Skip the blocks that aren't candidate targets.
+ if (!IndirectTargetMBBs.count(&MBB))
+ continue;
+
+ // We don't expect EH pads to ever be reached via an indirect branch. If
+ // this is desired for some reason, we could simply skip them here rather
+ // than asserting.
+ assert(!MBB.isEHPad() &&
+ "Unexpected EH pad as target of an indirect branch!");
+
+ // We should never end up threading EFLAGS into a block to harden
+ // conditional jumps as there would be an additional successor via the
+ // indirect branch. As a consequence, all such edges would be split before
+ // reaching here, and the inserted block will handle the EFLAGS-based
+ // hardening.
+ assert(!MBB.isLiveIn(X86::EFLAGS) &&
+ "Cannot check within a block that already has live-in EFLAGS!");
+
+ // We can't handle having non-indirect edges into this block unless this is
+ // the only successor and we can synthesize the necessary target address.
+ for (MachineBasicBlock *Pred : MBB.predecessors()) {
+ // If we've already handled this by extracting the target directly,
+ // nothing to do.
+ if (IndirectTerminatedMBBs.count(Pred))
+ continue;
+
+ // Otherwise, we have to be the only successor. We generally expect this
+ // to be true as conditional branches should have had a critical edge
+ // split already. We don't however need to worry about EH pad successors
+ // as they'll happily ignore the target and their hardening strategy is
+ // resilient to all ways in which they could be reached speculatively.
+ if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
+ return Succ->isEHPad() || Succ == &MBB;
+ })) {
+ LLVM_DEBUG({
+ dbgs() << "ERROR: Found conditional entry to target of indirect "
+ "branch!\n";
+ Pred->dump();
+ MBB.dump();
+ });
+ report_fatal_error("Cannot harden a conditional entry to a target of "
+ "an indirect branch!");
+ }
+
+ // Now we need to compute the address of this block and install it as a
+ // synthetic target in the predecessor. We do this at the bottom of the
+ // predecessor.
+ auto InsertPt = Pred->getFirstTerminator();
+ unsigned TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
+ if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+ !Subtarget->isPositionIndependent()) {
+ // Directly materialize it into an immediate.
+ auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
+ TII->get(X86::MOV64ri32), TargetReg)
+ .addMBB(&MBB);
+ ++NumInstsInserted;
+ (void)AddrI;
+ LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();
+ dbgs() << "\n");
+ } else {
+ auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
+ TargetReg)
+ .addReg(/*Base*/ X86::RIP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addMBB(&MBB)
+ .addReg(/*Segment*/ 0);
+ ++NumInstsInserted;
+ (void)AddrI;
+ LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();
+ dbgs() << "\n");
+ }
+ // And make this available.
+ TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
+ }
+
+ // Materialize the needed SSA value of the target. Note that we need the
+ // middle of the block as this block might at the bottom have an indirect
+ // branch back to itself. We can do this here because at this point, every
+ // predecessor of this block has an available value. This is basically just
+ // automating the construction of a PHI node for this target.
+ unsigned TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
+
+ // Insert a comparison of the incoming target register with this block's
+ // address.
+ auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
+ if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+ !Subtarget->isPositionIndependent()) {
+ // Check directly against a relocated immediate when we can.
+ auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
+ .addReg(TargetReg, RegState::Kill)
+ .addMBB(&MBB);
+ ++NumInstsInserted;
+ (void)CheckI;
+ LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
+ } else {
+ // Otherwise compute the address into a register first.
+ unsigned AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
+ auto AddrI =
+ BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
+ .addReg(/*Base*/ X86::RIP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addMBB(&MBB)
+ .addReg(/*Segment*/ 0);
+ ++NumInstsInserted;
+ (void)AddrI;
+ LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n");
+ auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
+ .addReg(TargetReg, RegState::Kill)
+ .addReg(AddrReg, RegState::Kill);
+ ++NumInstsInserted;
+ (void)CheckI;
+ LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
+ }
+
+ // Now cmov over the predicate if the comparison wasn't equal.
+ int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
+ auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes);
+ unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
+ auto CMovI =
+ BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
+ .addReg(PS->InitialReg)
+ .addReg(PS->PoisonReg);
+ CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
+ ++NumInstsInserted;
+ LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
+ CMovs.push_back(&*CMovI);
+
+ // And put the new value into the available values for SSA form of our
+ // predicate state.
+ PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
+ }
+
+ // Return all the newly inserted cmov instructions of the predicate state.
+ return CMovs;
+}
+
/// Returns true if the instruction has no behavior (specified or otherwise)
/// that is based on the value of any of its register operands
///
diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll b/llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll
index be8db624572..a94dc9219e3 100644
--- a/llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll
+++ b/llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening -data-sections | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening -relocation-model pic -data-sections | FileCheck %s --check-prefix=X64-PIC
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening -data-sections -mattr=+retpoline | FileCheck %s --check-prefix=X64-RETPOLINE
;
; FIXME: Add support for 32-bit.
@@ -32,6 +33,24 @@ define i32 @test_indirect_call(i32 ()** %ptr) nounwind {
; X64-NEXT: popq %rcx
; X64-NEXT: retq
;
+; X64-PIC-LABEL: test_indirect_call:
+; X64-PIC: # %bb.0: # %entry
+; X64-PIC-NEXT: pushq %rax
+; X64-PIC-NEXT: movq %rsp, %rax
+; X64-PIC-NEXT: movq $-1, %rcx
+; X64-PIC-NEXT: sarq $63, %rax
+; X64-PIC-NEXT: movq (%rdi), %rcx
+; X64-PIC-NEXT: orq %rax, %rcx
+; X64-PIC-NEXT: shlq $47, %rax
+; X64-PIC-NEXT: orq %rax, %rsp
+; X64-PIC-NEXT: callq *%rcx
+; X64-PIC-NEXT: movq %rsp, %rcx
+; X64-PIC-NEXT: sarq $63, %rcx
+; X64-PIC-NEXT: shlq $47, %rcx
+; X64-PIC-NEXT: orq %rcx, %rsp
+; X64-PIC-NEXT: popq %rcx
+; X64-PIC-NEXT: retq
+;
; X64-RETPOLINE-LABEL: test_indirect_call:
; X64-RETPOLINE: # %bb.0: # %entry
; X64-RETPOLINE-NEXT: pushq %rax
@@ -67,6 +86,17 @@ define i32 @test_indirect_tail_call(i32 ()** %ptr) nounwind {
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: jmpq *%rcx # TAILCALL
;
+; X64-PIC-LABEL: test_indirect_tail_call:
+; X64-PIC: # %bb.0: # %entry
+; X64-PIC-NEXT: movq %rsp, %rax
+; X64-PIC-NEXT: movq $-1, %rcx
+; X64-PIC-NEXT: sarq $63, %rax
+; X64-PIC-NEXT: movq (%rdi), %rcx
+; X64-PIC-NEXT: orq %rax, %rcx
+; X64-PIC-NEXT: shlq $47, %rax
+; X64-PIC-NEXT: orq %rax, %rsp
+; X64-PIC-NEXT: jmpq *%rcx # TAILCALL
+;
; X64-RETPOLINE-LABEL: test_indirect_tail_call:
; X64-RETPOLINE: # %bb.0: # %entry
; X64-RETPOLINE-NEXT: movq %rsp, %rax
@@ -90,7 +120,7 @@ define i32 @test_indirect_call_global() nounwind {
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: movq $-1, %rcx
; X64-NEXT: sarq $63, %rax
-; X64-NEXT: movq {{.*}}(%rip), %rcx
+; X64-NEXT: movq global_fnptr(%rip), %rcx
; X64-NEXT: orq %rax, %rcx
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
@@ -102,13 +132,32 @@ define i32 @test_indirect_call_global() nounwind {
; X64-NEXT: popq %rcx
; X64-NEXT: retq
;
+; X64-PIC-LABEL: test_indirect_call_global:
+; X64-PIC: # %bb.0: # %entry
+; X64-PIC-NEXT: pushq %rax
+; X64-PIC-NEXT: movq %rsp, %rax
+; X64-PIC-NEXT: movq $-1, %rcx
+; X64-PIC-NEXT: sarq $63, %rax
+; X64-PIC-NEXT: movq global_fnptr@GOTPCREL(%rip), %rcx
+; X64-PIC-NEXT: movq (%rcx), %rcx
+; X64-PIC-NEXT: orq %rax, %rcx
+; X64-PIC-NEXT: shlq $47, %rax
+; X64-PIC-NEXT: orq %rax, %rsp
+; X64-PIC-NEXT: callq *%rcx
+; X64-PIC-NEXT: movq %rsp, %rcx
+; X64-PIC-NEXT: sarq $63, %rcx
+; X64-PIC-NEXT: shlq $47, %rcx
+; X64-PIC-NEXT: orq %rcx, %rsp
+; X64-PIC-NEXT: popq %rcx
+; X64-PIC-NEXT: retq
+;
; X64-RETPOLINE-LABEL: test_indirect_call_global:
; X64-RETPOLINE: # %bb.0: # %entry
; X64-RETPOLINE-NEXT: pushq %rax
; X64-RETPOLINE-NEXT: movq %rsp, %rax
; X64-RETPOLINE-NEXT: movq $-1, %rcx
; X64-RETPOLINE-NEXT: sarq $63, %rax
-; X64-RETPOLINE-NEXT: movq {{.*}}(%rip), %r11
+; X64-RETPOLINE-NEXT: movq global_fnptr(%rip), %r11
; X64-RETPOLINE-NEXT: shlq $47, %rax
; X64-RETPOLINE-NEXT: orq %rax, %rsp
; X64-RETPOLINE-NEXT: callq __llvm_retpoline_r11
@@ -130,18 +179,30 @@ define i32 @test_indirect_tail_call_global() nounwind {
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: movq $-1, %rcx
; X64-NEXT: sarq $63, %rax
-; X64-NEXT: movq {{.*}}(%rip), %rcx
+; X64-NEXT: movq global_fnptr(%rip), %rcx
; X64-NEXT: orq %rax, %rcx
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: jmpq *%rcx # TAILCALL
;
+; X64-PIC-LABEL: test_indirect_tail_call_global:
+; X64-PIC: # %bb.0: # %entry
+; X64-PIC-NEXT: movq %rsp, %rax
+; X64-PIC-NEXT: movq $-1, %rcx
+; X64-PIC-NEXT: sarq $63, %rax
+; X64-PIC-NEXT: movq global_fnptr@GOTPCREL(%rip), %rcx
+; X64-PIC-NEXT: movq (%rcx), %rcx
+; X64-PIC-NEXT: orq %rax, %rcx
+; X64-PIC-NEXT: shlq $47, %rax
+; X64-PIC-NEXT: orq %rax, %rsp
+; X64-PIC-NEXT: jmpq *%rcx # TAILCALL
+;
; X64-RETPOLINE-LABEL: test_indirect_tail_call_global:
; X64-RETPOLINE: # %bb.0: # %entry
; X64-RETPOLINE-NEXT: movq %rsp, %rax
; X64-RETPOLINE-NEXT: movq $-1, %rcx
; X64-RETPOLINE-NEXT: sarq $63, %rax
-; X64-RETPOLINE-NEXT: movq {{.*}}(%rip), %r11
+; X64-RETPOLINE-NEXT: movq global_fnptr(%rip), %r11
; X64-RETPOLINE-NEXT: shlq $47, %rax
; X64-RETPOLINE-NEXT: orq %rax, %rsp
; X64-RETPOLINE-NEXT: jmp __llvm_retpoline_r11 # TAILCALL
@@ -157,25 +218,69 @@ define i32 @test_indirectbr(i8** %ptr) nounwind {
; X64-NEXT: movq %rsp, %rcx
; X64-NEXT: movq $-1, %rax
; X64-NEXT: sarq $63, %rcx
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: orq %rcx, %rax
-; X64-NEXT: jmpq *%rax
+; X64-NEXT: movq (%rdi), %rdx
+; X64-NEXT: orq %rcx, %rdx
+; X64-NEXT: jmpq *%rdx
; X64-NEXT: .LBB4_1: # %bb0
+; X64-NEXT: cmpq $.LBB4_1, %rdx
+; X64-NEXT: cmovneq %rax, %rcx
; X64-NEXT: movl $2, %eax
; X64-NEXT: jmp .LBB4_2
; X64-NEXT: .LBB4_4: # %bb2
+; X64-NEXT: cmpq $.LBB4_4, %rdx
+; X64-NEXT: cmovneq %rax, %rcx
; X64-NEXT: movl $13, %eax
; X64-NEXT: jmp .LBB4_2
; X64-NEXT: .LBB4_5: # %bb3
+; X64-NEXT: cmpq $.LBB4_5, %rdx
+; X64-NEXT: cmovneq %rax, %rcx
; X64-NEXT: movl $42, %eax
; X64-NEXT: jmp .LBB4_2
; X64-NEXT: .LBB4_3: # %bb1
+; X64-NEXT: cmpq $.LBB4_3, %rdx
+; X64-NEXT: cmovneq %rax, %rcx
; X64-NEXT: movl $7, %eax
; X64-NEXT: .LBB4_2: # %bb0
; X64-NEXT: shlq $47, %rcx
; X64-NEXT: orq %rcx, %rsp
; X64-NEXT: retq
;
+; X64-PIC-LABEL: test_indirectbr:
+; X64-PIC: # %bb.0: # %entry
+; X64-PIC-NEXT: movq %rsp, %rcx
+; X64-PIC-NEXT: movq $-1, %rax
+; X64-PIC-NEXT: sarq $63, %rcx
+; X64-PIC-NEXT: movq (%rdi), %rdx
+; X64-PIC-NEXT: orq %rcx, %rdx
+; X64-PIC-NEXT: jmpq *%rdx
+; X64-PIC-NEXT: .LBB4_1: # %bb0
+; X64-PIC-NEXT: leaq .LBB4_1(%rip), %rsi
+; X64-PIC-NEXT: cmpq %rsi, %rdx
+; X64-PIC-NEXT: cmovneq %rax, %rcx
+; X64-PIC-NEXT: movl $2, %eax
+; X64-PIC-NEXT: jmp .LBB4_2
+; X64-PIC-NEXT: .LBB4_4: # %bb2
+; X64-PIC-NEXT: leaq .LBB4_4(%rip), %rsi
+; X64-PIC-NEXT: cmpq %rsi, %rdx
+; X64-PIC-NEXT: cmovneq %rax, %rcx
+; X64-PIC-NEXT: movl $13, %eax
+; X64-PIC-NEXT: jmp .LBB4_2
+; X64-PIC-NEXT: .LBB4_5: # %bb3
+; X64-PIC-NEXT: leaq .LBB4_5(%rip), %rsi
+; X64-PIC-NEXT: cmpq %rsi, %rdx
+; X64-PIC-NEXT: cmovneq %rax, %rcx
+; X64-PIC-NEXT: movl $42, %eax
+; X64-PIC-NEXT: jmp .LBB4_2
+; X64-PIC-NEXT: .LBB4_3: # %bb1
+; X64-PIC-NEXT: leaq .LBB4_3(%rip), %rsi
+; X64-PIC-NEXT: cmpq %rsi, %rdx
+; X64-PIC-NEXT: cmovneq %rax, %rcx
+; X64-PIC-NEXT: movl $7, %eax
+; X64-PIC-NEXT: .LBB4_2: # %bb0
+; X64-PIC-NEXT: shlq $47, %rcx
+; X64-PIC-NEXT: orq %rcx, %rsp
+; X64-PIC-NEXT: retq
+;
; X64-RETPOLINE-LABEL: test_indirectbr:
; X64-RETPOLINE: # %bb.0: # %entry
entry:
@@ -201,30 +306,80 @@ define i32 @test_indirectbr_global(i32 %idx) nounwind {
; X64-NEXT: movq %rsp, %rcx
; X64-NEXT: movq $-1, %rax
; X64-NEXT: sarq $63, %rcx
-; X64-NEXT: movslq %edi, %rax
-; X64-NEXT: movq global_blockaddrs(,%rax,8), %rax
-; X64-NEXT: orq %rcx, %rax
-; X64-NEXT: jmpq *%rax
+; X64-NEXT: movslq %edi, %rdx
+; X64-NEXT: movq global_blockaddrs(,%rdx,8), %rdx
+; X64-NEXT: orq %rcx, %rdx
+; X64-NEXT: jmpq *%rdx
; X64-NEXT: .Ltmp0: # Block address taken
; X64-NEXT: .LBB5_1: # %bb0
+; X64-NEXT: cmpq $.LBB5_1, %rdx
+; X64-NEXT: cmovneq %rax, %rcx
; X64-NEXT: movl $2, %eax
; X64-NEXT: jmp .LBB5_2
; X64-NEXT: .Ltmp1: # Block address taken
; X64-NEXT: .LBB5_4: # %bb2
+; X64-NEXT: cmpq $.LBB5_4, %rdx
+; X64-NEXT: cmovneq %rax, %rcx
; X64-NEXT: movl $13, %eax
; X64-NEXT: jmp .LBB5_2
; X64-NEXT: .Ltmp2: # Block address taken
; X64-NEXT: .LBB5_5: # %bb3
+; X64-NEXT: cmpq $.LBB5_5, %rdx
+; X64-NEXT: cmovneq %rax, %rcx
; X64-NEXT: movl $42, %eax
; X64-NEXT: jmp .LBB5_2
; X64-NEXT: .Ltmp3: # Block address taken
; X64-NEXT: .LBB5_3: # %bb1
+; X64-NEXT: cmpq $.LBB5_3, %rdx
+; X64-NEXT: cmovneq %rax, %rcx
; X64-NEXT: movl $7, %eax
; X64-NEXT: .LBB5_2: # %bb0
; X64-NEXT: shlq $47, %rcx
; X64-NEXT: orq %rcx, %rsp
; X64-NEXT: retq
;
+; X64-PIC-LABEL: test_indirectbr_global:
+; X64-PIC: # %bb.0: # %entry
+; X64-PIC-NEXT: movq %rsp, %rcx
+; X64-PIC-NEXT: movq $-1, %rax
+; X64-PIC-NEXT: sarq $63, %rcx
+; X64-PIC-NEXT: movslq %edi, %rdx
+; X64-PIC-NEXT: movq global_blockaddrs@GOTPCREL(%rip), %rsi
+; X64-PIC-NEXT: movq (%rsi,%rdx,8), %rdx
+; X64-PIC-NEXT: orq %rcx, %rdx
+; X64-PIC-NEXT: jmpq *%rdx
+; X64-PIC-NEXT: .Ltmp0: # Block address taken
+; X64-PIC-NEXT: .LBB5_1: # %bb0
+; X64-PIC-NEXT: leaq .LBB5_1(%rip), %rsi
+; X64-PIC-NEXT: cmpq %rsi, %rdx
+; X64-PIC-NEXT: cmovneq %rax, %rcx
+; X64-PIC-NEXT: movl $2, %eax
+; X64-PIC-NEXT: jmp .LBB5_2
+; X64-PIC-NEXT: .Ltmp1: # Block address taken
+; X64-PIC-NEXT: .LBB5_4: # %bb2
+; X64-PIC-NEXT: leaq .LBB5_4(%rip), %rsi
+; X64-PIC-NEXT: cmpq %rsi, %rdx
+; X64-PIC-NEXT: cmovneq %rax, %rcx
+; X64-PIC-NEXT: movl $13, %eax
+; X64-PIC-NEXT: jmp .LBB5_2
+; X64-PIC-NEXT: .Ltmp2: # Block address taken
+; X64-PIC-NEXT: .LBB5_5: # %bb3
+; X64-PIC-NEXT: leaq .LBB5_5(%rip), %rsi
+; X64-PIC-NEXT: cmpq %rsi, %rdx
+; X64-PIC-NEXT: cmovneq %rax, %rcx
+; X64-PIC-NEXT: movl $42, %eax
+; X64-PIC-NEXT: jmp .LBB5_2
+; X64-PIC-NEXT: .Ltmp3: # Block address taken
+; X64-PIC-NEXT: .LBB5_3: # %bb1
+; X64-PIC-NEXT: leaq .LBB5_3(%rip), %rsi
+; X64-PIC-NEXT: cmpq %rsi, %rdx
+; X64-PIC-NEXT: cmovneq %rax, %rcx
+; X64-PIC-NEXT: movl $7, %eax
+; X64-PIC-NEXT: .LBB5_2: # %bb0
+; X64-PIC-NEXT: shlq $47, %rcx
+; X64-PIC-NEXT: orq %rcx, %rsp
+; X64-PIC-NEXT: retq
+;
; X64-RETPOLINE-LABEL: test_indirectbr_global:
; X64-RETPOLINE: # %bb.0: # %entry
; X64-RETPOLINE-NEXT: movq %rsp, %rcx
@@ -296,30 +451,85 @@ define i32 @test_switch_jumptable(i32 %idx) nounwind {
; X64-NEXT: ja .LBB6_2
; X64-NEXT: # %bb.1: # %entry
; X64-NEXT: cmovaq %rax, %rcx
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: movq .LJTI6_0(,%rax,8), %rax
-; X64-NEXT: orq %rcx, %rax
-; X64-NEXT: jmpq *%rax
-; X64-NEXT: .LBB6_3: # %bb1
+; X64-NEXT: movl %edi, %edx
+; X64-NEXT: movq .LJTI6_0(,%rdx,8), %rdx
+; X64-NEXT: orq %rcx, %rdx
+; X64-NEXT: jmpq *%rdx
+; X64-NEXT: .LBB6_4: # %bb1
+; X64-NEXT: cmpq $.LBB6_4, %rdx
+; X64-NEXT: cmovneq %rax, %rcx
; X64-NEXT: movl $7, %eax
-; X64-NEXT: jmp .LBB6_4
+; X64-NEXT: jmp .LBB6_3
; X64-NEXT: .LBB6_2: # %bb0
; X64-NEXT: cmovbeq %rax, %rcx
; X64-NEXT: movl $2, %eax
-; X64-NEXT: jmp .LBB6_4
+; X64-NEXT: jmp .LBB6_3
; X64-NEXT: .LBB6_5: # %bb2
+; X64-NEXT: cmpq $.LBB6_5, %rdx
+; X64-NEXT: cmovneq %rax, %rcx
; X64-NEXT: movl $13, %eax
-; X64-NEXT: jmp .LBB6_4
+; X64-NEXT: jmp .LBB6_3
; X64-NEXT: .LBB6_6: # %bb3
+; X64-NEXT: cmpq $.LBB6_6, %rdx
+; X64-NEXT: cmovneq %rax, %rcx
; X64-NEXT: movl $42, %eax
-; X64-NEXT: jmp .LBB6_4
+; X64-NEXT: jmp .LBB6_3
; X64-NEXT: .LBB6_7: # %bb5
+; X64-NEXT: cmpq $.LBB6_7, %rdx
+; X64-NEXT: cmovneq %rax, %rcx
; X64-NEXT: movl $11, %eax
-; X64-NEXT: .LBB6_4: # %bb1
+; X64-NEXT: .LBB6_3: # %bb0
; X64-NEXT: shlq $47, %rcx
; X64-NEXT: orq %rcx, %rsp
; X64-NEXT: retq
;
+; X64-PIC-LABEL: test_switch_jumptable:
+; X64-PIC: # %bb.0: # %entry
+; X64-PIC-NEXT: movq %rsp, %rcx
+; X64-PIC-NEXT: movq $-1, %rax
+; X64-PIC-NEXT: sarq $63, %rcx
+; X64-PIC-NEXT: cmpl $3, %edi
+; X64-PIC-NEXT: ja .LBB6_2
+; X64-PIC-NEXT: # %bb.1: # %entry
+; X64-PIC-NEXT: cmovaq %rax, %rcx
+; X64-PIC-NEXT: movl %edi, %edx
+; X64-PIC-NEXT: leaq .LJTI6_0(%rip), %rsi
+; X64-PIC-NEXT: movslq (%rsi,%rdx,4), %rdx
+; X64-PIC-NEXT: addq %rsi, %rdx
+; X64-PIC-NEXT: orq %rcx, %rdx
+; X64-PIC-NEXT: jmpq *%rdx
+; X64-PIC-NEXT: .LBB6_4: # %bb1
+; X64-PIC-NEXT: leaq .LBB6_4(%rip), %rsi
+; X64-PIC-NEXT: cmpq %rsi, %rdx
+; X64-PIC-NEXT: cmovneq %rax, %rcx
+; X64-PIC-NEXT: movl $7, %eax
+; X64-PIC-NEXT: jmp .LBB6_3
+; X64-PIC-NEXT: .LBB6_2: # %bb0
+; X64-PIC-NEXT: cmovbeq %rax, %rcx
+; X64-PIC-NEXT: movl $2, %eax
+; X64-PIC-NEXT: jmp .LBB6_3
+; X64-PIC-NEXT: .LBB6_5: # %bb2
+; X64-PIC-NEXT: leaq .LBB6_5(%rip), %rsi
+; X64-PIC-NEXT: cmpq %rsi, %rdx
+; X64-PIC-NEXT: cmovneq %rax, %rcx
+; X64-PIC-NEXT: movl $13, %eax
+; X64-PIC-NEXT: jmp .LBB6_3
+; X64-PIC-NEXT: .LBB6_6: # %bb3
+; X64-PIC-NEXT: leaq .LBB6_6(%rip), %rsi
+; X64-PIC-NEXT: cmpq %rsi, %rdx
+; X64-PIC-NEXT: cmovneq %rax, %rcx
+; X64-PIC-NEXT: movl $42, %eax
+; X64-PIC-NEXT: jmp .LBB6_3
+; X64-PIC-NEXT: .LBB6_7: # %bb5
+; X64-PIC-NEXT: leaq .LBB6_7(%rip), %rsi
+; X64-PIC-NEXT: cmpq %rsi, %rdx
+; X64-PIC-NEXT: cmovneq %rax, %rcx
+; X64-PIC-NEXT: movl $11, %eax
+; X64-PIC-NEXT: .LBB6_3: # %bb0
+; X64-PIC-NEXT: shlq $47, %rcx
+; X64-PIC-NEXT: orq %rcx, %rsp
+; X64-PIC-NEXT: retq
+;
; X64-RETPOLINE-LABEL: test_switch_jumptable:
; X64-RETPOLINE: # %bb.0: # %entry
; X64-RETPOLINE-NEXT: movq %rsp, %rcx
@@ -389,3 +599,190 @@ bb3:
bb5:
ret i32 11
}
+
+; This function's switch is crafted to trigger jump-table lowering in the x86
+; backend so that we can test how the exact jump table lowering behaves, but
+; also arranges for fallthroughs from case to case to ensure that this pattern
+; too can be handled.
+define i32 @test_switch_jumptable_fallthrough(i32 %idx, i32* %a.ptr, i32* %b.ptr, i32* %c.ptr, i32* %d.ptr) nounwind {
+; X64-LABEL: test_switch_jumptable_fallthrough:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movq %rsp, %r9
+; X64-NEXT: movq $-1, %r10
+; X64-NEXT: sarq $63, %r9
+; X64-NEXT: cmpl $3, %edi
+; X64-NEXT: ja .LBB7_2
+; X64-NEXT: # %bb.1: # %entry
+; X64-NEXT: cmovaq %r10, %r9
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: movl %edi, %esi
+; X64-NEXT: movq .LJTI7_0(,%rsi,8), %rsi
+; X64-NEXT: orq %r9, %rsi
+; X64-NEXT: jmpq *%rsi
+; X64-NEXT: .LBB7_2: # %bb0
+; X64-NEXT: cmovbeq %r10, %r9
+; X64-NEXT: movl (%rsi), %eax
+; X64-NEXT: orl %r9d, %eax
+; X64-NEXT: movq $.LBB7_3, %rsi
+; X64-NEXT: .LBB7_3: # %bb1
+; X64-NEXT: cmpq $.LBB7_3, %rsi
+; X64-NEXT: cmovneq %r10, %r9
+; X64-NEXT: addl (%rdx), %eax
+; X64-NEXT: orl %r9d, %eax
+; X64-NEXT: movq $.LBB7_4, %rsi
+; X64-NEXT: .LBB7_4: # %bb2
+; X64-NEXT: cmpq $.LBB7_4, %rsi
+; X64-NEXT: cmovneq %r10, %r9
+; X64-NEXT: addl (%rcx), %eax
+; X64-NEXT: orl %r9d, %eax
+; X64-NEXT: movq $.LBB7_5, %rsi
+; X64-NEXT: .LBB7_5: # %bb3
+; X64-NEXT: cmpq $.LBB7_5, %rsi
+; X64-NEXT: cmovneq %r10, %r9
+; X64-NEXT: addl (%r8), %eax
+; X64-NEXT: orl %r9d, %eax
+; X64-NEXT: movq $.LBB7_6, %rsi
+; X64-NEXT: .LBB7_6: # %bb4
+; X64-NEXT: cmpq $.LBB7_6, %rsi
+; X64-NEXT: cmovneq %r10, %r9
+; X64-NEXT: shlq $47, %r9
+; X64-NEXT: orq %r9, %rsp
+; X64-NEXT: retq
+;
+; X64-PIC-LABEL: test_switch_jumptable_fallthrough:
+; X64-PIC: # %bb.0: # %entry
+; X64-PIC-NEXT: movq %rsp, %r9
+; X64-PIC-NEXT: movq $-1, %r10
+; X64-PIC-NEXT: sarq $63, %r9
+; X64-PIC-NEXT: cmpl $3, %edi
+; X64-PIC-NEXT: ja .LBB7_2
+; X64-PIC-NEXT: # %bb.1: # %entry
+; X64-PIC-NEXT: cmovaq %r10, %r9
+; X64-PIC-NEXT: xorl %eax, %eax
+; X64-PIC-NEXT: movl %edi, %esi
+; X64-PIC-NEXT: leaq .LJTI7_0(%rip), %rdi
+; X64-PIC-NEXT: movslq (%rdi,%rsi,4), %rsi
+; X64-PIC-NEXT: addq %rdi, %rsi
+; X64-PIC-NEXT: orq %r9, %rsi
+; X64-PIC-NEXT: jmpq *%rsi
+; X64-PIC-NEXT: .LBB7_2: # %bb0
+; X64-PIC-NEXT: cmovbeq %r10, %r9
+; X64-PIC-NEXT: movl (%rsi), %eax
+; X64-PIC-NEXT: orl %r9d, %eax
+; X64-PIC-NEXT: leaq .LBB7_3(%rip), %rsi
+; X64-PIC-NEXT: .LBB7_3: # %bb1
+; X64-PIC-NEXT: leaq .LBB7_3(%rip), %rdi
+; X64-PIC-NEXT: cmpq %rdi, %rsi
+; X64-PIC-NEXT: cmovneq %r10, %r9
+; X64-PIC-NEXT: addl (%rdx), %eax
+; X64-PIC-NEXT: orl %r9d, %eax
+; X64-PIC-NEXT: leaq .LBB7_4(%rip), %rsi
+; X64-PIC-NEXT: .LBB7_4: # %bb2
+; X64-PIC-NEXT: leaq .LBB7_4(%rip), %rdx
+; X64-PIC-NEXT: cmpq %rdx, %rsi
+; X64-PIC-NEXT: cmovneq %r10, %r9
+; X64-PIC-NEXT: addl (%rcx), %eax
+; X64-PIC-NEXT: orl %r9d, %eax
+; X64-PIC-NEXT: leaq .LBB7_5(%rip), %rsi
+; X64-PIC-NEXT: .LBB7_5: # %bb3
+; X64-PIC-NEXT: leaq .LBB7_5(%rip), %rcx
+; X64-PIC-NEXT: cmpq %rcx, %rsi
+; X64-PIC-NEXT: cmovneq %r10, %r9
+; X64-PIC-NEXT: addl (%r8), %eax
+; X64-PIC-NEXT: orl %r9d, %eax
+; X64-PIC-NEXT: leaq .LBB7_6(%rip), %rsi
+; X64-PIC-NEXT: .LBB7_6: # %bb4
+; X64-PIC-NEXT: leaq .LBB7_6(%rip), %rcx
+; X64-PIC-NEXT: cmpq %rcx, %rsi
+; X64-PIC-NEXT: cmovneq %r10, %r9
+; X64-PIC-NEXT: shlq $47, %r9
+; X64-PIC-NEXT: orq %r9, %rsp
+; X64-PIC-NEXT: retq
+;
+; X64-RETPOLINE-LABEL: test_switch_jumptable_fallthrough:
+; X64-RETPOLINE: # %bb.0: # %entry
+; X64-RETPOLINE-NEXT: movq %rsp, %r9
+; X64-RETPOLINE-NEXT: movq $-1, %r10
+; X64-RETPOLINE-NEXT: sarq $63, %r9
+; X64-RETPOLINE-NEXT: xorl %eax, %eax
+; X64-RETPOLINE-NEXT: cmpl $1, %edi
+; X64-RETPOLINE-NEXT: jg .LBB8_5
+; X64-RETPOLINE-NEXT: # %bb.1: # %entry
+; X64-RETPOLINE-NEXT: cmovgq %r10, %r9
+; X64-RETPOLINE-NEXT: testl %edi, %edi
+; X64-RETPOLINE-NEXT: je .LBB8_2
+; X64-RETPOLINE-NEXT: # %bb.3: # %entry
+; X64-RETPOLINE-NEXT: cmoveq %r10, %r9
+; X64-RETPOLINE-NEXT: cmpl $1, %edi
+; X64-RETPOLINE-NEXT: jne .LBB8_8
+; X64-RETPOLINE-NEXT: # %bb.4:
+; X64-RETPOLINE-NEXT: cmovneq %r10, %r9
+; X64-RETPOLINE-NEXT: jmp .LBB8_10
+; X64-RETPOLINE-NEXT: .LBB8_5: # %entry
+; X64-RETPOLINE-NEXT: cmovleq %r10, %r9
+; X64-RETPOLINE-NEXT: cmpl $2, %edi
+; X64-RETPOLINE-NEXT: je .LBB8_6
+; X64-RETPOLINE-NEXT: # %bb.7: # %entry
+; X64-RETPOLINE-NEXT: cmoveq %r10, %r9
+; X64-RETPOLINE-NEXT: cmpl $3, %edi
+; X64-RETPOLINE-NEXT: jne .LBB8_8
+; X64-RETPOLINE-NEXT: # %bb.13:
+; X64-RETPOLINE-NEXT: cmovneq %r10, %r9
+; X64-RETPOLINE-NEXT: jmp .LBB8_12
+; X64-RETPOLINE-NEXT: .LBB8_8:
+; X64-RETPOLINE-NEXT: cmoveq %r10, %r9
+; X64-RETPOLINE-NEXT: movl (%rsi), %eax
+; X64-RETPOLINE-NEXT: orl %r9d, %eax
+; X64-RETPOLINE-NEXT: jmp .LBB8_9
+; X64-RETPOLINE-NEXT: .LBB8_2:
+; X64-RETPOLINE-NEXT: cmovneq %r10, %r9
+; X64-RETPOLINE-NEXT: .LBB8_9: # %bb1
+; X64-RETPOLINE-NEXT: addl (%rdx), %eax
+; X64-RETPOLINE-NEXT: orl %r9d, %eax
+; X64-RETPOLINE-NEXT: .LBB8_10: # %bb2
+; X64-RETPOLINE-NEXT: addl (%rcx), %eax
+; X64-RETPOLINE-NEXT: orl %r9d, %eax
+; X64-RETPOLINE-NEXT: jmp .LBB8_11
+; X64-RETPOLINE-NEXT: .LBB8_6:
+; X64-RETPOLINE-NEXT: cmovneq %r10, %r9
+; X64-RETPOLINE-NEXT: .LBB8_11: # %bb3
+; X64-RETPOLINE-NEXT: addl (%r8), %eax
+; X64-RETPOLINE-NEXT: orl %r9d, %eax
+; X64-RETPOLINE-NEXT: .LBB8_12: # %bb4
+; X64-RETPOLINE-NEXT: shlq $47, %r9
+; X64-RETPOLINE-NEXT: orq %r9, %rsp
+; X64-RETPOLINE-NEXT: retq
+entry:
+ switch i32 %idx, label %bb0 [
+ i32 0, label %bb1
+ i32 1, label %bb2
+ i32 2, label %bb3
+ i32 3, label %bb4
+ ]
+
+bb0:
+ %a = load i32, i32* %a.ptr
+ br label %bb1
+
+bb1:
+ %b.phi = phi i32 [ 0, %entry ], [ %a, %bb0 ]
+ %b = load i32, i32* %b.ptr
+ %b.sum = add i32 %b.phi, %b
+ br label %bb2
+
+bb2:
+ %c.phi = phi i32 [ 0, %entry ], [ %b.sum, %bb1 ]
+ %c = load i32, i32* %c.ptr
+ %c.sum = add i32 %c.phi, %c
+ br label %bb3
+
+bb3:
+ %d.phi = phi i32 [ 0, %entry ], [ %c.sum, %bb2 ]
+ %d = load i32, i32* %d.ptr
+ %d.sum = add i32 %d.phi, %d
+ br label %bb4
+
+bb4:
+ %e.phi = phi i32 [ 0, %entry ], [ %d.sum, %bb3 ]
+ ret i32 %e.phi
+}
OpenPOWER on IntegriCloud