diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/X86/X86.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IndirectThunks.cpp | 49 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll | 281 |
5 files changed, 349 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index a2b11d55f65..edc47482576 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -426,6 +426,15 @@ def FeatureRetpolineExternalThunk "ourselves. Only has effect when combined with some other retpoline " "feature", [FeatureRetpolineIndirectCalls]>; +// Mitigate LVI attacks against indirect calls/branches and call returns +def FeatureLVIControlFlowIntegrity + : SubtargetFeature< + "lvi-cfi", "UseLVIControlFlowIntegrity", "true", + "Prevent indirect calls/branches from using a memory operand, and " + "precede all indirect calls/branches from a register with an " + "LFENCE instruction to serialize control flow. Also decompose RET " + "instructions into a POP+LFENCE+JMP sequence.">; + // Direct Move instructions. def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", "Support movdiri instruction">; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index dbcebd54f73..c8720d9ae3a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31417,6 +31417,11 @@ static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget, } llvm_unreachable("unexpected reg for retpoline"); } + + if (Subtarget.useLVIControlFlowIntegrity()) { + assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); + return "__llvm_lvi_thunk_r11"; + } llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature"); } diff --git a/llvm/lib/Target/X86/X86IndirectThunks.cpp b/llvm/lib/Target/X86/X86IndirectThunks.cpp index e6408e986f1..36b9c3ccc95 100644 --- a/llvm/lib/Target/X86/X86IndirectThunks.cpp +++ b/llvm/lib/Target/X86/X86IndirectThunks.cpp @@ -14,6 +14,8 @@ /// /// Currently supported thunks include: /// - Retpoline -- A RET-implemented trampoline that lowers indirect calls +/// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization +/// before making an indirect call/jump /// /// Note that the reason that this is implemented as a MachineFunctionPass and /// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline @@ -44,11 +46,14 @@ using namespace llvm; #define DEBUG_TYPE "x86-retpoline-thunks" static const char RetpolineNamePrefix[] = "__llvm_retpoline_"; -static const char R11RetpolineName[] = "__llvm_retpoline_r11"; -static const char EAXRetpolineName[] = "__llvm_retpoline_eax"; -static const char ECXRetpolineName[] = "__llvm_retpoline_ecx"; -static const char EDXRetpolineName[] = "__llvm_retpoline_edx"; -static const char EDIRetpolineName[] = "__llvm_retpoline_edi"; +static const char R11RetpolineName[] = "__llvm_retpoline_r11"; +static const char EAXRetpolineName[] = "__llvm_retpoline_eax"; +static const char ECXRetpolineName[] = "__llvm_retpoline_ecx"; +static const char EDXRetpolineName[] = "__llvm_retpoline_edx"; +static const char EDIRetpolineName[] = "__llvm_retpoline_edi"; + +static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_"; +static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11"; namespace { template <typename Derived> class ThunkInserter { @@ -80,6 +85,38 @@ struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> { void populateThunk(MachineFunction &MF); }; +struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> { + const char *getThunkPrefix() { return LVIThunkNamePrefix; } + bool mayUseThunk(const MachineFunction &MF) { + return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity(); + } + void insertThunks(MachineModuleInfo &MMI) { + createThunkFunction(MMI, R11LVIThunkName); + } + void populateThunk(MachineFunction &MF) { + // Grab the entry MBB and erase any other blocks. O0 codegen appears to + // generate two bbs for the entry block. + MachineBasicBlock *Entry = &MF.front(); + Entry->clear(); + while (MF.size() > 1) + MF.erase(std::next(MF.begin())); + + // This code mitigates LVI by replacing each indirect call/jump with a + // direct call/jump to a thunk that looks like: + // ``` + // lfence + // jmpq *%r11 + // ``` + // This ensures that if the value in register %r11 was loaded from memory, + // then the value in %r11 is (architecturally) correct prior to the jump. + const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); + BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11); + MF.front().addLiveIn(X86::R11); + return; + } +}; + class X86IndirectThunks : public MachineFunctionPass { public: static char ID; @@ -98,7 +135,7 @@ public: } private: - std::tuple<RetpolineThunkInserter> TIs; + std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs; // FIXME: When LLVM moves to C++17, these can become folds template <typename... ThunkInserterT> diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 35983797bd3..eb5c293e5cb 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -421,6 +421,12 @@ protected: /// than emitting one inside the compiler. bool UseRetpolineExternalThunk = false; + /// Prevent generation of indirect call/branch instructions from memory, + /// and force all indirect call/branch instructions from a register to be + /// preceded by an LFENCE. Also decompose RET instructions into a + /// POP+LFENCE+JMP sequence. + bool UseLVIControlFlowIntegrity = false; + /// Use software floating point for code generation. bool UseSoftFloat = false; @@ -711,13 +717,16 @@ public: // These are generic getters that OR together all of the thunk types // supported by the subtarget. Therefore useIndirectThunk*() will return true // if any respective thunk feature is enabled. - bool useIndirectThunkCalls() const { return useRetpolineIndirectCalls(); } + bool useIndirectThunkCalls() const { + return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity(); + } bool useIndirectThunkBranches() const { - return useRetpolineIndirectBranches(); + return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity(); } bool preferMaskRegisters() const { return PreferMaskRegisters; } bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; } + bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; } unsigned getPreferVectorWidth() const { return PreferVectorWidth; } unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; } diff --git a/llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll b/llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll new file mode 100644 index 00000000000..d2caf6e1e9e --- /dev/null +++ b/llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll @@ -0,0 +1,281 @@ +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -mattr=+lvi-cfi < %s | FileCheck %s --check-prefix=X64 +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -mattr=+lvi-cfi -O0 < %s | FileCheck %s --check-prefix=X64FAST +; +; Note that a lot of this code was lifted from retpoline.ll. + +declare void @bar(i32) + +; Test a simple indirect call and tail call. +define void @icall_reg(void (i32)* %fp, i32 %x) { +entry: + tail call void @bar(i32 %x) + tail call void %fp(i32 %x) + tail call void @bar(i32 %x) + tail call void %fp(i32 %x) + ret void +} + +; X64-LABEL: icall_reg: +; X64-DAG: movq %rdi, %[[fp:[^ ]*]] +; X64-DAG: movl %esi, %[[x:[^ ]*]] +; X64: movl %esi, %edi +; X64: callq bar +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq %[[fp]], %r11 +; X64: callq __llvm_lvi_thunk_r11 +; X64: movl %[[x]], %edi +; X64: callq bar +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq %[[fp]], %r11 +; X64: jmp __llvm_lvi_thunk_r11 # TAILCALL + +; X64FAST-LABEL: icall_reg: +; X64FAST: callq bar +; X64FAST: callq __llvm_lvi_thunk_r11 +; X64FAST: callq bar +; X64FAST: jmp __llvm_lvi_thunk_r11 # TAILCALL + + +@global_fp = external global void (i32)* + +; Test an indirect call through a global variable. +define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 { + %fp1 = load void (i32)*, void (i32)** @global_fp + call void %fp1(i32 %x) + %fp2 = load void (i32)*, void (i32)** @global_fp + tail call void %fp2(i32 %x) + ret void +} + +; X64-LABEL: icall_global_fp: +; X64-DAG: movl %edi, %[[x:[^ ]*]] +; X64-DAG: movq global_fp(%rip), %r11 +; X64: callq __llvm_lvi_thunk_r11 +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq global_fp(%rip), %r11 +; X64: jmp __llvm_lvi_thunk_r11 # TAILCALL + +; X64FAST-LABEL: icall_global_fp: +; X64FAST: movq global_fp(%rip), %r11 +; X64FAST: callq __llvm_lvi_thunk_r11 +; X64FAST: movq global_fp(%rip), %r11 +; X64FAST: jmp __llvm_lvi_thunk_r11 # TAILCALL + + +%struct.Foo = type { void (%struct.Foo*)** } + +; Test an indirect call through a vtable. +define void @vcall(%struct.Foo* %obj) #0 { + %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0 + %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field + %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1 + %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot + tail call void %fp(%struct.Foo* %obj) + tail call void %fp(%struct.Foo* %obj) + ret void +} + +; X64-LABEL: vcall: +; X64: movq %rdi, %[[obj:[^ ]*]] +; X64: movq (%rdi), %[[vptr:[^ ]*]] +; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]] +; X64: movq %[[fp]], %r11 +; X64: callq __llvm_lvi_thunk_r11 +; X64-DAG: movq %[[obj]], %rdi +; X64-DAG: movq %[[fp]], %r11 +; X64: jmp __llvm_lvi_thunk_r11 # TAILCALL + +; X64FAST-LABEL: vcall: +; X64FAST: callq __llvm_lvi_thunk_r11 +; X64FAST: jmp __llvm_lvi_thunk_r11 # TAILCALL + + +declare void @direct_callee() + +define void @direct_tail() #0 { + tail call void @direct_callee() + ret void +} + +; X64-LABEL: direct_tail: +; X64: jmp direct_callee # TAILCALL +; X64FAST-LABEL: direct_tail: +; X64FAST: jmp direct_callee # TAILCALL + + +declare void @nonlazybind_callee() #1 + +define void @nonlazybind_caller() #0 { + call void @nonlazybind_callee() + tail call void @nonlazybind_callee() + ret void +} + +; X64-LABEL: nonlazybind_caller: +; X64: movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]] +; X64: movq %[[REG]], %r11 +; X64: callq __llvm_lvi_thunk_r11 +; X64: movq %[[REG]], %r11 +; X64: jmp __llvm_lvi_thunk_r11 # TAILCALL +; X64FAST-LABEL: nonlazybind_caller: +; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 +; X64FAST: callq __llvm_lvi_thunk_r11 +; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 +; X64FAST: jmp __llvm_lvi_thunk_r11 # TAILCALL + + +; Check that a switch gets lowered using a jump table +define void @switch_jumptable(i32* %ptr, i64* %sink) #0 { +; X64-LABEL: switch_jumptable: +; X64_NOT: jmpq * +entry: + br label %header + +header: + %i = load volatile i32, i32* %ptr + switch i32 %i, label %bb0 [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + i32 5, label %bb5 + i32 6, label %bb6 + i32 7, label %bb7 + i32 8, label %bb8 + i32 9, label %bb9 + ] + +bb0: + store volatile i64 0, i64* %sink + br label %header + +bb1: + store volatile i64 1, i64* %sink + br label %header + +bb2: + store volatile i64 2, i64* %sink + br label %header + +bb3: + store volatile i64 3, i64* %sink + br label %header + +bb4: + store volatile i64 4, i64* %sink + br label %header + +bb5: + store volatile i64 5, i64* %sink + br label %header + +bb6: + store volatile i64 6, i64* %sink + br label %header + +bb7: + store volatile i64 7, i64* %sink + br label %header + +bb8: + store volatile i64 8, i64* %sink + br label %header + +bb9: + store volatile i64 9, i64* %sink + br label %header +} + + +@indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0), + i8* blockaddress(@indirectbr_rewrite, %bb1), + i8* blockaddress(@indirectbr_rewrite, %bb2), + i8* blockaddress(@indirectbr_rewrite, %bb3), + i8* blockaddress(@indirectbr_rewrite, %bb4), + i8* blockaddress(@indirectbr_rewrite, %bb5), + i8* blockaddress(@indirectbr_rewrite, %bb6), + i8* blockaddress(@indirectbr_rewrite, %bb7), + i8* blockaddress(@indirectbr_rewrite, %bb8), + i8* blockaddress(@indirectbr_rewrite, %bb9)] + +; Check that when thunks are enabled the indirectbr instruction gets +; rewritten to use switch, and that in turn doesn't get lowered as a jump +; table. +define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #0 { +; X64-LABEL: indirectbr_rewrite: +; X64-NOT: jmpq * +entry: + %i0 = load i64, i64* %p + %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0 + %target0 = load i8*, i8** %target.i0 + indirectbr i8* %target0, [label %bb1, label %bb3] + +bb0: + store volatile i64 0, i64* %sink + br label %latch + +bb1: + store volatile i64 1, i64* %sink + br label %latch + +bb2: + store volatile i64 2, i64* %sink + br label %latch + +bb3: + store volatile i64 3, i64* %sink + br label %latch + +bb4: + store volatile i64 4, i64* %sink + br label %latch + +bb5: + store volatile i64 5, i64* %sink + br label %latch + +bb6: + store volatile i64 6, i64* %sink + br label %latch + +bb7: + store volatile i64 7, i64* %sink + br label %latch + +bb8: + store volatile i64 8, i64* %sink + br label %latch + +bb9: + store volatile i64 9, i64* %sink + br label %latch + +latch: + %i.next = load i64, i64* %p + %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next + %target.next = load i8*, i8** %target.i.next + ; Potentially hit a full 10 successors here so that even if we rewrite as + ; a switch it will try to be lowered with a jump table. + indirectbr i8* %target.next, [label %bb0, + label %bb1, + label %bb2, + label %bb3, + label %bb4, + label %bb5, + label %bb6, + label %bb7, + label %bb8, + label %bb9] +} + +; Lastly check that the necessary thunks were emitted. +; +; X64-LABEL: .section .text.__llvm_lvi_thunk_r11,{{.*}},__llvm_lvi_thunk_r11,comdat +; X64-NEXT: .hidden __llvm_lvi_thunk_r11 +; X64-NEXT: .weak __llvm_lvi_thunk_r11 +; X64: __llvm_lvi_thunk_r11: +; X64-NEXT: # {{.*}} # %entry +; X64-NEXT: lfence +; X64-NEXT: jmpq *%r11 + +attributes #1 = { nonlazybind } |