diff options
-rw-r--r-- | llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp | 38 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/speculative-load-hardening.ll | 272 |
2 files changed, 297 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp index cb8de34cf05..a4e9ead0070 100644 --- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -173,6 +173,7 @@ private: MachineInstr * sinkPostLoadHardenedInst(MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &HardenedLoads); + bool canHardenPostLoad(MachineInstr &MI); void hardenPostLoad(MachineInstr &MI, MachineSSAUpdater &PredStateSSA); void checkReturnInstr(MachineInstr &MI, MachineSSAUpdater &PredStateSSA); void checkCallInstr(MachineInstr &MI, MachineSSAUpdater &PredStateSSA); @@ -1115,12 +1116,12 @@ void X86SpeculativeLoadHardeningPass::checkAllLoads( (IndexReg && LoadDepRegs.test(IndexReg))) continue; - // If post-load hardening is enabled, this load is known to be - // data-invariant, and we aren't already going to harden one of the + // If post-load hardening is enabled, this load is compatible with + // post-load hardening, and we aren't already going to harden one of the // address registers, queue it up to be hardened post-load. Notably, even // once hardened this won't introduce a useful dependency that could prune // out subsequent loads. - if (EnablePostLoadHardening && isDataInvariantLoad(MI) && + if (EnablePostLoadHardening && canHardenPostLoad(MI) && !HardenedAddrRegs.count(BaseReg) && !HardenedAddrRegs.count(IndexReg)) { HardenPostLoad.insert(&MI); @@ -1602,6 +1603,25 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst( return MI; } +bool X86SpeculativeLoadHardeningPass::canHardenPostLoad(MachineInstr &MI) { + if (!isDataInvariantLoad(MI)) + return false; + + auto &DefOp = MI.getOperand(0); + unsigned OldDefReg = DefOp.getReg(); + + auto *DefRC = MRI->getRegClass(OldDefReg); + int DefRegBytes = TRI->getRegSizeInBits(*DefRC) / 8; + if (DefRegBytes > 8) + // We don't support post-load hardening of vectors. + return false; + + const TargetRegisterClass *GPRRegClasses[] = { + &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass, + &X86::GR64RegClass}; + return DefRC->hasSuperClassEq(GPRRegClasses[Log2_32(DefRegBytes)]); +} + // We can harden non-leaking loads into register without touching the address // by just hiding all of the loaded bits. We use an `or` instruction to do // this because having the poison value be all ones allows us to use the same @@ -1609,8 +1629,8 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst( // execution and coercing them to one is sufficient. void X86SpeculativeLoadHardeningPass::hardenPostLoad( MachineInstr &MI, MachineSSAUpdater &PredStateSSA) { - assert(isDataInvariantLoad(MI) && - "Cannot get here with a non-invariant load!"); + assert(canHardenPostLoad(MI) && + "Invalid instruction for post-load hardening!"); MachineBasicBlock &MBB = *MI.getParent(); DebugLoc Loc = MI.getDebugLoc(); @@ -1625,14 +1645,6 @@ void X86SpeculativeLoadHardeningPass::hardenPostLoad( unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr}; unsigned OrOpCode = OrOpCodes[Log2_32(DefRegBytes)]; -#ifndef NDEBUG - const TargetRegisterClass *OrRegClasses[] = { - &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass, - &X86::GR64RegClass}; - assert(DefRC->hasSuperClassEq(OrRegClasses[Log2_32(DefRegBytes)]) && - "Cannot define this register with OR instruction!"); -#endif - unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit}; auto GetStateRegInRC = [&](const TargetRegisterClass &RC) { diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening.ll b/llvm/test/CodeGen/X86/speculative-load-hardening.ll index e160ce1715d..db94bae5d47 100644 --- a/llvm/test/CodeGen/X86/speculative-load-hardening.ll +++ b/llvm/test/CodeGen/X86/speculative-load-hardening.ll @@ -590,3 +590,275 @@ lpad: call void @sink(i32 %leak) unreachable } + +declare void @sink_float(float) +declare void @sink_double(double) + +; Test direct and converting loads of floating point values. +define void @test_fp_loads(float* %fptr, double* %dptr, i32* %i32ptr, i64* %i64ptr) nounwind { +; X64-LABEL: test_fp_loads: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %r15 +; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %r12 +; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq %rcx, %r15 +; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: movq %rsi, %rbx +; X64-NEXT: movq %rdi, %r12 +; X64-NEXT: movq $-1, %rcx +; X64-NEXT: sarq $63, %rax +; X64-NEXT: orq %rax, %r12 +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq sink_float +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: orq %rax, %rbx +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq sink_double +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: cvtsd2ss %xmm0, %xmm0 +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq sink_float +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: cvtss2sd %xmm0, %xmm0 +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq sink_double +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: orq %rax, %r14 +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: cvtsi2ssl (%r14), %xmm0 +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq sink_float +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: orq %rax, %r15 +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: cvtsi2sdq (%r15), %xmm0 +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq sink_double +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: cvtsi2ssq (%r15), %xmm0 +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq sink_float +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: cvtsi2sdl (%r14), %xmm0 +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq sink_double +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: addq $8, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r12 +; X64-NEXT: popq %r14 +; X64-NEXT: popq %r15 +; X64-NEXT: retq +; +; X64-LFENCE-LABEL: test_fp_loads: +; X64-LFENCE: # %bb.0: # %entry +; X64-LFENCE-NEXT: pushq %r15 +; X64-LFENCE-NEXT: pushq %r14 +; X64-LFENCE-NEXT: pushq %r12 +; X64-LFENCE-NEXT: pushq %rbx +; X64-LFENCE-NEXT: pushq %rax +; X64-LFENCE-NEXT: movq %rcx, %r15 +; X64-LFENCE-NEXT: movq %rdx, %r14 +; X64-LFENCE-NEXT: movq %rsi, %rbx +; X64-LFENCE-NEXT: movq %rdi, %r12 +; X64-LFENCE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-LFENCE-NEXT: callq sink_float +; X64-LFENCE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-LFENCE-NEXT: callq sink_double +; X64-LFENCE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-LFENCE-NEXT: cvtsd2ss %xmm0, %xmm0 +; X64-LFENCE-NEXT: callq sink_float +; X64-LFENCE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-LFENCE-NEXT: cvtss2sd %xmm0, %xmm0 +; X64-LFENCE-NEXT: callq sink_double +; X64-LFENCE-NEXT: xorps %xmm0, %xmm0 +; X64-LFENCE-NEXT: cvtsi2ssl (%r14), %xmm0 +; X64-LFENCE-NEXT: callq sink_float +; X64-LFENCE-NEXT: xorps %xmm0, %xmm0 +; X64-LFENCE-NEXT: cvtsi2sdq (%r15), %xmm0 +; X64-LFENCE-NEXT: callq sink_double +; X64-LFENCE-NEXT: xorps %xmm0, %xmm0 +; X64-LFENCE-NEXT: cvtsi2ssq (%r15), %xmm0 +; X64-LFENCE-NEXT: callq sink_float +; X64-LFENCE-NEXT: xorps %xmm0, %xmm0 +; X64-LFENCE-NEXT: cvtsi2sdl (%r14), %xmm0 +; X64-LFENCE-NEXT: callq sink_double +; X64-LFENCE-NEXT: addq $8, %rsp +; X64-LFENCE-NEXT: popq %rbx +; X64-LFENCE-NEXT: popq %r12 +; X64-LFENCE-NEXT: popq %r14 +; X64-LFENCE-NEXT: popq %r15 +; X64-LFENCE-NEXT: retq +entry: + %f1 = load float, float* %fptr + call void @sink_float(float %f1) + %d1 = load double, double* %dptr + call void @sink_double(double %d1) + %f2.d = load double, double* %dptr + %f2 = fptrunc double %f2.d to float + call void @sink_float(float %f2) + %d2.f = load float, float* %fptr + %d2 = fpext float %d2.f to double + call void @sink_double(double %d2) + %f3.i = load i32, i32* %i32ptr + %f3 = sitofp i32 %f3.i to float + call void @sink_float(float %f3) + %d3.i = load i64, i64* %i64ptr + %d3 = sitofp i64 %d3.i to double + call void @sink_double(double %d3) + %f4.i = load i64, i64* %i64ptr + %f4 = sitofp i64 %f4.i to float + call void @sink_float(float %f4) + %d4.i = load i32, i32* %i32ptr + %d4 = sitofp i32 %d4.i to double + call void @sink_double(double %d4) + ret void +} + +declare void @sink_v4f32(<4 x float>) +declare void @sink_v2f64(<2 x double>) +declare void @sink_v16i8(<16 x i8>) +declare void @sink_v8i16(<8 x i16>) +declare void @sink_v4i32(<4 x i32>) +declare void @sink_v2i64(<2 x i64>) + +; Test loads of vectors. +define void @test_vec_loads(<4 x float>* %v4f32ptr, <2 x double>* %v2f64ptr, <16 x i8>* %v16i8ptr, <8 x i16>* %v8i16ptr, <4 x i32>* %v4i32ptr, <2 x i64>* %v2i64ptr) nounwind { +; X64-LABEL: test_vec_loads: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %r15 +; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %r13 +; X64-NEXT: pushq %r12 +; X64-NEXT: pushq %rbx +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq %r9, %r14 +; X64-NEXT: movq %r8, %r15 +; X64-NEXT: movq %rcx, %r12 +; X64-NEXT: movq %rdx, %r13 +; X64-NEXT: movq %rsi, %rbx +; X64-NEXT: movq $-1, %rcx +; X64-NEXT: sarq $63, %rax +; X64-NEXT: orq %rax, %rdi +; X64-NEXT: movaps (%rdi), %xmm0 +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq sink_v4f32 +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: orq %rax, %rbx +; X64-NEXT: movaps (%rbx), %xmm0 +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq sink_v2f64 +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: orq %rax, %r13 +; X64-NEXT: movaps (%r13), %xmm0 +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq sink_v16i8 +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: orq %rax, %r12 +; X64-NEXT: movaps (%r12), %xmm0 +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq sink_v8i16 +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: orq %rax, %r15 +; X64-NEXT: movaps (%r15), %xmm0 +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq sink_v4i32 +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: orq %rax, %r14 +; X64-NEXT: movaps (%r14), %xmm0 +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq sink_v2i64 +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r12 +; X64-NEXT: popq %r13 +; X64-NEXT: popq %r14 +; X64-NEXT: popq %r15 +; X64-NEXT: retq +; +; X64-LFENCE-LABEL: test_vec_loads: +; X64-LFENCE: # %bb.0: # %entry +; X64-LFENCE-NEXT: pushq %r15 +; X64-LFENCE-NEXT: pushq %r14 +; X64-LFENCE-NEXT: pushq %r13 +; X64-LFENCE-NEXT: pushq %r12 +; X64-LFENCE-NEXT: pushq %rbx +; X64-LFENCE-NEXT: movq %r9, %r14 +; X64-LFENCE-NEXT: movq %r8, %r15 +; X64-LFENCE-NEXT: movq %rcx, %r12 +; X64-LFENCE-NEXT: movq %rdx, %r13 +; X64-LFENCE-NEXT: movq %rsi, %rbx +; X64-LFENCE-NEXT: movaps (%rdi), %xmm0 +; X64-LFENCE-NEXT: callq sink_v4f32 +; X64-LFENCE-NEXT: movaps (%rbx), %xmm0 +; X64-LFENCE-NEXT: callq sink_v2f64 +; X64-LFENCE-NEXT: movaps (%r13), %xmm0 +; X64-LFENCE-NEXT: callq sink_v16i8 +; X64-LFENCE-NEXT: movaps (%r12), %xmm0 +; X64-LFENCE-NEXT: callq sink_v8i16 +; X64-LFENCE-NEXT: movaps (%r15), %xmm0 +; X64-LFENCE-NEXT: callq sink_v4i32 +; X64-LFENCE-NEXT: movaps (%r14), %xmm0 +; X64-LFENCE-NEXT: callq sink_v2i64 +; X64-LFENCE-NEXT: popq %rbx +; X64-LFENCE-NEXT: popq %r12 +; X64-LFENCE-NEXT: popq %r13 +; X64-LFENCE-NEXT: popq %r14 +; X64-LFENCE-NEXT: popq %r15 +; X64-LFENCE-NEXT: retq +entry: + %x1 = load <4 x float>, <4 x float>* %v4f32ptr + call void @sink_v4f32(<4 x float> %x1) + %x2 = load <2 x double>, <2 x double>* %v2f64ptr + call void @sink_v2f64(<2 x double> %x2) + %x3 = load <16 x i8>, <16 x i8>* %v16i8ptr + call void @sink_v16i8(<16 x i8> %x3) + %x4 = load <8 x i16>, <8 x i16>* %v8i16ptr + call void @sink_v8i16(<8 x i16> %x4) + %x5 = load <4 x i32>, <4 x i32>* %v4i32ptr + call void @sink_v4i32(<4 x i32> %x5) + %x6 = load <2 x i64>, <2 x i64>* %v2i64ptr + call void @sink_v2i64(<2 x i64> %x6) + ret void +} |