[x86/SLH] Fix a bug where we would try to post-load harden non-GPRs.

Found cases that hit the assert I added. This patch factors the validity checking into a nice helper routine and calls it when deciding to harden post-load, and asserts it when doing so later. I've added tests for the various ways of loading a floating point type, as well as loading all vector permutations. Even though many of these go to identical instructions, it seems good to somewhat comprehensively test them. I'm confident there will be more fixes needed here, I'll try to add tests each time as I get this predicate adjusted. llvm-svn: 337160
author: Chandler Carruth <chandlerc@gmail.com> 2018-07-16 11:38:48 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2018-07-16 11:38:48 +0000
commit: e66a6f48e314c84d00e9580fa3d30fadb683dd11 (patch)
tree: d7dbb651a86fb1c127fc5ee206fc15212a2d7118
parent: d1a381b17a4f0ecbd3bbd6c33db9c541037429eb (diff)
download: bcm5719-llvm-e66a6f48e314c84d00e9580fa3d30fadb683dd11.tar.gz
bcm5719-llvm-e66a6f48e314c84d00e9580fa3d30fadb683dd11.zip
2 files changed, 297 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index cb8de34cf05..a4e9ead0070 100644
--- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -173,6 +173,7 @@ private:
   MachineInstr *
   sinkPostLoadHardenedInst(MachineInstr &MI,
                            SmallPtrSetImpl<MachineInstr *> &HardenedLoads);
+  bool canHardenPostLoad(MachineInstr &MI);
   void hardenPostLoad(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
   void checkReturnInstr(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
   void checkCallInstr(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
@@ -1115,12 +1116,12 @@ void X86SpeculativeLoadHardeningPass::checkAllLoads(
           (IndexReg && LoadDepRegs.test(IndexReg)))
         continue;
 
-      // If post-load hardening is enabled, this load is known to be
-      // data-invariant, and we aren't already going to harden one of the
+      // If post-load hardening is enabled, this load is compatible with
+      // post-load hardening, and we aren't already going to harden one of the
       // address registers, queue it up to be hardened post-load. Notably, even
       // once hardened this won't introduce a useful dependency that could prune
       // out subsequent loads.
-      if (EnablePostLoadHardening && isDataInvariantLoad(MI) &&
+      if (EnablePostLoadHardening && canHardenPostLoad(MI) &&
           !HardenedAddrRegs.count(BaseReg) &&
           !HardenedAddrRegs.count(IndexReg)) {
         HardenPostLoad.insert(&MI);
@@ -1602,6 +1603,25 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
   return MI;
 }
 
+bool X86SpeculativeLoadHardeningPass::canHardenPostLoad(MachineInstr &MI) {
+  if (!isDataInvariantLoad(MI))
+    return false;
+
+  auto &DefOp = MI.getOperand(0);
+  unsigned OldDefReg = DefOp.getReg();
+
+  auto *DefRC = MRI->getRegClass(OldDefReg);
+  int DefRegBytes = TRI->getRegSizeInBits(*DefRC) / 8;
+  if (DefRegBytes > 8)
+    // We don't support post-load hardening of vectors.
+    return false;
+
+  const TargetRegisterClass *GPRRegClasses[] = {
+      &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
+      &X86::GR64RegClass};
+  return DefRC->hasSuperClassEq(GPRRegClasses[Log2_32(DefRegBytes)]);
+}
+
 // We can harden non-leaking loads into register without touching the address
 // by just hiding all of the loaded bits. We use an `or` instruction to do
 // this because having the poison value be all ones allows us to use the same
@@ -1609,8 +1629,8 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
 // execution and coercing them to one is sufficient.
 void X86SpeculativeLoadHardeningPass::hardenPostLoad(
     MachineInstr &MI, MachineSSAUpdater &PredStateSSA) {
-  assert(isDataInvariantLoad(MI) &&
-         "Cannot get here with a non-invariant load!");
+  assert(canHardenPostLoad(MI) &&
+         "Invalid instruction for post-load hardening!");
 
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc Loc = MI.getDebugLoc();
@@ -1625,14 +1645,6 @@ void X86SpeculativeLoadHardeningPass::hardenPostLoad(
   unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
   unsigned OrOpCode = OrOpCodes[Log2_32(DefRegBytes)];
 
-#ifndef NDEBUG
-  const TargetRegisterClass *OrRegClasses[] = {
-      &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
-      &X86::GR64RegClass};
-  assert(DefRC->hasSuperClassEq(OrRegClasses[Log2_32(DefRegBytes)]) &&
-         "Cannot define this register with OR instruction!");
-#endif
-
   unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
 
   auto GetStateRegInRC = [&](const TargetRegisterClass &RC) {
diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening.ll b/llvm/test/CodeGen/X86/speculative-load-hardening.ll
index e160ce1715d..db94bae5d47 100644
--- a/llvm/test/CodeGen/X86/speculative-load-hardening.ll
+++ b/llvm/test/CodeGen/X86/speculative-load-hardening.ll
@@ -590,3 +590,275 @@ lpad:
   call void @sink(i32 %leak)
   unreachable
 }
+
+declare void @sink_float(float)
+declare void @sink_double(double)
+
+; Test direct and converting loads of floating point values.
+define void @test_fp_loads(float* %fptr, double* %dptr, i32* %i32ptr, i64* %i64ptr) nounwind {
+; X64-LABEL: test_fp_loads:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    movq %rcx, %r15
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq %rsi, %rbx
+; X64-NEXT:    movq %rdi, %r12
+; X64-NEXT:    movq $-1, %rcx
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %r12
+; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_float
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %rbx
+; X64-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_double
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT:    cvtsd2ss %xmm0, %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_float
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    cvtss2sd %xmm0, %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_double
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %r14
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    cvtsi2ssl (%r14), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_float
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %r15
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    cvtsi2sdq (%r15), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_double
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    cvtsi2ssq (%r15), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_float
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    cvtsi2sdl (%r14), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_double
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    addq $8, %rsp
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    retq
+;
+; X64-LFENCE-LABEL: test_fp_loads:
+; X64-LFENCE:       # %bb.0: # %entry
+; X64-LFENCE-NEXT:    pushq %r15
+; X64-LFENCE-NEXT:    pushq %r14
+; X64-LFENCE-NEXT:    pushq %r12
+; X64-LFENCE-NEXT:    pushq %rbx
+; X64-LFENCE-NEXT:    pushq %rax
+; X64-LFENCE-NEXT:    movq %rcx, %r15
+; X64-LFENCE-NEXT:    movq %rdx, %r14
+; X64-LFENCE-NEXT:    movq %rsi, %rbx
+; X64-LFENCE-NEXT:    movq %rdi, %r12
+; X64-LFENCE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-LFENCE-NEXT:    callq sink_float
+; X64-LFENCE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-LFENCE-NEXT:    callq sink_double
+; X64-LFENCE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-LFENCE-NEXT:    cvtsd2ss %xmm0, %xmm0
+; X64-LFENCE-NEXT:    callq sink_float
+; X64-LFENCE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-LFENCE-NEXT:    cvtss2sd %xmm0, %xmm0
+; X64-LFENCE-NEXT:    callq sink_double
+; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
+; X64-LFENCE-NEXT:    cvtsi2ssl (%r14), %xmm0
+; X64-LFENCE-NEXT:    callq sink_float
+; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
+; X64-LFENCE-NEXT:    cvtsi2sdq (%r15), %xmm0
+; X64-LFENCE-NEXT:    callq sink_double
+; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
+; X64-LFENCE-NEXT:    cvtsi2ssq (%r15), %xmm0
+; X64-LFENCE-NEXT:    callq sink_float
+; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
+; X64-LFENCE-NEXT:    cvtsi2sdl (%r14), %xmm0
+; X64-LFENCE-NEXT:    callq sink_double
+; X64-LFENCE-NEXT:    addq $8, %rsp
+; X64-LFENCE-NEXT:    popq %rbx
+; X64-LFENCE-NEXT:    popq %r12
+; X64-LFENCE-NEXT:    popq %r14
+; X64-LFENCE-NEXT:    popq %r15
+; X64-LFENCE-NEXT:    retq
+entry:
+  %f1 = load float, float* %fptr
+  call void @sink_float(float %f1)
+  %d1 = load double, double* %dptr
+  call void @sink_double(double %d1)
+  %f2.d = load double, double* %dptr
+  %f2 = fptrunc double %f2.d to float
+  call void @sink_float(float %f2)
+  %d2.f = load float, float* %fptr
+  %d2 = fpext float %d2.f to double
+  call void @sink_double(double %d2)
+  %f3.i = load i32, i32* %i32ptr
+  %f3 = sitofp i32 %f3.i to float
+  call void @sink_float(float %f3)
+  %d3.i = load i64, i64* %i64ptr
+  %d3 = sitofp i64 %d3.i to double
+  call void @sink_double(double %d3)
+  %f4.i = load i64, i64* %i64ptr
+  %f4 = sitofp i64 %f4.i to float
+  call void @sink_float(float %f4)
+  %d4.i = load i32, i32* %i32ptr
+  %d4 = sitofp i32 %d4.i to double
+  call void @sink_double(double %d4)
+  ret void
+}
+
+declare void @sink_v4f32(<4 x float>)
+declare void @sink_v2f64(<2 x double>)
+declare void @sink_v16i8(<16 x i8>)
+declare void @sink_v8i16(<8 x i16>)
+declare void @sink_v4i32(<4 x i32>)
+declare void @sink_v2i64(<2 x i64>)
+
+; Test loads of vectors.
+define void @test_vec_loads(<4 x float>* %v4f32ptr, <2 x double>* %v2f64ptr, <16 x i8>* %v16i8ptr, <8 x i16>* %v8i16ptr, <4 x i32>* %v4i32ptr, <2 x i64>* %v2i64ptr) nounwind {
+; X64-LABEL: test_vec_loads:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    movq %r9, %r14
+; X64-NEXT:    movq %r8, %r15
+; X64-NEXT:    movq %rcx, %r12
+; X64-NEXT:    movq %rdx, %r13
+; X64-NEXT:    movq %rsi, %rbx
+; X64-NEXT:    movq $-1, %rcx
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %rdi
+; X64-NEXT:    movaps (%rdi), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_v4f32
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %rbx
+; X64-NEXT:    movaps (%rbx), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_v2f64
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %r13
+; X64-NEXT:    movaps (%r13), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_v16i8
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %r12
+; X64-NEXT:    movaps (%r12), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_v8i16
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %r15
+; X64-NEXT:    movaps (%r15), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_v4i32
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %r14
+; X64-NEXT:    movaps (%r14), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_v2i64
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    retq
+;
+; X64-LFENCE-LABEL: test_vec_loads:
+; X64-LFENCE:       # %bb.0: # %entry
+; X64-LFENCE-NEXT:    pushq %r15
+; X64-LFENCE-NEXT:    pushq %r14
+; X64-LFENCE-NEXT:    pushq %r13
+; X64-LFENCE-NEXT:    pushq %r12
+; X64-LFENCE-NEXT:    pushq %rbx
+; X64-LFENCE-NEXT:    movq %r9, %r14
+; X64-LFENCE-NEXT:    movq %r8, %r15
+; X64-LFENCE-NEXT:    movq %rcx, %r12
+; X64-LFENCE-NEXT:    movq %rdx, %r13
+; X64-LFENCE-NEXT:    movq %rsi, %rbx
+; X64-LFENCE-NEXT:    movaps (%rdi), %xmm0
+; X64-LFENCE-NEXT:    callq sink_v4f32
+; X64-LFENCE-NEXT:    movaps (%rbx), %xmm0
+; X64-LFENCE-NEXT:    callq sink_v2f64
+; X64-LFENCE-NEXT:    movaps (%r13), %xmm0
+; X64-LFENCE-NEXT:    callq sink_v16i8
+; X64-LFENCE-NEXT:    movaps (%r12), %xmm0
+; X64-LFENCE-NEXT:    callq sink_v8i16
+; X64-LFENCE-NEXT:    movaps (%r15), %xmm0
+; X64-LFENCE-NEXT:    callq sink_v4i32
+; X64-LFENCE-NEXT:    movaps (%r14), %xmm0
+; X64-LFENCE-NEXT:    callq sink_v2i64
+; X64-LFENCE-NEXT:    popq %rbx
+; X64-LFENCE-NEXT:    popq %r12
+; X64-LFENCE-NEXT:    popq %r13
+; X64-LFENCE-NEXT:    popq %r14
+; X64-LFENCE-NEXT:    popq %r15
+; X64-LFENCE-NEXT:    retq
+entry:
+  %x1 = load <4 x float>, <4 x float>* %v4f32ptr
+  call void @sink_v4f32(<4 x float> %x1)
+  %x2 = load <2 x double>, <2 x double>* %v2f64ptr
+  call void @sink_v2f64(<2 x double> %x2)
+  %x3 = load <16 x i8>, <16 x i8>* %v16i8ptr
+  call void @sink_v16i8(<16 x i8> %x3)
+  %x4 = load <8 x i16>, <8 x i16>* %v8i16ptr
+  call void @sink_v8i16(<8 x i16> %x4)
+  %x5 = load <4 x i32>, <4 x i32>* %v4i32ptr
+  call void @sink_v4i32(<4 x i32> %x5)
+  %x6 = load <2 x i64>, <2 x i64>* %v2i64ptr
+  call void @sink_v2i64(<2 x i64> %x6)
+  ret void
+}
author	Chandler Carruth <chandlerc@gmail.com>	2018-07-16 11:38:48 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2018-07-16 11:38:48 +0000
commit	e66a6f48e314c84d00e9580fa3d30fadb683dd11 (patch)
tree	d7dbb651a86fb1c127fc5ee206fc15212a2d7118
parent	d1a381b17a4f0ecbd3bbd6c33db9c541037429eb (diff)
download	bcm5719-llvm-e66a6f48e314c84d00e9580fa3d30fadb683dd11.tar.gz bcm5719-llvm-e66a6f48e314c84d00e9580fa3d30fadb683dd11.zip