[X86] Prevent fast isel from folding loads into the instructions listed in hasPartialRegUpdate.

This patch moves the check for opt size and hasPartialRegUpdate into the lower level implementation of foldMemoryOperandImpl to catch the entry point that fast isel uses. We're still folding undef register instructions in AVX that we should also probably disable, but that's a problem for another patch. Unfortunately, this requires reordering a bunch of functions which is why the diff is so large. I can do the function reordering separately if we want. Differential Revision: https://reviews.llvm.org/D39402 llvm-svn: 317112
author: Craig Topper <craig.topper@intel.com> 2017-11-01 18:10:06 +0000
committer: Craig Topper <craig.topper@intel.com> 2017-11-01 18:10:06 +0000
commit: ca1aa83cbe22e5b3aa7345a6fd7d0e4d4b1f1d64 (patch)
tree: 6f6e721f11d79808c898ac7c43493005625463c1
parent: 671526148c547c453d320075ec619f5519499de2 (diff)
download: bcm5719-llvm-ca1aa83cbe22e5b3aa7345a6fd7d0e4d4b1f1d64.tar.gz
bcm5719-llvm-ca1aa83cbe22e5b3aa7345a6fd7d0e4d4b1f1d64.zip
4 files changed, 177 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 276ceae7cdc..a2ec1f4f469 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -8389,6 +8389,11 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
        MI.getOpcode() == X86::PUSH64r))
     return nullptr;
 
+  // Avoid partial register update stalls unless optimizing for size.
+  // TODO: we should block undef reg update as well.
+  if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
+    return nullptr;
+
   unsigned NumOps = MI.getDesc().getNumOperands();
   bool isTwoAddr =
       NumOps > 1 && MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
@@ -8554,6 +8559,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
 
   // Unless optimizing for size, don't fold to avoid partial
   // register update stalls
+  // TODO: we should block undef reg update as well.
   if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
     return nullptr;
 
@@ -8752,6 +8758,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
   if (NoFusing) return nullptr;
 
   // Avoid partial register update stalls unless optimizing for size.
+  // TODO: we should block undef reg update as well.
   if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
     return nullptr;
 
diff --git a/llvm/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll b/llvm/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
index 1035c256790..4a3337554b6 100644
--- a/llvm/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
@@ -55,7 +55,8 @@ entry:
 define double @single_to_double_rm(float* %x) {
 ; SSE-LABEL: single_to_double_rm:
 ; SSE:       # BB#0: # %entry
-; SSE-NEXT:    cvtss2sd (%rdi), %xmm0
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    cvtss2sd %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: single_to_double_rm:
@@ -69,10 +70,28 @@ entry:
   ret double %conv
 }
 
+define double @single_to_double_rm_optsize(float* %x) optsize {
+; SSE-LABEL: single_to_double_rm_optsize:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    cvtss2sd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: single_to_double_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = load float, float* %x, align 4
+  %conv = fpext float %0 to double
+  ret double %conv
+}
+
 define float @double_to_single_rm(double* %x) {
 ; SSE-LABEL: double_to_single_rm:
 ; SSE:       # BB#0: # %entry
-; SSE-NEXT:    cvtsd2ss (%rdi), %xmm0
+; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT:    cvtsd2ss %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: double_to_single_rm:
@@ -85,3 +104,20 @@ entry:
   %conv = fptrunc double %0 to float
   ret float %conv
 }
+
+define float @double_to_single_rm_optsize(double* %x) optsize {
+; SSE-LABEL: double_to_single_rm_optsize:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    cvtsd2ss (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: double_to_single_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = load double, double* %x, align 8
+  %conv = fptrunc double %0 to float
+  ret float %conv
+}
diff --git a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
index 432e190a745..50eddab2b45 100644
--- a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
@@ -21,7 +21,8 @@ entry:
 define double @long_to_double_rm(i64* %a) {
 ; SSE2-LABEL: long_to_double_rm:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    cvtsi2sdq (%rdi), %xmm0
+; SSE2-NEXT:    movq (%rdi), %rax
+; SSE2-NEXT:    cvtsi2sdq %rax, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: long_to_double_rm:
@@ -34,6 +35,22 @@ entry:
   ret double %1
 }
 
+define double @long_to_double_rm_optsize(i64* %a) optsize {
+; SSE2-LABEL: long_to_double_rm_optsize:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2sdq (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: long_to_double_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2sdq (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = sitofp i64 %0 to double
+  ret double %1
+}
+
 define float @long_to_float_rr(i64 %a) {
 ; SSE2-LABEL: long_to_float_rr:
 ; SSE2:       # BB#0: # %entry
@@ -52,7 +69,8 @@ entry:
 define float @long_to_float_rm(i64* %a) {
 ; SSE2-LABEL: long_to_float_rm:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    cvtsi2ssq (%rdi), %xmm0
+; SSE2-NEXT:    movq (%rdi), %rax
+; SSE2-NEXT:    cvtsi2ssq %rax, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: long_to_float_rm:
@@ -64,3 +82,19 @@ entry:
   %1 = sitofp i64 %0 to float
   ret float %1
 }
+
+define float @long_to_float_rm_optsize(i64* %a) optsize {
+; SSE2-LABEL: long_to_float_rm_optsize:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2ssq (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: long_to_float_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = sitofp i64 %0 to float
+  ret float %1
+}
diff --git a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
index d722d37ee6b..57b50abab53 100644
--- a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
@@ -58,7 +58,8 @@ entry:
 define double @int_to_double_rm(i32* %a) {
 ; SSE2-LABEL: int_to_double_rm:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    cvtsi2sdl (%rdi), %xmm0
+; SSE2-NEXT:    movl (%rdi), %eax
+; SSE2-NEXT:    cvtsi2sdl %eax, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: int_to_double_rm:
@@ -107,6 +108,58 @@ entry:
   ret double %1
 }
 
+define double @int_to_double_rm_optsize(i32* %a) optsize {
+; SSE2-LABEL: int_to_double_rm_optsize:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2sdl (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: int_to_double_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2sdl (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; SSE2_X86-LABEL: int_to_double_rm_optsize:
+; SSE2_X86:       # BB#0: # %entry
+; SSE2_X86-NEXT:    pushl %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 8
+; SSE2_X86-NEXT:    .cfi_offset %ebp, -8
+; SSE2_X86-NEXT:    movl %esp, %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa_register %ebp
+; SSE2_X86-NEXT:    andl $-8, %esp
+; SSE2_X86-NEXT:    subl $8, %esp
+; SSE2_X86-NEXT:    movl 8(%ebp), %eax
+; SSE2_X86-NEXT:    cvtsi2sdl (%eax), %xmm0
+; SSE2_X86-NEXT:    movsd %xmm0, (%esp)
+; SSE2_X86-NEXT:    fldl (%esp)
+; SSE2_X86-NEXT:    movl %ebp, %esp
+; SSE2_X86-NEXT:    popl %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa %esp, 4
+; SSE2_X86-NEXT:    retl
+;
+; AVX_X86-LABEL: int_to_double_rm_optsize:
+; AVX_X86:       # BB#0: # %entry
+; AVX_X86-NEXT:    pushl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    .cfi_offset %ebp, -8
+; AVX_X86-NEXT:    movl %esp, %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_register %ebp
+; AVX_X86-NEXT:    andl $-8, %esp
+; AVX_X86-NEXT:    subl $8, %esp
+; AVX_X86-NEXT:    movl 8(%ebp), %eax
+; AVX_X86-NEXT:    vcvtsi2sdl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovsd %xmm0, (%esp)
+; AVX_X86-NEXT:    fldl (%esp)
+; AVX_X86-NEXT:    movl %ebp, %esp
+; AVX_X86-NEXT:    popl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa %esp, 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = load i32, i32* %a
+  %1 = sitofp i32 %0 to double
+  ret double %1
+}
+
 define float @int_to_float_rr(i32 %a) {
 ; SSE2-LABEL: int_to_float_rr:
 ; SSE2:       # BB#0: # %entry
@@ -148,7 +201,8 @@ entry:
 define float @int_to_float_rm(i32* %a) {
 ; SSE2-LABEL: int_to_float_rm:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    cvtsi2ssl (%rdi), %xmm0
+; SSE2-NEXT:    movl (%rdi), %eax
+; SSE2-NEXT:    cvtsi2ssl %eax, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: int_to_float_rm:
@@ -184,3 +238,43 @@ entry:
   %1 = sitofp i32 %0 to float
   ret float %1
 }
+
+define float @int_to_float_rm_optsize(i32* %a) optsize {
+; SSE2-LABEL: int_to_float_rm_optsize:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2ssl (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: int_to_float_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2ssl (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; SSE2_X86-LABEL: int_to_float_rm_optsize:
+; SSE2_X86:       # BB#0: # %entry
+; SSE2_X86-NEXT:    pushl %eax
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 8
+; SSE2_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE2_X86-NEXT:    cvtsi2ssl (%eax), %xmm0
+; SSE2_X86-NEXT:    movss %xmm0, (%esp)
+; SSE2_X86-NEXT:    flds (%esp)
+; SSE2_X86-NEXT:    popl %eax
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 4
+; SSE2_X86-NEXT:    retl
+;
+; AVX_X86-LABEL: int_to_float_rm_optsize:
+; AVX_X86:       # BB#0: # %entry
+; AVX_X86-NEXT:    pushl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX_X86-NEXT:    vcvtsi2ssl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
+; AVX_X86-NEXT:    flds (%esp)
+; AVX_X86-NEXT:    popl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = load i32, i32* %a
+  %1 = sitofp i32 %0 to float
+  ret float %1
+}
author	Craig Topper <craig.topper@intel.com>	2017-11-01 18:10:06 +0000
committer	Craig Topper <craig.topper@intel.com>	2017-11-01 18:10:06 +0000
commit	ca1aa83cbe22e5b3aa7345a6fd7d0e4d4b1f1d64 (patch)
tree	6f6e721f11d79808c898ac7c43493005625463c1
parent	671526148c547c453d320075ec619f5519499de2 (diff)
download	bcm5719-llvm-ca1aa83cbe22e5b3aa7345a6fd7d0e4d4b1f1d64.tar.gz bcm5719-llvm-ca1aa83cbe22e5b3aa7345a6fd7d0e4d4b1f1d64.zip