[x86] limit transform for select-of-fp-constants

This should likely be adjusted to limit this transform further, but these diffs should be clear wins. If we have blendv/conditional move, then we should assume those are cheap ops. The loads become independent of the compare, so those can be speculated before we need to use the values in the blend/mov. llvm-svn: 347526
author: Sanjay Patel <spatel@rotateright.com> 2018-11-25 17:27:02 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2018-11-25 17:27:02 +0000
commit: 7336e7c67a5decf608c7b8f4ef0da571fb505313 (patch)
tree: 8a77d614f67b6ec24be6112279aeebdea0896482
parent: 2e5a25c1708260cfbadb4e799ee86202b0105f74 (diff)
download: bcm5719-llvm-7336e7c67a5decf608c7b8f4ef0da571fb505313.tar.gz
bcm5719-llvm-7336e7c67a5decf608c7b8f4ef0da571fb505313.zip
8 files changed, 39 insertions, 16 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e610ee28abe..8a971f6fc57 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -269,6 +269,14 @@ public:
     return true;
   }
 
+  /// Return true if it is profitable to convert a select of FP constants into
+  /// a constant pool load whose address depends on the select condition. The
+  /// parameter may be used to differentiate a select with FP compare from
+  /// integer compare.
+  virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
+    return true;
+  }
+
   /// Return true if multiple condition registers are available.
   bool hasMultipleConditionRegisters() const {
     return HasMultipleConditionRegisters;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bce188e399a..db9a1048876 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18129,6 +18129,9 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
     ISD::CondCode CC) {
+  if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
+    return SDValue();
+
   // If we are before legalize types, we want the other legalization to happen
   // first (for example, to avoid messing with soft float).
   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c2e13e95ad5..c11598f2eb4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4767,6 +4767,14 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
   return true;
 }
 
+bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
+  // If we are using XMM registers in the ABI and the condition of the select is
+  // a floating-point compare and we have blendv or conditional move, then it is
+  // cheaper to select instead of doing a cross-register move and creating a
+  // load that depends on the compare result.
+  return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
+}
+
 bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
   // TODO: It might be a win to ease or lift this restriction, but the generic
   // folds in DAGCombiner conflict with vector folds for an AVX512 target.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index d95241237ab..fe151d89dd2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1041,6 +1041,8 @@ namespace llvm {
     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                            Type *Ty) const override;
 
+    bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
+
     bool convertSelectOfConstantsToMath(EVT VT) const override;
 
     bool decomposeMulByConstant(EVT VT, SDValue C) const override;
diff --git a/llvm/test/CodeGen/X86/avx512-cmp.ll b/llvm/test/CodeGen/X86/avx512-cmp.ll
index 89d811f8681..ecdca99bea3 100644
--- a/llvm/test/CodeGen/X86/avx512-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-cmp.ll
@@ -69,10 +69,10 @@ define float @test5(float %p) #0 {
 ; ALL-NEXT:  ## %bb.2: ## %return
 ; ALL-NEXT:    retq
 ; ALL-NEXT:  LBB3_1: ## %if.end
-; ALL-NEXT:    seta %al
-; ALL-NEXT:    movzbl %al, %eax
-; ALL-NEXT:    leaq {{.*}}(%rip), %rcx
+; ALL-NEXT:    vcmpltss %xmm0, %xmm1, %k1
+; ALL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; ALL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ALL-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; ALL-NEXT:    retq
 entry:
   %cmp = fcmp oeq float %p, 0.000000e+00
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
index 35a50acd3c8..3bfe088a426 100755
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@@ -1148,9 +1148,10 @@ define float @test5(float %p) #0 {
 ; GENERIC-NEXT:  # %bb.2: # %return
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ; GENERIC-NEXT:  .LBB67_1: # %if.end
-; GENERIC-NEXT:    seta %al # sched: [2:1.00]
-; GENERIC-NEXT:    movzbl %al, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    vcmpltss %xmm0, %xmm1, %k1 # sched: [3:1.00]
+; GENERIC-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
 ; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; GENERIC-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test5:
@@ -1162,9 +1163,10 @@ define float @test5(float %p) #0 {
 ; SKX-NEXT:  # %bb.2: # %return
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ; SKX-NEXT:  .LBB67_1: # %if.end
-; SKX-NEXT:    seta %al # sched: [2:1.00]
-; SKX-NEXT:    movzbl %al, %eax # sched: [1:0.25]
+; SKX-NEXT:    vcmpltss %xmm0, %xmm1, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
 ; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 entry:
   %cmp = fcmp oeq float %p, 0.000000e+00
diff --git a/llvm/test/CodeGen/X86/select-of-fp-constants.ll b/llvm/test/CodeGen/X86/select-of-fp-constants.ll
index 84b0bc13441..9ab12bc89b1 100644
--- a/llvm/test/CodeGen/X86/select-of-fp-constants.ll
+++ b/llvm/test/CodeGen/X86/select-of-fp-constants.ll
@@ -76,18 +76,18 @@ define float @fcmp_select_fp_constants(float %x) nounwind readnone {
 ;
 ; X64_AVX2-LABEL: fcmp_select_fp_constants:
 ; X64_AVX2:       # %bb.0:
+; X64_AVX2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64_AVX2-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X64_AVX2-NEXT:    vcmpneqss {{.*}}(%rip), %xmm0, %xmm0
-; X64_AVX2-NEXT:    vmovd %xmm0, %eax
-; X64_AVX2-NEXT:    andl $1, %eax
-; X64_AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64_AVX2-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
 ; X64_AVX2-NEXT:    retq
 ;
 ; X64_AVX512F-LABEL: fcmp_select_fp_constants:
 ; X64_AVX512F:       # %bb.0:
-; X64_AVX512F-NEXT:    vcmpneqss {{.*}}(%rip), %xmm0, %k0
-; X64_AVX512F-NEXT:    kmovw %k0, %eax
-; X64_AVX512F-NEXT:    movzwl %ax, %eax
+; X64_AVX512F-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64_AVX512F-NEXT:    vcmpneqss {{.*}}(%rip), %xmm0, %k1
 ; X64_AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64_AVX512F-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; X64_AVX512F-NEXT:    retq
  %c = fcmp une float %x, -4.0
  %r = select i1 %c, float 42.0, float 23.0
diff --git a/llvm/test/CodeGen/X86/vselect-zero.ll b/llvm/test/CodeGen/X86/vselect-zero.ll
index 722136d663c..70998b92bbb 100644
--- a/llvm/test/CodeGen/X86/vselect-zero.ll
+++ b/llvm/test/CodeGen/X86/vselect-zero.ll
@@ -129,9 +129,9 @@ define double @fsel_nonzero_constants(double %x, double %y) {
 ; AVX-LABEL: fsel_nonzero_constants:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovq %xmm0, %rax
-; AVX-NEXT:    andl $1, %eax
-; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
+; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
 ; AVX-NEXT:    retq
   %cond = fcmp oeq double %x, %y
   %r = select i1 %cond, double 12.0, double 42.0
author	Sanjay Patel <spatel@rotateright.com>	2018-11-25 17:27:02 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2018-11-25 17:27:02 +0000
commit	7336e7c67a5decf608c7b8f4ef0da571fb505313 (patch)
tree	8a77d614f67b6ec24be6112279aeebdea0896482
parent	2e5a25c1708260cfbadb4e799ee86202b0105f74 (diff)
download	bcm5719-llvm-7336e7c67a5decf608c7b8f4ef0da571fb505313.tar.gz bcm5719-llvm-7336e7c67a5decf608c7b8f4ef0da571fb505313.zip