summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp3
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp8
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h2
-rw-r--r--llvm/test/CodeGen/X86/avx512-cmp.ll6
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-schedule.ll10
-rw-r--r--llvm/test/CodeGen/X86/select-of-fp-constants.ll12
-rw-r--r--llvm/test/CodeGen/X86/vselect-zero.ll6
8 files changed, 39 insertions, 16 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e610ee28abe..8a971f6fc57 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -269,6 +269,14 @@ public:
return true;
}
+ /// Return true if it is profitable to convert a select of FP constants into
+ /// a constant pool load whose address depends on the select condition. The
+ /// parameter may be used to differentiate a select with FP compare from
+ /// integer compare.
+ virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
+ return true;
+ }
+
/// Return true if multiple condition registers are available.
bool hasMultipleConditionRegisters() const {
return HasMultipleConditionRegisters;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bce188e399a..db9a1048876 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18129,6 +18129,9 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC) {
+ if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
+ return SDValue();
+
// If we are before legalize types, we want the other legalization to happen
// first (for example, to avoid messing with soft float).
auto *TV = dyn_cast<ConstantFPSDNode>(N2);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c2e13e95ad5..c11598f2eb4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4767,6 +4767,14 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
+bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
+ // If we are using XMM registers in the ABI and the condition of the select is
+ // a floating-point compare and we have blendv or conditional move, then it is
+ // cheaper to select instead of doing a cross-register move and creating a
+ // load that depends on the compare result.
+ return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
+}
+
bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
// TODO: It might be a win to ease or lift this restriction, but the generic
// folds in DAGCombiner conflict with vector folds for an AVX512 target.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index d95241237ab..fe151d89dd2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1041,6 +1041,8 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
+
bool convertSelectOfConstantsToMath(EVT VT) const override;
bool decomposeMulByConstant(EVT VT, SDValue C) const override;
diff --git a/llvm/test/CodeGen/X86/avx512-cmp.ll b/llvm/test/CodeGen/X86/avx512-cmp.ll
index 89d811f8681..ecdca99bea3 100644
--- a/llvm/test/CodeGen/X86/avx512-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-cmp.ll
@@ -69,10 +69,10 @@ define float @test5(float %p) #0 {
; ALL-NEXT: ## %bb.2: ## %return
; ALL-NEXT: retq
; ALL-NEXT: LBB3_1: ## %if.end
-; ALL-NEXT: seta %al
-; ALL-NEXT: movzbl %al, %eax
-; ALL-NEXT: leaq {{.*}}(%rip), %rcx
+; ALL-NEXT: vcmpltss %xmm0, %xmm1, %k1
+; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ALL-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
; ALL-NEXT: retq
entry:
%cmp = fcmp oeq float %p, 0.000000e+00
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
index 35a50acd3c8..3bfe088a426 100755
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@@ -1148,9 +1148,10 @@ define float @test5(float %p) #0 {
; GENERIC-NEXT: # %bb.2: # %return
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-NEXT: .LBB67_1: # %if.end
-; GENERIC-NEXT: seta %al # sched: [2:1.00]
-; GENERIC-NEXT: movzbl %al, %eax # sched: [1:0.33]
+; GENERIC-NEXT: vcmpltss %xmm0, %xmm1, %k1 # sched: [3:1.00]
+; GENERIC-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; GENERIC-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test5:
@@ -1162,9 +1163,10 @@ define float @test5(float %p) #0 {
; SKX-NEXT: # %bb.2: # %return
; SKX-NEXT: retq # sched: [7:1.00]
; SKX-NEXT: .LBB67_1: # %if.end
-; SKX-NEXT: seta %al # sched: [2:1.00]
-; SKX-NEXT: movzbl %al, %eax # sched: [1:0.25]
+; SKX-NEXT: vcmpltss %xmm0, %xmm1, %k1 # sched: [3:1.00]
+; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%cmp = fcmp oeq float %p, 0.000000e+00
diff --git a/llvm/test/CodeGen/X86/select-of-fp-constants.ll b/llvm/test/CodeGen/X86/select-of-fp-constants.ll
index 84b0bc13441..9ab12bc89b1 100644
--- a/llvm/test/CodeGen/X86/select-of-fp-constants.ll
+++ b/llvm/test/CodeGen/X86/select-of-fp-constants.ll
@@ -76,18 +76,18 @@ define float @fcmp_select_fp_constants(float %x) nounwind readnone {
;
; X64_AVX2-LABEL: fcmp_select_fp_constants:
; X64_AVX2: # %bb.0:
+; X64_AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64_AVX2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64_AVX2-NEXT: vcmpneqss {{.*}}(%rip), %xmm0, %xmm0
-; X64_AVX2-NEXT: vmovd %xmm0, %eax
-; X64_AVX2-NEXT: andl $1, %eax
-; X64_AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64_AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
; X64_AVX2-NEXT: retq
;
; X64_AVX512F-LABEL: fcmp_select_fp_constants:
; X64_AVX512F: # %bb.0:
-; X64_AVX512F-NEXT: vcmpneqss {{.*}}(%rip), %xmm0, %k0
-; X64_AVX512F-NEXT: kmovw %k0, %eax
-; X64_AVX512F-NEXT: movzwl %ax, %eax
+; X64_AVX512F-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64_AVX512F-NEXT: vcmpneqss {{.*}}(%rip), %xmm0, %k1
; X64_AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64_AVX512F-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
; X64_AVX512F-NEXT: retq
%c = fcmp une float %x, -4.0
%r = select i1 %c, float 42.0, float 23.0
diff --git a/llvm/test/CodeGen/X86/vselect-zero.ll b/llvm/test/CodeGen/X86/vselect-zero.ll
index 722136d663c..70998b92bbb 100644
--- a/llvm/test/CodeGen/X86/vselect-zero.ll
+++ b/llvm/test/CodeGen/X86/vselect-zero.ll
@@ -129,9 +129,9 @@ define double @fsel_nonzero_constants(double %x, double %y) {
; AVX-LABEL: fsel_nonzero_constants:
; AVX: # %bb.0:
; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: andl $1, %eax
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
+; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
; AVX-NEXT: retq
%cond = fcmp oeq double %x, %y
%r = select i1 %cond, double 12.0, double 42.0
OpenPOWER on IntegriCloud