summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-10-15 15:38:38 +0000
committerSanjay Patel <spatel@rotateright.com>2018-10-15 15:38:38 +0000
commit4e970ff022920a04ba3a898b30e882ab4e95d41e (patch)
tree895130a712a3268dfdeaaf1a83dd12077d9d9696 /llvm
parenta01be0f217712a9b8a4df2d48fd420e4c786a569 (diff)
downloadbcm5719-llvm-4e970ff022920a04ba3a898b30e882ab4e95d41e.tar.gz
bcm5719-llvm-4e970ff022920a04ba3a898b30e882ab4e95d41e.zip
[DAGCombiner] allow undef elts in vector fma matching
llvm-svn: 344525
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp19
-rw-r--r--llvm/test/CodeGen/X86/fma_patterns.ll90
2 files changed, 70 insertions, 39 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7ec5fac390b..f2779a3475e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10794,17 +10794,18 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
- // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
- // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
+ // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
+ // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
- auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
- if (XC1 && XC1->isExactlyValue(+1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- Y, Flags);
- if (XC1 && XC1->isExactlyValue(-1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
+ if (C->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y, Flags);
+ if (C->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ }
}
return SDValue();
};
diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll
index d0d0dfed352..5395ae46d47 100644
--- a/llvm/test/CodeGen/X86/fma_patterns.ll
+++ b/llvm/test/CodeGen/X86/fma_patterns.ll
@@ -637,23 +637,38 @@ define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
}
define <4 x float> @test_v4f32_mul_y_add_x_one_undefs(<4 x float> %x, <4 x float> %y) {
-; FMA-LABEL: test_v4f32_mul_y_add_x_one_undefs:
-; FMA: # %bb.0:
-; FMA-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
-; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; FMA-NEXT: retq
+; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
+; FMA-INFS: # %bb.0:
+; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
+; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; FMA-INFS-NEXT: retq
;
-; FMA4-LABEL: test_v4f32_mul_y_add_x_one_undefs:
-; FMA4: # %bb.0:
-; FMA4-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
-; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; FMA4-NEXT: retq
+; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
+; FMA4-INFS: # %bb.0:
+; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
+; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; FMA4-INFS-NEXT: retq
;
-; AVX512-LABEL: test_v4f32_mul_y_add_x_one_undefs:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: retq
+; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
+; AVX512-INFS: # %bb.0:
+; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; AVX512-INFS-NEXT: retq
+;
+; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
+; FMA-NOINFS: # %bb.0:
+; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
+; FMA-NOINFS-NEXT: retq
+;
+; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
+; FMA4-NOINFS: # %bb.0:
+; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: retq
+;
+; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
+; AVX512-NOINFS: # %bb.0:
+; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
+; AVX512-NOINFS-NEXT: retq
%a = fadd <4 x float> %x, <float 1.0, float undef, float 1.0, float undef>
%m = fmul <4 x float> %y, %a
ret <4 x float> %m
@@ -736,23 +751,38 @@ define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y
}
define <4 x float> @test_v4f32_mul_y_add_x_negone_undefs(<4 x float> %x, <4 x float> %y) {
-; FMA-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
-; FMA: # %bb.0:
-; FMA-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
-; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; FMA-NEXT: retq
+; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
+; FMA-INFS: # %bb.0:
+; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
+; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; FMA-INFS-NEXT: retq
;
-; FMA4-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
-; FMA4: # %bb.0:
-; FMA4-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
-; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; FMA4-NEXT: retq
+; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
+; FMA4-INFS: # %bb.0:
+; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
+; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; FMA4-INFS-NEXT: retq
;
-; AVX512-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: retq
+; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
+; AVX512-INFS: # %bb.0:
+; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; AVX512-INFS-NEXT: retq
+;
+; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
+; FMA-NOINFS: # %bb.0:
+; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
+; FMA-NOINFS-NEXT: retq
+;
+; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
+; FMA4-NOINFS: # %bb.0:
+; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: retq
+;
+; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
+; AVX512-NOINFS: # %bb.0:
+; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
+; AVX512-NOINFS-NEXT: retq
%a = fadd <4 x float> %x, <float undef, float -1.0, float undef, float -1.0>
%m = fmul <4 x float> %y, %a
ret <4 x float> %m
OpenPOWER on IntegriCloud