summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp26
-rw-r--r--llvm/test/CodeGen/PowerPC/fma-combine.ll101
2 files changed, 116 insertions, 11 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 24ab65171a1..368e2100031 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5490,9 +5490,20 @@ char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
EVT VT = Op.getValueType();
const SDNodeFlags Flags = Op->getFlags();
const TargetOptions &Options = DAG.getTarget().Options;
- if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND &&
- isFPExtFree(VT, Op.getOperand(0).getValueType())))
- return 0;
+ if (!Op.hasOneUse()) {
+ bool IsFreeExtend = Op.getOpcode() == ISD::FP_EXTEND &&
+ isFPExtFree(VT, Op.getOperand(0).getValueType());
+
+ // If we already have the use of the negated floating constant, it is free
+ // to negate it even it has multiple uses.
+ bool IsFreeConstant =
+ Op.getOpcode() == ISD::ConstantFP &&
+ !getNegatedExpression(Op, DAG, LegalOperations, ForCodeSize)
+ .use_empty();
+
+ if (!IsFreeExtend && !IsFreeConstant)
+ return 0;
+ }
// Don't recurse exponentially.
if (Depth > SelectionDAG::MaxRecursionDepth)
@@ -5687,14 +5698,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
ForCodeSize, Depth + 1);
char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
ForCodeSize, Depth + 1);
- // TODO: This is a hack. It is possible that costs have changed between now
- // and the initial calls to isNegatibleForFree(). That is because we
- // are rewriting the expression, and that may change the number of
- // uses (and therefore the cost) of values. If the negation costs are
- // equal, only negate this value if it is a constant. Otherwise, try
- // operand 1. A better fix would eliminate uses as a cost factor or
- // track the change in uses as we rewrite the expression.
- if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) {
+ if (V0 > V1) {
// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
SDValue Neg0 = getNegatedExpression(
Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
diff --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll
index 88da295201f..8973dc1334d 100644
--- a/llvm/test/CodeGen/PowerPC/fma-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll
@@ -137,3 +137,104 @@ entry:
%add = fsub double %mul, %a
ret double %add
}
+
+define float @fma_combine_no_ice() {
+; CHECK-FAST-LABEL: fma_combine_no_ice:
+; CHECK-FAST: # %bb.0:
+; CHECK-FAST-NEXT: addis 3, 2, .LCPI4_0@toc@ha
+; CHECK-FAST-NEXT: addis 4, 2, .LCPI4_1@toc@ha
+; CHECK-FAST-NEXT: lfs 0, .LCPI4_0@toc@l(3)
+; CHECK-FAST-NEXT: lfsx 2, 0, 3
+; CHECK-FAST-NEXT: addis 3, 2, .LCPI4_2@toc@ha
+; CHECK-FAST-NEXT: lfs 3, .LCPI4_1@toc@l(4)
+; CHECK-FAST-NEXT: lfs 1, .LCPI4_2@toc@l(3)
+; CHECK-FAST-NEXT: xsmaddasp 3, 2, 0
+; CHECK-FAST-NEXT: xsmaddasp 1, 2, 3
+; CHECK-FAST-NEXT: xsnmsubasp 1, 3, 2
+; CHECK-FAST-NEXT: blr
+;
+; CHECK-FAST-NOVSX-LABEL: fma_combine_no_ice:
+; CHECK-FAST-NOVSX: # %bb.0:
+; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha
+; CHECK-FAST-NOVSX-NEXT: lfs 0, .LCPI4_0@toc@l(3)
+; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI4_1@toc@ha
+; CHECK-FAST-NOVSX-NEXT: lfs 1, 0(3)
+; CHECK-FAST-NOVSX-NEXT: lfs 2, .LCPI4_1@toc@l(3)
+; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI4_2@toc@ha
+; CHECK-FAST-NOVSX-NEXT: fmadds 0, 1, 2, 0
+; CHECK-FAST-NOVSX-NEXT: lfs 2, .LCPI4_2@toc@l(3)
+; CHECK-FAST-NOVSX-NEXT: fmadds 2, 1, 0, 2
+; CHECK-FAST-NOVSX-NEXT: fnmsubs 1, 0, 1, 2
+; CHECK-FAST-NOVSX-NEXT: blr
+;
+; CHECK-LABEL: fma_combine_no_ice:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addis 3, 2, .LCPI4_0@toc@ha
+; CHECK-NEXT: addis 4, 2, .LCPI4_1@toc@ha
+; CHECK-NEXT: lfs 0, .LCPI4_0@toc@l(3)
+; CHECK-NEXT: lfsx 2, 0, 3
+; CHECK-NEXT: addis 3, 2, .LCPI4_2@toc@ha
+; CHECK-NEXT: lfs 3, .LCPI4_1@toc@l(4)
+; CHECK-NEXT: lfs 1, .LCPI4_2@toc@l(3)
+; CHECK-NEXT: xsmaddasp 3, 2, 0
+; CHECK-NEXT: xsmaddasp 1, 2, 3
+; CHECK-NEXT: xsnmsubasp 1, 3, 2
+; CHECK-NEXT: blr
+ %tmp = load float, float* undef, align 4
+ %tmp2 = load float, float* undef, align 4
+ %tmp3 = fmul fast float %tmp, 0x3FE372D780000000
+ %tmp4 = fadd fast float %tmp3, 1.000000e+00
+ %tmp5 = fmul fast float %tmp2, %tmp4
+ %tmp6 = load float, float* undef, align 4
+ %tmp7 = load float, float* undef, align 4
+ %tmp8 = fmul fast float %tmp7, 0x3FE372D780000000
+ %tmp9 = fsub fast float -1.000000e+00, %tmp8
+ %tmp10 = fmul fast float %tmp9, %tmp6
+ %tmp11 = fadd fast float %tmp5, 5.000000e-01
+ %tmp12 = fadd fast float %tmp11, %tmp10
+ ret float %tmp12
+}
+
+; This would crash while trying getNegatedExpression().
+define double @getNegatedExpression_crash(double %x, double %y) {
+; CHECK-FAST-LABEL: getNegatedExpression_crash:
+; CHECK-FAST: # %bb.0:
+; CHECK-FAST-NEXT: addis 3, 2, .LCPI5_1@toc@ha
+; CHECK-FAST-NEXT: addis 4, 2, .LCPI5_0@toc@ha
+; CHECK-FAST-NEXT: lfs 3, .LCPI5_1@toc@l(3)
+; CHECK-FAST-NEXT: lfs 4, .LCPI5_0@toc@l(4)
+; CHECK-FAST-NEXT: xssubdp 0, 1, 3
+; CHECK-FAST-NEXT: xsmaddadp 3, 1, 4
+; CHECK-FAST-NEXT: xsmaddadp 0, 3, 2
+; CHECK-FAST-NEXT: fmr 1, 0
+; CHECK-FAST-NEXT: blr
+;
+; CHECK-FAST-NOVSX-LABEL: getNegatedExpression_crash:
+; CHECK-FAST-NOVSX: # %bb.0:
+; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI5_0@toc@ha
+; CHECK-FAST-NOVSX-NEXT: addis 4, 2, .LCPI5_1@toc@ha
+; CHECK-FAST-NOVSX-NEXT: lfs 0, .LCPI5_0@toc@l(3)
+; CHECK-FAST-NOVSX-NEXT: lfs 3, .LCPI5_1@toc@l(4)
+; CHECK-FAST-NOVSX-NEXT: fmadd 3, 1, 3, 0
+; CHECK-FAST-NOVSX-NEXT: fsub 0, 1, 0
+; CHECK-FAST-NOVSX-NEXT: fmadd 1, 3, 2, 0
+; CHECK-FAST-NOVSX-NEXT: blr
+;
+; CHECK-LABEL: getNegatedExpression_crash:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addis 3, 2, .LCPI5_1@toc@ha
+; CHECK-NEXT: addis 4, 2, .LCPI5_0@toc@ha
+; CHECK-NEXT: lfs 3, .LCPI5_1@toc@l(3)
+; CHECK-NEXT: lfs 4, .LCPI5_0@toc@l(4)
+; CHECK-NEXT: xssubdp 0, 1, 3
+; CHECK-NEXT: xsmaddadp 3, 1, 4
+; CHECK-NEXT: xsmaddadp 0, 3, 2
+; CHECK-NEXT: fmr 1, 0
+; CHECK-NEXT: blr
+ %neg = fneg fast double %x
+ %fma = call fast double @llvm.fma.f64(double %neg, double 42.0, double -1.0)
+ %add = fadd fast double %x, 1.0
+ %fma1 = call fast double @llvm.fma.f64(double %fma, double %y, double %add)
+ ret double %fma1
+}
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
OpenPOWER on IntegriCloud