diff options
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 44 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg-combines.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/fneg.ll | 7 |
3 files changed, 33 insertions, 20 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index eea22e220f1..7187466b055 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -516,6 +516,7 @@ namespace { bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; bool isOneUseSetCC(SDValue N) const; + bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); @@ -12110,6 +12111,22 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { return SDValue(); } +/// Return true if both inputs are at least as cheap in negated form and at +/// least one input is strictly cheaper in negated form. +bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) { + const TargetOptions &Options = DAG.getTarget().Options; + if (char LHSNeg = isNegatibleForFree(X, LegalOperations, TLI, &Options, + ForCodeSize)) + if (char RHSNeg = isNegatibleForFree(Y, LegalOperations, TLI, &Options, + ForCodeSize)) + // Both negated operands are at least as cheap as their counterparts. + // Check to see if at least one is cheaper negated. + if (LHSNeg == 2 || RHSNeg == 2) + return true; + + return false; +} + SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12180,21 +12197,11 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N0); - // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options, - ForCodeSize)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options, - ForCodeSize)) { - // Both can be negated for free, check to see if at least one is cheaper - // negated. - if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FMUL, DL, VT, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), - Flags); - } + // -N0 * -N1 --> N0 * N1 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags); } // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) @@ -12273,6 +12280,13 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); } + // (-N0 * -N1) + N2 --> (N0 * N1) + N2 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags); + } + if (UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) return N2; diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll index 867ab278541..458018fbf4f 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -1205,7 +1205,7 @@ define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, flo ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] -; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]] +; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]] ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]] diff --git a/llvm/test/CodeGen/PowerPC/fneg.ll b/llvm/test/CodeGen/PowerPC/fneg.ll index edcfe2d5c86..328ffecd176 100644 --- a/llvm/test/CodeGen/PowerPC/fneg.ll +++ b/llvm/test/CodeGen/PowerPC/fneg.ll @@ -20,8 +20,7 @@ declare float @llvm.fmuladd.f32(float, float, float) #4 define float @fma_fneg_fneg(float %x, float %y, float %z) { ; CHECK-LABEL: fma_fneg_fneg: ; CHECK: # %bb.0: -; CHECK-NEXT: fneg f0, f2 -; CHECK-NEXT: fnmsubs f1, f1, f0, f3 +; CHECK-NEXT: fmadds f1, f1, f2, f3 ; CHECK-NEXT: blr %negx = fneg float %x %negy = fneg float %y @@ -32,8 +31,8 @@ define float @fma_fneg_fneg(float %x, float %y, float %z) { define float @fma_fneg_fsub(float %x, float %y0, float %y1, float %z) { ; CHECK-LABEL: fma_fneg_fsub: ; CHECK: # %bb.0: -; CHECK-NEXT: fsubs f0, f2, f3 -; CHECK-NEXT: fnmsubs f1, f1, f0, f4 +; CHECK-NEXT: fsubs f0, f3, f2 +; CHECK-NEXT: fmadds f1, f1, f0, f4 ; CHECK-NEXT: blr %negx = fneg float %x %negy = fsub nsz float %y0, %y1 |

