diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-01-12 00:09:34 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-01-12 00:09:34 +0000 |
commit | 2529fba989e2c9a7e60cddee63a6c75670a4227d (patch) | |
tree | c3d95c1663b2bd690ba231c9e723e5315cf0110a /llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | |
parent | f91e47374cc57a72c86e520312cd2231d03c0fee (diff) | |
download | bcm5719-llvm-2529fba989e2c9a7e60cddee63a6c75670a4227d.tar.gz bcm5719-llvm-2529fba989e2c9a7e60cddee63a6c75670a4227d.zip |
AMDGPU: Fold fneg into fadd
Patch mostly by Fiona Glaser
llvm-svn: 291731
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 894056ea4d0..43425a919d7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -477,6 +477,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); + setTargetDAGCombine(ISD::FNEG); } //===----------------------------------------------------------------------===// @@ -2805,6 +2806,63 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N, return performCtlzCombine(SDLoc(N), Cond, True, False, DCI); } +static bool fnegFoldsIntoOp(unsigned Opc) { + switch (Opc) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FMA: + case ISD::FMAD: + return true; + default: + return false; + } +} + +SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + unsigned Opc = N0.getOpcode(); + + // If the input has multiple uses and we can either fold the negate down, or + // the other uses cannot, give up. This both prevents unprofitable + // transformations and infinite loops: we won't repeatedly try to fold around + // a negate that has no 'good' form. + // + // TODO: Check users can fold + if (fnegFoldsIntoOp(Opc) && !N0.hasOneUse()) + return SDValue(); + + SDLoc SL(N); + switch (Opc) { + case ISD::FADD: { + // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y)) + SDValue LHS = N0.getOperand(0); + SDValue RHS = N0.getOperand(1); + + if (LHS.getOpcode() != ISD::FNEG) + LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS); + else + LHS = LHS.getOperand(0); + + if (RHS.getOpcode() != ISD::FNEG) + RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); + else + RHS = RHS.getOperand(0); + + SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS); + if (!N0.hasOneUse()) + DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); + return Res; + } + default: + return SDValue(); + } +} + SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -2910,6 +2968,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, return performMulLoHi24Combine(N, DCI); case ISD::SELECT: return performSelectCombine(N, DCI); + case ISD::FNEG: + return performFNegCombine(N, DCI); case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { assert(!N->getValueType(0).isVector() && |