diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-01-18 21:43:36 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-01-18 21:43:36 +0000 |
commit | 80edab99ff840f7453080febc95a241e3745da41 (patch) | |
tree | 168181e7ec475817e51ba2d741f9494e6d336c6e /llvm/lib/Target | |
parent | f0caa3eaab93190db3904b3f59783e86d9236633 (diff) | |
download | bcm5719-llvm-80edab99ff840f7453080febc95a241e3745da41.tar.gz bcm5719-llvm-80edab99ff840f7453080febc95a241e3745da41.zip |
AMDGPU: Reduce 64-bit lshr by constant to 32-bit
64-bit shifts are very slow on some subtargets.
llvm-svn: 258090
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 |
2 files changed, 45 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 3589a0daceb..f7812e0fc3a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -377,6 +377,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::SELECT_CC); @@ -2562,9 +2563,46 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS); const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); + return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Zero, Lo); } +SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + if (N->getValueType(0) != MVT::i64) + return SDValue(); + + const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!RHS) + return SDValue(); + + unsigned ShiftAmt = RHS->getZExtValue(); + if (ShiftAmt < 32) + return SDValue(); + + // srl i64:x, C for C >= 32 + // => + // build_pair (srl hi_32(x), C - 32), 0 + + SelectionDAG &DAG = DCI.DAG; + SDLoc SL(N); + + SDValue One = DAG.getConstant(1, SL, MVT::i32); + SDValue Zero = DAG.getConstant(0, SL, MVT::i32); + + SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, N->getOperand(0)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, + VecOp, One); + + SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32); + SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst); + + SDValue BuildPair = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, + NewShift, Zero); + + return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair); +} + SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const { EVT VT = N->getValueType(0); @@ -2701,6 +2739,12 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, return performShlCombine(N, DCI); } + case ISD::SRL: { + if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) + break; + + return performSrlCombine(N, DCI); + } case ISD::MUL: return performMulCombine(N, DCI); case AMDGPUISD::MUL_I24: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 37925416a9c..920df8f5a6a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -69,6 +69,7 @@ private: SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performCtlzCombine(SDLoc SL, SDValue Cond, SDValue LHS, SDValue RHS, DAGCombinerInfo &DCI) const; |