diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-01-18 21:55:14 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-01-18 21:55:14 +0000 |
| commit | 3cbbc10488b8ff92691ed9ed98f9e982f8530644 (patch) | |
| tree | 6c3c2814474d6482d5d1eae976b2ec385442dcf6 /llvm/lib/Target | |
| parent | 3ca2f21f5080735661ffb629ea6d37b45ea36ceb (diff) | |
| download | bcm5719-llvm-3cbbc10488b8ff92691ed9ed98f9e982f8530644.tar.gz bcm5719-llvm-3cbbc10488b8ff92691ed9ed98f9e982f8530644.zip | |
AMDGPU: Generalize shl combine
Reduce 64-bit shl with constant > 32. We already special cased
this for the == 32 case, but this also works for any >= 32 constant.
llvm-svn: 258092
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 22 |
1 files changed, 14 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index f7812e0fc3a..69f4c7cf359 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2544,14 +2544,17 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, if (N->getValueType(0) != MVT::i64) return SDValue(); - // i64 (shl x, 32) -> (build_pair 0, x) + // i64 (shl x, C) -> (build_pair 0, (shl x, C -32)) - // Doing this with moves theoretically helps MI optimizations that understand - // copies. 2 v_mov_b32_e32 will have the same code size / cycle count as - // v_lshl_b64. In the SALU case, I think this is slightly worse since it - // doubles the code size and I'm unsure about cycle count. + // On some subtargets, 64-bit shift is a quarter rate instruction. In the + // common case, splitting this into a move and a 32-bit shift is faster and + // the same code size. const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); - if (!RHS || RHS->getZExtValue() != 32) + if (!RHS) + return SDValue(); + + unsigned RHSVal = RHS->getZExtValue(); + if (RHSVal < 32) return SDValue(); SDValue LHS = N->getOperand(0); @@ -2559,12 +2562,15 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, SDLoc SL(N); SelectionDAG &DAG = DCI.DAG; - // Extract low 32-bits. + SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32); + SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS); + SDValue NewShift = DAG.getNode(ISD::SHL, SL, MVT::i32, Lo, ShiftAmt); const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); - return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Zero, Lo); + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Zero, NewShift); + return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); } SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N, |

