summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2017-05-22 16:58:10 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2017-05-22 16:58:10 +0000
commit5fa289f0d8ff85b9e14d2f814a90761378ab54ae (patch)
treefed99a180eebde775b59f959727b7b5934508512 /llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
parent80cb549c2fb973ffa84276b6144e0aa65ef690c9 (diff)
downloadbcm5719-llvm-5fa289f0d8ff85b9e14d2f814a90761378ab54ae.tar.gz
bcm5719-llvm-5fa289f0d8ff85b9e14d2f814a90761378ab54ae.zip
[AMDGPU] Narrow lshl from 64 to 32 bit if possible
Turn expensive 64 bit shift into 32 bit if shift does not overflow int: shl (ext x) => zext (shl x) Differential Revision: https://reviews.llvm.org/D33367 llvm-svn: 303569
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp44
1 files changed, 33 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index b49263d61a3..5ec46a8294c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2595,27 +2595,49 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
- if (N->getValueType(0) != MVT::i64)
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64)
return SDValue();
- // i64 (shl x, C) -> (build_pair 0, (shl x, C -32))
-
- // On some subtargets, 64-bit shift is a quarter rate instruction. In the
- // common case, splitting this into a move and a 32-bit shift is faster and
- // the same code size.
- const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!RHS)
return SDValue();
- unsigned RHSVal = RHS->getZExtValue();
- if (RHSVal < 32)
- return SDValue();
-
SDValue LHS = N->getOperand(0);
+ unsigned RHSVal = RHS->getZExtValue();
+ if (!RHSVal)
+ return LHS;
SDLoc SL(N);
SelectionDAG &DAG = DCI.DAG;
+ switch (LHS->getOpcode()) {
+ default:
+ break;
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ANY_EXTEND: {
+ // shl (ext x) => zext (shl x), if shift does not overflow int
+ KnownBits Known;
+ SDValue X = LHS->getOperand(0);
+ DAG.computeKnownBits(X, Known);
+ unsigned LZ = Known.countMinLeadingZeros();
+ if (LZ < RHSVal)
+ break;
+ EVT XVT = X.getValueType();
+ SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0));
+ return DAG.getZExtOrTrunc(Shl, SL, VT);
+ }
+ }
+
+ // i64 (shl x, C) -> (build_pair 0, (shl x, C -32))
+
+ // On some subtargets, 64-bit shift is a quarter rate instruction. In the
+ // common case, splitting this into a move and a 32-bit shift is faster and
+ // the same code size.
+ if (RHSVal < 32)
+ return SDValue();
+
SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32);
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
OpenPOWER on IntegriCloud