diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-08-17 10:52:49 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-08-17 10:52:49 +0000 |
commit | 5113b48798f99b569697ce39406ca7d484ffd9a1 (patch) | |
tree | 922e1f4b0afc6219501fcca47039a44424bc7d15 | |
parent | 24d3492aee82542dcce50702494ecc9210b93c89 (diff) | |
download | bcm5719-llvm-5113b48798f99b569697ce39406ca7d484ffd9a1.tar.gz bcm5719-llvm-5113b48798f99b569697ce39406ca7d484ffd9a1.zip |
[DAGCombine] Improve (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) folding
Add support for cases where only some c1+c2 results exceed the max bitshift, clamping accordingly.
Differential Revision: https://reviews.llvm.org/D35722
llvm-svn: 340010
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/combine-sra.ll | 13 |
2 files changed, 18 insertions, 28 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c0af91c965a..4d9fd18d71e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6591,31 +6591,30 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) + // clamp (add c1, c2) to max shift. if (N0.getOpcode() == ISD::SRA) { SDLoc DL(N); EVT ShiftVT = N1.getValueType(); + EVT ShiftSVT = ShiftVT.getScalarType(); + SmallVector<SDValue, 16> ShiftValues; - auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, - ConstantSDNode *RHS) { + auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) { APInt c1 = LHS->getAPIntValue(); APInt c2 = RHS->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); - return (c1 + c2).uge(OpSizeInBits); - }; - if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) - return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), - DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT)); - - auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, - ConstantSDNode *RHS) { - APInt c1 = LHS->getAPIntValue(); - APInt c2 = RHS->getAPIntValue(); - zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); - return (c1 + c2).ult(OpSizeInBits); + APInt Sum = c1 + c2; + unsigned ShiftSum = + Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue(); + ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT)); + return true; }; - if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { - SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); - return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum); + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) { + SDValue ShiftValue; + if (VT.isVector()) + ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues); + else + ShiftValue = ShiftValues[0]; + return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue); } } diff --git a/llvm/test/CodeGen/X86/combine-sra.ll b/llvm/test/CodeGen/X86/combine-sra.ll index 7231da5134d..9bce1a7c3b6 100644 --- a/llvm/test/CodeGen/X86/combine-sra.ll +++ b/llvm/test/CodeGen/X86/combine-sra.ll @@ -120,24 +120,15 @@ define <4 x i32> @combine_vec_ashr_ashr3(<4 x i32> %x) { ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: psrad $27, %xmm1 ; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: psrad $5, %xmm2 +; SSE-NEXT: psrad $15, %xmm2 ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: psrad $31, %xmm1 -; SSE-NEXT: psrad $1, %xmm0 -; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] -; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] -; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: psrad $10, %xmm1 -; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; SSE-NEXT: psrad $31, %xmm0 -; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_ashr_ashr3: ; AVX: # %bb.0: ; AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %1 = ashr <4 x i32> %x, <i32 1, i32 5, i32 50, i32 27> %2 = ashr <4 x i32> %1, <i32 33, i32 10, i32 33, i32 0> |