diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-08-10 20:21:15 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-08-10 20:21:15 +0000 |
commit | a3a72b41de527ac6b4e013bcda1dff70ad191a60 (patch) | |
tree | 70c028c6f1f64b3386c3fad21b63322727b341a8 /llvm/lib | |
parent | 8e7661ec05d4397a242205677225a06bad1182ea (diff) | |
download | bcm5719-llvm-a3a72b41de527ac6b4e013bcda1dff70ad191a60.tar.gz bcm5719-llvm-a3a72b41de527ac6b4e013bcda1dff70ad191a60.zip |
[InstCombine] Move SSE2/AVX2 arithmetic vector shift folding to instcombiner
As discussed in D11760, this patch moves the (V)PSRA(WD) arithmetic shift-by-constant folding to InstCombine to match the logical shift implementations.
Differential Revision: http://reviews.llvm.org/D11886
llvm-svn: 244495
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 38 |
2 files changed, 31 insertions, 51 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6dd4fb8d98e..aac4b1452c0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23464,50 +23464,6 @@ static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } - - // Packed SSE2/AVX2 arithmetic shift immediate intrinsics. - case Intrinsic::x86_sse2_psrai_w: - case Intrinsic::x86_sse2_psrai_d: - case Intrinsic::x86_avx2_psrai_w: - case Intrinsic::x86_avx2_psrai_d: - case Intrinsic::x86_sse2_psra_w: - case Intrinsic::x86_sse2_psra_d: - case Intrinsic::x86_avx2_psra_w: - case Intrinsic::x86_avx2_psra_d: { - SDValue Op0 = N->getOperand(1); - SDValue Op1 = N->getOperand(2); - EVT VT = Op0.getValueType(); - assert(VT.isVector() && "Expected a vector type!"); - - if (isa<BuildVectorSDNode>(Op1)) - Op1 = Op1.getOperand(0); - - if (!isa<ConstantSDNode>(Op1)) - return SDValue(); - - EVT SVT = VT.getVectorElementType(); - unsigned SVTBits = SVT.getSizeInBits(); - - ConstantSDNode *CND = cast<ConstantSDNode>(Op1); - const APInt &C = APInt(SVTBits, CND->getAPIntValue().getZExtValue()); - uint64_t ShAmt = C.getZExtValue(); - - // Don't try to convert this shift into a ISD::SRA if the shift - // count is bigger than or equal to the element size. - if (ShAmt >= SVTBits) - return SDValue(); - - // Trivial case: if the shift count is zero, then fold this - // into the first operand. - if (ShAmt == 0) - return Op0; - - // Replace this packed shift intrinsic with a target independent - // shift dag node. - SDLoc DL(N); - SDValue Splat = DAG.getConstant(C, DL, VT); - return DAG.getNode(ISD::SRA, DL, VT, Op0, Splat); - } } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index de413c42348..600c8c36392 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -199,7 +199,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { static Value *SimplifyX86immshift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, - bool ShiftLeft) { + bool LogicalShift, bool ShiftLeft) { + assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left"); + // Simplify if count is constant. auto Arg1 = II.getArgOperand(1); auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1); @@ -238,9 +240,15 @@ static Value *SimplifyX86immshift(const IntrinsicInst &II, if (Count == 0) return Vec; - // Handle cases when Shift >= BitWidth - just return zero. - if (Count.uge(BitWidth)) - return ConstantAggregateZero::get(VT); + // Handle cases when Shift >= BitWidth. + if (Count.uge(BitWidth)) { + // If LogicalShift - just return zero. + if (LogicalShift) + return ConstantAggregateZero::get(VT); + + // If ArithmeticShift - clamp Shift to (BitWidth - 1). + Count = APInt(64, BitWidth - 1); + } // Get a constant vector of the same type as the first operand. auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth)); @@ -249,7 +257,10 @@ static Value *SimplifyX86immshift(const IntrinsicInst &II, if (ShiftLeft) return Builder.CreateShl(Vec, ShiftVec); - return Builder.CreateLShr(Vec, ShiftVec); + if (LogicalShift) + return Builder.CreateLShr(Vec, ShiftVec); + + return Builder.CreateAShr(Vec, ShiftVec); } static Value *SimplifyX86extend(const IntrinsicInst &II, @@ -776,6 +787,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + // Constant fold ashr( <A x Bi>, Ci ). + case Intrinsic::x86_sse2_psra_d: + case Intrinsic::x86_sse2_psra_w: + case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_sse2_psrai_w: + case Intrinsic::x86_avx2_psra_d: + case Intrinsic::x86_avx2_psra_w: + case Intrinsic::x86_avx2_psrai_d: + case Intrinsic::x86_avx2_psrai_w: + if (Value *V = SimplifyX86immshift(*II, *Builder, false, false)) + return ReplaceInstUsesWith(*II, V); + break; + // Constant fold lshr( <A x Bi>, Ci ). case Intrinsic::x86_sse2_psrl_d: case Intrinsic::x86_sse2_psrl_q: @@ -789,7 +813,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_psrli_d: case Intrinsic::x86_avx2_psrli_q: case Intrinsic::x86_avx2_psrli_w: - if (Value *V = SimplifyX86immshift(*II, *Builder, false)) + if (Value *V = SimplifyX86immshift(*II, *Builder, true, false)) return ReplaceInstUsesWith(*II, V); break; @@ -806,7 +830,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_pslli_d: case Intrinsic::x86_avx2_pslli_q: case Intrinsic::x86_avx2_pslli_w: - if (Value *V = SimplifyX86immshift(*II, *Builder, true)) + if (Value *V = SimplifyX86immshift(*II, *Builder, true, true)) return ReplaceInstUsesWith(*II, V); break; |