diff options
| author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-04-26 01:03:22 +0000 |
|---|---|---|
| committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-04-26 01:03:22 +0000 |
| commit | 8cc9059ce8f3bd25fd61b0cf6936295c80518bf3 (patch) | |
| tree | 8014fcc78e10c00ac649be3033b6f00c2d1a31cf /llvm/lib/Transforms/InstCombine | |
| parent | 8d039e442036436c004d7b43333efe29d4386293 (diff) | |
| download | bcm5719-llvm-8cc9059ce8f3bd25fd61b0cf6936295c80518bf3.tar.gz bcm5719-llvm-8cc9059ce8f3bd25fd61b0cf6936295c80518bf3.zip | |
[InstCombine][X86] Teach how to fold calls to SSE2/AVX2 packed logical shift
right intrinsics.
A packed logical shift right with a shift count bigger than or equal to the
element size always produces a zero vector. In all other cases, it can be
safely replaced by a 'lshr' instruction.
llvm-svn: 207299
Diffstat (limited to 'llvm/lib/Transforms/InstCombine')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 50 |
1 files changed, 41 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 17ada47d2be..df217f19acd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -570,8 +570,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_psll_w: case Intrinsic::x86_avx2_pslli_d: case Intrinsic::x86_avx2_pslli_q: - case Intrinsic::x86_avx2_pslli_w: { - // Simplify if count is constant. To 0 if > BitWidth, otherwise to shl. + case Intrinsic::x86_avx2_pslli_w: + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: { + // Simplify if count is constant. To 0 if >= BitWidth, + // otherwise to shl/lshr. auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1)); auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1)); if (!CDV && !CInt) @@ -588,14 +601,33 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { VT->getElementType()->getPrimitiveSizeInBits() - 1) return ReplaceInstUsesWith( CI, ConstantAggregateZero::get(Vec->getType())); - else { - unsigned VWidth = VT->getNumElements(); - // Get a constant vector of the same type as the first operand. - auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); - return BinaryOperator::CreateShl( - Vec, Builder->CreateVectorSplat(VWidth, VTCI)); + + bool isPackedShiftLeft = true; + switch (II->getIntrinsicID()) { + default : break; + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break; } - break; + + unsigned VWidth = VT->getNumElements(); + // Get a constant vector of the same type as the first operand. + auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); + if (isPackedShiftLeft) + return BinaryOperator::CreateShl(Vec, + Builder->CreateVectorSplat(VWidth, VTCI)); + + return BinaryOperator::CreateLShr(Vec, + Builder->CreateVectorSplat(VWidth, VTCI)); } case Intrinsic::x86_sse41_pmovsxbw: |

