summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/InstCombine
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-04-26 01:03:22 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-04-26 01:03:22 +0000
commit8cc9059ce8f3bd25fd61b0cf6936295c80518bf3 (patch)
tree8014fcc78e10c00ac649be3033b6f00c2d1a31cf /llvm/lib/Transforms/InstCombine
parent8d039e442036436c004d7b43333efe29d4386293 (diff)
downloadbcm5719-llvm-8cc9059ce8f3bd25fd61b0cf6936295c80518bf3.tar.gz
bcm5719-llvm-8cc9059ce8f3bd25fd61b0cf6936295c80518bf3.zip
[InstCombine][X86] Teach how to fold calls to SSE2/AVX2 packed logical shift
right intrinsics. A packed logical shift right with a shift count bigger than or equal to the element size always produces a zero vector. In all other cases, it can be safely replaced by a 'lshr' instruction. llvm-svn: 207299
Diffstat (limited to 'llvm/lib/Transforms/InstCombine')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp50
1 files changed, 41 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 17ada47d2be..df217f19acd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -570,8 +570,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx2_psll_w:
case Intrinsic::x86_avx2_pslli_d:
case Intrinsic::x86_avx2_pslli_q:
- case Intrinsic::x86_avx2_pslli_w: {
- // Simplify if count is constant. To 0 if > BitWidth, otherwise to shl.
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w: {
+ // Simplify if count is constant. To 0 if >= BitWidth,
+ // otherwise to shl/lshr.
auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1));
auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1));
if (!CDV && !CInt)
@@ -588,14 +601,33 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
VT->getElementType()->getPrimitiveSizeInBits() - 1)
return ReplaceInstUsesWith(
CI, ConstantAggregateZero::get(Vec->getType()));
- else {
- unsigned VWidth = VT->getNumElements();
- // Get a constant vector of the same type as the first operand.
- auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
- return BinaryOperator::CreateShl(
- Vec, Builder->CreateVectorSplat(VWidth, VTCI));
+
+ bool isPackedShiftLeft = true;
+ switch (II->getIntrinsicID()) {
+ default : break;
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break;
}
- break;
+
+ unsigned VWidth = VT->getNumElements();
+ // Get a constant vector of the same type as the first operand.
+ auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
+ if (isPackedShiftLeft)
+ return BinaryOperator::CreateShl(Vec,
+ Builder->CreateVectorSplat(VWidth, VTCI));
+
+ return BinaryOperator::CreateLShr(Vec,
+ Builder->CreateVectorSplat(VWidth, VTCI));
}
case Intrinsic::x86_sse41_pmovsxbw:
OpenPOWER on IntegriCloud