diff options
| author | Michael J. Spencer <bigcheesegs@gmail.com> | 2014-04-24 00:58:18 +0000 |
|---|---|---|
| committer | Michael J. Spencer <bigcheesegs@gmail.com> | 2014-04-24 00:58:18 +0000 |
| commit | dee4b2c379ab6913f03001c94d3f3753a1e09ad6 (patch) | |
| tree | 06ebe82bf826c9366213decadd8c1f2d117350ab /llvm/lib/Transforms/InstCombine | |
| parent | 0ee82b95cb56b3156906e28a8eccc24eb609daff (diff) | |
| download | bcm5719-llvm-dee4b2c379ab6913f03001c94d3f3753a1e09ad6.tar.gz bcm5719-llvm-dee4b2c379ab6913f03001c94d3f3753a1e09ad6.zip | |
[InstCombine][x86] Constant fold psll intrinsics.
This excludes avx512 as I don't have hardware to verify. It excludes _dq
variants because they are represented in the IR as <{2,4} x i64> when it's
actually a byte shift of the entire i{128,265}.
This also excludes _dq_bs as they aren't at all supported by the backend.
There are also no corresponding instructions in the ISA. I have no idea why
they exist...
llvm-svn: 207058
Diffstat (limited to 'llvm/lib/Transforms/InstCombine')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 5d4b063d5e5..40732d2f0b9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -556,6 +556,47 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + // Constant fold <A x Bi> << Ci. + // FIXME: We don't handle _dq because it's a shift of an i128, but is + // represented in the IR as <2 x i64>. A per element shift is wrong. + case Intrinsic::x86_sse2_psll_d: + case Intrinsic::x86_sse2_psll_q: + case Intrinsic::x86_sse2_psll_w: + case Intrinsic::x86_sse2_pslli_d: + case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_sse2_pslli_w: + case Intrinsic::x86_avx2_psll_d: + case Intrinsic::x86_avx2_psll_q: + case Intrinsic::x86_avx2_psll_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + case Intrinsic::x86_avx2_pslli_w: { + // Simplify if count is constant. To 0 if > BitWidth, otherwise to shl. + auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1)); + auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1)); + if (!CDV && !CInt) + break; + ConstantInt *Count; + if (CDV) + Count = cast<ConstantInt>(CDV->getElementAsConstant(0)); + else + Count = CInt; + + auto Vec = II->getArgOperand(0); + auto VT = cast<VectorType>(Vec->getType()); + if (Count->getZExtValue() > + VT->getElementType()->getPrimitiveSizeInBits() - 1) + return ReplaceInstUsesWith( + CI, ConstantAggregateZero::get(Vec->getType())); + else { + unsigned VWidth = VT->getNumElements(); + // Get a constant vector of the same type as the first operand. + auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); + return BinaryOperator::CreateShl( + Vec, Builder->CreateVectorSplat(VWidth, VTCI)); + } + break; + } case Intrinsic::x86_sse41_pmovsxbw: case Intrinsic::x86_sse41_pmovsxwd: |

