diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-06-07 08:18:35 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-06-07 08:18:35 +0000 |
commit | 91e3ac82939175c7e7159078e47a60bc6cabdb48 (patch) | |
tree | b57de61c766649400a347f0b52f9235a253b1f5c /llvm/lib/Transforms | |
parent | e64cee81bfc7a617247c73886c81eb609258bb81 (diff) | |
download | bcm5719-llvm-91e3ac82939175c7e7159078e47a60bc6cabdb48.tar.gz bcm5719-llvm-91e3ac82939175c7e7159078e47a60bc6cabdb48.zip |
[InstCombine][SSE] Add MOVMSK constant folding (PR27982)
This patch adds support for folding undef/zero/constant inputs to MOVMSK instructions.
The SSE/AVX versions can be fully folded, but the MMX version can only handle undef inputs.
Differential Revision: http://reviews.llvm.org/D20998
llvm-svn: 271990
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index e0ec74f814b..5de8c7a4fdb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -325,6 +325,45 @@ static Value *simplifyX86immShift(const IntrinsicInst &II, return Builder.CreateAShr(Vec, ShiftVec); } +static Value *simplifyX86movmsk(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder) { + Value *Arg = II.getArgOperand(0); + Type *ResTy = II.getType(); + Type *ArgTy = Arg->getType(); + + // movmsk(undef) -> zero as we must ensure the upper bits are zero. + if (isa<UndefValue>(Arg)) + return Constant::getNullValue(ResTy); + + // We can't easily peek through x86_mmx types. + if (!ArgTy->isVectorTy()) + return nullptr; + + auto *C = dyn_cast<Constant>(Arg); + if (!C) + return nullptr; + + // Extract signbits of the vector input and pack into integer result. + APInt Result(ResTy->getPrimitiveSizeInBits(), 0); + for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) { + auto *COp = C->getAggregateElement(I); + if (!COp) + return nullptr; + if (isa<UndefValue>(COp)) + continue; + + auto *CInt = dyn_cast<ConstantInt>(COp); + auto *CFp = dyn_cast<ConstantFP>(COp); + if (!CInt && !CFp) + return nullptr; + + if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative())) + Result.setBit(I); + } + + return Constant::getIntegerValue(ResTy, Result); +} + static Value *simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder) { auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2)); @@ -1460,6 +1499,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::x86_mmx_pmovmskb: + case Intrinsic::x86_sse_movmsk_ps: + case Intrinsic::x86_sse2_movmsk_pd: + case Intrinsic::x86_sse2_pmovmskb_128: + case Intrinsic::x86_avx_movmsk_pd_256: + case Intrinsic::x86_avx_movmsk_ps_256: + case Intrinsic::x86_avx2_pmovmskb: { + if (Value *V = simplifyX86movmsk(*II, *Builder)) + return replaceInstUsesWith(*II, V); + break; + } + case Intrinsic::x86_sse_comieq_ss: case Intrinsic::x86_sse_comige_ss: case Intrinsic::x86_sse_comigt_ss: |