summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-06-07 08:18:35 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-06-07 08:18:35 +0000
commit91e3ac82939175c7e7159078e47a60bc6cabdb48 (patch)
treeb57de61c766649400a347f0b52f9235a253b1f5c /llvm/lib/Transforms
parente64cee81bfc7a617247c73886c81eb609258bb81 (diff)
downloadbcm5719-llvm-91e3ac82939175c7e7159078e47a60bc6cabdb48.tar.gz
bcm5719-llvm-91e3ac82939175c7e7159078e47a60bc6cabdb48.zip
[InstCombine][SSE] Add MOVMSK constant folding (PR27982)
This patch adds support for folding undef/zero/constant inputs to MOVMSK instructions. The SSE/AVX versions can be fully folded, but the MMX version can only handle undef inputs. Differential Revision: http://reviews.llvm.org/D20998 llvm-svn: 271990
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp51
1 files changed, 51 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e0ec74f814b..5de8c7a4fdb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -325,6 +325,45 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
return Builder.CreateAShr(Vec, ShiftVec);
}
+static Value *simplifyX86movmsk(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Arg = II.getArgOperand(0);
+ Type *ResTy = II.getType();
+ Type *ArgTy = Arg->getType();
+
+ // movmsk(undef) -> zero as we must ensure the upper bits are zero.
+ if (isa<UndefValue>(Arg))
+ return Constant::getNullValue(ResTy);
+
+ // We can't easily peek through x86_mmx types.
+ if (!ArgTy->isVectorTy())
+ return nullptr;
+
+ auto *C = dyn_cast<Constant>(Arg);
+ if (!C)
+ return nullptr;
+
+ // Extract signbits of the vector input and pack into integer result.
+ APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
+ for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
+ auto *COp = C->getAggregateElement(I);
+ if (!COp)
+ return nullptr;
+ if (isa<UndefValue>(COp))
+ continue;
+
+ auto *CInt = dyn_cast<ConstantInt>(COp);
+ auto *CFp = dyn_cast<ConstantFP>(COp);
+ if (!CInt && !CFp)
+ return nullptr;
+
+ if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
+ Result.setBit(I);
+ }
+
+ return Constant::getIntegerValue(ResTy, Result);
+}
+
static Value *simplifyX86insertps(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
@@ -1460,6 +1499,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ case Intrinsic::x86_mmx_pmovmskb:
+ case Intrinsic::x86_sse_movmsk_ps:
+ case Intrinsic::x86_sse2_movmsk_pd:
+ case Intrinsic::x86_sse2_pmovmskb_128:
+ case Intrinsic::x86_avx_movmsk_pd_256:
+ case Intrinsic::x86_avx_movmsk_ps_256:
+ case Intrinsic::x86_avx2_pmovmskb: {
+ if (Value *V = simplifyX86movmsk(*II, *Builder))
+ return replaceInstUsesWith(*II, V);
+ break;
+ }
+
case Intrinsic::x86_sse_comieq_ss:
case Intrinsic::x86_sse_comige_ss:
case Intrinsic::x86_sse_comigt_ss:
OpenPOWER on IntegriCloud