From 1d1c56e2df4fd832aa9e22e68d844e2229649ee5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 11 Oct 2015 14:38:34 +0000 Subject: [InstCombine][X86][XOP] Combine XOP integer vector comparisons to native IR We now have lowering support for XOP PCOM/PCOMU instructions. llvm-svn: 249977 --- .../Transforms/InstCombine/InstCombineCalls.cpp | 53 ++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp') diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index b1a20ff3e6e..f741d1d485b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -446,6 +446,43 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II, return nullptr; } +/// Decode XOP integer vector comparison intrinsics. +static Value *SimplifyX86vpcom(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder, bool IsSigned) { + if (auto *CInt = dyn_cast(II.getArgOperand(2))) { + uint64_t Imm = CInt->getZExtValue() & 0x7; + VectorType *VecTy = cast(II.getType()); + CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; + + switch (Imm) { + case 0x0: + Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; + break; + case 0x1: + Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + break; + case 0x2: + Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + break; + case 0x3: + Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + break; + case 0x4: + Pred = ICmpInst::ICMP_EQ; break; + case 0x5: + Pred = ICmpInst::ICMP_NE; break; + case 0x6: + return ConstantInt::getSigned(VecTy, 0); // FALSE + case 0x7: + return ConstantInt::getSigned(VecTy, -1); // TRUE + } + + if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0), II.getArgOperand(1))) + return Builder.CreateSExtOrTrunc(Cmp, VecTy); + } + return nullptr; +} + /// visitCallInst - CallInst simplification. This mostly only handles folding /// of intrinsic instructions. For normal calls, it allows visitCallSite to do /// the heavy lifting. @@ -1252,6 +1289,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return ReplaceInstUsesWith(*II, V); break; + case Intrinsic::x86_xop_vpcomb: + case Intrinsic::x86_xop_vpcomd: + case Intrinsic::x86_xop_vpcomq: + case Intrinsic::x86_xop_vpcomw: + if (Value *V = SimplifyX86vpcom(*II, *Builder, true)) + return ReplaceInstUsesWith(*II, V); + break; + + case Intrinsic::x86_xop_vpcomub: + case Intrinsic::x86_xop_vpcomud: + case Intrinsic::x86_xop_vpcomuq: + case Intrinsic::x86_xop_vpcomuw: + if (Value *V = SimplifyX86vpcom(*II, *Builder, false)) + return ReplaceInstUsesWith(*II, V); + break; + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. // Note that ppc_altivec_vperm has a big-endian bias, so when creating -- cgit v1.2.3