summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2015-08-12 08:08:56 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2015-08-12 08:08:56 +0000
commit8c049d5c03befd19c68dd6b404277f604302bc98 (patch)
tree0af7c2ee214e92d36193f1d6ab9644db5d84a25b /llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
parent9b2c45398fdd2d57aa8ac9ba5f7beaf2f5335602 (diff)
downloadbcm5719-llvm-8c049d5c03befd19c68dd6b404277f604302bc98.tar.gz
bcm5719-llvm-8c049d5c03befd19c68dd6b404277f604302bc98.zip
[InstCombine] Move SSE/AVX vector blend folding to instcombiner
As discussed in D11886, this patch moves the SSE/AVX vector blend folding to instcombiner from PerformINTRINSIC_WO_CHAINCombine (which allows us to remove this completely). InstCombiner already had partial support for this, I just had to add support for zero (ConstantAggregateZero) masks and also the case where both selection inputs were the same (allowing us to ignore the mask). I also moved all the relevant combine tests into InstCombine/blend_x86.ll Differential Revision: http://reviews.llvm.org/D11934 llvm-svn: 244723
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp19
1 files changed, 15 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 600c8c36392..a7fb7467404 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -960,7 +960,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// This optimization is convoluted because the intrinsic is defined as
// getting a vector of floats or doubles for the ps and pd versions.
// FIXME: That should be changed.
+
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
Value *Mask = II->getArgOperand(2);
+
+ // fold (blend A, A, Mask) -> A
+ if (Op0 == Op1)
+ return ReplaceInstUsesWith(CI, Op0);
+
+ // Zero Mask - select 1st argument.
+ if (auto C = dyn_cast<ConstantAggregateZero>(Mask))
+ return ReplaceInstUsesWith(CI, Op0);
+
+ // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
if (auto C = dyn_cast<ConstantDataVector>(Mask)) {
auto Tyi1 = Builder->getInt1Ty();
auto SelectorType = cast<VectorType>(Mask->getType());
@@ -983,11 +996,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1)));
}
auto NewSelector = ConstantVector::get(Selectors);
- return SelectInst::Create(NewSelector, II->getArgOperand(1),
- II->getArgOperand(0), "blendv");
- } else {
- break;
+ return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
}
+ break;
}
case Intrinsic::x86_avx_vpermilvar_ps:
OpenPOWER on IntegriCloud