summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorRafael Espindola <rafael.espindola@gmail.com>2014-04-29 20:41:54 +0000
committerRafael Espindola <rafael.espindola@gmail.com>2014-04-29 20:41:54 +0000
commiteb7bdbd0ce8d71e95c4e9a09b1e5cda7234cbfcd (patch)
treeb81dad4d7bf74cf8c0e93701d1ad5a2daa30d019 /llvm/lib
parente653811af2e81ee8f719096b1fa3073b78407656 (diff)
downloadbcm5719-llvm-eb7bdbd0ce8d71e95c4e9a09b1e5cda7234cbfcd.tar.gz
bcm5719-llvm-eb7bdbd0ce8d71e95c4e9a09b1e5cda7234cbfcd.zip
Two fixes to the vpermilvar optimization.
The instcomine logic to handle vpermilvar's pd and 256 variants was incorrect. The _256 variants have indexes into the individual 128 bit lanes and in all cases it also has to mask out unused bits. llvm-svn: 207577
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp25
1 files changed, 24 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index df217f19acd..785d19e45b5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -725,9 +725,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Convert vpermil* to shufflevector if the mask is constant.
Value *V = II->getArgOperand(1);
if (auto C = dyn_cast<ConstantDataVector>(V)) {
+ unsigned Size = C->getNumElements();
+ assert(Size == 8 || Size == 4 || Size == 2);
+ uint32_t Indexes[8];
+
+ // The intrinsics only read one or two bits, clear the rest.
+ for (unsigned I = 0; I < Size; ++I) {
+ uint32_t Index = C->getElementAsInteger(I) & 0x3;
+ if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd ||
+ II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256)
+ Index >>= 1;
+ Indexes[I] = Index;
+ }
+
+ // The _256 variants are a bit trickier since the mask bits always index
+ // into the corresponding 128 half. In order to convert to a generic
+ // shuffle, we have to make that explicit.
+ if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
+ II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) {
+ for (unsigned I = Size / 2; I < Size; ++I)
+ Indexes[I] += Size / 2;
+ }
+ auto NewC =
+ ConstantDataVector::get(C->getContext(), makeArrayRef(Indexes, Size));
auto V1 = II->getArgOperand(0);
auto V2 = UndefValue::get(V1->getType());
- auto Shuffle = Builder->CreateShuffleVector(V1, V2, C);
+ auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC);
return ReplaceInstUsesWith(CI, Shuffle);
}
break;
OpenPOWER on IntegriCloud