diff options
| author | Rafael Espindola <rafael.espindola@gmail.com> | 2014-04-29 20:41:54 +0000 |
|---|---|---|
| committer | Rafael Espindola <rafael.espindola@gmail.com> | 2014-04-29 20:41:54 +0000 |
| commit | eb7bdbd0ce8d71e95c4e9a09b1e5cda7234cbfcd (patch) | |
| tree | b81dad4d7bf74cf8c0e93701d1ad5a2daa30d019 /llvm/lib | |
| parent | e653811af2e81ee8f719096b1fa3073b78407656 (diff) | |
| download | bcm5719-llvm-eb7bdbd0ce8d71e95c4e9a09b1e5cda7234cbfcd.tar.gz bcm5719-llvm-eb7bdbd0ce8d71e95c4e9a09b1e5cda7234cbfcd.zip | |
Two fixes to the vpermilvar optimization.
The instcomine logic to handle vpermilvar's pd and 256 variants was incorrect.
The _256 variants have indexes into the individual 128 bit lanes and in all
cases it also has to mask out unused bits.
llvm-svn: 207577
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 25 |
1 files changed, 24 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index df217f19acd..785d19e45b5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -725,9 +725,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Convert vpermil* to shufflevector if the mask is constant. Value *V = II->getArgOperand(1); if (auto C = dyn_cast<ConstantDataVector>(V)) { + unsigned Size = C->getNumElements(); + assert(Size == 8 || Size == 4 || Size == 2); + uint32_t Indexes[8]; + + // The intrinsics only read one or two bits, clear the rest. + for (unsigned I = 0; I < Size; ++I) { + uint32_t Index = C->getElementAsInteger(I) & 0x3; + if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd || + II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) + Index >>= 1; + Indexes[I] = Index; + } + + // The _256 variants are a bit trickier since the mask bits always index + // into the corresponding 128 half. In order to convert to a generic + // shuffle, we have to make that explicit. + if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 || + II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) { + for (unsigned I = Size / 2; I < Size; ++I) + Indexes[I] += Size / 2; + } + auto NewC = + ConstantDataVector::get(C->getContext(), makeArrayRef(Indexes, Size)); auto V1 = II->getArgOperand(0); auto V2 = UndefValue::get(V1->getType()); - auto Shuffle = Builder->CreateShuffleVector(V1, V2, C); + auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC); return ReplaceInstUsesWith(CI, Shuffle); } break; |

