diff options
author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2015-09-30 16:44:39 +0000 |
---|---|---|
committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2015-09-30 16:44:39 +0000 |
commit | 0594e2a1e94800dd74c9f12ef3af88c7cb87a10b (patch) | |
tree | 2ff6a3f87555328cd5fa98ef7df889e7af9dcf80 /llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | |
parent | 9a2ef281bcbb52f358d95d666153dcd1a93544b1 (diff) | |
download | bcm5719-llvm-0594e2a1e94800dd74c9f12ef3af88c7cb87a10b.tar.gz bcm5719-llvm-0594e2a1e94800dd74c9f12ef3af88c7cb87a10b.zip |
[InstCombine] Teach how to convert SSSE3/AVX2 byte shuffles to builtin shuffles if the shuffle mask is constant.
This patch teaches InstCombiner how to convert a SSSE3/AVX2 byte shuffle to a
builtin shuffle if the mask is constant.
Converting byte shuffle intrinsic calls to builtin shuffles can help finding
more opportunities for combining shuffles later on in selection dag.
We may end up with byte shuffles with constant masks as the result of inlining.
Differential Revision: http://reviews.llvm.org/D13252
llvm-svn: 248913
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 33e26b12e01..8f3deacf248 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1163,6 +1163,47 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::x86_ssse3_pshuf_b_128: + case Intrinsic::x86_avx2_pshuf_b: { + // Turn pshufb(V1,mask) -> shuffle(V1,Zero,mask) if mask is a constant. + auto *V = II->getArgOperand(1); + auto *VTy = cast<VectorType>(V->getType()); + unsigned NumElts = VTy->getNumElements(); + assert((NumElts == 16 || NumElts == 32) && + "Unexpected number of elements in shuffle mask!"); + // Initialize the resulting shuffle mask to all zeroes. + uint32_t Indexes[32] = {0}; + + if (auto *Mask = dyn_cast<ConstantDataVector>(V)) { + // Each byte in the shuffle control mask forms an index to permute the + // corresponding byte in the destination operand. + for (unsigned I = 0; I < NumElts; ++I) { + int8_t Index = Mask->getElementAsInteger(I); + // If the most significant bit (bit[7]) of each byte of the shuffle + // control mask is set, then zero is written in the result byte. + // The zero vector is in the right-hand side of the resulting + // shufflevector. + + // The value of each index is the least significant 4 bits of the + // shuffle control byte. + Indexes[I] = (Index < 0) ? NumElts : Index & 0xF; + } + } else if (!isa<ConstantAggregateZero>(V)) + break; + + // The value of each index for the high 128-bit lane is the least + // significant 4 bits of the respective shuffle control byte. + for (unsigned I = 16; I < NumElts; ++I) + Indexes[I] += I & 0xF0; + + auto NewC = ConstantDataVector::get(V->getContext(), + makeArrayRef(Indexes, NumElts)); + auto V1 = II->getArgOperand(0); + auto V2 = Constant::getNullValue(II->getType()); + auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC); + return ReplaceInstUsesWith(CI, Shuffle); + } + case Intrinsic::x86_avx_vpermilvar_ps: case Intrinsic::x86_avx_vpermilvar_ps_256: case Intrinsic::x86_avx_vpermilvar_pd: |