diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp | 52 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pshufb-mask-comments.ll | 6 |
2 files changed, 39 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp index ef16c5bdbfd..989bce447fe 100644 --- a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp +++ b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp @@ -40,24 +40,43 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) { assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512); #endif - // This is a straightforward byte vector. - if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) { - int NumElements = MaskTy->getVectorNumElements(); - ShuffleMask.reserve(NumElements); + if (!MaskTy->isVectorTy()) + return; + int NumElts = MaskTy->getVectorNumElements(); + + Type *EltTy = MaskTy->getVectorElementType(); + if (!EltTy->isIntegerTy()) + return; + + // The shuffle mask requires a byte vector - decode cases with + // wider elements as well. + unsigned BitWidth = cast<IntegerType>(EltTy)->getBitWidth(); + if ((BitWidth % 8) != 0) + return; + + int Scale = BitWidth / 8; + int NumBytes = NumElts * Scale; + ShuffleMask.reserve(NumBytes); + + for (int i = 0; i != NumElts; ++i) { + Constant *COp = C->getAggregateElement(i); + if (!COp) { + ShuffleMask.clear(); + return; + } else if (isa<UndefValue>(COp)) { + ShuffleMask.append(Scale, SM_SentinelUndef); + continue; + } - for (int i = 0; i < NumElements; ++i) { + APInt APElt = cast<ConstantInt>(COp)->getValue(); + for (int j = 0; j != Scale; ++j) { // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte // lane of the vector we're inside. - int Base = i & ~0xf; - Constant *COp = C->getAggregateElement(i); - if (!COp) { - ShuffleMask.clear(); - return; - } else if (isa<UndefValue>(COp)) { - ShuffleMask.push_back(SM_SentinelUndef); - continue; - } - uint64_t Element = cast<ConstantInt>(COp)->getZExtValue(); + int Base = ((i * Scale) + j) & ~0xf; + + uint64_t Element = APElt.getLoBits(8).getZExtValue(); + APElt = APElt.lshr(8); + // If the high bit (7) of the byte is set, the element is zeroed. if (Element & (1 << 7)) ShuffleMask.push_back(SM_SentinelZero); @@ -68,7 +87,8 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) { } } } - // TODO: Handle funny-looking vectors too. + + assert(NumBytes == (int)ShuffleMask.size() && "Unexpected shuffle mask size"); } void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, diff --git a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll index f6edf8e68cb..b6f228c7d48 100644 --- a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll +++ b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll @@ -75,9 +75,9 @@ define <16 x i8> @test5(<16 x i8> %V) { define <16 x i8> @test6(<16 x i8> %V, <2 x i64>* %P) { ; CHECK-LABEL: test6: ; CHECK: # BB#0: -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [217019414673948672,506380106026255364] -; CHECK-NEXT: movdqa %xmm1, (%rdi) -; CHECK-NEXT: pshufb %xmm1, %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [217019414673948672,506380106026255364] +; CHECK-NEXT: movaps %xmm1, (%rdi) +; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; CHECK-NEXT: retq %1 = insertelement <2 x i64> undef, i64 217019414673948672, i32 0 %2 = insertelement <2 x i64> %1, i64 506380106026255364, i32 1 |