summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp52
-rw-r--r--llvm/test/CodeGen/X86/pshufb-mask-comments.ll6
2 files changed, 39 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index ef16c5bdbfd..989bce447fe 100644
--- a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -40,24 +40,43 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512);
#endif
- // This is a straightforward byte vector.
- if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) {
- int NumElements = MaskTy->getVectorNumElements();
- ShuffleMask.reserve(NumElements);
+ if (!MaskTy->isVectorTy())
+ return;
+ int NumElts = MaskTy->getVectorNumElements();
+
+ Type *EltTy = MaskTy->getVectorElementType();
+ if (!EltTy->isIntegerTy())
+ return;
+
+ // The shuffle mask requires a byte vector - decode cases with
+ // wider elements as well.
+ unsigned BitWidth = cast<IntegerType>(EltTy)->getBitWidth();
+ if ((BitWidth % 8) != 0)
+ return;
+
+ int Scale = BitWidth / 8;
+ int NumBytes = NumElts * Scale;
+ ShuffleMask.reserve(NumBytes);
+
+ for (int i = 0; i != NumElts; ++i) {
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp) {
+ ShuffleMask.clear();
+ return;
+ } else if (isa<UndefValue>(COp)) {
+ ShuffleMask.append(Scale, SM_SentinelUndef);
+ continue;
+ }
- for (int i = 0; i < NumElements; ++i) {
+ APInt APElt = cast<ConstantInt>(COp)->getValue();
+ for (int j = 0; j != Scale; ++j) {
// For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
// lane of the vector we're inside.
- int Base = i & ~0xf;
- Constant *COp = C->getAggregateElement(i);
- if (!COp) {
- ShuffleMask.clear();
- return;
- } else if (isa<UndefValue>(COp)) {
- ShuffleMask.push_back(SM_SentinelUndef);
- continue;
- }
- uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ int Base = ((i * Scale) + j) & ~0xf;
+
+ uint64_t Element = APElt.getLoBits(8).getZExtValue();
+ APElt = APElt.lshr(8);
+
// If the high bit (7) of the byte is set, the element is zeroed.
if (Element & (1 << 7))
ShuffleMask.push_back(SM_SentinelZero);
@@ -68,7 +87,8 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
}
}
}
- // TODO: Handle funny-looking vectors too.
+
+ assert(NumBytes == (int)ShuffleMask.size() && "Unexpected shuffle mask size");
}
void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
diff --git a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll
index f6edf8e68cb..b6f228c7d48 100644
--- a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll
+++ b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll
@@ -75,9 +75,9 @@ define <16 x i8> @test5(<16 x i8> %V) {
define <16 x i8> @test6(<16 x i8> %V, <2 x i64>* %P) {
; CHECK-LABEL: test6:
; CHECK: # BB#0:
-; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [217019414673948672,506380106026255364]
-; CHECK-NEXT: movdqa %xmm1, (%rdi)
-; CHECK-NEXT: pshufb %xmm1, %xmm0
+; CHECK-NEXT: movaps {{.*#+}} xmm1 = [217019414673948672,506380106026255364]
+; CHECK-NEXT: movaps %xmm1, (%rdi)
+; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: retq
%1 = insertelement <2 x i64> undef, i64 217019414673948672, i32 0
%2 = insertelement <2 x i64> %1, i64 506380106026255364, i32 1
OpenPOWER on IntegriCloud