diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/X86/X86MCInstLower.cpp | 83 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp | 68 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll | 8 |
4 files changed, 131 insertions, 31 deletions
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 45380e09ab3..d02786cf562 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1018,7 +1018,8 @@ static const Constant *getConstantFromPool(const MachineInstr &MI, } static std::string getShuffleComment(const MachineOperand &DstOp, - const MachineOperand &SrcOp, + const MachineOperand &SrcOp1, + const MachineOperand &SrcOp2, ArrayRef<int> Mask) { std::string Comment; @@ -1032,39 +1033,49 @@ static std::string getShuffleComment(const MachineOperand &DstOp, }; StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem"; - StringRef SrcName = SrcOp.isReg() ? GetRegisterName(SrcOp.getReg()) : "mem"; + StringRef Src1Name = + SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem"; + StringRef Src2Name = + SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem"; + + // One source operand, fix the mask to print all elements in one span. + SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end()); + if (Src1Name == Src2Name) + for (int i = 0, e = ShuffleMask.size(); i != e; ++i) + if (ShuffleMask[i] >= e) + ShuffleMask[i] -= e; raw_string_ostream CS(Comment); CS << DstName << " = "; - bool NeedComma = false; - bool InSrc = false; - for (int M : Mask) { - // Wrap up any prior entry... - if (M == SM_SentinelZero && InSrc) { - InSrc = false; - CS << "]"; - } - if (NeedComma) + for (int i = 0, e = ShuffleMask.size(); i != e; ++i) { + if (i != 0) CS << ","; - else - NeedComma = true; - - // Print this shuffle... - if (M == SM_SentinelZero) { + if (ShuffleMask[i] == SM_SentinelZero) { CS << "zero"; - } else { - if (!InSrc) { - InSrc = true; - CS << SrcName << "["; - } - if (M == SM_SentinelUndef) + continue; + } + + // Otherwise, it must come from src1 or src2. Print the span of elements + // that comes from this src. + bool isSrc1 = ShuffleMask[i] < (int)e; + CS << (isSrc1 ? Src1Name : Src2Name) << '['; + + bool IsFirst = true; + while (i != e && ShuffleMask[i] != SM_SentinelZero && + (ShuffleMask[i] < (int)e) == isSrc1) { + if (!IsFirst) + CS << ','; + else + IsFirst = false; + if (ShuffleMask[i] == SM_SentinelUndef) CS << "u"; else - CS << M; + CS << ShuffleMask[i] % (int)e; + ++i; } + CS << ']'; + --i; // For loop increments element #. } - if (InSrc) - CS << "]"; CS.flush(); return Comment; @@ -1313,7 +1324,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallVector<int, 16> Mask; DecodePSHUFBMask(C, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask)); + OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask)); } break; } @@ -1340,7 +1351,25 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallVector<int, 16> Mask; DecodeVPERMILPMask(C, ElSize, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask)); + OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask)); + } + break; + } + case X86::VPPERMrrm: { + if (!OutStreamer->isVerboseAsm()) + break; + assert(MI->getNumOperands() > 6 && + "We should always have at least 6 operands!"); + const MachineOperand &DstOp = MI->getOperand(0); + const MachineOperand &SrcOp1 = MI->getOperand(1); + const MachineOperand &SrcOp2 = MI->getOperand(2); + const MachineOperand &MaskOp = MI->getOperand(6); + + if (auto *C = getConstantFromPool(*MI, MaskOp)) { + SmallVector<int, 16> Mask; + DecodeVPPERMMask(C, Mask); + if (!Mask.empty()) + OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask)); } break; } diff --git a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp index 4e7714eeb2e..f24f38d4a21 100644 --- a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp +++ b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp @@ -153,6 +153,74 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, // TODO: Handle funny-looking vectors too. } +void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) { + Type *MaskTy = C->getType(); + assert(MaskTy->getPrimitiveSizeInBits() == 128); + + // Only support vector types. + if (!MaskTy->isVectorTy()) + return; + + // Make sure its an integer type. + Type *VecEltTy = MaskTy->getVectorElementType(); + if (!VecEltTy->isIntegerTy()) + return; + + // The shuffle mask requires a byte vector - decode cases with + // wider elements as well. + unsigned BitWidth = cast<IntegerType>(VecEltTy)->getBitWidth(); + if ((BitWidth % 8) != 0) + return; + + int NumElts = MaskTy->getVectorNumElements(); + int Scale = BitWidth / 8; + int NumBytes = NumElts * Scale; + ShuffleMask.reserve(NumBytes); + + for (int i = 0; i != NumElts; ++i) { + Constant *COp = C->getAggregateElement(i); + if (!COp) { + ShuffleMask.clear(); + return; + } else if (isa<UndefValue>(COp)) { + ShuffleMask.append(Scale, SM_SentinelUndef); + continue; + } + + // VPPERM Operation + // Bits[4:0] - Byte Index (0 - 31) + // Bits[7:5] - Permute Operation + // + // Permute Operation: + // 0 - Source byte (no logical operation). + // 1 - Invert source byte. + // 2 - Bit reverse of source byte. + // 3 - Bit reverse of inverted source byte. + // 4 - 00h (zero - fill). + // 5 - FFh (ones - fill). + // 6 - Most significant bit of source byte replicated in all bit positions. + // 7 - Invert most significant bit of source byte and replicate in all bit positions. + APInt MaskElt = cast<ConstantInt>(COp)->getValue(); + for (int j = 0; j != Scale; ++j) { + APInt Index = MaskElt.getLoBits(5); + APInt PermuteOp = MaskElt.lshr(5).getLoBits(3); + MaskElt = MaskElt.lshr(8); + + if (PermuteOp == 4) { + ShuffleMask.push_back(SM_SentinelZero); + continue; + } + if (PermuteOp != 0) { + ShuffleMask.clear(); + return; + } + ShuffleMask.push_back((int)Index.getZExtValue()); + } + } + + assert(NumBytes == (int)ShuffleMask.size() && "Unexpected shuffle mask size"); +} + void DecodeVPERMVMask(const Constant *C, MVT VT, SmallVectorImpl<int> &ShuffleMask) { Type *MaskTy = C->getType(); diff --git a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h index bd61e4ba2e8..a07b26c04f7 100644 --- a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h +++ b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h @@ -32,6 +32,9 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask); void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, SmallVectorImpl<int> &ShuffleMask); +/// Decode a VPPERM variable mask from an IR-level vector constant. +void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask); + /// Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant. void DecodeVPERMVMask(const Constant *C, MVT VT, SmallVectorImpl<int> &ShuffleMask); diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll index 00294cf2ef6..91655da36fb 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -13,8 +13,8 @@ declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: combine_vpperm_identity: ; CHECK: # BB#0: -; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm1[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] +; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] ; CHECK-NEXT: retq %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16>) %res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>) @@ -24,7 +24,7 @@ define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) { define <16 x i8> @combine_vpperm_as_unary_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: combine_vpperm_as_unary_unpckhwd: ; CHECK: # BB#0: -; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; CHECK-NEXT: retq %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>) ret <16 x i8> %res0 @@ -33,7 +33,7 @@ define <16 x i8> @combine_vpperm_as_unary_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: combine_vpperm_as_unpckhwd: ; CHECK: # BB#0: -; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] ; CHECK-NEXT: retq %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>) ret <16 x i8> %res0 |