diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/X86/Utils/X86ShuffleDecode.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 11 |
4 files changed, 23 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp index b1af31067ae..fd2b4e28003 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp @@ -1202,7 +1202,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); LLVM_FALLTHROUGH; CASE_PMOVZX(PMOVZXBW, m) - DecodeZeroExtendMask(8, 16, getRegOperandNumElts(MI, 16, 0), ShuffleMask); + DecodeZeroExtendMask(8, 16, getRegOperandNumElts(MI, 16, 0), false, + ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -1210,7 +1211,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); LLVM_FALLTHROUGH; CASE_PMOVZX(PMOVZXBD, m) - DecodeZeroExtendMask(8, 32, getRegOperandNumElts(MI, 32, 0), ShuffleMask); + DecodeZeroExtendMask(8, 32, getRegOperandNumElts(MI, 32, 0), false, + ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -1218,7 +1220,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); LLVM_FALLTHROUGH; CASE_PMOVZX(PMOVZXBQ, m) - DecodeZeroExtendMask(8, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask); + DecodeZeroExtendMask(8, 64, getRegOperandNumElts(MI, 64, 0), false, + ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -1226,7 +1229,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); LLVM_FALLTHROUGH; CASE_PMOVZX(PMOVZXWD, m) - DecodeZeroExtendMask(16, 32, getRegOperandNumElts(MI, 32, 0), ShuffleMask); + DecodeZeroExtendMask(16, 32, getRegOperandNumElts(MI, 32, 0), false, + ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -1234,7 +1238,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); LLVM_FALLTHROUGH; CASE_PMOVZX(PMOVZXWQ, m) - DecodeZeroExtendMask(16, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask); + DecodeZeroExtendMask(16, 64, getRegOperandNumElts(MI, 64, 0), false, + ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -1242,7 +1247,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); LLVM_FALLTHROUGH; CASE_PMOVZX(PMOVZXDQ, m) - DecodeZeroExtendMask(32, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask); + DecodeZeroExtendMask(32, 64, getRegOperandNumElts(MI, 64, 0), false, + ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; } diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 524e7231891..48fd3e0b7ab 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -383,7 +383,8 @@ void DecodeVPERMMask(unsigned NumElts, unsigned Imm, } void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, - unsigned NumDstElts, SmallVectorImpl<int> &Mask) { + unsigned NumDstElts, bool IsAnyExtend, + SmallVectorImpl<int> &Mask) { unsigned Scale = DstScalarBits / SrcScalarBits; assert(SrcScalarBits < DstScalarBits && "Expected zero extension mask to increase scalar size"); @@ -391,7 +392,7 @@ void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, for (unsigned i = 0; i != NumDstElts; i++) { Mask.push_back(i); for (unsigned j = 1; j != Scale; j++) - Mask.push_back(SM_SentinelZero); + Mask.push_back(IsAnyExtend ? SM_SentinelUndef : SM_SentinelZero); } } diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h index d1678d57a8f..f5278506307 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -136,7 +136,7 @@ void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, /// Decode a zero extension instruction as a shuffle mask. void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, - unsigned NumDstElts, + unsigned NumDstElts, bool IsAnyExtend, SmallVectorImpl<int> &ShuffleMask); /// Decode a move lower and zero upper instruction as a shuffle mask. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8b6edaa50ba..47b8e9eac2d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6849,17 +6849,20 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask, return true; } case ISD::ZERO_EXTEND: - case ISD::ZERO_EXTEND_VECTOR_INREG: { + case ISD::ZERO_EXTEND_VECTOR_INREG: + case ISD::ANY_EXTEND_VECTOR_INREG: { SDValue Src = N.getOperand(0); EVT SrcVT = Src.getValueType(); - // Zero-extended source must be a simple vector. + // Extended source must be a simple vector. if (!SrcVT.isSimple() || (SrcVT.getSizeInBits() % 128) != 0 || (SrcVT.getScalarSizeInBits() % 8) != 0) return false; unsigned NumSrcBitsPerElt = SrcVT.getScalarSizeInBits(); - DecodeZeroExtendMask(NumSrcBitsPerElt, NumBitsPerElt, NumElts, Mask); + bool IsAnyExtend = (ISD::ANY_EXTEND_VECTOR_INREG == Opcode); + DecodeZeroExtendMask(NumSrcBitsPerElt, NumBitsPerElt, NumElts, IsAnyExtend, + Mask); if (NumSizeInBits != SrcVT.getSizeInBits()) { assert((NumSizeInBits % SrcVT.getSizeInBits()) == 0 && @@ -43259,7 +43262,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, // Attempt to combine as a shuffle. // TODO: SSE41 support - if (Subtarget.hasAVX() && N->getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) { + if (Subtarget.hasAVX()) { SDValue Op(N, 0); if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType())) if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |