diff options
| -rw-r--r-- | llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | 56 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/Utils/X86ShuffleDecode.h | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 4 | 
4 files changed, 41 insertions, 31 deletions
diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp index 5e809c34325..f5f3a4cc83d 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -1038,7 +1038,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,    case X86::EXTRQI:      if (MI->getOperand(2).isImm() &&          MI->getOperand(3).isImm()) -      DecodeEXTRQIMask(MI->getOperand(2).getImm(), +      DecodeEXTRQIMask(MVT::v16i8, MI->getOperand(2).getImm(),                         MI->getOperand(3).getImm(),                         ShuffleMask); @@ -1049,7 +1049,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,    case X86::INSERTQI:      if (MI->getOperand(3).isImm() &&          MI->getOperand(4).isImm()) -      DecodeINSERTQIMask(MI->getOperand(3).getImm(), +      DecodeINSERTQIMask(MVT::v16i8, MI->getOperand(3).getImm(),                           MI->getOperand(4).getImm(),                           ShuffleMask); diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 1be5aec849f..de7914360fd 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -452,15 +452,20 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {      Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);  } -void DecodeEXTRQIMask(int Len, int Idx, +void DecodeEXTRQIMask(MVT VT, int Len, int Idx,                        SmallVectorImpl<int> &ShuffleMask) { +  assert(VT.is128BitVector() && "Expected 128-bit vector"); +  unsigned NumElts = VT.getVectorNumElements(); +  unsigned EltSize = VT.getScalarSizeInBits(); +  unsigned HalfElts = NumElts / 2; +    // Only the bottom 6 bits are valid for each immediate.    Len &= 0x3F;    Idx &= 0x3F;    // We can only decode this bit extraction instruction as a shuffle if both the -  // length and index work with whole bytes. -  if (0 != (Len % 8) || 0 != (Idx % 8)) +  // length and index work with whole elements. +  if (0 != (Len % EltSize) || 0 != (Idx % EltSize))      return;    // A length of zero is equivalent to a bit length of 64. @@ -469,33 +474,38 @@ void DecodeEXTRQIMask(int Len, int Idx,    // If the length + index exceeds the bottom 64 bits the result is undefined.    if ((Len + Idx) > 64) { -    ShuffleMask.append(16, SM_SentinelUndef); +    ShuffleMask.append(NumElts, SM_SentinelUndef);      return;    } -  // Convert index and index to work with bytes. -  Len /= 8; -  Idx /= 8; +  // Convert index and index to work with elements. +  Len /= EltSize; +  Idx /= EltSize; -  // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes -  // of the lower 64-bits. The upper 64-bits are undefined. +  // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining +  // elements of the lower 64-bits. The upper 64-bits are undefined.    for (int i = 0; i != Len; ++i)      ShuffleMask.push_back(i + Idx); -  for (int i = Len; i != 8; ++i) +  for (int i = Len; i != HalfElts; ++i)      ShuffleMask.push_back(SM_SentinelZero); -  for (int i = 8; i != 16; ++i) +  for (int i = HalfElts; i != NumElts; ++i)      ShuffleMask.push_back(SM_SentinelUndef);  } -void DecodeINSERTQIMask(int Len, int Idx, +void DecodeINSERTQIMask(MVT VT, int Len, int Idx,                          SmallVectorImpl<int> &ShuffleMask) { +  assert(VT.is128BitVector() && "Expected 128-bit vector"); +  unsigned NumElts = VT.getVectorNumElements(); +  unsigned EltSize = VT.getScalarSizeInBits(); +  unsigned HalfElts = NumElts / 2; +    // Only the bottom 6 bits are valid for each immediate.    Len &= 0x3F;    Idx &= 0x3F;    // We can only decode this bit insertion instruction as a shuffle if both the -  // length and index work with whole bytes. -  if (0 != (Len % 8) || 0 != (Idx % 8)) +  // length and index work with whole elements. +  if (0 != (Len % EltSize) || 0 != (Idx % EltSize))      return;    // A length of zero is equivalent to a bit length of 64. @@ -504,24 +514,24 @@ void DecodeINSERTQIMask(int Len, int Idx,    // If the length + index exceeds the bottom 64 bits the result is undefined.    if ((Len + Idx) > 64) { -    ShuffleMask.append(16, SM_SentinelUndef); +    ShuffleMask.append(NumElts, SM_SentinelUndef);      return;    } -  // Convert index and index to work with bytes. -  Len /= 8; -  Idx /= 8; +  // Convert index and index to work with elements. +  Len /= EltSize; +  Idx /= EltSize; -  // INSERTQ: Extract lowest Len bytes from lower half of second source and -  // insert over first source starting at Idx byte. The upper 64-bits are +  // INSERTQ: Extract lowest Len elements from lower half of second source and +  // insert over first source starting at Idx element. The upper 64-bits are    // undefined.    for (int i = 0; i != Idx; ++i)      ShuffleMask.push_back(i);    for (int i = 0; i != Len; ++i) -    ShuffleMask.push_back(i + 16); -  for (int i = Idx + Len; i != 8; ++i) +    ShuffleMask.push_back(i + NumElts); +  for (int i = Idx + Len; i != HalfElts; ++i)      ShuffleMask.push_back(i); -  for (int i = 8; i != 16; ++i) +  for (int i = HalfElts; i != NumElts; ++i)      ShuffleMask.push_back(SM_SentinelUndef);  } diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h index 17619d09d05..251c9f7558e 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -134,12 +134,12 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);  void DecodeScalarMoveMask(MVT VT, bool IsLoad,                            SmallVectorImpl<int> &ShuffleMask); -/// Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask. -void DecodeEXTRQIMask(int Len, int Idx, +/// Decode a SSE4A EXTRQ instruction as a shuffle mask. +void DecodeEXTRQIMask(MVT VT, int Len, int Idx,                        SmallVectorImpl<int> &ShuffleMask); -/// Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask. -void DecodeINSERTQIMask(int Len, int Idx, +/// Decode a SSE4A INSERTQ instruction as a shuffle mask. +void DecodeINSERTQIMask(MVT VT, int Len, int Idx,                          SmallVectorImpl<int> &ShuffleMask);  /// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1f4bc356943..5fefaf4b644 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5561,7 +5561,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,          isa<ConstantSDNode>(N->getOperand(2))) {        int BitLen = N->getConstantOperandVal(1);        int BitIdx = N->getConstantOperandVal(2); -      DecodeEXTRQIMask(BitLen, BitIdx, Mask); +      DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask);        IsUnary = true;      }      break; @@ -5570,7 +5570,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,        isa<ConstantSDNode>(N->getOperand(3))) {        int BitLen = N->getConstantOperandVal(2);        int BitIdx = N->getConstantOperandVal(3); -      DecodeINSERTQIMask(BitLen, BitIdx, Mask); +      DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask);        IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);      }      break;  | 

