diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 41 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 12 | 
2 files changed, 30 insertions, 23 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 93d5d921801..7c24f848850 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -360,6 +360,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {           Name == "sse42.crc32.64.8" || // Added in 3.4           Name.startswith("avx.vbroadcast.s") || // Added in 3.5           Name.startswith("avx512.mask.palignr.") || // Added in 3.9 +         Name.startswith("avx512.mask.valign.") || // Added in 4.0           Name.startswith("sse2.psll.dq") || // Added in 3.7           Name.startswith("sse2.psrl.dq") || // Added in 3.7           Name.startswith("avx2.psll.dq") || // Added in 3.7 @@ -572,13 +573,23 @@ static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,    return Builder.CreateSelect(Mask, Op0, Op1);  } -static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder, -                                          Value *Op0, Value *Op1, Value *Shift, -                                          Value *Passthru, Value *Mask) { +// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. +// PALIGNR handles large immediates by shifting while VALIGN masks the immediate +// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes. +static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, +                                        Value *Op1, Value *Shift, +                                        Value *Passthru, Value *Mask, +                                        bool IsVALIGN) {    unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();    unsigned NumElts = Op0->getType()->getVectorNumElements(); -  assert(NumElts % 16 == 0); +  assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!"); +  assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!"); +  assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!"); + +  // Mask the immediate for VALIGN. +  if (IsVALIGN) +    ShiftVal &= (NumElts - 1);    // If palignr is shifting the pair of vectors more than the size of two    // lanes, emit zero. @@ -595,10 +606,10 @@ static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder,    uint32_t Indices[64];    // 256-bit palignr operates on 128-bit lanes so we need to handle that -  for (unsigned l = 0; l != NumElts; l += 16) { +  for (unsigned l = 0; l < NumElts; l += 16) {      for (unsigned i = 0; i != 16; ++i) {        unsigned Idx = ShiftVal + i; -      if (Idx >= 16) +      if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.          Idx += NumElts - 16; // End of lane, switch operand.        Indices[l + i] = Idx + l;      } @@ -1071,11 +1082,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {          Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,                              CI->getArgOperand(1));      } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) { -      Rep = UpgradeX86PALIGNRIntrinsics(Builder, CI->getArgOperand(0), -                                        CI->getArgOperand(1), -                                        CI->getArgOperand(2), -                                        CI->getArgOperand(3), -                                        CI->getArgOperand(4)); +      Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), +                                      CI->getArgOperand(1), +                                      CI->getArgOperand(2), +                                      CI->getArgOperand(3), +                                      CI->getArgOperand(4), +                                      false); +    } else if (IsX86 && Name.startswith("avx512.mask.valign.")) { +      Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), +                                      CI->getArgOperand(1), +                                      CI->getArgOperand(2), +                                      CI->getArgOperand(3), +                                      CI->getArgOperand(4), +                                      true);      } else if (IsX86 && (Name == "sse2.psll.dq" ||                           Name == "avx2.psll.dq")) {        // 128/256-bit shift left specified in bits. diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 3a0e7f101e4..515145a045a 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1171,18 +1171,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {    X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128,    CMP_MASK_CC,  X86ISD::CMPMU, 0),    X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256,    CMP_MASK_CC,  X86ISD::CMPMU, 0),    X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512,    CMP_MASK_CC,  X86ISD::CMPMU, 0), -  X86_INTRINSIC_DATA(avx512_mask_valign_d_128, INTR_TYPE_3OP_IMM8_MASK, -                     X86ISD::VALIGN, 0), -  X86_INTRINSIC_DATA(avx512_mask_valign_d_256, INTR_TYPE_3OP_IMM8_MASK, -                     X86ISD::VALIGN, 0), -  X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_IMM8_MASK, -                     X86ISD::VALIGN, 0), -  X86_INTRINSIC_DATA(avx512_mask_valign_q_128, INTR_TYPE_3OP_IMM8_MASK, -                     X86ISD::VALIGN, 0), -  X86_INTRINSIC_DATA(avx512_mask_valign_q_256, INTR_TYPE_3OP_IMM8_MASK, -                     X86ISD::VALIGN, 0), -  X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_IMM8_MASK, -                     X86ISD::VALIGN, 0),    X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK_RM,                       X86ISD::CVTPH2PS, 0),    X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK_RM,  | 

