diff options
Diffstat (limited to 'llvm/lib/IR/AutoUpgrade.cpp')
-rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 41 |
1 files changed, 30 insertions, 11 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 93d5d921801..7c24f848850 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -360,6 +360,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "sse42.crc32.64.8" || // Added in 3.4 Name.startswith("avx.vbroadcast.s") || // Added in 3.5 Name.startswith("avx512.mask.palignr.") || // Added in 3.9 + Name.startswith("avx512.mask.valign.") || // Added in 4.0 Name.startswith("sse2.psll.dq") || // Added in 3.7 Name.startswith("sse2.psrl.dq") || // Added in 3.7 Name.startswith("avx2.psll.dq") || // Added in 3.7 @@ -572,13 +573,23 @@ static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, return Builder.CreateSelect(Mask, Op0, Op1); } -static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder, - Value *Op0, Value *Op1, Value *Shift, - Value *Passthru, Value *Mask) { +// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. +// PALIGNR handles large immediates by shifting while VALIGN masks the immediate +// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes. +static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, + Value *Op1, Value *Shift, + Value *Passthru, Value *Mask, + bool IsVALIGN) { unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue(); unsigned NumElts = Op0->getType()->getVectorNumElements(); - assert(NumElts % 16 == 0); + assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!"); + assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!"); + assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!"); + + // Mask the immediate for VALIGN. + if (IsVALIGN) + ShiftVal &= (NumElts - 1); // If palignr is shifting the pair of vectors more than the size of two // lanes, emit zero. @@ -595,10 +606,10 @@ static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder, uint32_t Indices[64]; // 256-bit palignr operates on 128-bit lanes so we need to handle that - for (unsigned l = 0; l != NumElts; l += 16) { + for (unsigned l = 0; l < NumElts; l += 16) { for (unsigned i = 0; i != 16; ++i) { unsigned Idx = ShiftVal + i; - if (Idx >= 16) + if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN. Idx += NumElts - 16; // End of lane, switch operand. Indices[l + i] = Idx + l; } @@ -1071,11 +1082,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) { - Rep = UpgradeX86PALIGNRIntrinsics(Builder, CI->getArgOperand(0), - CI->getArgOperand(1), - CI->getArgOperand(2), - CI->getArgOperand(3), - CI->getArgOperand(4)); + Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), + CI->getArgOperand(1), + CI->getArgOperand(2), + CI->getArgOperand(3), + CI->getArgOperand(4), + false); + } else if (IsX86 && Name.startswith("avx512.mask.valign.")) { + Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), + CI->getArgOperand(1), + CI->getArgOperand(2), + CI->getArgOperand(3), + CI->getArgOperand(4), + true); } else if (IsX86 && (Name == "sse2.psll.dq" || Name == "avx2.psll.dq")) { // 128/256-bit shift left specified in bits. |