summaryrefslogtreecommitdiffstats
path: root/llvm/lib/IR/AutoUpgrade.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/IR/AutoUpgrade.cpp')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp41
1 files changed, 30 insertions, 11 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 93d5d921801..7c24f848850 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -360,6 +360,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "sse42.crc32.64.8" || // Added in 3.4
Name.startswith("avx.vbroadcast.s") || // Added in 3.5
Name.startswith("avx512.mask.palignr.") || // Added in 3.9
+ Name.startswith("avx512.mask.valign.") || // Added in 4.0
Name.startswith("sse2.psll.dq") || // Added in 3.7
Name.startswith("sse2.psrl.dq") || // Added in 3.7
Name.startswith("avx2.psll.dq") || // Added in 3.7
@@ -572,13 +573,23 @@ static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
return Builder.CreateSelect(Mask, Op0, Op1);
}
-static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder,
- Value *Op0, Value *Op1, Value *Shift,
- Value *Passthru, Value *Mask) {
+// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
+// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
+// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
+static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
+ Value *Op1, Value *Shift,
+ Value *Passthru, Value *Mask,
+ bool IsVALIGN) {
unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
unsigned NumElts = Op0->getType()->getVectorNumElements();
- assert(NumElts % 16 == 0);
+ assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
+ assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
+ assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
+
+ // Mask the immediate for VALIGN.
+ if (IsVALIGN)
+ ShiftVal &= (NumElts - 1);
// If palignr is shifting the pair of vectors more than the size of two
// lanes, emit zero.
@@ -595,10 +606,10 @@ static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder,
uint32_t Indices[64];
// 256-bit palignr operates on 128-bit lanes so we need to handle that
- for (unsigned l = 0; l != NumElts; l += 16) {
+ for (unsigned l = 0; l < NumElts; l += 16) {
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = ShiftVal + i;
- if (Idx >= 16)
+ if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
Idx += NumElts - 16; // End of lane, switch operand.
Indices[l + i] = Idx + l;
}
@@ -1071,11 +1082,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
- Rep = UpgradeX86PALIGNRIntrinsics(Builder, CI->getArgOperand(0),
- CI->getArgOperand(1),
- CI->getArgOperand(2),
- CI->getArgOperand(3),
- CI->getArgOperand(4));
+ Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
+ CI->getArgOperand(1),
+ CI->getArgOperand(2),
+ CI->getArgOperand(3),
+ CI->getArgOperand(4),
+ false);
+ } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
+ Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
+ CI->getArgOperand(1),
+ CI->getArgOperand(2),
+ CI->getArgOperand(3),
+ CI->getArgOperand(4),
+ true);
} else if (IsX86 && (Name == "sse2.psll.dq" ||
Name == "avx2.psll.dq")) {
// 128/256-bit shift left specified in bits.
OpenPOWER on IntegriCloud