summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp26
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h12
2 files changed, 26 insertions, 12 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 79a8d18126d..96c34c0ad8c 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -256,6 +256,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name.startswith("avx512.mask.pshuf.d.") ||
Name.startswith("avx512.mask.pshufl.w.") ||
Name.startswith("avx512.mask.pshufh.w.") ||
+ Name.startswith("avx512.mask.shuf.p") ||
Name.startswith("avx512.mask.vpermil.p") ||
Name.startswith("avx512.mask.perm.df.") ||
Name.startswith("avx512.mask.perm.di.") ||
@@ -1149,6 +1150,31 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
if (CI->getNumArgOperands() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
+ Value *Op0 = CI->getArgOperand(0);
+ Value *Op1 = CI->getArgOperand(1);
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ unsigned NumElts = CI->getType()->getVectorNumElements();
+
+ unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
+ unsigned HalfLaneElts = NumLaneElts / 2;
+
+ SmallVector<uint32_t, 16> Idxs(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ // Base index is the starting element of the lane.
+ Idxs[i] = i - (i % NumLaneElts);
+ // If we are half way through the lane switch to the other source.
+ if ((i % NumLaneElts) >= HalfLaneElts)
+ Idxs[i] += NumElts;
+ // Now select the specific element. By adding HalfLaneElts bits from
+ // the immediate. Wrapping around the immediate every 8-bits.
+ Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
+ }
+
+ Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
+
+ Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
+ CI->getArgOperand(3));
} else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
Name.startswith("avx512.mask.movshdup") ||
Name.startswith("avx512.mask.movsldup"))) {
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index a883bc4c5d6..189fb1aec17 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1352,18 +1352,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::SHUF128, 0),
X86_INTRINSIC_DATA(avx512_mask_shuf_i64x2_256, INTR_TYPE_3OP_IMM8_MASK,
X86ISD::SHUF128, 0),
- X86_INTRINSIC_DATA(avx512_mask_shuf_pd_128, INTR_TYPE_3OP_IMM8_MASK,
- X86ISD::SHUFP, 0),
- X86_INTRINSIC_DATA(avx512_mask_shuf_pd_256, INTR_TYPE_3OP_IMM8_MASK,
- X86ISD::SHUFP, 0),
- X86_INTRINSIC_DATA(avx512_mask_shuf_pd_512, INTR_TYPE_3OP_IMM8_MASK,
- X86ISD::SHUFP, 0),
- X86_INTRINSIC_DATA(avx512_mask_shuf_ps_128, INTR_TYPE_3OP_IMM8_MASK,
- X86ISD::SHUFP, 0),
- X86_INTRINSIC_DATA(avx512_mask_shuf_ps_256, INTR_TYPE_3OP_IMM8_MASK,
- X86ISD::SHUFP, 0),
- X86_INTRINSIC_DATA(avx512_mask_shuf_ps_512, INTR_TYPE_3OP_IMM8_MASK,
- X86ISD::SHUFP, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
OpenPOWER on IntegriCloud