summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorJina Nahias <jina.nahias@intel.com>2017-11-13 09:16:39 +0000
committerJina Nahias <jina.nahias@intel.com>2017-11-13 09:16:39 +0000
commit9a7f9f123c3beb682be9533a483f68ee73f1e2d1 (patch)
tree1479b3a0899ef700eb29bfbccaf30c0d6cd6b7be /llvm/lib
parentdca979194d047360657726c709b3caf40d0b3c2f (diff)
downloadbcm5719-llvm-9a7f9f123c3beb682be9533a483f68ee73f1e2d1.tar.gz
bcm5719-llvm-9a7f9f123c3beb682be9533a483f68ee73f1e2d1.zip
[x86][AVX512] Lowering shuffle i/f intrinsics to LLVM IR
This patch, together with a matching clang patch (https://reviews.llvm.org/D38672), implements the lowering of X86 shuffle i/f intrinsics to IR. Differential Revision: https://reviews.llvm.org/D38671 Change-Id: I1e7d359a74743e995ec356237a85214ce55d3661 llvm-svn: 318026
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp26
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h16
2 files changed, 25 insertions, 17 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 2c9e9be3da5..244dabe60e3 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -76,6 +76,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
if (Name=="ssse3.pabs.b.128" || // Added in 6.0
Name=="ssse3.pabs.w.128" || // Added in 6.0
Name=="ssse3.pabs.d.128" || // Added in 6.0
+ Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
+ Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
Name.startswith("avx2.pabs.") || // Added in 6.0
Name.startswith("avx512.mask.pabs.") || // Added in 6.0
Name.startswith("avx512.broadcastm") || // Added in 6.0
@@ -1270,7 +1272,29 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
else
Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
{ 0, 1, 2, 3, 0, 1, 2, 3 });
- } else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
+ } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
+ Name.startswith("avx512.mask.shuf.f"))) {
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ Type *VT = CI->getType();
+ unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
+ unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
+ unsigned ControlBitsMask = NumLanes - 1;
+ unsigned NumControlBits = NumLanes / 2;
+ SmallVector<uint32_t, 8> ShuffleMask(0);
+
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
+ // We actually need the other source.
+ if (l >= NumLanes / 2)
+ LaneMask += NumLanes;
+ for (unsigned i = 0; i != NumElementsInLane; ++i)
+ ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
+ }
+ Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
+ CI->getArgOperand(1), ShuffleMask);
+ Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
+ CI->getArgOperand(3));
+ }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
Name.startswith("avx512.mask.broadcasti"))) {
unsigned NumSrcElts =
CI->getArgOperand(0)->getType()->getVectorNumElements();
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 80f74696a16..1c795126092 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1038,22 +1038,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::SCALEFS, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::SCALEFS, 0),
- X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4, INTR_TYPE_3OP_IMM8_MASK,
- X86ISD::SHUF128, 0),
- X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4_256, INTR_TYPE_3OP_IMM8_MASK,
- X86ISD::SHUF128, 0),
- X86_INTRINSIC_DATA(avx512_mask_shuf_f64x2, INTR_TYPE_3OP_IMM8_MASK,
- X86ISD::SHUF128, 0),
- X86_INTRINSIC_DATA(avx512_mask_shuf_f64x2_256, INTR_TYPE_3OP_IMM8_MASK,
- X86ISD::SHUF128, 0),
- X86_INTRINSIC_DATA(avx512_mask_shuf_i32x4, INTR_TYPE_3OP_IMM8_MASK,
- X86ISD::SHUF128, 0),
- X86_INTRINSIC_DATA(avx512_mask_shuf_i32x4_256, INTR_TYPE_3OP_IMM8_MASK,
- X86ISD::SHUF128, 0),
- X86_INTRINSIC_DATA(avx512_mask_shuf_i64x2, INTR_TYPE_3OP_IMM8_MASK,
- X86ISD::SHUF128, 0),
- X86_INTRINSIC_DATA(avx512_mask_shuf_i64x2_256, INTR_TYPE_3OP_IMM8_MASK,
- X86ISD::SHUF128, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK, ISD::FSQRT,
OpenPOWER on IntegriCloud