diff options
| author | Jina Nahias <jina.nahias@intel.com> | 2017-11-13 09:16:39 +0000 |
|---|---|---|
| committer | Jina Nahias <jina.nahias@intel.com> | 2017-11-13 09:16:39 +0000 |
| commit | 9a7f9f123c3beb682be9533a483f68ee73f1e2d1 (patch) | |
| tree | 1479b3a0899ef700eb29bfbccaf30c0d6cd6b7be /llvm/lib | |
| parent | dca979194d047360657726c709b3caf40d0b3c2f (diff) | |
| download | bcm5719-llvm-9a7f9f123c3beb682be9533a483f68ee73f1e2d1.tar.gz bcm5719-llvm-9a7f9f123c3beb682be9533a483f68ee73f1e2d1.zip | |
[x86][AVX512] Lowering shuffle i/f intrinsics to LLVM IR
This patch, together with a matching clang patch (https://reviews.llvm.org/D38672), implements the lowering of X86 shuffle i/f intrinsics to IR.
Differential Revision: https://reviews.llvm.org/D38671
Change-Id: I1e7d359a74743e995ec356237a85214ce55d3661
llvm-svn: 318026
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 26 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 16 |
2 files changed, 25 insertions, 17 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 2c9e9be3da5..244dabe60e3 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -76,6 +76,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { if (Name=="ssse3.pabs.b.128" || // Added in 6.0 Name=="ssse3.pabs.w.128" || // Added in 6.0 Name=="ssse3.pabs.d.128" || // Added in 6.0 + Name.startswith("avx512.mask.shuf.i") || // Added in 6.0 + Name.startswith("avx512.mask.shuf.f") || // Added in 6.0 Name.startswith("avx2.pabs.") || // Added in 6.0 Name.startswith("avx512.mask.pabs.") || // Added in 6.0 Name.startswith("avx512.broadcastm") || // Added in 6.0 @@ -1270,7 +1272,29 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { else Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), { 0, 1, 2, 3, 0, 1, 2, 3 }); - } else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") || + } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") || + Name.startswith("avx512.mask.shuf.f"))) { + unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); + Type *VT = CI->getType(); + unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128; + unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits(); + unsigned ControlBitsMask = NumLanes - 1; + unsigned NumControlBits = NumLanes / 2; + SmallVector<uint32_t, 8> ShuffleMask(0); + + for (unsigned l = 0; l != NumLanes; ++l) { + unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; + // We actually need the other source. + if (l >= NumLanes / 2) + LaneMask += NumLanes; + for (unsigned i = 0; i != NumElementsInLane; ++i) + ShuffleMask.push_back(LaneMask * NumElementsInLane + i); + } + Rep = Builder.CreateShuffleVector(CI->getArgOperand(0), + CI->getArgOperand(1), ShuffleMask); + Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, + CI->getArgOperand(3)); + }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") || Name.startswith("avx512.mask.broadcasti"))) { unsigned NumSrcElts = CI->getArgOperand(0)->getType()->getVectorNumElements(); diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 80f74696a16..1c795126092 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1038,22 +1038,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::SCALEFS, 0), X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::SCALEFS, 0), - X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4, INTR_TYPE_3OP_IMM8_MASK, - X86ISD::SHUF128, 0), - X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4_256, INTR_TYPE_3OP_IMM8_MASK, - X86ISD::SHUF128, 0), - X86_INTRINSIC_DATA(avx512_mask_shuf_f64x2, INTR_TYPE_3OP_IMM8_MASK, - X86ISD::SHUF128, 0), - X86_INTRINSIC_DATA(avx512_mask_shuf_f64x2_256, INTR_TYPE_3OP_IMM8_MASK, - X86ISD::SHUF128, 0), - X86_INTRINSIC_DATA(avx512_mask_shuf_i32x4, INTR_TYPE_3OP_IMM8_MASK, - X86ISD::SHUF128, 0), - X86_INTRINSIC_DATA(avx512_mask_shuf_i32x4_256, INTR_TYPE_3OP_IMM8_MASK, - X86ISD::SHUF128, 0), - X86_INTRINSIC_DATA(avx512_mask_shuf_i64x2, INTR_TYPE_3OP_IMM8_MASK, - X86ISD::SHUF128, 0), - X86_INTRINSIC_DATA(avx512_mask_shuf_i64x2_256, INTR_TYPE_3OP_IMM8_MASK, - X86ISD::SHUF128, 0), X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK, ISD::FSQRT, |

