diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-06-23 07:37:33 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-06-23 07:37:33 +0000 |
| commit | 597aa42fec46332e1267e4234cd9d258ac4e5b53 (patch) | |
| tree | f7250419418034adf40a3b075013f5ebb8cf6a7f /llvm/lib | |
| parent | 8f8bd37dd34335965a7cff88cae7ee0da44a1b47 (diff) | |
| download | bcm5719-llvm-597aa42fec46332e1267e4234cd9d258ac4e5b53.tar.gz bcm5719-llvm-597aa42fec46332e1267e4234cd9d258ac4e5b53.zip | |
[AVX512] Remove masked unpack intrinsics and autoupgrade to vectorshuffle and selects.
llvm-svn: 273543
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 36 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 72 |
3 files changed, 46 insertions, 72 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index a4fc03cf029..8ba7ef4d26e 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -197,6 +197,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name.startswith("x86.avx512.mask.pshuf.d.") || Name.startswith("x86.avx512.mask.pshufl.w.") || Name.startswith("x86.avx512.mask.pshufh.w.") || + Name.startswith("x86.avx512.mask.punpckl") || + Name.startswith("x86.avx512.mask.punpckh") || + Name.startswith("x86.avx512.mask.unpckl.") || + Name.startswith("x86.avx512.mask.unpckh.") || Name.startswith("x86.sse41.pmovsx") || Name.startswith("x86.sse41.pmovzx") || Name.startswith("x86.avx2.pmovsx") || @@ -1034,6 +1038,38 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { if (CI->getNumArgOperands() == 4) Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); + } else if (Name.startswith("llvm.x86.avx512.mask.punpckl") || + Name.startswith("llvm.x86.avx512.mask.unpckl.")) { + Value *Op0 = CI->getArgOperand(0); + Value *Op1 = CI->getArgOperand(1); + int NumElts = CI->getType()->getVectorNumElements(); + int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); + + SmallVector<uint32_t, 64> Idxs(NumElts); + for (int l = 0; l != NumElts; l += NumLaneElts) + for (int i = 0; i != NumLaneElts; ++i) + Idxs[i + l] = l + (i / 2) + NumElts * (i % 2); + + Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); + + Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, + CI->getArgOperand(2)); + } else if (Name.startswith("llvm.x86.avx512.mask.punpckh") || + Name.startswith("llvm.x86.avx512.mask.unpckh.")) { + Value *Op0 = CI->getArgOperand(0); + Value *Op1 = CI->getArgOperand(1); + int NumElts = CI->getType()->getVectorNumElements(); + int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); + + SmallVector<uint32_t, 64> Idxs(NumElts); + for (int l = 0; l != NumElts; l += NumLaneElts) + for (int i = 0; i != NumLaneElts; ++i) + Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2); + + Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); + + Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, + CI->getArgOperand(2)); } else { llvm_unreachable("Unknown function for CallInst upgrade."); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 54e5536c36e..f0b1901d77a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -11822,6 +11822,11 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!"); + // Use dedicated unpack instructions for masks that match their pattern. + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG)) + return V; + // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG)) @@ -11856,6 +11861,11 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!"); assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!"); + // Use dedicated unpack instructions for masks that match their pattern. + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG)) + return V; + // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 071709a1a73..341775cf4eb 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1415,54 +1415,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_mask_pternlog_q_512, TERLOG_OP_MASK, X86ISD::VPTERNLOG, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_128, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_256, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_512, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_128, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_256, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_512, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_128, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_256, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_512, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_128, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_256, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_512, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_128, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_256, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_512, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckld_q_128, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckld_q_256, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_punpckld_q_512, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_128, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_256, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_512, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_128, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_256, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_512, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), X86_INTRINSIC_DATA(avx512_mask_pxor_d_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0), X86_INTRINSIC_DATA(avx512_mask_pxor_d_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0), X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0), @@ -1575,30 +1527,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_128, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_256, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_512, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_128, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_256, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_512, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKH, 0), - X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_128, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_256, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_512, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_128, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_256, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), - X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_512, INTR_TYPE_2OP_MASK, - X86ISD::UNPCKL, 0), X86_INTRINSIC_DATA(avx512_mask_valign_d_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VALIGN, 0), X86_INTRINSIC_DATA(avx512_mask_valign_d_256, INTR_TYPE_3OP_IMM8_MASK, |

