summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-06-23 07:37:33 +0000
committerCraig Topper <craig.topper@gmail.com>2016-06-23 07:37:33 +0000
commit597aa42fec46332e1267e4234cd9d258ac4e5b53 (patch)
treef7250419418034adf40a3b075013f5ebb8cf6a7f /llvm/lib
parent8f8bd37dd34335965a7cff88cae7ee0da44a1b47 (diff)
downloadbcm5719-llvm-597aa42fec46332e1267e4234cd9d258ac4e5b53.tar.gz
bcm5719-llvm-597aa42fec46332e1267e4234cd9d258ac4e5b53.zip
[AVX512] Remove masked unpack intrinsics and autoupgrade to vectorshuffle and selects.
llvm-svn: 273543
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp36
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h72
3 files changed, 46 insertions, 72 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index a4fc03cf029..8ba7ef4d26e 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -197,6 +197,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name.startswith("x86.avx512.mask.pshuf.d.") ||
Name.startswith("x86.avx512.mask.pshufl.w.") ||
Name.startswith("x86.avx512.mask.pshufh.w.") ||
+ Name.startswith("x86.avx512.mask.punpckl") ||
+ Name.startswith("x86.avx512.mask.punpckh") ||
+ Name.startswith("x86.avx512.mask.unpckl.") ||
+ Name.startswith("x86.avx512.mask.unpckh.") ||
Name.startswith("x86.sse41.pmovsx") ||
Name.startswith("x86.sse41.pmovzx") ||
Name.startswith("x86.avx2.pmovsx") ||
@@ -1034,6 +1038,38 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
if (CI->getNumArgOperands() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
+ } else if (Name.startswith("llvm.x86.avx512.mask.punpckl") ||
+ Name.startswith("llvm.x86.avx512.mask.unpckl.")) {
+ Value *Op0 = CI->getArgOperand(0);
+ Value *Op1 = CI->getArgOperand(1);
+ int NumElts = CI->getType()->getVectorNumElements();
+ int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
+
+ SmallVector<uint32_t, 64> Idxs(NumElts);
+ for (int l = 0; l != NumElts; l += NumLaneElts)
+ for (int i = 0; i != NumLaneElts; ++i)
+ Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
+
+ Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
+
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (Name.startswith("llvm.x86.avx512.mask.punpckh") ||
+ Name.startswith("llvm.x86.avx512.mask.unpckh.")) {
+ Value *Op0 = CI->getArgOperand(0);
+ Value *Op1 = CI->getArgOperand(1);
+ int NumElts = CI->getType()->getVectorNumElements();
+ int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
+
+ SmallVector<uint32_t, 64> Idxs(NumElts);
+ for (int l = 0; l != NumElts; l += NumLaneElts)
+ for (int i = 0; i != NumLaneElts; ++i)
+ Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
+
+ Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
+
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
} else {
llvm_unreachable("Unknown function for CallInst upgrade.");
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 54e5536c36e..f0b1901d77a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -11822,6 +11822,11 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG))
+ return V;
+
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask,
Subtarget, DAG))
@@ -11856,6 +11861,11 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!");
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG))
+ return V;
+
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask,
Subtarget, DAG))
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 071709a1a73..341775cf4eb 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1415,54 +1415,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_mask_pternlog_q_512, TERLOG_OP_MASK,
X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_128, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_256, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_512, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_128, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_256, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_512, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_128, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_256, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_512, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_128, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_256, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_512, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_128, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_256, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_512, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckld_q_128, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckld_q_256, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpckld_q_512, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_128, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_256, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_512, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_128, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_256, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_512, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_d_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_d_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
@@ -1575,30 +1527,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
- X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_128, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_256, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_512, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_128, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_256, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_512, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKH, 0),
- X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_128, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_256, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_512, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_128, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_256, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_512, INTR_TYPE_2OP_MASK,
- X86ISD::UNPCKL, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_d_128, INTR_TYPE_3OP_IMM8_MASK,
X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_d_256, INTR_TYPE_3OP_IMM8_MASK,
OpenPOWER on IntegriCloud