summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorIgor Breger <igor.breger@intel.com>2016-01-21 14:18:11 +0000
committerIgor Breger <igor.breger@intel.com>2016-01-21 14:18:11 +0000
commit7a000f5bb2be477f1b053c315523f274bf0f7ac8 (patch)
tree5e430b80258ccbca2975f3451a515a18e775aa24 /llvm/lib/Target
parent21a30a42a98186b7bc96caf6ffb40e597457373d (diff)
downloadbcm5719-llvm-7a000f5bb2be477f1b053c315523f274bf0f7ac8.tar.gz
bcm5719-llvm-7a000f5bb2be477f1b053c315523f274bf0f7ac8.zip
AVX512: Masked move intrinsic implementation.
Implemented intrinsic for the follow instructions (reg move) : VMOVDQU8/16, VMOVDQA32/64, VMOVAPS/PD. Differential Revision: http://reviews.llvm.org/D16316 llvm-svn: 258398
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td17
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h18
2 files changed, 24 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index ed4df801973..0b57d581fb4 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2563,7 +2563,10 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
- "${dst} {${mask}} {z}, $src}"), [], _.ExeDomain>,
+ "${dst} {${mask}} {z}, $src}"),
+ [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
+ (_.VT _.RC:$src),
+ _.ImmAllZerosV)))], _.ExeDomain>,
EVEX, EVEX_KZ;
let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
@@ -2758,22 +2761,14 @@ def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
(VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
let AddedComplexity = 20 in {
-def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
- (bc_v8i64 (v16i32 immAllZerosV)))),
- (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
-
def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
(v8i64 VR512:$src))),
- (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
+ (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
VK8), VR512:$src)>;
-def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
- (v16i32 immAllZerosV))),
- (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
-
def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
(v16i32 VR512:$src))),
- (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
+ (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
}
// Move Int Doubleword to Packed Double Int
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 7e8ce4adf1b..17e1fee4c7c 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -920,6 +920,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FMIN, X86ISD::FMIN_RND),
X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FMIN, X86ISD::FMIN_RND),
+ X86_INTRINSIC_DATA(avx512_mask_mova_d_128, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_mova_d_256, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_mova_d_512, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_mova_pd_128, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_mova_pd_256, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_mova_pd_512, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_mova_ps_128, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_mova_ps_256, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_mova_ps_512, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_mova_q_128, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_mova_q_256, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_mova_q_512, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_movddup_128, INTR_TYPE_1OP_MASK,
X86ISD::MOVDDUP, 0),
X86_INTRINSIC_DATA(avx512_mask_movddup_256, INTR_TYPE_1OP_MASK,
@@ -942,6 +954,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::MOVSLDUP, 0),
X86_INTRINSIC_DATA(avx512_mask_movsldup_512, INTR_TYPE_1OP_MASK,
X86ISD::MOVSLDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movu_b_128, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movu_b_256, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movu_b_512, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movu_w_128, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movu_w_256, BLEND, ISD::VSELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movu_w_512, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_pd_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_pd_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
OpenPOWER on IntegriCloud