diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 17 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 18 |
2 files changed, 24 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index ed4df801973..0b57d581fb4 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2563,7 +2563,10 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src), !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", - "${dst} {${mask}} {z}, $src}"), [], _.ExeDomain>, + "${dst} {${mask}} {z}, $src}"), + [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask, + (_.VT _.RC:$src), + _.ImmAllZerosV)))], _.ExeDomain>, EVEX, EVEX_KZ; let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable, @@ -2758,22 +2761,14 @@ def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr, (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>; let AddedComplexity = 20 in { -def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src), - (bc_v8i64 (v16i32 immAllZerosV)))), - (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>; - def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)), (v8i64 VR512:$src))), - (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), + (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), VK8), VR512:$src)>; -def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src), - (v16i32 immAllZerosV))), - (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>; - def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), (v16i32 VR512:$src))), - (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; + (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; } // Move Int Doubleword to Packed Double Int diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 7e8ce4adf1b..17e1fee4c7c 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -920,6 +920,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FMIN, X86ISD::FMIN_RND), X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN, X86ISD::FMIN_RND), + X86_INTRINSIC_DATA(avx512_mask_mova_d_128, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_mova_d_256, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_mova_d_512, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_mova_pd_128, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_mova_pd_256, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_mova_pd_512, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_mova_ps_128, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_mova_ps_256, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_mova_ps_512, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_mova_q_128, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_mova_q_256, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_mova_q_512, BLEND, ISD::VSELECT, 0), X86_INTRINSIC_DATA(avx512_mask_movddup_128, INTR_TYPE_1OP_MASK, X86ISD::MOVDDUP, 0), X86_INTRINSIC_DATA(avx512_mask_movddup_256, INTR_TYPE_1OP_MASK, @@ -942,6 +954,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::MOVSLDUP, 0), X86_INTRINSIC_DATA(avx512_mask_movsldup_512, INTR_TYPE_1OP_MASK, X86ISD::MOVSLDUP, 0), + X86_INTRINSIC_DATA(avx512_mask_movu_b_128, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_movu_b_256, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_movu_b_512, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_movu_w_128, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_movu_w_256, BLEND, ISD::VSELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_movu_w_512, BLEND, ISD::VSELECT, 0), X86_INTRINSIC_DATA(avx512_mask_mul_pd_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0), X86_INTRINSIC_DATA(avx512_mask_mul_pd_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0), X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL, |