diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrAVX512.td')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 79 |
1 files changed, 61 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index c2d8f8a8fa6..33054e3fde5 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2183,28 +2183,26 @@ let Predicates = [HasBWI] in { // GR from/to mask register def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), - (COPY_TO_REGCLASS GR16:$src, VK16)>; + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), - (COPY_TO_REGCLASS VK16:$src, GR16)>; + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), - (COPY_TO_REGCLASS GR8:$src, VK8)>; + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), - (COPY_TO_REGCLASS VK8:$src, GR8)>; + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), (KMOVWrk VK16:$src)>; def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), - (i32 (INSERT_SUBREG (IMPLICIT_DEF), - (i16 (COPY_TO_REGCLASS VK16:$src, GR16)), sub_16bit))>; + (COPY_TO_REGCLASS VK16:$src, GR32)>; def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), - (MOVZX32rr8 (COPY_TO_REGCLASS VK8:$src, GR8))>, Requires<[NoDQI]>; + (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>; def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), - (i32 (INSERT_SUBREG (IMPLICIT_DEF), - (i8 (COPY_TO_REGCLASS VK8:$src, GR8)), sub_8bit))>; + (COPY_TO_REGCLASS VK8:$src, GR32)>; def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (COPY_TO_REGCLASS GR32:$src, VK32)>; @@ -3288,6 +3286,23 @@ def : Pat<(masked_store addr:$dst, Mask, } +multiclass avx512_store_scalar_lowering_subreg<string InstrStr, + AVX512VLVectorVTInfo _, + dag Mask, RegisterClass MaskRC, + SubRegIndex subreg> { + +def : Pat<(masked_store addr:$dst, Mask, + (_.info512.VT (insert_subvector undef, + (_.info256.VT (insert_subvector undef, + (_.info128.VT _.info128.RC:$src), + (iPTR 0))), + (iPTR 0)))), + (!cast<Instruction>(InstrStr#mrk) addr:$dst, + (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; + +} + multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, dag Mask, RegisterClass MaskRC> { @@ -3314,22 +3329,50 @@ def : Pat<(_.info128.VT (extract_subvector } +multiclass avx512_load_scalar_lowering_subreg<string InstrStr, + AVX512VLVectorVTInfo _, + dag Mask, RegisterClass MaskRC, + SubRegIndex subreg> { + +def : Pat<(_.info128.VT (extract_subvector + (_.info512.VT (masked_load addr:$srcAddr, Mask, + (_.info512.VT (bitconvert + (v16i32 immAllZerosV))))), + (iPTR 0))), + (!cast<Instruction>(InstrStr#rmkz) + (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + addr:$srcAddr)>; + +def : Pat<(_.info128.VT (extract_subvector + (_.info512.VT (masked_load addr:$srcAddr, Mask, + (_.info512.VT (insert_subvector undef, + (_.info256.VT (insert_subvector undef, + (_.info128.VT (X86vzmovl _.info128.RC:$src)), + (iPTR 0))), + (iPTR 0))))), + (iPTR 0))), + (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, + (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + addr:$srcAddr)>; + +} + defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; -defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, - (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>; -defm : avx512_store_scalar_lowering<"VMOVSDZ", avx512vl_f64_info, - (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>; +defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, + (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; +defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, + (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; -defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, - (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>; -defm : avx512_load_scalar_lowering<"VMOVSDZ", avx512vl_f64_info, - (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>; +defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, + (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; +defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, + (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), @@ -3340,7 +3383,7 @@ def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), - (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)), + (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; let hasSideEffects = 0 in |

