summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td117
1 files changed, 117 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index cf2e33c0303..d078852c7e8 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -3876,6 +3876,31 @@ def : Pat<(masked_store addr:$dst, Mask,
}
+// This matches the more recent codegen from clang that avoids emitting a 512
+// bit masked store directly. Codegen will widen 128-bit masked store to 512
+// bits on AVX512F only targets.
+multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
+ AVX512VLVectorVTInfo _,
+ dag Mask512, dag Mask128,
+ RegisterClass MaskRC,
+ SubRegIndex subreg> {
+
+// AVX512F pattern.
+def : Pat<(masked_store addr:$dst, Mask512,
+ (_.info512.VT (insert_subvector undef,
+ (_.info128.VT _.info128.RC:$src),
+ (iPTR 0)))),
+ (!cast<Instruction>(InstrStr#mrk) addr:$dst,
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
+ (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
+
+// AVX512VL pattern.
+def : Pat<(masked_store addr:$dst, Mask128, (_.info128.VT _.info128.RC:$src)),
+ (!cast<Instruction>(InstrStr#mrk) addr:$dst,
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
+ (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
+}
+
multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
dag Mask, RegisterClass MaskRC> {
@@ -3926,6 +3951,48 @@ def : Pat<(_.info128.VT (extract_subvector
}
+// This matches the more recent codegen from clang that avoids emitting a 512
+// bit masked load directly. Codegen will widen 128-bit masked load to 512
+// bits on AVX512F only targets.
+multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
+ AVX512VLVectorVTInfo _,
+ dag Mask512, dag Mask128,
+ RegisterClass MaskRC,
+ SubRegIndex subreg> {
+// AVX512F patterns.
+def : Pat<(_.info128.VT (extract_subvector
+ (_.info512.VT (masked_load addr:$srcAddr, Mask512,
+ (_.info512.VT (bitconvert
+ (v16i32 immAllZerosV))))),
+ (iPTR 0))),
+ (!cast<Instruction>(InstrStr#rmkz)
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
+ addr:$srcAddr)>;
+
+def : Pat<(_.info128.VT (extract_subvector
+ (_.info512.VT (masked_load addr:$srcAddr, Mask512,
+ (_.info512.VT (insert_subvector undef,
+ (_.info128.VT (X86vzmovl _.info128.RC:$src)),
+ (iPTR 0))))),
+ (iPTR 0))),
+ (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
+ addr:$srcAddr)>;
+
+// AVX512Vl patterns.
+def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
+ (_.info128.VT (bitconvert (v4i32 immAllZerosV))))),
+ (!cast<Instruction>(InstrStr#rmkz)
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
+ addr:$srcAddr)>;
+
+def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
+ (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
+ (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
+ addr:$srcAddr)>;
+}
+
defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
@@ -3936,6 +4003,31 @@ defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
+defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
+ (v16i1 (insert_subvector
+ (v16i1 immAllZerosV),
+ (v4i1 (extract_subvector
+ (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
+ (iPTR 0))),
+ (iPTR 0))),
+ (v4i1 (extract_subvector
+ (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
+ (iPTR 0))), GR8, sub_8bit>;
+defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
+ (v8i1
+ (extract_subvector
+ (v16i1
+ (insert_subvector
+ (v16i1 immAllZerosV),
+ (v2i1 (extract_subvector
+ (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
+ (iPTR 0))),
+ (iPTR 0))),
+ (iPTR 0))),
+ (v2i1 (extract_subvector
+ (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
+ (iPTR 0))), GR8, sub_8bit>;
+
defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
@@ -3943,6 +4035,31 @@ defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
+defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
+ (v16i1 (insert_subvector
+ (v16i1 immAllZerosV),
+ (v4i1 (extract_subvector
+ (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
+ (iPTR 0))),
+ (iPTR 0))),
+ (v4i1 (extract_subvector
+ (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
+ (iPTR 0))), GR8, sub_8bit>;
+defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
+ (v8i1
+ (extract_subvector
+ (v16i1
+ (insert_subvector
+ (v16i1 immAllZerosV),
+ (v2i1 (extract_subvector
+ (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
+ (iPTR 0))),
+ (iPTR 0))),
+ (iPTR 0))),
+ (v2i1 (extract_subvector
+ (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
+ (iPTR 0))), GR8, sub_8bit>;
+
def : Pat<(f32 (X86selects (scalar_to_vector GR8:$mask),
(f32 FR32X:$src1), (f32 FR32X:$src2))),
(COPY_TO_REGCLASS
OpenPOWER on IntegriCloud