diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-10-02 04:45:02 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-10-02 04:45:02 +0000 |
| commit | 8d6a863b02f199960590973f06b4c4dd29df578f (patch) | |
| tree | 283d96188ef8f25d2438d73b1e0d42b9d47d6c46 /llvm/lib/Target | |
| parent | c3aab6eaaa0fedf009ced78e56dacc378a8cde8a (diff) | |
| download | bcm5719-llvm-8d6a863b02f199960590973f06b4c4dd29df578f.tar.gz bcm5719-llvm-8d6a863b02f199960590973f06b4c4dd29df578f.zip | |
[X86] Add broadcast load folding patterns to the NoVLX compare patterns.
These patterns use zmm registers for 128/256-bit compares when
the VLX instructions aren't available. Previously we only
supported registers, but as PR36191 notes we can fold broadcast
loads, but not regular loads.
llvm-svn: 373423
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 154 |
1 files changed, 138 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 4064d020cc4..2d3b8a55681 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2392,7 +2392,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, (_.VT _.RC:$src1), cond))), (!cast<Instruction>(Name#_.ZSuffix#"rmibk") _.KRCWM:$mask, _.RC:$src1, addr:$src2, - (CommFrag.OperandTransform $cc))>; + (CommFrag_su.OperandTransform $cc))>; } multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, @@ -3172,6 +3172,30 @@ multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, Narrow.KRC)>; } +multiclass axv512_icmp_packed_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, + string InstStr, + X86VectorVTInfo Narrow, + X86VectorVTInfo Wide> { + // Broadcast load. + def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)))), + (COPY_TO_REGCLASS + (!cast<Instruction>(InstStr#"Zrmb") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2), + Narrow.KRC)>; + + def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Frag_su (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2)))), + (COPY_TO_REGCLASS + (!cast<Instruction>(InstStr#"Zrmbk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2), + Narrow.KRC)>; +} + // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, string InstStr, @@ -3180,7 +3204,7 @@ multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), (Narrow.VT Narrow.RC:$src2), cond)), (COPY_TO_REGCLASS - (!cast<Instruction>(InstStr##Zrri) + (!cast<Instruction>(InstStr#"Zrri") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), (Frag.OperandTransform $cc)), Narrow.KRC)>; @@ -3189,34 +3213,108 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), (Narrow.VT Narrow.RC:$src2), cond)))), - (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik) + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), - (Frag.OperandTransform $cc)), Narrow.KRC)>; + (Frag_su.OperandTransform $cc)), Narrow.KRC)>; +} + +multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, + PatFrag CommFrag, PatFrag CommFrag_su, + string InstStr, + X86VectorVTInfo Narrow, + X86VectorVTInfo Wide> { +// Broadcast load. +def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2), cond)), + (COPY_TO_REGCLASS + (!cast<Instruction>(InstStr#"Zrmib") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Narrow.KVT + (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2), + cond)))), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>; + +// Commuted with broadcast load. +def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2), + (Narrow.VT Narrow.RC:$src1), + cond)), + (COPY_TO_REGCLASS + (!cast<Instruction>(InstStr#"Zrmib") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Narrow.KVT + (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), + (Narrow.VT Narrow.RC:$src1), + cond)))), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>; } // Same as above, but for fp types which don't use PatFrags. -multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su, - string InstStr, +multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, X86VectorVTInfo Narrow, X86VectorVTInfo Wide> { -def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), timm:$cc)), +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), timm:$cc)), (COPY_TO_REGCLASS - (!cast<Instruction>(InstStr##Zrri) + (!cast<Instruction>(InstStr#"Zrri") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), timm:$cc), Narrow.KRC)>; def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, - (OpNode_su (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), timm:$cc))), - (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik) + (X86cmpm_su (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), timm:$cc))), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), timm:$cc), Narrow.KRC)>; + +// Broadcast load. +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), + (COPY_TO_REGCLASS + (!cast<Instruction>(InstStr#"Zrmbi") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, timm:$cc), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (X86cmpm_su (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, timm:$cc), Narrow.KRC)>; + +// Commuted with broadcast load. +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), + (Narrow.VT Narrow.RC:$src1), timm:$cc)), + (COPY_TO_REGCLASS + (!cast<Instruction>(InstStr#"Zrmbi") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), + (Narrow.VT Narrow.RC:$src1), timm:$cc))), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; } let Predicates = [HasAVX512, NoVLX] in { @@ -3234,6 +3332,18 @@ let Predicates = [HasAVX512, NoVLX] in { defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>; defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>; + + defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>; + defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>; + + defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>; + defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>; + + defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>; + defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>; + + defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>; + defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>; } defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; @@ -3248,10 +3358,22 @@ let Predicates = [HasAVX512, NoVLX] in { defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; - defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v8f32x_info, v16f32_info>; - defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v4f32x_info, v16f32_info>; - defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v4f64x_info, v8f64_info>; - defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v2f64x_info, v8f64_info>; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>; + + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>; + + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>; + + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>; + + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; } let Predicates = [HasBWI, NoVLX] in { |

