diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 40 |
2 files changed, 32 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9f483b72a89..2f2ae32a1d1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -17830,6 +17830,14 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { "Cannot set masked compare for this operation"); ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); + + // If this is a seteq make sure any build vectors of all zeros are on the RHS. + // This helps with vptestm matching. + // TODO: Should we just canonicalize the setcc during DAG combine? + if ((SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE) && + ISD::isBuildVectorAllZeros(Op0.getNode())) + std::swap(Op0, Op1); + bool Swap = false; unsigned SSECC; switch (SetCCOpcode) { diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 395ab8a3c8a..23432072e98 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2189,27 +2189,27 @@ multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, } } -def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2), - (X86cmpm_c node:$src1, node:$src2, (i8 0))>; -def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2), - (X86cmpm_c node:$src1, node:$src2, (i8 4))>; +// This fragment treats X86cmpm as commutable to help match loads in both +// operands for PCMPEQ. +def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2), + (X86cmpm_c node:$src1, node:$src2, (i8 0))>; def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), (X86cmpm node:$src1, node:$src2, (i8 6))>; // FIXME: Is there a better scheduler itinerary for VPCMP? -defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm, +defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>, EVEX_CD8<8, CD8VF>, VEX_WIG; -defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm, +defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, SSE_ALU_F32P, avx512vl_i16_info, HasBWI, 1>, EVEX_CD8<16, CD8VF>, VEX_WIG; -defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm, +defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, SSE_ALU_F32P, avx512vl_i32_info, HasAVX512, 1>, EVEX_CD8<32, CD8VF>; -defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm, +defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, SSE_ALU_F32P, avx512vl_i64_info, HasAVX512, 1>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; @@ -3111,16 +3111,16 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, let Predicates = [HasAVX512, NoVLX] in { defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD", v8i32x_info, v16i32_info>; + defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>; defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD", v4i32x_info, v16i32_info>; + defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>; defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQQ", v4i64x_info, v8i64_info>; + defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>; defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQQ", v2i64x_info, v8i64_info>; + defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>; defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>; defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", v8i32x_info, v16i32_info>; @@ -3141,16 +3141,16 @@ let Predicates = [HasAVX512, NoVLX] in { let Predicates = [HasBWI, NoVLX] in { defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQB", v32i8x_info, v64i8_info>; + defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>; defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQB", v16i8x_info, v64i8_info>; + defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>; defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQW", v16i16x_info, v32i16_info>; + defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>; defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQW", v8i16x_info, v32i16_info>; + defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>; defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPB", v32i8x_info, v64i8_info>; defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUB", v32i8x_info, v64i8_info>; @@ -5465,6 +5465,14 @@ multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, } } +// These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm +// as commutable here because we already canonicalized all zeros vectors to the +// RHS during lowering. +def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2), + (X86cmpm node:$src1, node:$src2, (i8 0))>; +def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2), + (X86cmpm node:$src1, node:$src2, (i8 4))>; + multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, PatFrag OpNode, OpndItins itins> : avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>, |

