diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 20 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/viabs.ll | 10 |
3 files changed, 21 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 397257d3660..0e2eef7cfb9 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7235,8 +7235,14 @@ defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; // Helper fragments to match sext vXi1 to vXiY. -def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>; -def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>; +def v64i1sextv64i8 : PatLeaf<(v64i8 + (X86vsext + (v64i1 (X86pcmpgtm + (bc_v64i8 (v16i32 immAllZerosV)), + VR512:$src))))>; +def v32i1sextv32i16 : PatLeaf<(v32i16 (X86vsrai VR512:$src, (i8 15)))>; +def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>; +def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>; multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > { def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), @@ -7863,6 +7869,16 @@ def : Pat<(xor (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), (VPABSQZrr VR512:$src)>; } +let Predicates = [HasBWI] in { +def : Pat<(xor + (bc_v8i64 (v64i1sextv64i8)), + (bc_v8i64 (add (v64i8 VR512:$src), (v64i1sextv64i8)))), + (VPABSBZrr VR512:$src)>; +def : Pat<(xor + (bc_v8i64 (v32i1sextv32i16)), + (bc_v8i64 (add (v32i16 VR512:$src), (v32i1sextv32i16)))), + (VPABSWZrr VR512:$src)>; +} multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{ diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index d38940dc5f8..d7bb95e8e14 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -856,6 +856,7 @@ def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>; def bc_v8f32 : PatFrag<(ops node:$in), (v8f32 (bitconvert node:$in))>; // 512-bit bitconvert pattern fragments +def bc_v64i8 : PatFrag<(ops node:$in), (v64i8 (bitconvert node:$in))>; def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>; def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>; def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>; diff --git a/llvm/test/CodeGen/X86/viabs.ll b/llvm/test/CodeGen/X86/viabs.ll index b14658e0f84..ea3d5cb3869 100644 --- a/llvm/test/CodeGen/X86/viabs.ll +++ b/llvm/test/CodeGen/X86/viabs.ll @@ -637,11 +637,7 @@ define <64 x i8> @test14(<64 x i8> %a) nounwind { ; ; AVX512BW-LABEL: test14: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1 -; AVX512BW-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 -; AVX512BW-NEXT: vpmovm2b %k0, %zmm1 -; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpxorq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpabsb %zmm0, %zmm0 ; AVX512BW-NEXT: retq %tmp1neg = sub <64 x i8> zeroinitializer, %a %b = icmp slt <64 x i8> %a, zeroinitializer @@ -712,9 +708,7 @@ define <32 x i16> @test15(<32 x i16> %a) nounwind { ; ; AVX512BW-LABEL: test15: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vpsraw $15, %zmm0, %zmm1 -; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpxorq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpabsw %zmm0, %zmm0 ; AVX512BW-NEXT: retq %tmp1neg = sub <32 x i16> zeroinitializer, %a %b = icmp sgt <32 x i16> %a, zeroinitializer |

