summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td20
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td1
-rw-r--r--llvm/test/CodeGen/X86/viabs.ll10
3 files changed, 21 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 397257d3660..0e2eef7cfb9 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7235,8 +7235,14 @@ defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd
VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
// Helper fragments to match sext vXi1 to vXiY.
-def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
-def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
+def v64i1sextv64i8 : PatLeaf<(v64i8
+ (X86vsext
+ (v64i1 (X86pcmpgtm
+ (bc_v64i8 (v16i32 immAllZerosV)),
+ VR512:$src))))>;
+def v32i1sextv32i16 : PatLeaf<(v32i16 (X86vsrai VR512:$src, (i8 15)))>;
+def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
+def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
@@ -7863,6 +7869,16 @@ def : Pat<(xor
(bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
(VPABSQZrr VR512:$src)>;
}
+let Predicates = [HasBWI] in {
+def : Pat<(xor
+ (bc_v8i64 (v64i1sextv64i8)),
+ (bc_v8i64 (add (v64i8 VR512:$src), (v64i1sextv64i8)))),
+ (VPABSBZrr VR512:$src)>;
+def : Pat<(xor
+ (bc_v8i64 (v32i1sextv32i16)),
+ (bc_v8i64 (add (v32i16 VR512:$src), (v32i1sextv32i16)))),
+ (VPABSWZrr VR512:$src)>;
+}
multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index d38940dc5f8..d7bb95e8e14 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -856,6 +856,7 @@ def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
def bc_v8f32 : PatFrag<(ops node:$in), (v8f32 (bitconvert node:$in))>;
// 512-bit bitconvert pattern fragments
+def bc_v64i8 : PatFrag<(ops node:$in), (v64i8 (bitconvert node:$in))>;
def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>;
def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
diff --git a/llvm/test/CodeGen/X86/viabs.ll b/llvm/test/CodeGen/X86/viabs.ll
index b14658e0f84..ea3d5cb3869 100644
--- a/llvm/test/CodeGen/X86/viabs.ll
+++ b/llvm/test/CodeGen/X86/viabs.ll
@@ -637,11 +637,7 @@ define <64 x i8> @test14(<64 x i8> %a) nounwind {
;
; AVX512BW-LABEL: test14:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpcmpgtb %zmm0, %zmm1, %k0
-; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
-; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpabsb %zmm0, %zmm0
; AVX512BW-NEXT: retq
%tmp1neg = sub <64 x i8> zeroinitializer, %a
%b = icmp slt <64 x i8> %a, zeroinitializer
@@ -712,9 +708,7 @@ define <32 x i16> @test15(<32 x i16> %a) nounwind {
;
; AVX512BW-LABEL: test15:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpsraw $15, %zmm0, %zmm1
-; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpabsw %zmm0, %zmm0
; AVX512BW-NEXT: retq
%tmp1neg = sub <32 x i16> zeroinitializer, %a
%b = icmp sgt <32 x i16> %a, zeroinitializer
OpenPOWER on IntegriCloud