summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vector-rotate-512.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-rotate-512.ll')
-rw-r--r--llvm/test/CodeGen/X86/vector-rotate-512.ll92
1 files changed, 41 insertions, 51 deletions
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index d92d73a220d..0b858e7dc9e 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -179,12 +179,11 @@ define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm3
; AVX512VL-NEXT: vpsrlw $7, %ymm3, %ymm4
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
-; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm8
-; AVX512VL-NEXT: vpor %ymm4, %ymm8, %ymm4
+; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm5
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512VL-NEXT: vpternlogq $248, %ymm8, %ymm4, %ymm5
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
-; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm6, %ymm4
@@ -196,11 +195,10 @@ define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand %ymm5, %ymm3, %ymm3
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm4
-; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
+; AVX512VL-NEXT: vpternlogq $248, %ymm8, %ymm3, %ymm4
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
@@ -425,13 +423,11 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm5, %xmm5
; AVX512VL-NEXT: vpsrlw $8, %xmm5, %xmm5
; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5
-; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
-; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2
+; AVX512VL-NEXT: vpternlogq $236, %ymm5, %ymm4, %ymm2
; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3
; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm0, %ymm3, %ymm0
+; AVX512VL-NEXT: vpternlogq $236, %ymm5, %ymm3, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
@@ -446,12 +442,11 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512BW-NEXT: vpsllw %xmm2, %xmm4, %xmm2
; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2
; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsrlw %xmm1, %xmm4, %xmm1
-; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
-; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm3
+; AVX512BW-NEXT: vpsrlw %xmm1, %xmm4, %xmm0
+; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0
+; AVX512BW-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v64i8:
@@ -465,12 +460,11 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm4, %xmm2
; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2
; AVX512VLBW-NEXT: vpandq %zmm2, %zmm3, %zmm2
-; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm4, %xmm1
-; AVX512VLBW-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1
-; AVX512VLBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
+; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm3
+; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm4, %xmm0
+; AVX512VLBW-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
%splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
%splat8 = sub <64 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>, %splat
@@ -889,31 +883,27 @@ define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind {
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55]
; AVX512VL-NEXT: vpsrlw $11, %ymm1, %ymm3
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX512VL-NEXT: vpor %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512VL-NEXT: vpternlogq $200, %ymm3, %ymm2, %ymm1
; AVX512VL-NEXT: vpsrlw $11, %ymm0, %ymm3
; AVX512VL-NEXT: vpsllw $5, %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $200, %ymm3, %ymm2, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_mask_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $5, %zmm0, %zmm1
-; AVX512BW-NEXT: vpsrlw $11, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpsrlw $11, %zmm0, %zmm2
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_rotate_mask_v32i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $5, %zmm0, %zmm1
-; AVX512VLBW-NEXT: vpsrlw $11, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpsrlw $11, %zmm0, %zmm2
+; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
%shl = shl <32 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
%lshr = lshr <32 x i16> %a, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
@@ -948,34 +938,34 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
-; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpternlogq $226, %ymm2, %ymm3, %ymm1
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39]
-; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4
+; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
+; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
+; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39]
+; AVX512VL-NEXT: vpternlogq $200, %ymm2, %ymm4, %ymm1
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
+; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $226, %ymm4, %ymm3, %ymm0
-; AVX512VL-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $200, %ymm2, %ymm4, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_mask_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_rotate_mask_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
-; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm2
+; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
%shl = shl <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%lshr = lshr <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
OpenPOWER on IntegriCloud