diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-11-18 15:52:08 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-11-18 15:52:08 +0000 |
commit | fec9f8657b5cea6ad99cf95ef3524d98ae975b97 (patch) | |
tree | 0938e5343033ad3b5f74faadd5501e54a29e539b /llvm/test/CodeGen | |
parent | 40509997ebce38b00c3abf441ec1f620ada4012e (diff) | |
download | bcm5719-llvm-fec9f8657b5cea6ad99cf95ef3524d98ae975b97.tar.gz bcm5719-llvm-fec9f8657b5cea6ad99cf95ef3524d98ae975b97.zip |
[X86][SSE] Relax IsSplatValue - remove the 'variable shift' limit on subtracts.
Means we don't use the per-lane-shifts as much when we can cheaply use the older splat-variable-shifts.
llvm-svn: 347162
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/X86/vector-rotate-128.ll | 63 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-rotate-256.ll | 70 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-rotate-512.ll | 14 |
3 files changed, 40 insertions, 107 deletions
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll index 9d0e65143d0..22adf6e8a53 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -689,7 +689,7 @@ define <2 x i64> @splatvar_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64] ; AVX2-NEXT: vpsubq %xmm2, %xmm3, %xmm2 ; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm1 -; AVX2-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsrlq %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -804,7 +804,8 @@ define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { ; AVX2-NEXT: vpslld %xmm2, %xmm0, %xmm2 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] ; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 -; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: retq ; @@ -925,53 +926,17 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: splatvar_rotate_v8i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: splatvar_rotate_v8i16: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: splatvar_rotate_v8i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1 -; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512VLBW-LABEL: splatvar_rotate_v8i16: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLBW-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX512VLBW-NEXT: retq +; AVX512-LABEL: splatvar_rotate_v8i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_rotate_v8i16: ; XOPAVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index 75fe0b322ca..cdfcb59d513 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -522,10 +522,10 @@ define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { ; AVX2-LABEL: splatvar_rotate_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastq %xmm1, %ymm2 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [64,64,64,64] -; AVX2-NEXT: vpsubq %ymm2, %ymm3, %ymm2 ; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm1 -; AVX2-NEXT: vpsrlvq %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64] +; AVX2-NEXT: vpsubq %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vpsrlq %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; @@ -606,9 +606,10 @@ define <8 x i32> @splatvar_rotate_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; AVX2-NEXT: vpbroadcastd %xmm1, %ymm2 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm1 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32] -; AVX2-NEXT: vpsubd %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] +; AVX2-NEXT: vpsubd %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero +; AVX2-NEXT: vpsrld %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; @@ -697,52 +698,17 @@ define <16 x i16> @splatvar_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind ; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: splatvar_rotate_v16i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastw %xmm1, %ymm2 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %xmm2, %xmm3, %xmm2 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: splatvar_rotate_v16i16: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastw %xmm1, %ymm2 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm1 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %xmm2, %xmm3, %xmm2 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512VL-NEXT: vpor %ymm0, %ymm1, %ymm0 -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: splatvar_rotate_v16i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm2 -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %ymm2, %ymm3, %ymm2 -; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm2 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm2, %ymm0, %ymm0 -; AVX512BW-NEXT: retq -; -; AVX512VLBW-LABEL: splatvar_rotate_v16i16: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %ymm2 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm1 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %ymm2, %ymm3, %ymm2 -; AVX512VLBW-NEXT: vpsrlvw %ymm2, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0 -; AVX512VLBW-NEXT: retq +; AVX512-LABEL: splatvar_rotate_v16i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastw %xmm1, %ymm2 +; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm1 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512-NEXT: vpsubw %xmm2, %xmm3, %xmm2 +; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; AVX512-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 +; AVX512-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_rotate_v16i16: ; XOPAVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll index f838f1b54db..54adeb28a26 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -344,22 +344,24 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind ; AVX512BW-LABEL: splatvar_rotate_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %zmm2, %zmm3, %zmm2 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm1 -; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %xmm2, %xmm3, %xmm2 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_rotate_v32i16: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm2 -; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %zmm2, %zmm3, %zmm2 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %xmm2, %xmm3, %xmm2 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer |