From 2bc8e079f2202ce6f223d034ed2dde141f7eec60 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 2 Jul 2018 15:14:07 +0000 Subject: [X86][SSE] Blend any v8i16/v4i32 shift with 2 shift unique values We were only doing this for basic blends, despite shuffle lowering now being good enough to handle more complex blends. This means that the two v8i16 splat shifts are performed in parallel instead of serially as the general shift case. llvm-svn: 336113 --- llvm/test/CodeGen/X86/lower-vec-shift.ll | 34 +++++++++++--------------------- 1 file changed, 12 insertions(+), 22 deletions(-) (limited to 'llvm/test/CodeGen') diff --git a/llvm/test/CodeGen/X86/lower-vec-shift.ll b/llvm/test/CodeGen/X86/lower-vec-shift.ll index 1dfb8ca65bc..cca165e29ff 100644 --- a/llvm/test/CodeGen/X86/lower-vec-shift.ll +++ b/llvm/test/CodeGen/X86/lower-vec-shift.ll @@ -211,31 +211,21 @@ define <4 x i32> @test8(<4 x i32> %a) { define <8 x i16> @test9(<8 x i16> %a) { ; SSE-LABEL: test9: ; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,65535,65535,65535,0,0,0] ; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: pand %xmm2, %xmm1 -; SSE-NEXT: psraw $2, %xmm0 -; SSE-NEXT: pandn %xmm0, %xmm2 -; SSE-NEXT: por %xmm2, %xmm1 -; SSE-NEXT: psraw $1, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psraw $3, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,65535,65535,65535,0,0,0] +; SSE-NEXT: psraw $1, %xmm0 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: pandn %xmm1, %xmm2 +; SSE-NEXT: por %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: test9: -; AVX1: # %bb.0: -; AVX1-NEXT: vpsraw $2, %xmm0, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4],xmm1[5,6,7] -; AVX1-NEXT: vpsraw $1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: test9: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 -; AVX2-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX-LABEL: test9: +; AVX: # %bb.0: +; AVX-NEXT: vpsraw $3, %xmm0, %xmm1 +; AVX-NEXT: vpsraw $1, %xmm0, %xmm0 +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4],xmm1[5,6,7] +; AVX-NEXT: retq %lshr = ashr <8 x i16> %a, ret <8 x i16> %lshr } -- cgit v1.2.3