diff options
| author | Chandler Carruth <chandlerc@gmail.com> | 2014-10-03 22:43:17 +0000 |
|---|---|---|
| committer | Chandler Carruth <chandlerc@gmail.com> | 2014-10-03 22:43:17 +0000 |
| commit | f3e880697a617eea447e75185e6fb4845b28595d (patch) | |
| tree | 930026bc543d658421a330a8bb0cb6d8d0a9ebb7 /llvm/test | |
| parent | 1a3576a45051655f2f652fd94541c3de192c8991 (diff) | |
| download | bcm5719-llvm-f3e880697a617eea447e75185e6fb4845b28595d.tar.gz bcm5719-llvm-f3e880697a617eea447e75185e6fb4845b28595d.zip | |
[x86] Add a really preposterous number of patterns for matching all of
the various ways in which blends can be used to do vector element
insertion for lowering with the scalar math instruction forms that
effectively re-blend with the high elements after performing the
operation.
This then allows me to bail on the element insertion lowering path when
we have SSE4.1 and are going to be doing a normal blend, which in turn
restores the last of the blends lost from the new vector shuffle
lowering when I got it to prioritize insertion in other cases (for
example when we don't *have* a blend instruction).
Without the patterns, using blends here would have regressed
sse-scalar-fp-arith.ll *completely* with the new vector shuffle
lowering. For completeness, I've added RUN-lines with the new lowering
here. This is somewhat superfluous as I'm about to flip the default, but
hey, it shows that this actually significantly changed behavior.
The patterns I've added are just ridiculously repetative. Suggestions on
making them better very much welcome. In particular, handling the
commuted form of the v2f64 patterns is somewhat obnoxious.
llvm-svn: 219033
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll | 88 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll | 26 |
4 files changed, 90 insertions, 35 deletions
diff --git a/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll b/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll index b122ef67544..415a4f12b2c 100644 --- a/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll +++ b/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll @@ -1,6 +1,9 @@ ; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s +; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s -x86-experimental-vector-shuffle-lowering | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s ; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s +; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s -x86-experimental-vector-shuffle-lowering | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s ; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s +; RUN: llc -mcpu=x86-64 -mattr=+avx < %s -x86-experimental-vector-shuffle-lowering | FileCheck --check-prefix=AVX %s target triple = "x86_64-unknown-unknown" diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll index aa837f15e57..59041367bba 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -211,28 +211,61 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { ret <2 x double> %shuffle } define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: shuffle_v2f64_03: -; SSE: # BB#0: -; SSE-NEXT: movsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: shuffle_v2f64_03: +; SSE2: # BB#0: +; SSE2-NEXT: movsd %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: shuffle_v2f64_03: +; SSE3: # BB#0: +; SSE3-NEXT: movsd %xmm0, %xmm1 +; SSE3-NEXT: movaps %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: shuffle_v2f64_03: +; SSSE3: # BB#0: +; SSSE3-NEXT: movsd %xmm0, %xmm1 +; SSSE3-NEXT: movaps %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v2f64_03: +; SSE41: # BB#0: +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_03: ; AVX: # BB#0: -; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] ; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3> ret <2 x double> %shuffle } define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: shuffle_v2f64_21: -; SSE: # BB#0: -; SSE-NEXT: movsd %xmm1, %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: shuffle_v2f64_21: +; SSE2: # BB#0: +; SSE2-NEXT: movsd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: shuffle_v2f64_21: +; SSE3: # BB#0: +; SSE3-NEXT: movsd %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: shuffle_v2f64_21: +; SSSE3: # BB#0: +; SSSE3-NEXT: movsd %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v2f64_21: +; SSE41: # BB#0: +; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_21: ; AVX: # BB#0: -; AVX-NEXT: vmovsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1> ret <2 x double> %shuffle @@ -753,16 +786,35 @@ define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) { } define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) { -; SSE-LABEL: shuffle_v2f64_z1: -; SSE: # BB#0: -; SSE-NEXT: xorps %xmm1, %xmm1 -; SSE-NEXT: movsd %xmm1, %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: shuffle_v2f64_z1: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: movsd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: shuffle_v2f64_z1: +; SSE3: # BB#0: +; SSE3-NEXT: xorps %xmm1, %xmm1 +; SSE3-NEXT: movsd %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: shuffle_v2f64_z1: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: movsd %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v2f64_z1: +; SSE41: # BB#0: +; SSE41-NEXT: xorpd %xmm1, %xmm1 +; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_z1: ; AVX: # BB#0: -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vmovsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> ret <2 x double> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index 32ee62fa985..7899a52a741 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -55,7 +55,7 @@ define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2] -; AVX1-NEXT: vmovsd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4f64_0300: @@ -382,7 +382,7 @@ define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2] -; AVX1-NEXT: vmovsd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4i64_0300: @@ -518,7 +518,7 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0] ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4i64_4012: @@ -654,7 +654,7 @@ define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) { ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] -; AVX1-NEXT: vmovsd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: stress_test1: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll index 662b9832611..2f02f2fc08f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -91,7 +91,7 @@ define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) { ; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3] ; ALL-NEXT: vbroadcastsd %xmm0, %ymm0 -; ALL-NEXT: vmovsd %xmm1, %xmm0, %xmm1 +; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3] ; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> @@ -275,12 +275,12 @@ define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) { ; ALL-LABEL: shuffle_v8f64_08991abb: ; ALL: # BB#0: -; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm1[0,0,1,1] -; ALL-NEXT: vmovsd %xmm0, %xmm2, %xmm2 -; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] -; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,3,3] -; ALL-NEXT: vmovsd %xmm0, %xmm1, %xmm0 -; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0 +; ALL-NEXT: vpermilpd {{.*#+}} ymm2 = ymm0[1,0,2,2] +; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm1[0,2,3,3] +; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3] +; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,1] +; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> ret <8 x double> %shuffle @@ -303,11 +303,11 @@ define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double> %b) { ; ALL-LABEL: shuffle_v8f64_09ab1def: ; ALL: # BB#0: -; ALL-NEXT: vmovsd %xmm0, %xmm1, %xmm2 -; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm1 -; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] -; ALL-NEXT: vmovsd %xmm0, %xmm1, %xmm0 -; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0 +; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2 +; ALL-NEXT: vpermilpd {{.*#+}} ymm3 = ymm0[1,0,2,2] +; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3] +; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> ret <8 x double> %shuffle @@ -721,7 +721,7 @@ define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) { ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2,3] ; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm1 ; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3] -; ALL-NEXT: vmovsd %xmm1, %xmm0, %xmm0 +; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] ; ALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm0, %zmm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10> |

