diff options
| author | Chandler Carruth <chandlerc@gmail.com> | 2014-08-15 03:54:49 +0000 |
|---|---|---|
| committer | Chandler Carruth <chandlerc@gmail.com> | 2014-08-15 03:54:49 +0000 |
| commit | 17fd848bfafd5ee4dd36887d9c3c8556d0f013fe (patch) | |
| tree | bbed3f938517be41c7be559686e1a083db6914cb | |
| parent | fe963b17644c8b02784cb1863155ad236d6f96af (diff) | |
| download | bcm5719-llvm-17fd848bfafd5ee4dd36887d9c3c8556d0f013fe.tar.gz bcm5719-llvm-17fd848bfafd5ee4dd36887d9c3c8556d0f013fe.zip | |
[x86] Fix the very broken formation of vpunpck instructions in the
target-specific shuffl DAG combines.
We were recognizing the paired shuffles backwards. This code needs to be
replaced anyways as we have the same functionality elsewhere, but I'll
do the refactoring in a follow-up, this is the minimal fix to the
behavior.
In addition to fixing miscompiles with the new vector shuffle lowering,
it also causes the canonicalization to kick in much better, selecting
the smaller encoding variants in lots of places in the new AVX path.
This still isn't quite ideal as we don't need both the shufpd and the
punpck instructions, but that'll get fixed in a follow-up patch.
llvm-svn: 215690
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-sext.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll | 9 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll | 26 |
4 files changed, 24 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c2f284496d5..5143f19c763 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19496,7 +19496,7 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, while (Mask.size() > 1) { SmallVector<int, 16> NewMask; for (int i = 0, e = Mask.size()/2; i < e; ++i) { - if (Mask[2*i] % 2 != 0 || Mask[2*i] != Mask[2*i + 1] + 1) { + if (Mask[2*i] % 2 != 0 || Mask[2*i] + 1 != Mask[2*i + 1]) { NewMask.clear(); break; } diff --git a/llvm/test/CodeGen/X86/avx-sext.ll b/llvm/test/CodeGen/X86/avx-sext.ll index fb2287f5289..9bcf06f7b32 100644 --- a/llvm/test/CodeGen/X86/avx-sext.ll +++ b/llvm/test/CodeGen/X86/avx-sext.ll @@ -156,7 +156,7 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) { ; AVX-LABEL: sext_16i8_to_16i16 ; AVX: vpmovsxbw -; AVX: vmovhlps +; AVX: vpunpckhqdq ; AVX: vpmovsxbw ; AVX: ret define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) { diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll index 210d672b5c0..3b7c146dd36 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-AVX1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" @@ -60,6 +61,14 @@ define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) { ret <4 x i32> %shuffle } +define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) { +; CHECK-AVX1-LABEL: @shuffle_v4i32_2121 +; CHECK-AVX1: vpshufd {{.*}} # xmm0 = xmm0[2,1,2,1] +; CHECK-AVX1-NEXT: retq + %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1> + ret <4 x i32> %shuffle +} + define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) { ; CHECK-SSE2-LABEL: @shuffle_v4f32_0001 ; CHECK-SSE2: shufps {{.*}} # xmm0 = xmm0[0,0,0,1] diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index d26e1fbb0cb..ac441e9b77f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -6,7 +6,7 @@ target triple = "x86_64-unknown-unknown" define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_0001 ; AVX1: # BB#0: -; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm0[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> @@ -18,7 +18,7 @@ define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm1[0],xmm0[0] -; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> @@ -41,7 +41,7 @@ define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm0[0],xmm1[1] -; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> @@ -52,7 +52,7 @@ define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_1000 ; AVX1: # BB#0: ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm0[2,3,0,1] -; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> @@ -63,8 +63,8 @@ define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_2200 ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1] -; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> @@ -76,7 +76,7 @@ define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[1],xmm0[0] -; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,2,3] +; AVX1-NEXT: vpunpckhqdq {{.*}} # xmm1 = xmm1[1,1] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> @@ -174,7 +174,7 @@ define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_0124 ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0] ; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm2[0],xmm1[1] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -185,7 +185,7 @@ define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_0142 ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm2[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm2 = xmm2[0,0] ; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm1[0],xmm2[1] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -197,7 +197,7 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0] -; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0] ; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -209,7 +209,7 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0] -; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] ; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[0],xmm0[1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -229,7 +229,7 @@ define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm1[2,3,0,1] ; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm2[0],xmm0[1] -; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0] ; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -249,7 +249,7 @@ define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm0[2,3,0,1] ; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm2[0],xmm1[1] -; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] ; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[0],xmm0[1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq |

