summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-08-15 03:54:49 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-08-15 03:54:49 +0000
commit17fd848bfafd5ee4dd36887d9c3c8556d0f013fe (patch)
treebbed3f938517be41c7be559686e1a083db6914cb
parentfe963b17644c8b02784cb1863155ad236d6f96af (diff)
downloadbcm5719-llvm-17fd848bfafd5ee4dd36887d9c3c8556d0f013fe.tar.gz
bcm5719-llvm-17fd848bfafd5ee4dd36887d9c3c8556d0f013fe.zip
[x86] Fix the very broken formation of vpunpck instructions in the
target-specific shuffl DAG combines. We were recognizing the paired shuffles backwards. This code needs to be replaced anyways as we have the same functionality elsewhere, but I'll do the refactoring in a follow-up, this is the minimal fix to the behavior. In addition to fixing miscompiles with the new vector shuffle lowering, it also causes the canonicalization to kick in much better, selecting the smaller encoding variants in lots of places in the new AVX path. This still isn't quite ideal as we don't need both the shufpd and the punpck instructions, but that'll get fixed in a follow-up patch. llvm-svn: 215690
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--llvm/test/CodeGen/X86/avx-sext.ll2
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll9
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll26
4 files changed, 24 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c2f284496d5..5143f19c763 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19496,7 +19496,7 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
while (Mask.size() > 1) {
SmallVector<int, 16> NewMask;
for (int i = 0, e = Mask.size()/2; i < e; ++i) {
- if (Mask[2*i] % 2 != 0 || Mask[2*i] != Mask[2*i + 1] + 1) {
+ if (Mask[2*i] % 2 != 0 || Mask[2*i] + 1 != Mask[2*i + 1]) {
NewMask.clear();
break;
}
diff --git a/llvm/test/CodeGen/X86/avx-sext.ll b/llvm/test/CodeGen/X86/avx-sext.ll
index fb2287f5289..9bcf06f7b32 100644
--- a/llvm/test/CodeGen/X86/avx-sext.ll
+++ b/llvm/test/CodeGen/X86/avx-sext.ll
@@ -156,7 +156,7 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
; AVX-LABEL: sext_16i8_to_16i16
; AVX: vpmovsxbw
-; AVX: vmovhlps
+; AVX: vpunpckhqdq
; AVX: vpmovsxbw
; AVX: ret
define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) {
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
index 210d672b5c0..3b7c146dd36 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-AVX1
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
@@ -60,6 +61,14 @@ define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) {
ret <4 x i32> %shuffle
}
+define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-AVX1-LABEL: @shuffle_v4i32_2121
+; CHECK-AVX1: vpshufd {{.*}} # xmm0 = xmm0[2,1,2,1]
+; CHECK-AVX1-NEXT: retq
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1>
+ ret <4 x i32> %shuffle
+}
+
define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
; CHECK-SSE2-LABEL: @shuffle_v4f32_0001
; CHECK-SSE2: shufps {{.*}} # xmm0 = xmm0[0,0,0,1]
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
index d26e1fbb0cb..ac441e9b77f 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-unknown"
define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: @shuffle_v4i64_0001
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
@@ -18,7 +18,7 @@ define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm1[0],xmm0[0]
-; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
@@ -41,7 +41,7 @@ define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm0[0],xmm1[1]
-; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
@@ -52,7 +52,7 @@ define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: @shuffle_v4i64_1000
; AVX1: # BB#0:
; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
@@ -63,8 +63,8 @@ define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: @shuffle_v4i64_2200
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1]
-; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
+; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
@@ -76,7 +76,7 @@ define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[1],xmm0[0]
-; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
+; AVX1-NEXT: vpunpckhqdq {{.*}} # xmm1 = xmm1[1,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
@@ -174,7 +174,7 @@ define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: @shuffle_v4i64_0124
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1]
+; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm2[0],xmm1[1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -185,7 +185,7 @@ define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: @shuffle_v4i64_0142
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm2[0,1,0,1]
+; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm2 = xmm2[0,0]
; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -197,7 +197,7 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0]
-; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1]
+; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -209,7 +209,7 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0]
-; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[0],xmm0[1]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -229,7 +229,7 @@ define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
; AVX1: # BB#0:
; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm1[2,3,0,1]
; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm2[0],xmm0[1]
-; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1]
+; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -249,7 +249,7 @@ define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
; AVX1: # BB#0:
; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm0[2,3,0,1]
; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
-; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[0],xmm0[1]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
OpenPOWER on IntegriCloud