summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-10-05 12:07:34 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-10-05 12:07:34 +0000
commitacecdc02119459d8ca13e6ca8fa2f16d655ae2f7 (patch)
treed7d1f0654fa99218721118c0edc03952289598c3
parentae81abfa3807885d0ecc221f747100607cd4be3e (diff)
downloadbcm5719-llvm-acecdc02119459d8ca13e6ca8fa2f16d655ae2f7.tar.gz
bcm5719-llvm-acecdc02119459d8ca13e6ca8fa2f16d655ae2f7.zip
[x86] Fix PR21139, one of the last remaining regressions found in the
new vector shuffle lowering. This is loosely based on a patch by Marius Wachtler to the PR (thanks!). I refactored it a bi to use std::count_if and a mutable array ref but the core idea was exactly right. I also added some direct testing of this case. I believe PR21137 is now the only remaining regression. llvm-svn: 219081
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp26
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll22
-rw-r--r--llvm/test/CodeGen/X86/vector-zext.ll5
3 files changed, 37 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1bfacfe65a2..64e0db1e70f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9278,21 +9278,29 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
//
// FIXME: We need to handle other interleaving widths (i16, i32, ...).
if (shouldLowerAsInterleaving(Mask)) {
- // FIXME: Figure out whether we should pack these into the low or high
- // halves.
-
- int EMask[16], OMask[16];
+ int NumLoHalf = std::count_if(Mask.begin(), Mask.end(), [](int M) {
+ return (M >= 0 && M < 8) || (M >= 16 && M < 24);
+ });
+ int NumHiHalf = std::count_if(Mask.begin(), Mask.end(), [](int M) {
+ return (M >= 8 && M < 16) || M >= 24;
+ });
+ int EMask[16] = {-1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1};
+ int OMask[16] = {-1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1};
+ bool UnpackLo = NumLoHalf >= NumHiHalf;
+ MutableArrayRef<int> TargetEMask(UnpackLo ? EMask : EMask + 8, 8);
+ MutableArrayRef<int> TargetOMask(UnpackLo ? OMask : OMask + 8, 8);
for (int i = 0; i < 8; ++i) {
- EMask[i] = Mask[2*i];
- OMask[i] = Mask[2*i + 1];
- EMask[i + 8] = -1;
- OMask[i + 8] = -1;
+ TargetEMask[i] = Mask[2 * i];
+ TargetOMask[i] = Mask[2 * i + 1];
}
SDValue Evens = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, EMask);
SDValue Odds = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, OMask);
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, Evens, Odds);
+ return DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
+ MVT::v16i8, Evens, Odds);
}
// Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
index e8613be0272..acb6d503127 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -232,6 +232,20 @@ define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(
ret <16 x i8> %shuffle
}
+define <16 x i8> @shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
+; SSE: # BB#0:
+; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ ret <16 x i8> %shuffle
+}
+
define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
; SSE-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
; SSE: # BB#0:
@@ -373,7 +387,7 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(
define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
; SSE2-LABEL: trunc_v4i32_shuffle:
; SSE2: # BB#0:
-; SSE2-NEXT: pand .LCPI13_0(%rip), %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: retq
@@ -444,7 +458,7 @@ entry:
define <16 x i8> @PR20540(<8 x i8> %a) {
; SSE2-LABEL: PR20540:
; SSE2: # BB#0:
-; SSE2-NEXT: pand .LCPI16_0(%rip), %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
@@ -767,7 +781,7 @@ define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,2,3,0,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,6,7,7]
-; SSE2-NEXT: pand .LCPI23_0(%rip), %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,4]
; SSE2-NEXT: movdqa %xmm1, %xmm3
@@ -821,7 +835,7 @@ define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,7]
-; SSE2-NEXT: pand .LCPI24_0(%rip), %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,4]
; SSE2-NEXT: movdqa %xmm0, %xmm3
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index 0ce3ecf5d61..53d7235a3d1 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -191,9 +191,8 @@ define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
;
; AVX1-LABEL: zext_16i8_to_16i16:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; AVX1-NEXT: vpmovzxbw %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
OpenPOWER on IntegriCloud