summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp35
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll100
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll14
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll22
4 files changed, 134 insertions, 37 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f3774321a07..8c9d8711d1f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7233,6 +7233,31 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask,
return DAG.getConstant(Imm, MVT::i8);
}
+/// \brief Try to emit a blend instruction for a shuffle.
+///
+/// This doesn't do any checks for the availability of instructions for blending
+/// these values. It relies on the availability of the X86ISD::BLENDI pattern to
+/// be matched in the backend with the type given. What it does check for is
+/// that the shuffle mask is in fact a blend.
+static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+
+ unsigned BlendMask = 0;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ if (Mask[i] >= Size) {
+ if (Mask[i] != i + Size)
+ return SDValue(); // Shuffled V2 input!
+ BlendMask |= 1u << i;
+ continue;
+ }
+ if (Mask[i] >= 0 && Mask[i] != i)
+ return SDValue(); // Shuffled V1 input!
+ }
+ return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
+ DAG.getConstant(BlendMask, MVT::i8));
+}
+
/// \brief Handle lowering of 2-lane 64-bit floating point shuffles.
///
/// This is the basis function for the 2-lane 64-bit shuffles as we have full
@@ -7267,6 +7292,11 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
if (isShuffleEquivalent(Mask, 1, 3))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
+ if (Subtarget->hasSSE41())
+ if (SDValue Blend =
+ lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask, DAG))
+ return Blend;
+
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2,
DAG.getConstant(SHUFPDMask, MVT::i8));
@@ -7353,6 +7383,11 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
+ if (Subtarget->hasSSE41())
+ if (SDValue Blend =
+ lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask, DAG))
+ return Blend;
+
if (NumV2Elements == 1) {
int V2Index =
std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) -
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
index 619105f5026..f6382a98559 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -111,17 +111,35 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
ret <2 x double> %shuffle
}
define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
-; ALL-LABEL: @shuffle_v2f64_03
-; ALL: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
-; ALL-NEXT: retq
+; SSE2-LABEL: @shuffle_v2f64_03
+; SSE2: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: @shuffle_v2f64_03
+; SSE3: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; SSE3-NEXT: retq
+;
+; SSE41-LABEL: @shuffle_v2f64_03
+; SSE41: blendpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %shuffle
}
define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
-; ALL-LABEL: @shuffle_v2f64_21
-; ALL: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
-; ALL-NEXT: movapd %xmm1, %xmm0
-; ALL-NEXT: retq
+; SSE2-LABEL: @shuffle_v2f64_21
+; SSE2: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: @shuffle_v2f64_21
+; SSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
+; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: retq
+;
+; SSE41-LABEL: @shuffle_v2f64_21
+; SSE41: blendpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
+; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
ret <2 x double> %shuffle
}
@@ -143,17 +161,35 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
-; ALL-LABEL: @shuffle_v2i64_03
-; ALL: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
-; ALL-NEXT: retq
+; SSE2-LABEL: @shuffle_v2i64_03
+; SSE2: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: @shuffle_v2i64_03
+; SSE3: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; SSE3-NEXT: retq
+;
+; SSE41-LABEL: @shuffle_v2i64_03
+; SSE41: blendpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; ALL-LABEL: @shuffle_v2i64_03_copy
-; ALL: shufpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
-; ALL-NEXT: movapd %xmm1, %xmm0
-; ALL-NEXT: retq
+; SSE2-LABEL: @shuffle_v2i64_03_copy
+; SSE2: shufpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: @shuffle_v2i64_03_copy
+; SSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
+; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: retq
+;
+; SSE41-LABEL: @shuffle_v2i64_03_copy
+; SSE41: blendpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
+; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %shuffle
}
@@ -204,18 +240,38 @@ define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
-; ALL-LABEL: @shuffle_v2i64_21
-; ALL: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
-; ALL-NEXT: movapd %xmm1, %xmm0
-; ALL-NEXT: retq
+; SSE2-LABEL: @shuffle_v2i64_21
+; SSE2: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: @shuffle_v2i64_21
+; SSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
+; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: retq
+;
+; SSE41-LABEL: @shuffle_v2i64_21
+; SSE41: blendpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
+; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; ALL-LABEL: @shuffle_v2i64_21_copy
-; ALL: shufpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
-; ALL-NEXT: movapd %xmm2, %xmm0
-; ALL-NEXT: retq
+; SSE2-LABEL: @shuffle_v2i64_21_copy
+; SSE2: shufpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: @shuffle_v2i64_21_copy
+; SSE3: shufpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
+; SSE3-NEXT: movapd %xmm2, %xmm0
+; SSE3-NEXT: retq
+;
+; SSE41-LABEL: @shuffle_v2i64_21_copy
+; SSE41: blendpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
ret <2 x i64> %shuffle
}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
index 9105197f67c..d5bb55a2caa 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -216,11 +216,14 @@ define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
; SSE2-NEXT: retq
;
; SSE41-LABEL: @shuffle_v4f32_4zzz
-; SSE41: insertps {{.*}} # xmm0 = xmm0[0],zero,zero,zero
+; SSE41: xorps %[[X:xmm[0-9]+]], %[[X]]
+; SSE41-NEXT: blendps {{.*}} # [[X]] = xmm0[0],[[X]][1,2,3]
+; SSE41-NEXT: movaps %[[X]], %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: @shuffle_v4f32_4zzz
-; AVX1: vinsertps {{.*}} # xmm0 = xmm0[0],zero,zero,zero
+; AVX1: vxorps %[[X:xmm[0-9]+]], %[[X]]
+; AVX1-NEXT: vblendps {{.*}} # xmm0 = xmm0[0],[[X]][1,2,3]
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
ret <4 x float> %shuffle
@@ -290,11 +293,14 @@ define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) {
; SSE2-NEXT: retq
;
; SSE41-LABEL: @shuffle_v4f32_zzz7
-; SSE41: insertps {{.*}} # xmm0 = zero,zero,zero,xmm0[3]
+; SSE41: xorps %[[X:xmm[0-9]+]], %[[X]]
+; SSE41-NEXT: blendps {{.*}} # [[X]] = [[X]][0,1,2],xmm0[3]
+; SSE41-NEXT: movaps %[[X]], %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: @shuffle_v4f32_zzz7
-; AVX1: vinsertps {{.*}} # xmm0 = zero,zero,zero,xmm0[3]
+; AVX1: vxorps %[[X:xmm[0-9]+]], %[[X]]
+; AVX1-NEXT: vblendps {{.*}} # xmm0 = [[X]][0,1,2],xmm0[3]
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x float> %shuffle
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
index a21b78985d7..cd79a38ca4a 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -40,7 +40,7 @@ define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: @shuffle_v4i64_0300
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm0[0],xmm1[1]
+; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm0[0],xmm1[1]
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@@ -119,7 +119,7 @@ define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
; AVX1-LABEL: @shuffle_v4f64_0300
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm0[0],xmm1[1]
+; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm0[0],xmm1[1]
; AVX1-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@@ -282,7 +282,7 @@ define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
-; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm2[0],xmm1[1]
+; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm2[0],xmm1[1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
@@ -293,7 +293,7 @@ define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm2 = xmm2[0,0]
-; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
+; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
@@ -305,7 +305,7 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0]
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
-; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
@@ -317,7 +317,7 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0]
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
-; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm1[0],xmm0[1]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
@@ -335,9 +335,9 @@ define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: @shuffle_v4i64_0451
; AVX1: # BB#0:
; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm1[2,3,0,1]
-; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm2[0],xmm0[1]
+; AVX1-NEXT: vblendpd {{.*}} # xmm2 = xmm2[0],xmm0[1]
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
-; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
@@ -355,9 +355,9 @@ define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: @shuffle_v4i64_4015
; AVX1: # BB#0:
; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm0[2,3,0,1]
-; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
+; AVX1-NEXT: vblendpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
-; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm1[0],xmm0[1]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
@@ -370,7 +370,7 @@ define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX1-NEXT: vpunpckhqdq {{.*}} # xmm0 = xmm0[1,1]
; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
OpenPOWER on IntegriCloud