diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 35 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll | 100 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll | 22 |
4 files changed, 134 insertions, 37 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f3774321a07..8c9d8711d1f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7233,6 +7233,31 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, return DAG.getConstant(Imm, MVT::i8); } +/// \brief Try to emit a blend instruction for a shuffle. +/// +/// This doesn't do any checks for the availability of instructions for blending +/// these values. It relies on the availability of the X86ISD::BLENDI pattern to +/// be matched in the backend with the type given. What it does check for is +/// that the shuffle mask is in fact a blend. +static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef<int> Mask, + SelectionDAG &DAG) { + + unsigned BlendMask = 0; + for (int i = 0, Size = Mask.size(); i < Size; ++i) { + if (Mask[i] >= Size) { + if (Mask[i] != i + Size) + return SDValue(); // Shuffled V2 input! + BlendMask |= 1u << i; + continue; + } + if (Mask[i] >= 0 && Mask[i] != i) + return SDValue(); // Shuffled V1 input! + } + return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2, + DAG.getConstant(BlendMask, MVT::i8)); +} + /// \brief Handle lowering of 2-lane 64-bit floating point shuffles. /// /// This is the basis function for the 2-lane 64-bit shuffles as we have full @@ -7267,6 +7292,11 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, if (isShuffleEquivalent(Mask, 1, 3)) return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2); + if (Subtarget->hasSSE41()) + if (SDValue Blend = + lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask, DAG)) + return Blend; + unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2, DAG.getConstant(SHUFPDMask, MVT::i8)); @@ -7353,6 +7383,11 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, if (isShuffleEquivalent(Mask, 2, 6, 3, 7)) return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2); + if (Subtarget->hasSSE41()) + if (SDValue Blend = + lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask, DAG)) + return Blend; + if (NumV2Elements == 1) { int V2Index = std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) - diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll index 619105f5026..f6382a98559 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -111,17 +111,35 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { ret <2 x double> %shuffle } define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) { -; ALL-LABEL: @shuffle_v2f64_03 -; ALL: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] -; ALL-NEXT: retq +; SSE2-LABEL: @shuffle_v2f64_03 +; SSE2: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v2f64_03 +; SSE3: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v2f64_03 +; SSE41: blendpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3> ret <2 x double> %shuffle } define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) { -; ALL-LABEL: @shuffle_v2f64_21 -; ALL: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1] -; ALL-NEXT: movapd %xmm1, %xmm0 -; ALL-NEXT: retq +; SSE2-LABEL: @shuffle_v2f64_21 +; SSE2: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v2f64_21 +; SSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1] +; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v2f64_21 +; SSE41: blendpd {{.*}} # xmm1 = xmm1[0],xmm0[1] +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1> ret <2 x double> %shuffle } @@ -143,17 +161,35 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_03 -; ALL: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] -; ALL-NEXT: retq +; SSE2-LABEL: @shuffle_v2i64_03 +; SSE2: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v2i64_03 +; SSE3: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v2i64_03 +; SSE41: blendpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_03_copy -; ALL: shufpd {{.*}} # xmm1 = xmm1[0],xmm2[1] -; ALL-NEXT: movapd %xmm1, %xmm0 -; ALL-NEXT: retq +; SSE2-LABEL: @shuffle_v2i64_03_copy +; SSE2: shufpd {{.*}} # xmm1 = xmm1[0],xmm2[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v2i64_03_copy +; SSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm2[1] +; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v2i64_03_copy +; SSE41: blendpd {{.*}} # xmm1 = xmm1[0],xmm2[1] +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> ret <2 x i64> %shuffle } @@ -204,18 +240,38 @@ define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_21 -; ALL: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1] -; ALL-NEXT: movapd %xmm1, %xmm0 -; ALL-NEXT: retq +; SSE2-LABEL: @shuffle_v2i64_21 +; SSE2: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v2i64_21 +; SSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1] +; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v2i64_21 +; SSE41: blendpd {{.*}} # xmm1 = xmm1[0],xmm0[1] +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_21_copy -; ALL: shufpd {{.*}} # xmm2 = xmm2[0],xmm1[1] -; ALL-NEXT: movapd %xmm2, %xmm0 -; ALL-NEXT: retq +; SSE2-LABEL: @shuffle_v2i64_21_copy +; SSE2: shufpd {{.*}} # xmm2 = xmm2[0],xmm1[1] +; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v2i64_21_copy +; SSE3: shufpd {{.*}} # xmm2 = xmm2[0],xmm1[1] +; SSE3-NEXT: movapd %xmm2, %xmm0 +; SSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v2i64_21_copy +; SSE41: blendpd {{.*}} # xmm2 = xmm2[0],xmm1[1] +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> ret <2 x i64> %shuffle } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll index 9105197f67c..d5bb55a2caa 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -216,11 +216,14 @@ define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) { ; SSE2-NEXT: retq ; ; SSE41-LABEL: @shuffle_v4f32_4zzz -; SSE41: insertps {{.*}} # xmm0 = xmm0[0],zero,zero,zero +; SSE41: xorps %[[X:xmm[0-9]+]], %[[X]] +; SSE41-NEXT: blendps {{.*}} # [[X]] = xmm0[0],[[X]][1,2,3] +; SSE41-NEXT: movaps %[[X]], %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: @shuffle_v4f32_4zzz -; AVX1: vinsertps {{.*}} # xmm0 = xmm0[0],zero,zero,zero +; AVX1: vxorps %[[X:xmm[0-9]+]], %[[X]] +; AVX1-NEXT: vblendps {{.*}} # xmm0 = xmm0[0],[[X]][1,2,3] ; AVX1-NEXT: retq %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ret <4 x float> %shuffle @@ -290,11 +293,14 @@ define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) { ; SSE2-NEXT: retq ; ; SSE41-LABEL: @shuffle_v4f32_zzz7 -; SSE41: insertps {{.*}} # xmm0 = zero,zero,zero,xmm0[3] +; SSE41: xorps %[[X:xmm[0-9]+]], %[[X]] +; SSE41-NEXT: blendps {{.*}} # [[X]] = [[X]][0,1,2],xmm0[3] +; SSE41-NEXT: movaps %[[X]], %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: @shuffle_v4f32_zzz7 -; AVX1: vinsertps {{.*}} # xmm0 = zero,zero,zero,xmm0[3] +; AVX1: vxorps %[[X:xmm[0-9]+]], %[[X]] +; AVX1-NEXT: vblendps {{.*}} # xmm0 = [[X]][0,1,2],xmm0[3] ; AVX1-NEXT: retq %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7> ret <4 x float> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index a21b78985d7..cd79a38ca4a 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -40,7 +40,7 @@ define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_0300 ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm0[0],xmm1[1] +; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm0[0],xmm1[1] ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq @@ -119,7 +119,7 @@ define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) { ; AVX1-LABEL: @shuffle_v4f64_0300 ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm0[0],xmm1[1] +; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm0[0],xmm1[1] ; AVX1-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq @@ -282,7 +282,7 @@ define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0] -; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm2[0],xmm1[1] +; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm2[0],xmm1[1] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4> @@ -293,7 +293,7 @@ define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm2 = xmm2[0,0] -; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm1[0],xmm2[1] +; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm1[0],xmm2[1] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2> @@ -305,7 +305,7 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0] ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0] -; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm0[0],xmm1[1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2> @@ -317,7 +317,7 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0] ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] -; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm1[0],xmm0[1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2> @@ -335,9 +335,9 @@ define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_0451 ; AVX1: # BB#0: ; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm1[2,3,0,1] -; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm2[0],xmm0[1] +; AVX1-NEXT: vblendpd {{.*}} # xmm2 = xmm2[0],xmm0[1] ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0] -; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm0[0],xmm1[1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1> @@ -355,9 +355,9 @@ define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_4015 ; AVX1: # BB#0: ; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm0[2,3,0,1] -; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm2[0],xmm1[1] +; AVX1-NEXT: vblendpd {{.*}} # xmm2 = xmm2[0],xmm1[1] ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] -; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm1[0],xmm0[1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5> @@ -370,7 +370,7 @@ define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) { ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 ; AVX1-NEXT: vpunpckhqdq {{.*}} # xmm0 = xmm0[1,1] ; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[2,3,0,1] -; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm0[0],xmm1[1] ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,0,1] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq |

