diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining.ll | 8 |
2 files changed, 11 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2b496b04f01..1de40c12562 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26072,17 +26072,17 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, ShuffleVT = MVT::v4f32; return true; } - if (isTargetShuffleEquivalent(Mask, {0, 3}) && FloatDomain) { - if (Subtarget.hasSSE2()) { - std::swap(V1, V2); - Shuffle = X86ISD::MOVSD; - ShuffleVT = MVT::v2f64; - return true; - } + if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2() && + (FloatDomain || !Subtarget.hasSSE41())) { + std::swap(V1, V2); + Shuffle = X86ISD::MOVSD; + ShuffleVT = MaskVT; + return true; } - if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) && FloatDomain) { + if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) && + (FloatDomain || !Subtarget.hasSSE41())) { Shuffle = X86ISD::MOVSS; - ShuffleVT = MVT::v4f32; + ShuffleVT = MaskVT; return true; } } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index 88f1202aeb5..a65d830351e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -1804,9 +1804,7 @@ define <4 x float> @combine_test4b(<4 x float> %a, <4 x float> %b) { define <4 x i8> @combine_test1c(<4 x i8>* %a, <4 x i8>* %b) { ; SSE2-LABEL: combine_test1c: ; SSE2: # BB#0: -; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] @@ -1815,9 +1813,7 @@ define <4 x i8> @combine_test1c(<4 x i8>* %a, <4 x i8>* %b) { ; ; SSSE3-LABEL: combine_test1c: ; SSSE3: # BB#0: -; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] |