diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/combine-vec-shuffle-3.ll | 54 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/combine-vec-shuffle-4.ll | 6 |
3 files changed, 35 insertions, 26 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c7440136255..6f876b5aafe 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16880,6 +16880,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, return (SVT.getVectorNumElements() == 2 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isMOVLMask(M, SVT) || + isMOVHLPSMask(M, SVT) || isSHUFPMask(M, SVT) || isPSHUFDMask(M, SVT) || isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) || diff --git a/llvm/test/CodeGen/X86/combine-vec-shuffle-3.ll b/llvm/test/CodeGen/X86/combine-vec-shuffle-3.ll index d966d7efdd1..c2cff517d4e 100644 --- a/llvm/test/CodeGen/X86/combine-vec-shuffle-3.ll +++ b/llvm/test/CodeGen/X86/combine-vec-shuffle-3.ll @@ -35,14 +35,10 @@ define <4 x float> @test4(<4 x float> %a, <4 x float> %b) { %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> ret <4 x float> %2 } -; FIXME: this should be lowered as a single movhlps. However, the backend -; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we -; end up with the sub-optimal sequence 'shufps, palignr'. ; CHECK-LABEL: test4 ; Mask: [6,7,2,3] -; CHECK: shufps $94 -; CHECK: palignr $8 -; CHECK: ret +; CHECK: movhlps +; CHECK-NEXT: ret define <4 x float> @test5(<4 x float> %a, <4 x float> %b) { %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> @@ -90,13 +86,10 @@ define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) { %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> ret <4 x i32> %2 } -; FIXME: this should be lowered as a single movhlps. However, the backend thinks that -; shuffle mask [6,7,2,3] is not legal. ; CHECK-LABEL: test9 ; Mask: [6,7,2,3] -; CHECK: shufps $94 -; CHECK: palignr $8 -; CHECK: ret +; CHECK: movhlps +; CHECK-NEXT: ret define <4 x i32> @test10(<4 x i32> %a, <4 x i32> %b) { %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> @@ -144,13 +137,9 @@ define <4 x float> @test14(<4 x float> %a, <4 x float> %b) { %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> ret <4 x float> %2 } -; FIXME: this should be lowered as a single movhlps. However, the backend -; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we -; end up with the sub-optimal sequence 'pshufd, palignr'. ; CHECK-LABEL: test14 ; Mask: [6,7,2,3] -; CHECK: pshufd $94 -; CHECK: palignr $8 +; CHECK: movhlps ; CHECK: ret define <4 x float> @test15(<4 x float> %a, <4 x float> %b) { @@ -199,13 +188,9 @@ define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) { %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> ret <4 x i32> %2 } -; FIXME: this should be lowered as a single movhlps. However, the backend -; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we -; end up with the sub-optimal sequence 'shufps, palignr'. ; CHECK-LABEL: test19 ; Mask: [6,7,2,3] -; CHECK: pshufd $94 -; CHECK: palignr $8 +; CHECK: movhlps ; CHECK: ret define <4 x i32> @test20(<4 x i32> %a, <4 x i32> %b) { @@ -371,3 +356,30 @@ define <4 x float> @blend_123(<4 x float> %a, <4 x float> %b) { ; CHECK: movss ; CHECK: ret +define <4 x i32> @test_movhl_1(<4 x i32> %a, <4 x i32> %b) { + %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 7, i32 5, i32 3> + %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 1, i32 0, i32 3> + ret <4 x i32> %2 +} +; CHECK-LABEL: test_movhl_1 +; CHECK: movhlps +; CHECK-NEXT: ret + +define <4 x i32> @test_movhl_2(<4 x i32> %a, <4 x i32> %b) { + %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 0, i32 3, i32 6> + %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 3, i32 7, i32 0, i32 2> + ret <4 x i32> %2 +} +; CHECK-LABEL: test_movhl_2 +; CHECK: movhlps +; CHECK-NEXT: ret + +define <4 x i32> @test_movhl_3(<4 x i32> %a, <4 x i32> %b) { + %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 6, i32 3, i32 2> + %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 0, i32 3, i32 2> + ret <4 x i32> %2 +} +; CHECK-LABEL: test_movhl_3 +; CHECK: movhlps +; CHECK-NEXT: ret + diff --git a/llvm/test/CodeGen/X86/combine-vec-shuffle-4.ll b/llvm/test/CodeGen/X86/combine-vec-shuffle-4.ll index 45c624a61d4..6ca45920c80 100644 --- a/llvm/test/CodeGen/X86/combine-vec-shuffle-4.ll +++ b/llvm/test/CodeGen/X86/combine-vec-shuffle-4.ll @@ -38,14 +38,10 @@ define <4 x float> @test4(<4 x float> %a, <4 x float> %b) { %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> ret <4 x float> %2 } -; FIXME: this should be lowered as a single movhlps. However, the backend -; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we -; end up with the sub-optimal sequence 'movhlps, palignr'. ; CHECK-LABEL: test4 ; Mask: [6,7,2,3] ; CHECK: movhlps -; CHECK: palignr $8 -; CHECK: ret +; CHECK-NEXT: ret define <4 x float> @test5(<4 x float> %a, <4 x float> %b) { %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3> |