[ARM] - Fix lowering of shufflevectors in AArch32

Some shufflevectors are currently being incorrectly lowered in the AArch32 backend as the existing checks for detecting the NEON operations from the shufflevector instruction expects the shuffle mask and the vector operands to be of the same length. This is not always the case as the mask may be twice as long as the operand; here only the lower half of the shufflemask gets checked, so provided the lower half of the shufflemask looks like a vector transpose (or even is just all -1 for undef) then the intrinsics may get incorrectly lowered into a vector transpose (VTRN) instruction. This patch fixes this by accommodating for both cases and adds regression tests. Differential Revision: http://reviews.llvm.org/D11407 llvm-svn: 243103
author: Luke Cheeseman <luke.cheeseman@arm.com> 2015-07-24 09:57:05 +0000
committer: Luke Cheeseman <luke.cheeseman@arm.com> 2015-07-24 09:57:05 +0000
commit: 4d45ff2b87010a78671cd9eb4903ad945ebdb7ea (patch)
tree: 94cd41f5a7740c6aa22f7be123e686de0d1e1765 /llvm/test/CodeGen/ARM/vuzp.ll
parent: f8b5874b6383bfb06f4258bc906713c2d507c3d5 (diff)
download: bcm5719-llvm-4d45ff2b87010a78671cd9eb4903ad945ebdb7ea.tar.gz
bcm5719-llvm-4d45ff2b87010a78671cd9eb4903ad945ebdb7ea.zip
1 files changed, 21 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/vuzp.ll b/llvm/test/CodeGen/ARM/vuzp.ll
index 5510634b066..8e71ad81264 100644
--- a/llvm/test/CodeGen/ARM/vuzp.ll
+++ b/llvm/test/CodeGen/ARM/vuzp.ll
@@ -264,3 +264,24 @@ define <16 x i16> @vuzpQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
 	ret <16 x i16> %tmp3
 }
+
+define <8 x i16> @vuzp_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
+entry:
+  ; CHECK-LABEL: vuzp_lower_shufflemask_undef
+  ; CHECK: vuzp
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+  %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 3, i32 5, i32 7>
+  ret <8 x i16> %0
+}
+
+define <4 x i32> @vuzp_lower_shufflemask_zeroed(<2 x i32>* %A, <2 x i32>* %B) {
+entry:
+  ; CHECK-LABEL: vuzp_lower_shufflemask_zeroed
+  ; CHECK-NOT: vtrn
+  ; CHECK: vuzp
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 0, i32 1, i32 3>
+  ret <4 x i32> %0
+}
author	Luke Cheeseman <luke.cheeseman@arm.com>	2015-07-24 09:57:05 +0000
committer	Luke Cheeseman <luke.cheeseman@arm.com>	2015-07-24 09:57:05 +0000
commit	4d45ff2b87010a78671cd9eb4903ad945ebdb7ea (patch)
tree	94cd41f5a7740c6aa22f7be123e686de0d1e1765 /llvm/test/CodeGen/ARM/vuzp.ll
parent	f8b5874b6383bfb06f4258bc906713c2d507c3d5 (diff)
download	bcm5719-llvm-4d45ff2b87010a78671cd9eb4903ad945ebdb7ea.tar.gz bcm5719-llvm-4d45ff2b87010a78671cd9eb4903ad945ebdb7ea.zip