summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/ARM/vuzp.ll
diff options
context:
space:
mode:
authorLuke Cheeseman <luke.cheeseman@arm.com>2015-07-24 09:57:05 +0000
committerLuke Cheeseman <luke.cheeseman@arm.com>2015-07-24 09:57:05 +0000
commit4d45ff2b87010a78671cd9eb4903ad945ebdb7ea (patch)
tree94cd41f5a7740c6aa22f7be123e686de0d1e1765 /llvm/test/CodeGen/ARM/vuzp.ll
parentf8b5874b6383bfb06f4258bc906713c2d507c3d5 (diff)
downloadbcm5719-llvm-4d45ff2b87010a78671cd9eb4903ad945ebdb7ea.tar.gz
bcm5719-llvm-4d45ff2b87010a78671cd9eb4903ad945ebdb7ea.zip
[ARM] - Fix lowering of shufflevectors in AArch32
Some shufflevectors are currently being incorrectly lowered in the AArch32 backend as the existing checks for detecting the NEON operations from the shufflevector instruction expects the shuffle mask and the vector operands to be of the same length. This is not always the case as the mask may be twice as long as the operand; here only the lower half of the shufflemask gets checked, so provided the lower half of the shufflemask looks like a vector transpose (or even is just all -1 for undef) then the intrinsics may get incorrectly lowered into a vector transpose (VTRN) instruction. This patch fixes this by accommodating for both cases and adds regression tests. Differential Revision: http://reviews.llvm.org/D11407 llvm-svn: 243103
Diffstat (limited to 'llvm/test/CodeGen/ARM/vuzp.ll')
-rw-r--r--llvm/test/CodeGen/ARM/vuzp.ll21
1 files changed, 21 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/vuzp.ll b/llvm/test/CodeGen/ARM/vuzp.ll
index 5510634b066..8e71ad81264 100644
--- a/llvm/test/CodeGen/ARM/vuzp.ll
+++ b/llvm/test/CodeGen/ARM/vuzp.ll
@@ -264,3 +264,24 @@ define <16 x i16> @vuzpQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
ret <16 x i16> %tmp3
}
+
+define <8 x i16> @vuzp_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
+entry:
+ ; CHECK-LABEL: vuzp_lower_shufflemask_undef
+ ; CHECK: vuzp
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 3, i32 5, i32 7>
+ ret <8 x i16> %0
+}
+
+define <4 x i32> @vuzp_lower_shufflemask_zeroed(<2 x i32>* %A, <2 x i32>* %B) {
+entry:
+ ; CHECK-LABEL: vuzp_lower_shufflemask_zeroed
+ ; CHECK-NOT: vtrn
+ ; CHECK: vuzp
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 0, i32 1, i32 3>
+ ret <4 x i32> %0
+}
OpenPOWER on IntegriCloud