summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/ARM
diff options
context:
space:
mode:
authorJames Molloy <james.molloy@arm.com>2015-09-10 08:42:28 +0000
committerJames Molloy <james.molloy@arm.com>2015-09-10 08:42:28 +0000
commit8c995a93ce8502e44e7b3ef75357ee62e09ba090 (patch)
tree0c40276f05925b8b92f48d51db19906509f6eabb /llvm/test/CodeGen/ARM
parent6f77949d8b1f8a2abc0186a025d1c49d6dfd9e86 (diff)
downloadbcm5719-llvm-8c995a93ce8502e44e7b3ef75357ee62e09ba090.tar.gz
bcm5719-llvm-8c995a93ce8502e44e7b3ef75357ee62e09ba090.zip
[ARM] Do not use vtrn for vectorshuffle if the order is reversed
The tests in isVTRNMask and isVTRN_v_undef_Mask should also check that the elements of the upper and lower half of the vectorshuffle occur in the correct order when both halves are used. Without this test the code assumes that it is correct to use vector transpose (vtrn) for the masks <1, 1, 0, 0> and <1, 3, 0, 2>, among others, but the transpose actually incorrectly generates shuffles for <0, 0, 1, 1> and <0, 2, 1, 3> in this case. Patch by Jeroen Ketema! llvm-svn: 247254
Diffstat (limited to 'llvm/test/CodeGen/ARM')
-rw-r--r--llvm/test/CodeGen/ARM/vtrn.ll28
-rw-r--r--llvm/test/CodeGen/ARM/vuzp.ll12
-rw-r--r--llvm/test/CodeGen/ARM/vzip.ll10
3 files changed, 50 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/vtrn.ll b/llvm/test/CodeGen/ARM/vtrn.ll
index 5c22d1a44f6..89af2318bfb 100644
--- a/llvm/test/CodeGen/ARM/vtrn.ll
+++ b/llvm/test/CodeGen/ARM/vtrn.ll
@@ -371,3 +371,31 @@ define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1,
%rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
ret <8 x i8> %rv
}
+
+; Negative test that should not generate a vtrn
+define void @lower_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) {
+entry:
+ ; CHECK-LABEL: lower_twice_no_vtrn
+ ; CHECK: @ BB#0:
+ ; CHECK-NOT: vtrn
+ ; CHECK: mov pc, lr
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 5, i32 3, i32 7, i32 1, i32 5, i32 3, i32 7>
+ store <8 x i16> %0, <8 x i16>* %C
+ ret void
+}
+
+; Negative test that should not generate a vtrn
+define void @upper_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) {
+entry:
+ ; CHECK-LABEL: upper_twice_no_vtrn
+ ; CHECK: @ BB#0:
+ ; CHECK-NOT: vtrn
+ ; CHECK: mov pc, lr
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 6, i32 0, i32 4, i32 2, i32 6>
+ store <8 x i16> %0, <8 x i16>* %C
+ ret void
+}
diff --git a/llvm/test/CodeGen/ARM/vuzp.ll b/llvm/test/CodeGen/ARM/vuzp.ll
index 570623e4f64..6c24348b8cf 100644
--- a/llvm/test/CodeGen/ARM/vuzp.ll
+++ b/llvm/test/CodeGen/ARM/vuzp.ll
@@ -286,6 +286,18 @@ entry:
ret <4 x i32> %0
}
+define void @vuzp_rev_shufflemask_vtrn(<2 x i32>* %A, <2 x i32>* %B, <4 x i32>* %C) {
+entry:
+ ; CHECK-LABEL: vuzp_rev_shufflemask_vtrn
+ ; CHECK-NOT: vtrn
+ ; CHECK: vuzp
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
+ store <4 x i32> %0, <4 x i32>* %C
+ ret void
+}
+
define <8 x i8> @vuzp_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) {
; In order to create the select we need to truncate the vcgt result from a vector of i32 to a vector of i8.
; This results in a build_vector with mismatched types. We will generate two vmovn.i32 instructions to
diff --git a/llvm/test/CodeGen/ARM/vzip.ll b/llvm/test/CodeGen/ARM/vzip.ll
index e7b7da333f4..24386a236f4 100644
--- a/llvm/test/CodeGen/ARM/vzip.ll
+++ b/llvm/test/CodeGen/ARM/vzip.ll
@@ -295,3 +295,13 @@ entry:
ret <4 x i32> %0
}
+define void @vzip_undef_rev_shufflemask_vtrn(<2 x i32>* %A, <4 x i32>* %B) {
+entry:
+ ; CHECK-LABEL: vzip_undef_rev_shufflemask_vtrn
+ ; CHECK-NOT: vtrn
+ ; CHECK: vzip
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
+ store <4 x i32> %0, <4 x i32>* %B
+ ret void
+}
OpenPOWER on IntegriCloud