[ARM] Do not use vtrn for vectorshuffle if the order is reversed

The tests in isVTRNMask and isVTRN_v_undef_Mask should also check that the elements of the upper and lower half of the vectorshuffle occur in the correct order when both halves are used. Without this test the code assumes that it is correct to use vector transpose (vtrn) for the masks <1, 1, 0, 0> and <1, 3, 0, 2>, among others, but the transpose actually incorrectly generates shuffles for <0, 0, 1, 1> and <0, 2, 1, 3> in this case. Patch by Jeroen Ketema! llvm-svn: 247254
author: James Molloy <james.molloy@arm.com> 2015-09-10 08:42:28 +0000
committer: James Molloy <james.molloy@arm.com> 2015-09-10 08:42:28 +0000
commit: 8c995a93ce8502e44e7b3ef75357ee62e09ba090 (patch)
tree: 0c40276f05925b8b92f48d51db19906509f6eabb /llvm/test/CodeGen/ARM
parent: 6f77949d8b1f8a2abc0186a025d1c49d6dfd9e86 (diff)
download: bcm5719-llvm-8c995a93ce8502e44e7b3ef75357ee62e09ba090.tar.gz
bcm5719-llvm-8c995a93ce8502e44e7b3ef75357ee62e09ba090.zip
3 files changed, 50 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/vtrn.ll b/llvm/test/CodeGen/ARM/vtrn.ll
index 5c22d1a44f6..89af2318bfb 100644
--- a/llvm/test/CodeGen/ARM/vtrn.ll
+++ b/llvm/test/CodeGen/ARM/vtrn.ll
@@ -371,3 +371,31 @@ define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1,
   %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
   ret <8 x i8> %rv
 }
+
+; Negative test that should not generate a vtrn
+define void @lower_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) {
+entry:
+  ; CHECK-LABEL: lower_twice_no_vtrn
+  ; CHECK: @ BB#0:
+  ; CHECK-NOT: vtrn
+  ; CHECK: mov pc, lr
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 5, i32 3, i32 7, i32 1, i32 5, i32 3, i32 7>
+  store <8 x i16> %0, <8 x i16>* %C
+  ret void
+}
+
+; Negative test that should not generate a vtrn
+define void @upper_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) {
+entry:
+  ; CHECK-LABEL: upper_twice_no_vtrn
+  ; CHECK: @ BB#0:
+  ; CHECK-NOT: vtrn
+  ; CHECK: mov pc, lr
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 6, i32 0, i32 4, i32 2, i32 6>
+  store <8 x i16> %0, <8 x i16>* %C
+  ret void
+}
diff --git a/llvm/test/CodeGen/ARM/vuzp.ll b/llvm/test/CodeGen/ARM/vuzp.ll
index 570623e4f64..6c24348b8cf 100644
--- a/llvm/test/CodeGen/ARM/vuzp.ll
+++ b/llvm/test/CodeGen/ARM/vuzp.ll
@@ -286,6 +286,18 @@ entry:
   ret <4 x i32> %0
 }
 
+define void @vuzp_rev_shufflemask_vtrn(<2 x i32>* %A, <2 x i32>* %B, <4 x i32>* %C) {
+entry:
+  ; CHECK-LABEL: vuzp_rev_shufflemask_vtrn
+  ; CHECK-NOT: vtrn
+  ; CHECK: vuzp
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
+  store <4 x i32> %0, <4 x i32>* %C
+  ret void
+}
+
 define <8 x i8> @vuzp_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) {
 ; In order to create the select we need to truncate the vcgt result from a vector of i32 to a vector of i8.
 ; This results in a build_vector with mismatched types. We will generate two vmovn.i32 instructions to
diff --git a/llvm/test/CodeGen/ARM/vzip.ll b/llvm/test/CodeGen/ARM/vzip.ll
index e7b7da333f4..24386a236f4 100644
--- a/llvm/test/CodeGen/ARM/vzip.ll
+++ b/llvm/test/CodeGen/ARM/vzip.ll
@@ -295,3 +295,13 @@ entry:
   ret <4 x i32> %0
 }
 
+define void @vzip_undef_rev_shufflemask_vtrn(<2 x i32>* %A, <4 x i32>* %B) {
+entry:
+  ; CHECK-LABEL: vzip_undef_rev_shufflemask_vtrn
+  ; CHECK-NOT: vtrn
+  ; CHECK: vzip
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
+  store <4 x i32> %0, <4 x i32>* %B
+  ret void
+}
author	James Molloy <james.molloy@arm.com>	2015-09-10 08:42:28 +0000
committer	James Molloy <james.molloy@arm.com>	2015-09-10 08:42:28 +0000
commit	8c995a93ce8502e44e7b3ef75357ee62e09ba090 (patch)
tree	0c40276f05925b8b92f48d51db19906509f6eabb /llvm/test/CodeGen/ARM
parent	6f77949d8b1f8a2abc0186a025d1c49d6dfd9e86 (diff)
download	bcm5719-llvm-8c995a93ce8502e44e7b3ef75357ee62e09ba090.tar.gz bcm5719-llvm-8c995a93ce8502e44e7b3ef75357ee62e09ba090.zip