ARM NEON: Handle v16i8 and v8i16 reverse shuffles

Lower reverse shuffles to a vrev64 and a vext instruction instead of the default legalization of storing and loading to the stack. This is important because we generate reverse shuffles in the loop vectorizer when we reverse store to an array. uint8_t Arr[N]; for (i = 0; i < N; ++i) Arr[N - i - 1] = ... radar://13171760 llvm-svn: 174929
author: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-02-12 01:58:32 +0000
committer: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-02-12 01:58:32 +0000
commit: 1f3d3ca7698c09f2362930ff0732957e74c9b115 (patch)
tree: c429f15848b567a1bbf4c08e4de4b1f5c750ea33 /llvm/test/CodeGen/ARM/vector-DAGCombine.ll
parent: 6ced97aaae91bf7a05d3b79814c91cd943df7e4a (diff)
download: bcm5719-llvm-1f3d3ca7698c09f2362930ff0732957e74c9b115.tar.gz
bcm5719-llvm-1f3d3ca7698c09f2362930ff0732957e74c9b115.zip
1 files changed, 27 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/vector-DAGCombine.ll b/llvm/test/CodeGen/ARM/vector-DAGCombine.ll
index a38a0feae04..42964deb0b5 100644
--- a/llvm/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/llvm/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -133,3 +133,30 @@ define i16 @foldBuildVectors() {
   %3 = extractelement <8 x i16> %2, i32 0
   ret i16 %3
 }
+
+; Test that we are generating vrev and vext for reverse shuffles of v8i16
+; shuffles.
+; CHECK: reverse_v8i16
+define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
+  %v0 = load <8 x i16>* %loadaddr
+  ; CHECK: vrev64.16
+  ; CHECK: vext.16
+  %v1 = shufflevector <8 x i16> %v0, <8 x i16> undef,
+              <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  store <8 x i16> %v1, <8 x i16>* %storeaddr
+  ret void
+}
+
+; Test that we are generating vrev and vext for reverse shuffles of v16i8
+; shuffles.
+; CHECK: reverse_v16i8
+define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
+  %v0 = load <16 x i8>* %loadaddr
+  ; CHECK: vrev64.8
+  ; CHECK: vext.8
+  %v1 = shufflevector <16 x i8> %v0, <16 x i8> undef,
+       <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8,
+                   i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  store <16 x i8> %v1, <16 x i8>* %storeaddr
+  ret void
+}
author	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-02-12 01:58:32 +0000
committer	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-02-12 01:58:32 +0000
commit	1f3d3ca7698c09f2362930ff0732957e74c9b115 (patch)
tree	c429f15848b567a1bbf4c08e4de4b1f5c750ea33 /llvm/test/CodeGen/ARM/vector-DAGCombine.ll
parent	6ced97aaae91bf7a05d3b79814c91cd943df7e4a (diff)
download	bcm5719-llvm-1f3d3ca7698c09f2362930ff0732957e74c9b115.tar.gz bcm5719-llvm-1f3d3ca7698c09f2362930ff0732957e74c9b115.zip