diff options
author | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-02-12 01:58:32 +0000 |
---|---|---|
committer | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-02-12 01:58:32 +0000 |
commit | 1f3d3ca7698c09f2362930ff0732957e74c9b115 (patch) | |
tree | c429f15848b567a1bbf4c08e4de4b1f5c750ea33 /llvm/test/CodeGen/ARM/vector-DAGCombine.ll | |
parent | 6ced97aaae91bf7a05d3b79814c91cd943df7e4a (diff) | |
download | bcm5719-llvm-1f3d3ca7698c09f2362930ff0732957e74c9b115.tar.gz bcm5719-llvm-1f3d3ca7698c09f2362930ff0732957e74c9b115.zip |
ARM NEON: Handle v16i8 and v8i16 reverse shuffles
Lower reverse shuffles to a vrev64 and a vext instruction instead of the default
legalization of storing and loading to the stack. This is important because we
generate reverse shuffles in the loop vectorizer when we reverse store to an
array.
uint8_t Arr[N];
for (i = 0; i < N; ++i)
Arr[N - i - 1] = ...
radar://13171760
llvm-svn: 174929
Diffstat (limited to 'llvm/test/CodeGen/ARM/vector-DAGCombine.ll')
-rw-r--r-- | llvm/test/CodeGen/ARM/vector-DAGCombine.ll | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/vector-DAGCombine.ll b/llvm/test/CodeGen/ARM/vector-DAGCombine.ll index a38a0feae04..42964deb0b5 100644 --- a/llvm/test/CodeGen/ARM/vector-DAGCombine.ll +++ b/llvm/test/CodeGen/ARM/vector-DAGCombine.ll @@ -133,3 +133,30 @@ define i16 @foldBuildVectors() { %3 = extractelement <8 x i16> %2, i32 0 ret i16 %3 } + +; Test that we are generating vrev and vext for reverse shuffles of v8i16 +; shuffles. +; CHECK: reverse_v8i16 +define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) { + %v0 = load <8 x i16>* %loadaddr + ; CHECK: vrev64.16 + ; CHECK: vext.16 + %v1 = shufflevector <8 x i16> %v0, <8 x i16> undef, + <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + store <8 x i16> %v1, <8 x i16>* %storeaddr + ret void +} + +; Test that we are generating vrev and vext for reverse shuffles of v16i8 +; shuffles. +; CHECK: reverse_v16i8 +define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) { + %v0 = load <16 x i8>* %loadaddr + ; CHECK: vrev64.8 + ; CHECK: vext.8 + %v1 = shufflevector <16 x i8> %v0, <16 x i8> undef, + <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, + i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + store <16 x i8> %v1, <16 x i8>* %storeaddr + ret void +} |