diff options
author | Luke Cheeseman <luke.cheeseman@arm.com> | 2015-07-24 09:57:05 +0000 |
---|---|---|
committer | Luke Cheeseman <luke.cheeseman@arm.com> | 2015-07-24 09:57:05 +0000 |
commit | 4d45ff2b87010a78671cd9eb4903ad945ebdb7ea (patch) | |
tree | 94cd41f5a7740c6aa22f7be123e686de0d1e1765 /llvm/test/CodeGen/ARM/vuzp.ll | |
parent | f8b5874b6383bfb06f4258bc906713c2d507c3d5 (diff) | |
download | bcm5719-llvm-4d45ff2b87010a78671cd9eb4903ad945ebdb7ea.tar.gz bcm5719-llvm-4d45ff2b87010a78671cd9eb4903ad945ebdb7ea.zip |
[ARM] - Fix lowering of shufflevectors in AArch32
Some shufflevectors are currently being incorrectly lowered in the AArch32
backend as the existing checks for detecting the NEON operations from the
shufflevector instruction expects the shuffle mask and the vector operands to be
of the same length.
This is not always the case as the mask may be twice as long as the operand;
here only the lower half of the shufflemask gets checked, so provided the lower
half of the shufflemask looks like a vector transpose (or even is just all -1
for undef) then the intrinsics may get incorrectly lowered into a vector
transpose (VTRN) instruction.
This patch fixes this by accommodating for both cases and adds regression tests.
Differential Revision: http://reviews.llvm.org/D11407
llvm-svn: 243103
Diffstat (limited to 'llvm/test/CodeGen/ARM/vuzp.ll')
-rw-r--r-- | llvm/test/CodeGen/ARM/vuzp.ll | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/vuzp.ll b/llvm/test/CodeGen/ARM/vuzp.ll index 5510634b066..8e71ad81264 100644 --- a/llvm/test/CodeGen/ARM/vuzp.ll +++ b/llvm/test/CodeGen/ARM/vuzp.ll @@ -264,3 +264,24 @@ define <16 x i16> @vuzpQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15> ret <16 x i16> %tmp3 } + +define <8 x i16> @vuzp_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) { +entry: + ; CHECK-LABEL: vuzp_lower_shufflemask_undef + ; CHECK: vuzp + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 3, i32 5, i32 7> + ret <8 x i16> %0 +} + +define <4 x i32> @vuzp_lower_shufflemask_zeroed(<2 x i32>* %A, <2 x i32>* %B) { +entry: + ; CHECK-LABEL: vuzp_lower_shufflemask_zeroed + ; CHECK-NOT: vtrn + ; CHECK: vuzp + %tmp1 = load <2 x i32>, <2 x i32>* %A + %tmp2 = load <2 x i32>, <2 x i32>* %B + %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 0, i32 1, i32 3> + ret <4 x i32> %0 +} |