[ARM] Split 128-bit vectors in BUILD_VECTOR lowering

Given that INSERT_VECTOR_ELT operates on D registers anyway, combining 64-bit vectors into a 128-bit vector is basically free. Therefore, try to split BUILD_VECTOR nodes before giving up and lowering them to a series of INSERT_VECTOR_ELT instructions. Sometimes this allows dramatically better lowerings; see testcases for examples. Inspired by similar code in the x86 backend for AVX. Differential Revision: https://reviews.llvm.org/D27624 llvm-svn: 289706
author: Eli Friedman <efriedma@codeaurora.org> 2016-12-14 20:44:38 +0000
committer: Eli Friedman <efriedma@codeaurora.org> 2016-12-14 20:44:38 +0000
commit: cbed30c5012b9b52a495dadc94f14ea54f1b5b10 (patch)
tree: 8df8e1f2f28f11f7865edfb9cd0513a023651362 /llvm/test/CodeGen/ARM/vcombine.ll
parent: 53816d074d928b3a39b6c07aabe43b419e9cc980 (diff)
download: bcm5719-llvm-cbed30c5012b9b52a495dadc94f14ea54f1b5b10.tar.gz
bcm5719-llvm-cbed30c5012b9b52a495dadc94f14ea54f1b5b10.zip
1 files changed, 18 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/vcombine.ll b/llvm/test/CodeGen/ARM/vcombine.ll
index fc171889f5f..81b22ee12cd 100644
--- a/llvm/test/CodeGen/ARM/vcombine.ll
+++ b/llvm/test/CodeGen/ARM/vcombine.ll
@@ -105,3 +105,21 @@ define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <8 x i8> %tmp2
 }
+
+; vcombine(vld1_dup(p), vld1_dup(p2))
+define <8 x i16> @vcombine_vdup(<8 x i16> %src, i16* nocapture readonly %p) {
+; CHECK-LABEL: vcombine_vdup:
+; CHECK: vld1.16 {d16[]},
+; CHECK: vld1.16 {d17[]},
+; CHECK-LE: vmov    r0, r1, d16
+; CHECK-LE: vmov    r2, r3, d17
+  %a1 = load i16, i16* %p, align 2
+  %a2 = insertelement <4 x i16> undef, i16 %a1, i32 0
+  %a3 = shufflevector <4 x i16> %a2, <4 x i16> undef, <4 x i32> zeroinitializer
+  %p2 = getelementptr inbounds i16, i16* %p, i32 1
+  %b1 = load i16, i16* %p2, align 2
+  %b2 = insertelement <4 x i16> undef, i16 %b1, i32 0
+  %b3 = shufflevector <4 x i16> %b2, <4 x i16> undef, <4 x i32> zeroinitializer
+  %shuffle = shufflevector <4 x i16> %a3, <4 x i16> %b3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle
+}
author	Eli Friedman <efriedma@codeaurora.org>	2016-12-14 20:44:38 +0000
committer	Eli Friedman <efriedma@codeaurora.org>	2016-12-14 20:44:38 +0000
commit	cbed30c5012b9b52a495dadc94f14ea54f1b5b10 (patch)
tree	8df8e1f2f28f11f7865edfb9cd0513a023651362 /llvm/test/CodeGen/ARM/vcombine.ll
parent	53816d074d928b3a39b6c07aabe43b419e9cc980 (diff)
download	bcm5719-llvm-cbed30c5012b9b52a495dadc94f14ea54f1b5b10.tar.gz bcm5719-llvm-cbed30c5012b9b52a495dadc94f14ea54f1b5b10.zip