[DAGCombiner] use narrow load to avoid vector extract

If we have (extract_subvector(load wide vector)) with no other users, that can just be (load narrow vector). This is intentionally conservative. Follow-ups may loosen the one-use constraint to account for the extract cost or just remove the one-use check. The memop chain updating is based on code that already exists multiple times in x86 lowering, so that should be pulled into a helper function as a follow-up. Background: this is a potential improvement noticed via regressions caused by making x86's peekThroughBitcasts() not loop on consecutive bitcasts (see comments in D33137). Differential Revision: https://reviews.llvm.org/D33578 llvm-svn: 304072
author: Sanjay Patel <spatel@rotateright.com> 2017-05-27 14:07:03 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2017-05-27 14:07:03 +0000
commit: 33f4a9728741b1305496ec89f5b472a57e21a32b (patch)
tree: 647340b9f5eb29c43204f874206ff6a7cae58647 /llvm/test/CodeGen/ARM
parent: 7aa22859b66223c59a5dacc45e8bd7535659057b (diff)
download: bcm5719-llvm-33f4a9728741b1305496ec89f5b472a57e21a32b.tar.gz
bcm5719-llvm-33f4a9728741b1305496ec89f5b472a57e21a32b.zip
2 files changed, 7 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/ARM/vcombine.ll b/llvm/test/CodeGen/ARM/vcombine.ll
index 81b22ee12cd..c08ed81d042 100644
--- a/llvm/test/CodeGen/ARM/vcombine.ll
+++ b/llvm/test/CodeGen/ARM/vcombine.ll
@@ -99,7 +99,9 @@ define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind {
 define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
 ; CHECK: vget_high8
 ; CHECK-NOT: vst
-; CHECK-LE: vmov r0, r1, d17
+; CHECK-LE-NOT: vld1.64 {d16, d17}, [r0]
+; CHECK-LE: vldr  d16, [r0, #8]
+; CHECK-LE: vmov  r0, r1, d16
 ; CHECK-BE: vmov r1, r0, d16
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
diff --git a/llvm/test/CodeGen/ARM/vext.ll b/llvm/test/CodeGen/ARM/vext.ll
index e44e757a316..5742dc31497 100644
--- a/llvm/test/CodeGen/ARM/vext.ll
+++ b/llvm/test/CodeGen/ARM/vext.ll
@@ -199,10 +199,10 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ; CHECK-LABEL: test_undef:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
-; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT:    vzip.16 d19, d16
-; CHECK-NEXT:    vmov r0, r1, d19
+; CHECK-NEXT:    vldr  d16, [r1]
+; CHECK-NEXT:    vldr  d17, [r0, #8]
+; CHECK-NEXT:    vzip.16 d17, d16
+; CHECK-NEXT:    vmov  r0, r1, d17
 ; CHECK-NEXT:    mov pc, lr
         %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp2 = load <8 x i16>, <8 x i16>* %B
author	Sanjay Patel <spatel@rotateright.com>	2017-05-27 14:07:03 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2017-05-27 14:07:03 +0000
commit	33f4a9728741b1305496ec89f5b472a57e21a32b (patch)
tree	647340b9f5eb29c43204f874206ff6a7cae58647 /llvm/test/CodeGen/ARM
parent	7aa22859b66223c59a5dacc45e8bd7535659057b (diff)
download	bcm5719-llvm-33f4a9728741b1305496ec89f5b472a57e21a32b.tar.gz bcm5719-llvm-33f4a9728741b1305496ec89f5b472a57e21a32b.zip