summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/ARM/vext.ll
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-05-27 14:07:03 +0000
committerSanjay Patel <spatel@rotateright.com>2017-05-27 14:07:03 +0000
commit33f4a9728741b1305496ec89f5b472a57e21a32b (patch)
tree647340b9f5eb29c43204f874206ff6a7cae58647 /llvm/test/CodeGen/ARM/vext.ll
parent7aa22859b66223c59a5dacc45e8bd7535659057b (diff)
downloadbcm5719-llvm-33f4a9728741b1305496ec89f5b472a57e21a32b.tar.gz
bcm5719-llvm-33f4a9728741b1305496ec89f5b472a57e21a32b.zip
[DAGCombiner] use narrow load to avoid vector extract
If we have (extract_subvector(load wide vector)) with no other users, that can just be (load narrow vector). This is intentionally conservative. Follow-ups may loosen the one-use constraint to account for the extract cost or just remove the one-use check. The memop chain updating is based on code that already exists multiple times in x86 lowering, so that should be pulled into a helper function as a follow-up. Background: this is a potential improvement noticed via regressions caused by making x86's peekThroughBitcasts() not loop on consecutive bitcasts (see comments in D33137). Differential Revision: https://reviews.llvm.org/D33578 llvm-svn: 304072
Diffstat (limited to 'llvm/test/CodeGen/ARM/vext.ll')
-rw-r--r--llvm/test/CodeGen/ARM/vext.ll8
1 files changed, 4 insertions, 4 deletions
diff --git a/llvm/test/CodeGen/ARM/vext.ll b/llvm/test/CodeGen/ARM/vext.ll
index e44e757a316..5742dc31497 100644
--- a/llvm/test/CodeGen/ARM/vext.ll
+++ b/llvm/test/CodeGen/ARM/vext.ll
@@ -199,10 +199,10 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
; CHECK-LABEL: test_undef:
; CHECK: @ BB#0:
-; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vzip.16 d19, d16
-; CHECK-NEXT: vmov r0, r1, d19
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0, #8]
+; CHECK-NEXT: vzip.16 d17, d16
+; CHECK-NEXT: vmov r0, r1, d17
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
OpenPOWER on IntegriCloud