summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/ARM
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-05-27 14:07:03 +0000
committerSanjay Patel <spatel@rotateright.com>2017-05-27 14:07:03 +0000
commit33f4a9728741b1305496ec89f5b472a57e21a32b (patch)
tree647340b9f5eb29c43204f874206ff6a7cae58647 /llvm/test/CodeGen/ARM
parent7aa22859b66223c59a5dacc45e8bd7535659057b (diff)
downloadbcm5719-llvm-33f4a9728741b1305496ec89f5b472a57e21a32b.tar.gz
bcm5719-llvm-33f4a9728741b1305496ec89f5b472a57e21a32b.zip
[DAGCombiner] use narrow load to avoid vector extract
If we have (extract_subvector(load wide vector)) with no other users, that can just be (load narrow vector). This is intentionally conservative. Follow-ups may loosen the one-use constraint to account for the extract cost or just remove the one-use check. The memop chain updating is based on code that already exists multiple times in x86 lowering, so that should be pulled into a helper function as a follow-up. Background: this is a potential improvement noticed via regressions caused by making x86's peekThroughBitcasts() not loop on consecutive bitcasts (see comments in D33137). Differential Revision: https://reviews.llvm.org/D33578 llvm-svn: 304072
Diffstat (limited to 'llvm/test/CodeGen/ARM')
-rw-r--r--llvm/test/CodeGen/ARM/vcombine.ll4
-rw-r--r--llvm/test/CodeGen/ARM/vext.ll8
2 files changed, 7 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/ARM/vcombine.ll b/llvm/test/CodeGen/ARM/vcombine.ll
index 81b22ee12cd..c08ed81d042 100644
--- a/llvm/test/CodeGen/ARM/vcombine.ll
+++ b/llvm/test/CodeGen/ARM/vcombine.ll
@@ -99,7 +99,9 @@ define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind {
define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
; CHECK: vget_high8
; CHECK-NOT: vst
-; CHECK-LE: vmov r0, r1, d17
+; CHECK-LE-NOT: vld1.64 {d16, d17}, [r0]
+; CHECK-LE: vldr d16, [r0, #8]
+; CHECK-LE: vmov r0, r1, d16
; CHECK-BE: vmov r1, r0, d16
%tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
diff --git a/llvm/test/CodeGen/ARM/vext.ll b/llvm/test/CodeGen/ARM/vext.ll
index e44e757a316..5742dc31497 100644
--- a/llvm/test/CodeGen/ARM/vext.ll
+++ b/llvm/test/CodeGen/ARM/vext.ll
@@ -199,10 +199,10 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
; CHECK-LABEL: test_undef:
; CHECK: @ BB#0:
-; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vzip.16 d19, d16
-; CHECK-NEXT: vmov r0, r1, d19
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0, #8]
+; CHECK-NEXT: vzip.16 d17, d16
+; CHECK-NEXT: vmov r0, r1, d17
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
OpenPOWER on IntegriCloud