diff options
author | Sanjay Patel <spatel@rotateright.com> | 2017-05-27 14:07:03 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2017-05-27 14:07:03 +0000 |
commit | 33f4a9728741b1305496ec89f5b472a57e21a32b (patch) | |
tree | 647340b9f5eb29c43204f874206ff6a7cae58647 /llvm/test/CodeGen/ARM | |
parent | 7aa22859b66223c59a5dacc45e8bd7535659057b (diff) | |
download | bcm5719-llvm-33f4a9728741b1305496ec89f5b472a57e21a32b.tar.gz bcm5719-llvm-33f4a9728741b1305496ec89f5b472a57e21a32b.zip |
[DAGCombiner] use narrow load to avoid vector extract
If we have (extract_subvector(load wide vector)) with no other users,
that can just be (load narrow vector). This is intentionally conservative.
Follow-ups may loosen the one-use constraint to account for the extract cost
or just remove the one-use check.
The memop chain updating is based on code that already exists multiple times
in x86 lowering, so that should be pulled into a helper function as a follow-up.
Background: this is a potential improvement noticed via regressions caused by
making x86's peekThroughBitcasts() not loop on consecutive bitcasts (see
comments in D33137).
Differential Revision: https://reviews.llvm.org/D33578
llvm-svn: 304072
Diffstat (limited to 'llvm/test/CodeGen/ARM')
-rw-r--r-- | llvm/test/CodeGen/ARM/vcombine.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/vext.ll | 8 |
2 files changed, 7 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/ARM/vcombine.ll b/llvm/test/CodeGen/ARM/vcombine.ll index 81b22ee12cd..c08ed81d042 100644 --- a/llvm/test/CodeGen/ARM/vcombine.ll +++ b/llvm/test/CodeGen/ARM/vcombine.ll @@ -99,7 +99,9 @@ define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind { define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind { ; CHECK: vget_high8 ; CHECK-NOT: vst -; CHECK-LE: vmov r0, r1, d17 +; CHECK-LE-NOT: vld1.64 {d16, d17}, [r0] +; CHECK-LE: vldr d16, [r0, #8] +; CHECK-LE: vmov r0, r1, d16 ; CHECK-BE: vmov r1, r0, d16 %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> diff --git a/llvm/test/CodeGen/ARM/vext.ll b/llvm/test/CodeGen/ARM/vext.ll index e44e757a316..5742dc31497 100644 --- a/llvm/test/CodeGen/ARM/vext.ll +++ b/llvm/test/CodeGen/ARM/vext.ll @@ -199,10 +199,10 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: test_undef: ; CHECK: @ BB#0: -; CHECK-NEXT: vld1.64 {d16, d17}, [r1] -; CHECK-NEXT: vld1.64 {d18, d19}, [r0] -; CHECK-NEXT: vzip.16 d19, d16 -; CHECK-NEXT: vmov r0, r1, d19 +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0, #8] +; CHECK-NEXT: vzip.16 d17, d16 +; CHECK-NEXT: vmov r0, r1, d17 ; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B |