summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AArch64/arm64-vshift.ll
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-05-27 14:07:03 +0000
committerSanjay Patel <spatel@rotateright.com>2017-05-27 14:07:03 +0000
commit33f4a9728741b1305496ec89f5b472a57e21a32b (patch)
tree647340b9f5eb29c43204f874206ff6a7cae58647 /llvm/test/CodeGen/AArch64/arm64-vshift.ll
parent7aa22859b66223c59a5dacc45e8bd7535659057b (diff)
downloadbcm5719-llvm-33f4a9728741b1305496ec89f5b472a57e21a32b.tar.gz
bcm5719-llvm-33f4a9728741b1305496ec89f5b472a57e21a32b.zip
[DAGCombiner] use narrow load to avoid vector extract
If we have (extract_subvector(load wide vector)) with no other users, that can just be (load narrow vector). This is intentionally conservative. Follow-ups may loosen the one-use constraint to account for the extract cost or just remove the one-use check. The memop chain updating is based on code that already exists multiple times in x86 lowering, so that should be pulled into a helper function as a follow-up. Background: this is a potential improvement noticed via regressions caused by making x86's peekThroughBitcasts() not loop on consecutive bitcasts (see comments in D33137). Differential Revision: https://reviews.llvm.org/D33578 llvm-svn: 304072
Diffstat (limited to 'llvm/test/CodeGen/AArch64/arm64-vshift.ll')
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vshift.ll12
1 files changed, 6 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index c1c4649bd6a..6b0fe40b5a0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -1164,7 +1164,7 @@ define <2 x i64> @ushll2d(<2 x i32>* %A) nounwind {
define <8 x i16> @ushll2_8h(<16 x i8>* %A) nounwind {
;CHECK-LABEL: ushll2_8h:
-;CHECK: ushll2.8h v0, {{v[0-9]+}}, #1
+;CHECK: ushll.8h v0, {{v[0-9]+}}, #1
%load1 = load <16 x i8>, <16 x i8>* %A
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
@@ -1174,7 +1174,7 @@ define <8 x i16> @ushll2_8h(<16 x i8>* %A) nounwind {
define <4 x i32> @ushll2_4s(<8 x i16>* %A) nounwind {
;CHECK-LABEL: ushll2_4s:
-;CHECK: ushll2.4s v0, {{v[0-9]+}}, #1
+;CHECK: ushll.4s v0, {{v[0-9]+}}, #1
%load1 = load <8 x i16>, <8 x i16>* %A
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
@@ -1184,7 +1184,7 @@ define <4 x i32> @ushll2_4s(<8 x i16>* %A) nounwind {
define <2 x i64> @ushll2_2d(<4 x i32>* %A) nounwind {
;CHECK-LABEL: ushll2_2d:
-;CHECK: ushll2.2d v0, {{v[0-9]+}}, #1
+;CHECK: ushll.2d v0, {{v[0-9]+}}, #1
%load1 = load <4 x i32>, <4 x i32>* %A
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
@@ -1221,7 +1221,7 @@ define <2 x i64> @sshll2d(<2 x i32>* %A) nounwind {
define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind {
;CHECK-LABEL: sshll2_8h:
-;CHECK: sshll2.8h v0, {{v[0-9]+}}, #1
+;CHECK: sshll.8h v0, {{v[0-9]+}}, #1
%load1 = load <16 x i8>, <16 x i8>* %A
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
@@ -1231,7 +1231,7 @@ define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind {
define <4 x i32> @sshll2_4s(<8 x i16>* %A) nounwind {
;CHECK-LABEL: sshll2_4s:
-;CHECK: sshll2.4s v0, {{v[0-9]+}}, #1
+;CHECK: sshll.4s v0, {{v[0-9]+}}, #1
%load1 = load <8 x i16>, <8 x i16>* %A
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
@@ -1241,7 +1241,7 @@ define <4 x i32> @sshll2_4s(<8 x i16>* %A) nounwind {
define <2 x i64> @sshll2_2d(<4 x i32>* %A) nounwind {
;CHECK-LABEL: sshll2_2d:
-;CHECK: sshll2.2d v0, {{v[0-9]+}}, #1
+;CHECK: sshll.2d v0, {{v[0-9]+}}, #1
%load1 = load <4 x i32>, <4 x i32>* %A
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
OpenPOWER on IntegriCloud