[LoopVectorizer, TTI] New method supportsEfficientVectorElementLoadStore()

Since SystemZ supports vector element load/store instructions, there is no need for extracts/inserts if a vector load/store gets scalarized. This patch lets Target specify that it supports such instructions by means of a new TTI hook that defaults to false. The use for this is in the LoopVectorizer getScalarizationOverhead() method, which will with this patch produce a smaller sum for a vector load/store on SystemZ. New test: test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll Review: Adam Nemet https://reviews.llvm.org/D30680 llvm-svn: 300056
author: Jonas Paulsson <paulsson@linux.vnet.ibm.com> 2017-04-12 12:41:37 +0000
committer: Jonas Paulsson <paulsson@linux.vnet.ibm.com> 2017-04-12 12:41:37 +0000
commit: da74ed42dadbbf62644fb8fe0ff1e7885c7b2a04 (patch)
tree: 5ead03687c900acdb6bf62c7c34fa34f62b03480 /llvm/test/Transforms/LoopVectorize
parent: 12194e9bec6f5c4dc887659983764e6cfb708205 (diff)
download: bcm5719-llvm-da74ed42dadbbf62644fb8fe0ff1e7885c7b2a04.tar.gz
bcm5719-llvm-da74ed42dadbbf62644fb8fe0ff1e7885c7b2a04.zip
1 files changed, 33 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
new file mode 100644
index 00000000000..e7096c29b99
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
@@ -0,0 +1,33 @@
+; REQUIRES: asserts
+; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \
+; RUN:   -force-vector-width=4 -debug-only=loop-vectorize \
+; RUN:   -disable-output -enable-interleaved-mem-accesses=false < %s 2>&1 | \
+; RUN:   FileCheck %s
+;
+; Check that a scalarized load/store does not get a cost for insterts/
+; extracts, since z13 supports element load/store.
+
+define void @fun(i32* %data, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
+  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp0, align 4
+  %i.next = add nuw nsw i64 %i, 2
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   %tmp1 = load i32, i32* %tmp0, align 4
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   store i32 %tmp2, i32* %tmp0, align 4
+
+; CHECK: LV: Scalarizing:  %tmp1 = load i32, i32* %tmp0, align 4
+; CHECK: LV: Scalarizing:  store i32 %tmp2, i32* %tmp0, align 4
+}
+
author	Jonas Paulsson <paulsson@linux.vnet.ibm.com>	2017-04-12 12:41:37 +0000
committer	Jonas Paulsson <paulsson@linux.vnet.ibm.com>	2017-04-12 12:41:37 +0000
commit	da74ed42dadbbf62644fb8fe0ff1e7885c7b2a04 (patch)
tree	5ead03687c900acdb6bf62c7c34fa34f62b03480 /llvm/test/Transforms/LoopVectorize
parent	12194e9bec6f5c4dc887659983764e6cfb708205 (diff)
download	bcm5719-llvm-da74ed42dadbbf62644fb8fe0ff1e7885c7b2a04.tar.gz bcm5719-llvm-da74ed42dadbbf62644fb8fe0ff1e7885c7b2a04.zip