summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopVectorize
diff options
context:
space:
mode:
authorJonas Paulsson <paulsson@linux.vnet.ibm.com>2017-04-12 12:41:37 +0000
committerJonas Paulsson <paulsson@linux.vnet.ibm.com>2017-04-12 12:41:37 +0000
commitda74ed42dadbbf62644fb8fe0ff1e7885c7b2a04 (patch)
tree5ead03687c900acdb6bf62c7c34fa34f62b03480 /llvm/test/Transforms/LoopVectorize
parent12194e9bec6f5c4dc887659983764e6cfb708205 (diff)
downloadbcm5719-llvm-da74ed42dadbbf62644fb8fe0ff1e7885c7b2a04.tar.gz
bcm5719-llvm-da74ed42dadbbf62644fb8fe0ff1e7885c7b2a04.zip
[LoopVectorizer, TTI] New method supportsEfficientVectorElementLoadStore()
Since SystemZ supports vector element load/store instructions, there is no need for extracts/inserts if a vector load/store gets scalarized. This patch lets Target specify that it supports such instructions by means of a new TTI hook that defaults to false. The use for this is in the LoopVectorizer getScalarizationOverhead() method, which will with this patch produce a smaller sum for a vector load/store on SystemZ. New test: test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll Review: Adam Nemet https://reviews.llvm.org/D30680 llvm-svn: 300056
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
-rw-r--r--llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll33
1 files changed, 33 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
new file mode 100644
index 00000000000..e7096c29b99
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
@@ -0,0 +1,33 @@
+; REQUIRES: asserts
+; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \
+; RUN: -force-vector-width=4 -debug-only=loop-vectorize \
+; RUN: -disable-output -enable-interleaved-mem-accesses=false < %s 2>&1 | \
+; RUN: FileCheck %s
+;
+; Check that a scalarized load/store does not get a cost for insterts/
+; extracts, since z13 supports element load/store.
+
+define void @fun(i32* %data, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+ %tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
+ %tmp1 = load i32, i32* %tmp0, align 4
+ %tmp2 = add i32 %tmp1, 1
+ store i32 %tmp2, i32* %tmp0, align 4
+ %i.next = add nuw nsw i64 %i, 2
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ ret void
+
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: %tmp1 = load i32, i32* %tmp0, align 4
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %tmp2, i32* %tmp0, align 4
+
+; CHECK: LV: Scalarizing: %tmp1 = load i32, i32* %tmp0, align 4
+; CHECK: LV: Scalarizing: store i32 %tmp2, i32* %tmp0, align 4
+}
+
OpenPOWER on IntegriCloud