summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms
diff options
context:
space:
mode:
authorJustin Lebar <jlebar@google.com>2016-07-20 20:07:34 +0000
committerJustin Lebar <jlebar@google.com>2016-07-20 20:07:34 +0000
commit62b03e344eeb168d0ac87a442275b28d3e95016a (patch)
tree9942e7ce6f56e61a8078f2d7f81ae5d94566f264 /llvm/test/Transforms
parent400ae40348096d6f3fb22a4c9df38be0de7d2af2 (diff)
downloadbcm5719-llvm-62b03e344eeb168d0ac87a442275b28d3e95016a.tar.gz
bcm5719-llvm-62b03e344eeb168d0ac87a442275b28d3e95016a.zip
[LSV] Vectorize up to side-effecting instructions.
Summary: Previously if we had a chain that contained a side-effecting instruction, we wouldn't vectorize it at all. Now we'll vectorize everything that comes before the side-effecting instruction. Reviewers: asbirlea Subscribers: arsenm, jholewinski, llvm-commits, mzolotukhin Differential Revision: https://reviews.llvm.org/D22536 llvm-svn: 276170
Diffstat (limited to 'llvm/test/Transforms')
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/NVPTX/lit.local.cfg3
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll48
2 files changed, 51 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/lit.local.cfg b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/lit.local.cfg
new file mode 100644
index 00000000000..a5e90f8e3c1
--- /dev/null
+++ b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'NVPTX' in config.root.targets:
+ config.unsupported = True
+
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll
new file mode 100644
index 00000000000..e521a00a9fe
--- /dev/null
+++ b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll
@@ -0,0 +1,48 @@
+; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s | FileCheck %s
+
+; If we have a chain of loads or stores with a side-effecting operation in the
+; middle, we should still be able to merge the loads/stores that appear
+; before/after the side-effecting op. We just can't merge *across* the
+; side-effecting op.
+
+declare void @fn() #0
+
+; CHECK-LABEL: @merge_stores
+; CHECK: store <2 x i32> <i32 100, i32 101>
+; CHECK: call void @fn()
+; CHECK: store <2 x i32> <i32 102, i32 103>
+define void @merge_stores(i32* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32* %out, i32 3
+
+ store i32 101, i32* %out.gep.1
+ store i32 100, i32* %out
+ call void @fn()
+ store i32 102, i32* %out.gep.2
+ store i32 103, i32* %out.gep.3
+ ret void
+}
+
+; CHECK-LABEL: @merge_loads
+; CHECK: load <2 x i32>
+; CHECK: call void @fn()
+; CHECK: load <2 x i32>
+define i32 @merge_loads(i32* %in) #0 {
+ %in.gep.1 = getelementptr i32, i32* %in, i32 1
+ %in.gep.2 = getelementptr i32, i32* %in, i32 2
+ %in.gep.3 = getelementptr i32, i32* %in, i32 3
+
+ %v1 = load i32, i32* %in
+ %v2 = load i32, i32* %in.gep.1
+ call void @fn()
+ %v3 = load i32, i32* %in.gep.2
+ %v4 = load i32, i32* %in.gep.3
+
+ %sum1 = add i32 %v1, %v2
+ %sum2 = add i32 %sum1, %v3
+ %sum3 = add i32 %sum2, %v4
+ ret i32 %v4
+}
+
+attributes #0 = { nounwind }
OpenPOWER on IntegriCloud