summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2016-10-27 08:15:07 +0000
committerNicolai Haehnle <nhaehnle@gmail.com>2016-10-27 08:15:07 +0000
commit7b0e25b7ad98a8c8970ee49a768b88b1dd5e043f (patch)
tree95cec42bb929a03c5407ba5ebbd9f761183eb2fb /llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
parent00009d48247477bb1f904a46dfca405cea16ed30 (diff)
downloadbcm5719-llvm-7b0e25b7ad98a8c8970ee49a768b88b1dd5e043f.tar.gz
bcm5719-llvm-7b0e25b7ad98a8c8970ee49a768b88b1dd5e043f.zip
AMDGPU: Fix SILoadStoreOptimizer when writes cannot be merged due register dependencies
Summary: When finding a match for a merge and collecting the instructions that must be moved, keep in mind that the instruction we merge might actually use one of the defs that are being moved. Fixes piglit spec/arb_enhanced_layouts/execution/component-layout/vs-tcs-load-output[-indirect]. The fact that the ds_read in the test case is not eliminated suggests that there might be another problem related to alias analysis, but that's a separate problem: this pass should still work correctly even when earlier optimization passes missed something or were disabled. Reviewers: tstellarAMD, arsenm Subscribers: kzhuravl, wdng, yaxunl, llvm-commits, tony-tye Differential Revision: https://reviews.llvm.org/D25829 llvm-svn: 285273
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll23
1 files changed, 23 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
new file mode 100644
index 00000000000..e4a36d7e691
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}test1:
+; CHECK: ds_write_b32
+; CHECK: ds_read_b32
+; CHECK: ds_write_b32
+define amdgpu_vs void @test1(i32 %v) #0 {
+ %p0 = getelementptr i32, i32 addrspace(3)* null, i32 0
+ %p1 = getelementptr i32, i32 addrspace(3)* null, i32 1
+
+ store i32 %v, i32 addrspace(3)* %p0
+
+ call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %v, i32 1, i32 undef, i32 undef, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
+
+ %w = load i32, i32 addrspace(3)* %p0
+ store i32 %w, i32 addrspace(3)* %p1
+ ret void
+}
+
+declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
+
+attributes #0 = { nounwind }
OpenPOWER on IntegriCloud