summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/memory_clause.ll
diff options
context:
space:
mode:
authorMark Searles <m.c.searles@gmail.com>2018-07-16 10:02:41 +0000
committerMark Searles <m.c.searles@gmail.com>2018-07-16 10:02:41 +0000
commit72da47df2514bd54b0d9378280625591f9dd2f9e (patch)
tree0aa97bb31fda298e3fd93fc969a4e85c14609813 /llvm/test/CodeGen/AMDGPU/memory_clause.ll
parentc2d5d9adb549fe0374aa6ef60365effafbd580a6 (diff)
downloadbcm5719-llvm-72da47df2514bd54b0d9378280625591f9dd2f9e.tar.gz
bcm5719-llvm-72da47df2514bd54b0d9378280625591f9dd2f9e.zip
run post-RA hazard recognizer pass late
Memory legalizer, waitcnt, and shrink passes can perturb the instructions, which means that the post-RA hazard recognizer pass should run after them. Otherwise, one of those passes may invalidate the work done by the hazard recognizer. Note that this has adverse side-effect that any consecutive S_NOP 0's, emitted by the hazard recognizer, will not be shrunk into a single S_NOP <N>. This should be addressed in a follow-on patch. Differential Revision: https://reviews.llvm.org/D49288 llvm-svn: 337154
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/memory_clause.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/memory_clause.ll6
1 files changed, 5 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.ll b/llvm/test/CodeGen/AMDGPU/memory_clause.ll
index 516fc3461df..f4a66f83fd9 100644
--- a/llvm/test/CodeGen/AMDGPU/memory_clause.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory_clause.ll
@@ -77,6 +77,7 @@ bb:
; GCN-NEXT: buffer_load_dword
; GCN-NEXT: buffer_load_dword
; GCN-NEXT: s_nop
+; GCN-NEXT: s_nop
; GCN-NEXT: buffer_load_dword
define void @mubuf_clause(<4 x i32> addrspace(5)* noalias nocapture readonly %arg, <4 x i32> addrspace(5)* noalias nocapture %arg1) {
bb:
@@ -105,8 +106,9 @@ bb:
; GCN-LABEL: {{^}}vector_clause_indirect:
; GCN: global_load_dwordx2 [[ADDR:v\[[0-9:]+\]]], v[{{[0-9:]+}}], off
-; GCN-NEXT: s_nop
+; GCN-NEXT: s_nop 0
; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_nop 0
; GCN-NEXT: global_load_dwordx4 v[{{[0-9:]+}}], [[ADDR]], off
; GCN-NEXT: global_load_dwordx4 v[{{[0-9:]+}}], [[ADDR]], off offset:16
define amdgpu_kernel void @vector_clause_indirect(i64 addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture readnone %arg1, <4 x i32> addrspace(1)* noalias nocapture %arg2) {
@@ -128,6 +130,7 @@ bb:
; GCN-LABEL: {{^}}load_global_d16_hi:
; GCN: global_load_short_d16_hi v
; GCN-NEXT: s_nop
+; GCN-NEXT: s_nop
; GCN-NEXT: global_load_short_d16_hi v
define void @load_global_d16_hi(i16 addrspace(1)* %in, i16 %reg, <2 x i16> addrspace(1)* %out) {
entry:
@@ -147,6 +150,7 @@ entry:
; GCN-LABEL: {{^}}load_global_d16_lo:
; GCN: global_load_short_d16 v
; GCN-NEXT: s_nop
+; GCN-NEXT: s_nop
; GCN-NEXT: global_load_short_d16 v
define void @load_global_d16_lo(i16 addrspace(1)* %in, i32 %reg, <2 x i16> addrspace(1)* %out) {
entry:
OpenPOWER on IntegriCloud