diff options
author | Mark Searles <m.c.searles@gmail.com> | 2018-07-16 10:02:41 +0000 |
---|---|---|
committer | Mark Searles <m.c.searles@gmail.com> | 2018-07-16 10:02:41 +0000 |
commit | 72da47df2514bd54b0d9378280625591f9dd2f9e (patch) | |
tree | 0aa97bb31fda298e3fd93fc969a4e85c14609813 /llvm/test/CodeGen/AMDGPU/memory_clause.ll | |
parent | c2d5d9adb549fe0374aa6ef60365effafbd580a6 (diff) | |
download | bcm5719-llvm-72da47df2514bd54b0d9378280625591f9dd2f9e.tar.gz bcm5719-llvm-72da47df2514bd54b0d9378280625591f9dd2f9e.zip |
run post-RA hazard recognizer pass late
Memory legalizer, waitcnt, and shrink passes can perturb the instructions,
which means that the post-RA hazard recognizer pass should run after them.
Otherwise, one of those passes may invalidate the work done by the hazard
recognizer. Note that this has adverse side-effect that any consecutive
S_NOP 0's, emitted by the hazard recognizer, will not be shrunk into a
single S_NOP <N>. This should be addressed in a follow-on patch.
Differential Revision: https://reviews.llvm.org/D49288
llvm-svn: 337154
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/memory_clause.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/memory_clause.ll | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.ll b/llvm/test/CodeGen/AMDGPU/memory_clause.ll index 516fc3461df..f4a66f83fd9 100644 --- a/llvm/test/CodeGen/AMDGPU/memory_clause.ll +++ b/llvm/test/CodeGen/AMDGPU/memory_clause.ll @@ -77,6 +77,7 @@ bb: ; GCN-NEXT: buffer_load_dword ; GCN-NEXT: buffer_load_dword ; GCN-NEXT: s_nop +; GCN-NEXT: s_nop ; GCN-NEXT: buffer_load_dword define void @mubuf_clause(<4 x i32> addrspace(5)* noalias nocapture readonly %arg, <4 x i32> addrspace(5)* noalias nocapture %arg1) { bb: @@ -105,8 +106,9 @@ bb: ; GCN-LABEL: {{^}}vector_clause_indirect: ; GCN: global_load_dwordx2 [[ADDR:v\[[0-9:]+\]]], v[{{[0-9:]+}}], off -; GCN-NEXT: s_nop +; GCN-NEXT: s_nop 0 ; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_nop 0 ; GCN-NEXT: global_load_dwordx4 v[{{[0-9:]+}}], [[ADDR]], off ; GCN-NEXT: global_load_dwordx4 v[{{[0-9:]+}}], [[ADDR]], off offset:16 define amdgpu_kernel void @vector_clause_indirect(i64 addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture readnone %arg1, <4 x i32> addrspace(1)* noalias nocapture %arg2) { @@ -128,6 +130,7 @@ bb: ; GCN-LABEL: {{^}}load_global_d16_hi: ; GCN: global_load_short_d16_hi v ; GCN-NEXT: s_nop +; GCN-NEXT: s_nop ; GCN-NEXT: global_load_short_d16_hi v define void @load_global_d16_hi(i16 addrspace(1)* %in, i16 %reg, <2 x i16> addrspace(1)* %out) { entry: @@ -147,6 +150,7 @@ entry: ; GCN-LABEL: {{^}}load_global_d16_lo: ; GCN: global_load_short_d16 v ; GCN-NEXT: s_nop +; GCN-NEXT: s_nop ; GCN-NEXT: global_load_short_d16 v define void @load_global_d16_lo(i16 addrspace(1)* %in, i32 %reg, <2 x i16> addrspace(1)* %out) { entry: |