diff options
author | Rhys Perry <pendingchaos02@gmail.com> | 2019-05-17 09:32:23 +0000 |
---|---|---|
committer | Rhys Perry <pendingchaos02@gmail.com> | 2019-05-17 09:32:23 +0000 |
commit | c4bc61bad7b29659181d0a9e3ae409c46bb39392 (patch) | |
tree | 8f9970fcb3628ca98db3e2854b4ba166ca20419a /llvm | |
parent | 929af67361275feafc07d02458af7657f754c029 (diff) | |
download | bcm5719-llvm-c4bc61bad7b29659181d0a9e3ae409c46bb39392.tar.gz bcm5719-llvm-c4bc61bad7b29659181d0a9e3ae409c46bb39392.zip |
[AMDGPU] detect WaW hazards when moving/merging load/store instructions
Summary:
In order to combine memory operations efficiently, the load/store
optimizer might move some instructions around. It's usually safe
to move instructions down past the merged instruction because the
pass checks if memory operations can be re-ordered.
Though, the current logic doesn't handle Write-after-Write hazards.
This fixes a reflection issue with Monster Hunter World and DXVK.
v2: - rebased on top of master
- clean up the test case
- handle WaW hazards correctly
Bugzilla: https://bugs.llvm.org/show_bug.cgi?id=40130
Original patch by Samuel Pitoiset.
Reviewers: tpr, arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: ronlieb, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Differential Revision: https://reviews.llvm.org/D61313
llvm-svn: 361008
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/merge-load-store.mir | 34 |
2 files changed, 35 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index b7541e0df62..461f7b213d2 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -281,6 +281,7 @@ static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs, // registers are in SSA form. if (Use.isReg() && ((Use.readsReg() && RegDefs.count(Use.getReg())) || + (Use.isDef() && RegDefs.count(Use.getReg())) || (Use.isDef() && TargetRegisterInfo::isPhysicalRegister(Use.getReg()) && PhysRegUses.count(Use.getReg())))) { Insts.push_back(&MI); diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir index cfa3b99bc2c..f716cb15f0e 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir @@ -59,6 +59,11 @@ attributes #0 = { convergent nounwind } attributes #1 = { convergent nounwind readnone } + define amdgpu_kernel void @move_waw_hazards() #0 { + ret void + } + + attributes #0 = { convergent nounwind } ... --- name: mem_dependency @@ -129,3 +134,32 @@ body: | S_SETPC_B64_return undef $sgpr30_sgpr31, implicit %6, implicit %7 ... +--- +# Make sure Write-after-Write hazards are correctly detected and the +# instructions moved accordingly. +# operations. +# CHECK-LABEL: name: move_waw_hazards +# CHECK: S_AND_B64 +# CHECK: S_CMP_EQ_U32 +name: move_waw_hazards +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + %3:sgpr_64 = COPY $sgpr0_sgpr1 + %6:sreg_32_xm0_xexec = S_MOV_B32 0 + %7:sreg_32_xm0 = S_MOV_B32 0 + %8:sreg_64_xexec = REG_SEQUENCE killed %6, %subreg.sub0, %7, %subreg.sub1 + %9:sreg_128 = S_LOAD_DWORDX4_IMM killed %8, 0, 0, 0 :: (invariant load 16, addrspace 6) + %31:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %9, 0, 0, 0 :: (dereferenceable invariant load 4) + %10:sreg_32_xm0_xexec = COPY %31.sub0 + %11:sreg_32_xm0_xexec = COPY killed %31.sub1 + %12:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %9, 2, 0, 0 :: (dereferenceable invariant load 4) + %13:sreg_64 = V_CMP_NE_U32_e64 killed %11, 0, implicit $exec + %15:sreg_64 = V_CMP_NE_U32_e64 killed %12, 0, implicit $exec + %17:sreg_64_xexec = S_AND_B64 killed %13, killed %15, implicit-def dead $scc + S_CMP_EQ_U32 killed %10, 0, implicit-def $scc + %18:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %9, 3, 0, 0 :: (dereferenceable invariant load 4) + S_ENDPGM 0 +... |