diff options
| author | alex-t <alexander.timofeev@amd.com> | 2020-01-04 18:23:14 +0300 |
|---|---|---|
| committer | alex-t <alexander.timofeev@amd.com> | 2020-01-04 18:23:14 +0300 |
| commit | ca8b20ca3ba10288b61a083c4ce57fb011124935 (patch) | |
| tree | 60baa7eb6c2d7dd593da13c3b4749e3806bb9b29 /llvm/lib/Target | |
| parent | 6d05bc2e3a9b54fde53aa5cbd83cc7c1d432cac1 (diff) | |
| download | bcm5719-llvm-ca8b20ca3ba10288b61a083c4ce57fb011124935.tar.gz bcm5719-llvm-ca8b20ca3ba10288b61a083c4ce57fb011124935.zip | |
[AMDGPU] need to insert wait between the scalar load and vector store to the same address to avoid WAR conflict.
Reviewers: rampitec, vpykhtin, nhaehnle
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D71934
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 927826c5240..ef662d55cb0 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -42,7 +42,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -372,6 +374,8 @@ private: AMDGPU::IsaVersion IV; DenseSet<MachineInstr *> TrackedWaitcntSet; + DenseMap<const Value *, MachineBasicBlock *> SLoadAddresses; + MachinePostDominatorTree *PDT; struct BlockInfo { MachineBasicBlock *MBB; @@ -406,6 +410,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired<MachinePostDominatorTree>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -792,6 +797,7 @@ bool WaitcntBrackets::counterOutOfOrder(InstCounterType T) const { INITIALIZE_PASS_BEGIN(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false, false) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_END(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false, false) @@ -1012,6 +1018,13 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( if (MI.mayStore()) { // FIXME: Should not be relying on memoperands. for (const MachineMemOperand *Memop : MI.memoperands()) { + const Value *Ptr = Memop->getValue(); + if (SLoadAddresses.count(Ptr)) { + addWait(Wait, LGKM_CNT, 0); + if (PDT->dominates(MI.getParent(), + SLoadAddresses.find(Ptr)->second)) + SLoadAddresses.erase(Ptr); + } unsigned AS = Memop->getAddrSpace(); if (AS != AMDGPUAS::LOCAL_ADDRESS) continue; @@ -1399,6 +1412,13 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, } } + if (TII->isSMRD(Inst)) { + for (const MachineMemOperand *Memop : Inst.memoperands()) { + const Value *Ptr = Memop->getValue(); + SLoadAddresses.insert(std::make_pair(Ptr, Inst.getParent())); + } + } + // Generate an s_waitcnt instruction to be placed before // cur_Inst, if needed. Modified |= generateWaitcntInstBefore(Inst, ScoreBrackets, OldWaitcntInstr); @@ -1448,6 +1468,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); IV = AMDGPU::getIsaVersion(ST->getCPU()); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + PDT = &getAnalysis<MachinePostDominatorTree>(); ForceEmitZeroWaitcnts = ForceEmitZeroFlag; for (auto T : inst_counter_types()) |

