summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authoralex-t <alexander.timofeev@amd.com>2020-01-04 18:23:14 +0300
committeralex-t <alexander.timofeev@amd.com>2020-01-04 18:23:14 +0300
commitca8b20ca3ba10288b61a083c4ce57fb011124935 (patch)
tree60baa7eb6c2d7dd593da13c3b4749e3806bb9b29 /llvm/lib/Target
parent6d05bc2e3a9b54fde53aa5cbd83cc7c1d432cac1 (diff)
downloadbcm5719-llvm-ca8b20ca3ba10288b61a083c4ce57fb011124935.tar.gz
bcm5719-llvm-ca8b20ca3ba10288b61a083c4ce57fb011124935.zip
[AMDGPU] need to insert wait between the scalar load and vector store to the same address to avoid WAR conflict.
Reviewers: rampitec, vpykhtin, nhaehnle Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D71934
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp21
1 files changed, 21 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 927826c5240..ef662d55cb0 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -42,7 +42,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -372,6 +374,8 @@ private:
AMDGPU::IsaVersion IV;
DenseSet<MachineInstr *> TrackedWaitcntSet;
+ DenseMap<const Value *, MachineBasicBlock *> SLoadAddresses;
+ MachinePostDominatorTree *PDT;
struct BlockInfo {
MachineBasicBlock *MBB;
@@ -406,6 +410,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addRequired<MachinePostDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -792,6 +797,7 @@ bool WaitcntBrackets::counterOutOfOrder(InstCounterType T) const {
INITIALIZE_PASS_BEGIN(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
false)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_END(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
false)
@@ -1012,6 +1018,13 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
if (MI.mayStore()) {
// FIXME: Should not be relying on memoperands.
for (const MachineMemOperand *Memop : MI.memoperands()) {
+ const Value *Ptr = Memop->getValue();
+ if (SLoadAddresses.count(Ptr)) {
+ addWait(Wait, LGKM_CNT, 0);
+ if (PDT->dominates(MI.getParent(),
+ SLoadAddresses.find(Ptr)->second))
+ SLoadAddresses.erase(Ptr);
+ }
unsigned AS = Memop->getAddrSpace();
if (AS != AMDGPUAS::LOCAL_ADDRESS)
continue;
@@ -1399,6 +1412,13 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
}
}
+ if (TII->isSMRD(Inst)) {
+ for (const MachineMemOperand *Memop : Inst.memoperands()) {
+ const Value *Ptr = Memop->getValue();
+ SLoadAddresses.insert(std::make_pair(Ptr, Inst.getParent()));
+ }
+ }
+
// Generate an s_waitcnt instruction to be placed before
// cur_Inst, if needed.
Modified |= generateWaitcntInstBefore(Inst, ScoreBrackets, OldWaitcntInstr);
@@ -1448,6 +1468,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
IV = AMDGPU::getIsaVersion(ST->getCPU());
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ PDT = &getAnalysis<MachinePostDominatorTree>();
ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
for (auto T : inst_counter_types())
OpenPOWER on IntegriCloud