summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2018-11-07 21:53:29 +0000
committerNicolai Haehnle <nhaehnle@gmail.com>2018-11-07 21:53:29 +0000
commit0ab31c9c4411dbf8b714dd7a220cf925530d57ba (patch)
treeb1398017478003902c711d27e3ad80d9e6791669 /llvm/lib/Target
parent15e90e331cecd9fcedcd0e1565816682e2535a27 (diff)
downloadbcm5719-llvm-0ab31c9c4411dbf8b714dd7a220cf925530d57ba.tar.gz
bcm5719-llvm-0ab31c9c4411dbf8b714dd7a220cf925530d57ba.zip
AMDGPU/InsertWaitcnts: Remove kill-related logic
Summary: This is not needed, because we don't actually insert relevant branches for KILLs that late in the compilation flow. Besides, this was always checking for the wrong kill opcode anyway... Reviewers: msearles, rampitec, scott.linder, kanarayan Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D54085 llvm-svn: 346362
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp102
1 files changed, 1 insertions, 101 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 819b1b9fcd7..a785461b13d 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -382,8 +382,6 @@ private:
DenseMap<MachineLoop *, std::unique_ptr<LoopWaitcntData>> LoopWaitcntDataMap;
- std::vector<std::unique_ptr<BlockWaitcntBrackets>> KillWaitBrackets;
-
// ForceEmitZeroWaitcnts: force all waitcnts insts to be s_waitcnt 0
// because of amdgpu-waitcnt-forcezero flag
bool ForceEmitZeroWaitcnts;
@@ -410,13 +408,6 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
- void addKillWaitBracket(BlockWaitcntBrackets *Bracket) {
- // The waitcnt information is copied because it changes as the block is
- // traversed.
- KillWaitBrackets.push_back(
- llvm::make_unique<BlockWaitcntBrackets>(*Bracket));
- }
-
bool isForceEmitWaitcnt() const {
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1))
@@ -1425,24 +1416,6 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
MixedExpTypes |= PredScoreBrackets->mixedExpTypes();
}
- // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
- // Also handle kills for exit block.
- if (Block.succ_empty() && !KillWaitBrackets.empty()) {
- for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- int Span = KillWaitBrackets[I]->getScoreUB(T) -
- KillWaitBrackets[I]->getScoreLB(T);
- MaxPending[T] = std::max(MaxPending[T], Span);
- Span = KillWaitBrackets[I]->pendingFlat(T) -
- KillWaitBrackets[I]->getScoreLB(T);
- MaxFlat[T] = std::max(MaxFlat[T], Span);
- }
-
- MixedExpTypes |= KillWaitBrackets[I]->mixedExpTypes();
- }
- }
-
// Special handling for GDS_GPR_LOCK and EXP_GPR_LOCK.
for (MachineBasicBlock *Pred : Block.predecessors()) {
BlockWaitcntBrackets *PredScoreBrackets =
@@ -1460,18 +1433,6 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
}
- // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
- if (Block.succ_empty() && !KillWaitBrackets.empty()) {
- for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
- int GDSSpan = KillWaitBrackets[I]->getEventUB(GDS_GPR_LOCK) -
- KillWaitBrackets[I]->getScoreLB(EXP_CNT);
- MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], GDSSpan);
- int EXPSpan = KillWaitBrackets[I]->getEventUB(EXP_GPR_LOCK) -
- KillWaitBrackets[I]->getScoreLB(EXP_CNT);
- MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
- }
- }
-
#if 0
// LC does not (unlike) add a waitcnt at beginning. Leaving it as marker.
// TODO: how does LC distinguish between function entry and main entry?
@@ -1551,60 +1512,6 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
}
}
- // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
- // Set the register scoreboard.
- if (Block.succ_empty() && !KillWaitBrackets.empty()) {
- for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
- // Now merge the gpr_reg_score information.
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- int PredLB = KillWaitBrackets[I]->getScoreLB(T);
- int PredUB = KillWaitBrackets[I]->getScoreUB(T);
- if (PredLB < PredUB) {
- int PredScale = MaxPending[T] - PredUB;
- // Merge vgpr scores.
- for (int J = 0; J <= KillWaitBrackets[I]->getMaxVGPR(); J++) {
- int PredRegScore = KillWaitBrackets[I]->getRegScore(J, T);
- if (PredRegScore <= PredLB)
- continue;
- int NewRegScore = PredScale + PredRegScore;
- ScoreBrackets->setRegScore(
- J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
- }
- // Also need to merge sgpr scores for lgkm_cnt.
- if (T == LGKM_CNT) {
- for (int J = 0; J <= KillWaitBrackets[I]->getMaxSGPR(); J++) {
- int PredRegScore =
- KillWaitBrackets[I]->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
- if (PredRegScore <= PredLB)
- continue;
- int NewRegScore = PredScale + PredRegScore;
- ScoreBrackets->setRegScore(
- J + NUM_ALL_VGPRS, LGKM_CNT,
- std::max(
- ScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT),
- NewRegScore));
- }
- }
- }
- }
-
- // Also merge the WaitEvent information.
- ForAllWaitEventType(W) {
- enum InstCounterType T = KillWaitBrackets[I]->eventCounter(W);
- int PredEventUB = KillWaitBrackets[I]->getEventUB(W);
- if (PredEventUB > KillWaitBrackets[I]->getScoreLB(T)) {
- int NewEventUB =
- MaxPending[T] + PredEventUB - KillWaitBrackets[I]->getScoreUB(T);
- if (NewEventUB > 0) {
- ScoreBrackets->setEventUB(
- W, std::max(ScoreBrackets->getEventUB(W), NewEventUB));
- }
- }
- }
- }
- }
-
// Special case handling of GDS_GPR_LOCK and EXP_GPR_LOCK. Merge this for the
// sequencing predecessors, because changes to EXEC require waitcnts due to
// the delayed nature of these operations.
@@ -1701,13 +1608,6 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
continue;
}
- // Kill instructions generate a conditional branch to the endmain block.
- // Merge the current waitcnt state into the endmain block information.
- // TODO: Are there other flavors of KILL instruction?
- if (Inst.getOpcode() == AMDGPU::KILL) {
- addKillWaitBracket(ScoreBrackets);
- }
-
bool VCCZBugWorkAround = false;
if (readsVCCZ(Inst) &&
(!VCCZBugHandledSet.count(&Inst))) {
@@ -1871,7 +1771,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
LoopWaitcntDataMap.clear();
BlockWaitcntProcessedSet.clear();
- // Walk over the blocks in reverse post-dominator order, inserting
+ // Walk over the blocks in reverse post order, inserting
// s_waitcnt where needed.
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
bool Modified = false;
OpenPOWER on IntegriCloud