diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-08-20 17:45:25 +0000 | 
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-08-20 17:45:25 +0000 | 
| commit | 4b7fc85c0bb76351d7ad848b52bdf955f0c9789c (patch) | |
| tree | c493e1953a6ee344fedbfddc73662c6df733cea7 /llvm/lib/Target | |
| parent | 1d37ec1fda91bd2e7553f3651fe7ef4c54f767a0 (diff) | |
| download | bcm5719-llvm-4b7fc85c0bb76351d7ad848b52bdf955f0c9789c.tar.gz bcm5719-llvm-4b7fc85c0bb76351d7ad848b52bdf955f0c9789c.zip | |
Revert "AMDGPU: Fix iterator error when lowering SI_END_CF"
This reverts r367500 and r369203. This is causing various test
failures.
llvm-svn: 369417
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 142 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 14 | 
5 files changed, 28 insertions, 142 deletions
| diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index f4f858fa483..ea877272826 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1397,12 +1397,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {      MI.setDesc(get(AMDGPU::S_OR_B32));      break; -  case AMDGPU::S_OR_B64_term: -    // This is only a terminator to get the correct spill code placement during -    // register allocation. -    MI.setDesc(get(AMDGPU::S_OR_B64)); -    break; -    case AMDGPU::S_ANDN2_B64_term:      // This is only a terminator to get the correct spill code placement during      // register allocation. @@ -1895,7 +1889,6 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,      case AMDGPU::SI_MASK_BRANCH:      case AMDGPU::S_MOV_B64_term:      case AMDGPU::S_XOR_B64_term: -    case AMDGPU::S_OR_B64_term:      case AMDGPU::S_ANDN2_B64_term:      case AMDGPU::S_MOV_B32_term:      case AMDGPU::S_XOR_B32_term: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index f67c0a20861..934b50b87de 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -193,7 +193,6 @@ class WrapTerminatorInst<SOP_Pseudo base_inst> : SPseudoInstSI<  let WaveSizePredicate = isWave64 in {  def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>; -def S_OR_B64_term : WrapTerminatorInst<S_OR_B64>;  def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;  def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;  } diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index c95c12c8f49..1a51b7ebffa 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -55,7 +55,6 @@  #include "llvm/ADT/StringRef.h"  #include "llvm/CodeGen/LiveIntervals.h"  #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineDominators.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineFunctionPass.h"  #include "llvm/CodeGen/MachineInstr.h" @@ -80,16 +79,12 @@ class SILowerControlFlow : public MachineFunctionPass {  private:    const SIRegisterInfo *TRI = nullptr;    const SIInstrInfo *TII = nullptr; -  MachineRegisterInfo *MRI = nullptr;    LiveIntervals *LIS = nullptr; -  MachineDominatorTree *DT = nullptr; -  MachineLoopInfo *MLI = nullptr; - +  MachineRegisterInfo *MRI = nullptr;    const TargetRegisterClass *BoolRC = nullptr;    unsigned AndOpc;    unsigned OrOpc; -  unsigned OrTermOpc;    unsigned XorOpc;    unsigned MovTermOpc;    unsigned Andn2TermOpc; @@ -101,7 +96,7 @@ private:    void emitElse(MachineInstr &MI);    void emitIfBreak(MachineInstr &MI);    void emitLoop(MachineInstr &MI); -  MachineBasicBlock *emitEndCf(MachineInstr &MI); +  void emitEndCf(MachineInstr &MI);    void findMaskOperands(MachineInstr &MI, unsigned OpNo,                          SmallVectorImpl<MachineOperand> &Src) const; @@ -126,7 +121,7 @@ public:      AU.addPreservedID(LiveVariablesID);      AU.addPreservedID(MachineLoopInfoID);      AU.addPreservedID(MachineDominatorsID); - +    AU.setPreservesCFG();      MachineFunctionPass::getAnalysisUsage(AU);    }  }; @@ -254,7 +249,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {    LIS->InsertMachineInstrInMaps(*SetExec);    LIS->InsertMachineInstrInMaps(*NewBr); -  LIS->removeAllRegUnitsForPhysReg(Exec); +  LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);    MI.eraseFromParent();    // FIXME: Is there a better way of adjusting the liveness? It shouldn't be @@ -338,7 +333,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {      LIS->createAndComputeVirtRegInterval(SaveReg);    // Let this be recomputed. -  LIS->removeAllRegUnitsForPhysReg(Exec); +  LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);  }  void SILowerControlFlow::emitIfBreak(MachineInstr &MI) { @@ -403,99 +398,23 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {    MI.eraseFromParent();  } -// Insert \p Inst (which modifies exec) at \p InsPt in \p MBB, such that \p MBB -// is split as necessary to keep the exec modification in its own block. -static MachineBasicBlock *insertInstWithExecFallthrough(MachineBasicBlock &MBB, -                                                        MachineInstr &MI, -                                                        MachineInstr *NewMI, -                                                        MachineDominatorTree *DT, -                                                        LiveIntervals *LIS, -                                                        MachineLoopInfo *MLI) { -  assert(NewMI->isTerminator()); - -  MachineBasicBlock::iterator InsPt = MI.getIterator(); -  if (std::next(MI.getIterator()) == MBB.end()) { -    // Don't bother with a new block. -    MBB.insert(InsPt, NewMI); -    if (LIS) -      LIS->ReplaceMachineInstrInMaps(MI, *NewMI); -    MI.eraseFromParent(); -    return &MBB; -  } - -  MachineFunction *MF = MBB.getParent(); -  MachineBasicBlock *SplitMBB -    = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - -  MF->insert(++MachineFunction::iterator(MBB), SplitMBB); - -  // FIXME: This is working around a MachineDominatorTree API defect. -  // -  // If a previous pass split a critical edge, it may not have been applied to -  // the DomTree yet. applySplitCriticalEdges is lazily applied, and inspects -  // the CFG of the given block. Make sure to call a dominator tree method that -  // will flush this cache before touching the successors of the block. -  MachineDomTreeNode *NodeMBB = nullptr; -  if (DT) -    NodeMBB = DT->getNode(&MBB); - -  // Move everything to the new block, except the end_cf pseudo. -  SplitMBB->splice(SplitMBB->begin(), &MBB, MBB.begin(), MBB.end()); - -  SplitMBB->transferSuccessorsAndUpdatePHIs(&MBB); -  MBB.addSuccessor(SplitMBB, BranchProbability::getOne()); - -  MBB.insert(MBB.end(), NewMI); - -  if (DT) { -    std::vector<MachineDomTreeNode *> Children = NodeMBB->getChildren(); -    DT->addNewBlock(SplitMBB, &MBB); - -    // Reparent all of the children to the new block body. -    auto *SplitNode = DT->getNode(SplitMBB); -    for (auto *Child : Children) -      DT->changeImmediateDominator(Child, SplitNode); -  } - -  if (MLI) { -    if (MachineLoop *Loop = MLI->getLoopFor(&MBB)) -      Loop->addBasicBlockToLoop(SplitMBB, MLI->getBase()); -  } - -  if (LIS) { -    LIS->insertMBBInMaps(SplitMBB); -    LIS->ReplaceMachineInstrInMaps(MI, *NewMI); -  } - -  // All live-ins are forwarded. -  for (auto &LiveIn : MBB.liveins()) -    SplitMBB->addLiveIn(LiveIn); - -  MI.eraseFromParent(); -  return SplitMBB; -} - -MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) { +void SILowerControlFlow::emitEndCf(MachineInstr &MI) {    MachineBasicBlock &MBB = *MI.getParent();    const DebugLoc &DL = MI.getDebugLoc();    MachineBasicBlock::iterator InsPt = MBB.begin(); +  MachineInstr *NewMI = +      BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec) +          .addReg(Exec) +          .add(MI.getOperand(0)); -  // First, move the instruction. It's unnecessarily difficult to update -  // LiveIntervals when there's a change in control flow, so move the -  // instruction before changing the blocks. -  MBB.splice(InsPt, &MBB, MI.getIterator());    if (LIS) -    LIS->handleMove(MI); +    LIS->ReplaceMachineInstrInMaps(MI, *NewMI); -  MachineFunction *MF = MBB.getParent(); +  MI.eraseFromParent(); -  // Create instruction without inserting it yet. -  MachineInstr *NewMI -    = BuildMI(*MF, DL, TII->get(OrTermOpc), Exec) -    .addReg(Exec) -    .add(MI.getOperand(0)); -  return insertInstWithExecFallthrough(MBB, MI, NewMI, DT, LIS, MLI); +  if (LIS) +    LIS->handleMove(*NewMI);  }  // Returns replace operands for a logical operation, either single result @@ -517,7 +436,7 @@ void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,    // A copy with implcitly defined exec inserted earlier is an exclusion, it    // does not really modify exec.    for (auto I = Def->getIterator(); I != MI.getIterator(); ++I) -    if (I->modifiesRegister(Exec, TRI) && +    if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&          !(I->isCopy() && I->getOperand(0).getReg() != Exec))        return; @@ -560,16 +479,12 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {    // This doesn't actually need LiveIntervals, but we can preserve them.    LIS = getAnalysisIfAvailable<LiveIntervals>(); -  DT = getAnalysisIfAvailable<MachineDominatorTree>(); -  MLI = getAnalysisIfAvailable<MachineLoopInfo>(); -    MRI = &MF.getRegInfo();    BoolRC = TRI->getBoolRC();    if (ST.isWave32()) {      AndOpc = AMDGPU::S_AND_B32;      OrOpc = AMDGPU::S_OR_B32; -    OrTermOpc = AMDGPU::S_OR_B32_term;      XorOpc = AMDGPU::S_XOR_B32;      MovTermOpc = AMDGPU::S_MOV_B32_term;      Andn2TermOpc = AMDGPU::S_ANDN2_B32_term; @@ -579,7 +494,6 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {    } else {      AndOpc = AMDGPU::S_AND_B64;      OrOpc = AMDGPU::S_OR_B64; -    OrTermOpc = AMDGPU::S_OR_B64_term;      XorOpc = AMDGPU::S_XOR_B64;      MovTermOpc = AMDGPU::S_MOV_B64_term;      Andn2TermOpc = AMDGPU::S_ANDN2_B64_term; @@ -592,11 +506,11 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {    for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();         BI != BE; BI = NextBB) {      NextBB = std::next(BI); -    MachineBasicBlock *MBB = &*BI; +    MachineBasicBlock &MBB = *BI;      MachineBasicBlock::iterator I, Next, Last; -    for (I = MBB->begin(), Last = MBB->end(); I != MBB->end(); I = Next) { +    for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) {        Next = std::next(I);        MachineInstr &MI = *I; @@ -617,24 +531,10 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {          emitLoop(MI);          break; -      case AMDGPU::SI_END_CF: { -        MachineInstr *NextMI = nullptr; - -        if (Next != MBB->end()) -          NextMI = &*Next; - -        MBB = emitEndCf(MI); - -        if (NextMI) { -          MBB = NextMI->getParent(); -          Next = NextMI->getIterator(); -          Last = MBB->end(); -        } - -        NextBB = std::next(MBB->getIterator()); -        BE = MF.end(); +      case AMDGPU::SI_END_CF: +        emitEndCf(MI);          break; -      } +        case AMDGPU::S_AND_B64:        case AMDGPU::S_OR_B64:        case AMDGPU::S_AND_B32: @@ -650,7 +550,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {        }        // Replay newly inserted code to combine masks -      Next = (Last == MBB->end()) ? MBB->begin() : Last; +      Next = (Last == MBB.end()) ? MBB.begin() : Last;      }    } diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index b8e076f5efd..cc9b46a7558 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -202,12 +202,6 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {      MI.setDesc(TII.get(AMDGPU::S_OR_B32));      return true;    } -  case AMDGPU::S_OR_B64_term: { -    // This is only a terminator to get the correct spill code placement during -    // register allocation. -    MI.setDesc(TII.get(AMDGPU::S_OR_B64)); -    return true; -  }    case AMDGPU::S_ANDN2_B64_term: {      // This is only a terminator to get the correct spill code placement during      // register allocation. diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 0eb850fe176..681c3b35f75 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -82,14 +82,14 @@ FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {    return new SIOptimizeExecMaskingPreRA();  } -static bool isEndCF(const MachineInstr &MI, const GCNSubtarget &ST, -                    const SIRegisterInfo *TRI) { +static bool isEndCF(const MachineInstr &MI, const SIRegisterInfo *TRI, +                    const GCNSubtarget &ST) {    if (ST.isWave32()) { -    return MI.getOpcode() == AMDGPU::S_OR_B32_term && +    return MI.getOpcode() == AMDGPU::S_OR_B32 &&             MI.modifiesRegister(AMDGPU::EXEC_LO, TRI);    } -  return MI.getOpcode() == AMDGPU::S_OR_B64_term && +  return MI.getOpcode() == AMDGPU::S_OR_B64 &&           MI.modifiesRegister(AMDGPU::EXEC, TRI);  } @@ -379,13 +379,13 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {      // Try to collapse adjacent endifs.      auto E = MBB.end(); -    auto Lead = MBB.getFirstTerminator(); -    if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, ST, TRI)) +    auto Lead = skipDebugInstructionsForward(MBB.begin(), E); +    if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI, ST))        continue;      MachineBasicBlock *TmpMBB = &MBB;      auto NextLead = skipIgnoreExecInstsTrivialSucc(TmpMBB, std::next(Lead)); -    if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, ST, TRI) || +    if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, TRI, ST) ||          !getOrExecSource(*NextLead, *TII, MRI, ST))        continue; | 

