diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/R600/SIInsertWaits.cpp | 54 | 
1 files changed, 46 insertions, 8 deletions
| diff --git a/llvm/lib/Target/R600/SIInsertWaits.cpp b/llvm/lib/Target/R600/SIInsertWaits.cpp index 712d97d33e9..f279e5e5c58 100644 --- a/llvm/lib/Target/R600/SIInsertWaits.cpp +++ b/llvm/lib/Target/R600/SIInsertWaits.cpp @@ -41,6 +41,12 @@ typedef union {  } Counters; +typedef enum { +  OTHER, +  SMEM, +  VMEM +} InstType; +  typedef Counters RegCounters[512];  typedef std::pair<unsigned, unsigned> RegInterval; @@ -73,6 +79,9 @@ private:    /// \brief Different export instruction types seen since last wait.    unsigned ExpInstrTypesSeen; +  /// \brief Type of the last opcode. +  InstType LastOpcodeType; +    /// \brief Get increment/decrement amount for this instruction.    Counters getHwCounts(MachineInstr &MI); @@ -83,7 +92,8 @@ private:    RegInterval getRegInterval(MachineOperand &Op);    /// \brief Handle instructions async components -  void pushInstruction(MachineInstr &MI); +  void pushInstruction(MachineBasicBlock &MBB, +                       MachineBasicBlock::iterator I);    /// \brief Insert the actual wait instruction    bool insertWait(MachineBasicBlock &MBB, @@ -203,10 +213,11 @@ RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {    return Result;  } -void SIInsertWaits::pushInstruction(MachineInstr &MI) { +void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, +                                    MachineBasicBlock::iterator I) {    // Get the hardware counter increments and sum them up -  Counters Increment = getHwCounts(MI); +  Counters Increment = getHwCounts(*I);    unsigned Sum = 0;    for (unsigned i = 0; i < 3; ++i) { @@ -215,17 +226,42 @@ void SIInsertWaits::pushInstruction(MachineInstr &MI) {    }    // If we don't increase anything then that's it -  if (Sum == 0) +  if (Sum == 0) { +    LastOpcodeType = OTHER;      return; +  } + +  if (TRI->ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { +    // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM +    // or SMEM clause, respectively. +    // +    // The temporary workaround is to break the clauses with S_NOP. +    // +    // The proper solution would be to allocate registers such that all source +    // and destination registers don't overlap, e.g. this is illegal: +    //   r0 = load r2 +    //   r2 = load r0 +    if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) || +        (LastOpcodeType == VMEM && Increment.Named.VM)) { +      // Insert a NOP to break the clause. +      BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)) +          .addImm(0); +    } + +    if (TII->isSMRD(I->getOpcode())) +      LastOpcodeType = SMEM; +    else if (Increment.Named.VM) +      LastOpcodeType = VMEM; +  }    // Remember which export instructions we have seen    if (Increment.Named.EXP) { -    ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2; +    ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;    } -  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { +  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { -    MachineOperand &Op = MI.getOperand(i); +    MachineOperand &Op = I->getOperand(i);      if (!isOpRelevant(Op))        continue; @@ -302,6 +338,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,                    ((Counts.Named.EXP & 0x7) << 4) |                    ((Counts.Named.LGKM & 0x7) << 8)); +  LastOpcodeType = OTHER;    return true;  } @@ -356,6 +393,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {    WaitedOn = ZeroCounts;    LastIssued = ZeroCounts; +  LastOpcodeType = OTHER;    memset(&UsedRegs, 0, sizeof(UsedRegs));    memset(&DefinedRegs, 0, sizeof(DefinedRegs)); @@ -368,7 +406,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {           I != E; ++I) {        Changes |= insertWait(MBB, I, handleOperands(*I)); -      pushInstruction(*I); +      pushInstruction(MBB, I);      }      // Wait for everything at the end of the MBB | 

