diff options
author | Marek Olsak <marek.olsak@amd.com> | 2016-11-25 17:37:09 +0000 |
---|---|---|
committer | Marek Olsak <marek.olsak@amd.com> | 2016-11-25 17:37:09 +0000 |
commit | 79c05871a28872a01e5e75394c5c6382d5c434a5 (patch) | |
tree | 29ee3868682c95cf84a8912c3c911d6948f17d37 /llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | |
parent | c5fb167df05538ad6cd0b4e967187bf0bac44f19 (diff) | |
download | bcm5719-llvm-79c05871a28872a01e5e75394c5c6382d5c434a5.tar.gz bcm5719-llvm-79c05871a28872a01e5e75394c5c6382d5c434a5.zip |
AMDGPU/SI: Add back reverted SGPR spilling code, but disable it
suggested as a better solution by Matt
llvm-svn: 287942
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInsertWaits.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 43 |
1 files changed, 42 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index a9e693917bf..da4db63ab33 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -532,6 +532,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); IV = getIsaVersion(ST->getFeatureBits()); + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); HardwareLimits.Named.VM = getVmcntBitMask(IV); HardwareLimits.Named.EXP = getExpcntBitMask(IV); @@ -543,20 +544,27 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { LastOpcodeType = OTHER; LastInstWritesM0 = false; IsFlatOutstanding = false; - ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid(); + ReturnsVoid = MFI->returnsVoid(); memset(&UsedRegs, 0, sizeof(UsedRegs)); memset(&DefinedRegs, 0, sizeof(DefinedRegs)); SmallVector<MachineInstr *, 4> RemoveMI; + SmallVector<MachineBasicBlock *, 4> EndPgmBlocks; + + bool HaveScalarStores = false; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { + if (!HaveScalarStores && TII->isScalarStore(*I)) + HaveScalarStores = true; + if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) { // There is a hardware bug on CI/SI where SMRD instruction may corrupt // vccz bit, so when we detect that an instruction may read from a @@ -625,12 +633,45 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { pushInstruction(MBB, I, Increment); handleSendMsg(MBB, I); + + if (I->getOpcode() == AMDGPU::S_ENDPGM || + I->getOpcode() == AMDGPU::SI_RETURN) + EndPgmBlocks.push_back(&MBB); } // Wait for everything at the end of the MBB Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued); } + if (HaveScalarStores) { + // If scalar writes are used, the cache must be flushed or else the next + // wave to reuse the same scratch memory can be clobbered. + // + // Insert s_dcache_wb at wave termination points if there were any scalar + // stores, and only if the cache hasn't already been flushed. This could be + // improved by looking across blocks for flushes in postdominating blocks + // from the stores but an explicitly requested flush is probably very rare. + for (MachineBasicBlock *MBB : EndPgmBlocks) { + bool SeenDCacheWB = false; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + + if (I->getOpcode() == AMDGPU::S_DCACHE_WB) + SeenDCacheWB = true; + else if (TII->isScalarStore(*I)) + SeenDCacheWB = false; + + // FIXME: It would be better to insert this before a waitcnt if any. + if ((I->getOpcode() == AMDGPU::S_ENDPGM || + I->getOpcode() == AMDGPU::SI_RETURN) && !SeenDCacheWB) { + Changes = true; + BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB)); + } + } + } + } + for (MachineInstr *I : RemoveMI) I->eraseFromParent(); |