summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-11-13 18:20:54 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-11-13 18:20:54 +0000
commitdc45274d546b531ca64308cfe4304042e341c463 (patch)
treeb70f3b53f74a692128c5fd8b8f21618a9a4e75d0 /llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
parente2399f9e0e0272ad12f649c8eff0da49dcbf2068 (diff)
downloadbcm5719-llvm-dc45274d546b531ca64308cfe4304042e341c463.tar.gz
bcm5719-llvm-dc45274d546b531ca64308cfe4304042e341c463.zip
AMDGPU: Implement SGPR spilling with scalar stores
nThis avoids the nasty problems caused by using memory instructions that read the exec mask while spilling / restoring registers used for control flow masking, but only for VI when these were added. This always uses the scalar stores when enabled currently, but it may be better to still try to spill to a VGPR and use this on the fallback memory path. The cache also needs to be flushed before wave termination if a scalar store is used. llvm-svn: 286766
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInsertWaits.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaits.cpp43
1 files changed, 42 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index a9e693917bf..da4db63ab33 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -532,6 +532,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
IV = getIsaVersion(ST->getFeatureBits());
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
HardwareLimits.Named.VM = getVmcntBitMask(IV);
HardwareLimits.Named.EXP = getExpcntBitMask(IV);
@@ -543,20 +544,27 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
LastOpcodeType = OTHER;
LastInstWritesM0 = false;
IsFlatOutstanding = false;
- ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid();
+ ReturnsVoid = MFI->returnsVoid();
memset(&UsedRegs, 0, sizeof(UsedRegs));
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
SmallVector<MachineInstr *, 4> RemoveMI;
+ SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
+
+ bool HaveScalarStores = false;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
+
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
+ if (!HaveScalarStores && TII->isScalarStore(*I))
+ HaveScalarStores = true;
+
if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
// There is a hardware bug on CI/SI where SMRD instruction may corrupt
// vccz bit, so when we detect that an instruction may read from a
@@ -625,12 +633,45 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
pushInstruction(MBB, I, Increment);
handleSendMsg(MBB, I);
+
+ if (I->getOpcode() == AMDGPU::S_ENDPGM ||
+ I->getOpcode() == AMDGPU::SI_RETURN)
+ EndPgmBlocks.push_back(&MBB);
}
// Wait for everything at the end of the MBB
Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
}
+ if (HaveScalarStores) {
+ // If scalar writes are used, the cache must be flushed or else the next
+ // wave to reuse the same scratch memory can be clobbered.
+ //
+ // Insert s_dcache_wb at wave termination points if there were any scalar
+ // stores, and only if the cache hasn't already been flushed. This could be
+ // improved by looking across blocks for flushes in postdominating blocks
+ // from the stores but an explicitly requested flush is probably very rare.
+ for (MachineBasicBlock *MBB : EndPgmBlocks) {
+ bool SeenDCacheWB = false;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+
+ if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
+ SeenDCacheWB = true;
+ else if (TII->isScalarStore(*I))
+ SeenDCacheWB = false;
+
+ // FIXME: It would be better to insert this before a waitcnt if any.
+ if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
+ I->getOpcode() == AMDGPU::SI_RETURN) && !SeenDCacheWB) {
+ Changes = true;
+ BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
+ }
+ }
+ }
+ }
+
for (MachineInstr *I : RemoveMI)
I->eraseFromParent();
OpenPOWER on IntegriCloud