summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMarek Olsak <marek.olsak@amd.com>2016-11-25 16:03:34 +0000
committerMarek Olsak <marek.olsak@amd.com>2016-11-25 16:03:34 +0000
commite3895bfb470de6d552b73af103c783ce062e2dcd (patch)
tree6c68a10373e21d4f906970e17a1a685844c04129 /llvm/lib
parentdad553a5cf9d49493d64a8d55683338336f1a9f9 (diff)
downloadbcm5719-llvm-e3895bfb470de6d552b73af103c783ce062e2dcd.tar.gz
bcm5719-llvm-e3895bfb470de6d552b73af103c783ce062e2dcd.zip
Revert "AMDGPU: Implement SGPR spilling with scalar stores"
This reverts commit 4404d0d6e354e80dd7f8f0a0e12d8ad809cf007e. llvm-svn: 287936
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaits.cpp43
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp106
3 files changed, 10 insertions, 153 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index da4db63ab33..a9e693917bf 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -532,7 +532,6 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
IV = getIsaVersion(ST->getFeatureBits());
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
HardwareLimits.Named.VM = getVmcntBitMask(IV);
HardwareLimits.Named.EXP = getExpcntBitMask(IV);
@@ -544,27 +543,20 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
LastOpcodeType = OTHER;
LastInstWritesM0 = false;
IsFlatOutstanding = false;
- ReturnsVoid = MFI->returnsVoid();
+ ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid();
memset(&UsedRegs, 0, sizeof(UsedRegs));
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
SmallVector<MachineInstr *, 4> RemoveMI;
- SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
-
- bool HaveScalarStores = false;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
-
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
- if (!HaveScalarStores && TII->isScalarStore(*I))
- HaveScalarStores = true;
-
if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
// There is a hardware bug on CI/SI where SMRD instruction may corrupt
// vccz bit, so when we detect that an instruction may read from a
@@ -633,45 +625,12 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
pushInstruction(MBB, I, Increment);
handleSendMsg(MBB, I);
-
- if (I->getOpcode() == AMDGPU::S_ENDPGM ||
- I->getOpcode() == AMDGPU::SI_RETURN)
- EndPgmBlocks.push_back(&MBB);
}
// Wait for everything at the end of the MBB
Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
}
- if (HaveScalarStores) {
- // If scalar writes are used, the cache must be flushed or else the next
- // wave to reuse the same scratch memory can be clobbered.
- //
- // Insert s_dcache_wb at wave termination points if there were any scalar
- // stores, and only if the cache hasn't already been flushed. This could be
- // improved by looking across blocks for flushes in postdominating blocks
- // from the stores but an explicitly requested flush is probably very rare.
- for (MachineBasicBlock *MBB : EndPgmBlocks) {
- bool SeenDCacheWB = false;
-
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
-
- if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
- SeenDCacheWB = true;
- else if (TII->isScalarStore(*I))
- SeenDCacheWB = false;
-
- // FIXME: It would be better to insert this before a waitcnt if any.
- if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
- I->getOpcode() == AMDGPU::SI_RETURN) && !SeenDCacheWB) {
- Changes = true;
- BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
- }
- }
- }
- }
-
for (MachineInstr *I : RemoveMI)
I->eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e9fbde16e2a..63ce2583581 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -544,7 +544,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
}
- MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
+ BuildMI(MBB, MI, DL, OpDesc)
.addReg(SrcReg, getKillRegState(isKill)) // data
.addFrameIndex(FrameIndex) // addr
.addMemOperand(MMO)
@@ -554,11 +554,6 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
// needing them, and need to ensure that the reserved registers are
// correctly handled.
- if (ST.hasScalarStores()) {
- // m0 is used for offset to scalar stores if used to spill.
- Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine);
- }
-
return;
}
@@ -648,17 +643,12 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
}
- MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
+ BuildMI(MBB, MI, DL, OpDesc, DestReg)
.addFrameIndex(FrameIndex) // addr
.addMemOperand(MMO)
.addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
.addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit);
- if (ST.hasScalarStores()) {
- // m0 is used for offset to scalar stores if used to spill.
- Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine);
- }
-
return;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 948ea113cd7..54fcbb507c8 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -24,12 +24,6 @@
using namespace llvm;
-static cl::opt<bool> EnableSpillSGPRToSMEM(
- "amdgpu-spill-sgpr-to-smem",
- cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
- cl::init(true));
-
-
static bool hasPressureSet(const int *PSets, unsigned PSetID) {
for (unsigned i = 0; PSets[i] != -1; ++i) {
if (PSets[i] == (int)PSetID)
@@ -491,21 +485,18 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
int Index,
RegScavenger *RS) const {
- MachineBasicBlock *MBB = MI->getParent();
- MachineFunction *MF = MBB->getParent();
+ MachineFunction *MF = MI->getParent()->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineBasicBlock *MBB = MI->getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ MachineFrameInfo &FrameInfo = MF->getFrameInfo();
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
+ const DebugLoc &DL = MI->getDebugLoc();
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
unsigned SuperReg = MI->getOperand(0).getReg();
bool IsKill = MI->getOperand(0).isKill();
- const DebugLoc &DL = MI->getDebugLoc();
-
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- MachineFrameInfo &FrameInfo = MF->getFrameInfo();
-
- bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
// SubReg carries the "Kill" flag when SubReg == SuperReg.
unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
@@ -513,55 +504,6 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
unsigned SubReg = NumSubRegs == 1 ?
SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));
- if (SpillToSMEM) {
- if (SuperReg == AMDGPU::M0) {
- assert(NumSubRegs == 1);
- unsigned CopyM0
- = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), CopyM0)
- .addReg(AMDGPU::M0, getKillRegState(IsKill));
-
- // The real spill now kills the temp copy.
- SubReg = SuperReg = CopyM0;
- IsKill = true;
- }
-
- int64_t FrOffset = FrameInfo.getObjectOffset(Index);
- unsigned Size = FrameInfo.getObjectSize(Index);
- unsigned Align = FrameInfo.getObjectAlignment(Index);
- MachinePointerInfo PtrInfo
- = MachinePointerInfo::getFixedStack(*MF, Index);
- MachineMemOperand *MMO
- = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
- Size, Align);
-
- unsigned OffsetReg = AMDGPU::M0;
- // Add i * 4 wave offset.
- //
- // SMEM instructions only support a single offset, so increment the wave
- // offset.
-
- int64_t Offset = ST.getWavefrontSize() * (FrOffset + 4 * i);
- if (Offset != 0) {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
- .addReg(MFI->getScratchWaveOffsetReg())
- .addImm(Offset);
- } else {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
- .addReg(MFI->getScratchWaveOffsetReg());
- }
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BUFFER_STORE_DWORD_SGPR))
- .addReg(SubReg, getKillRegState(IsKill)) // sdata
- .addReg(MFI->getScratchRSrcReg()) // sbase
- .addReg(OffsetReg) // soff
- .addImm(0) // glc
- .addMemOperand(MMO);
-
- continue;
- }
-
struct SIMachineFunctionInfo::SpilledReg Spill =
MFI->getSpilledReg(MF, Index, i);
if (Spill.hasReg()) {
@@ -588,9 +530,10 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
// it are fixed.
} else {
// Spill SGPR to a frame index.
+ // FIXME we should use S_STORE_DWORD here for VI.
+
// TODO: Should VI try to spill to VGPR and then spill to SMEM?
unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- // TODO: Should VI try to spill to VGPR and then spill to SMEM?
MachineInstrBuilder Mov
= BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
@@ -642,7 +585,6 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
unsigned SuperReg = MI->getOperand(0).getReg();
- bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
// m0 is not allowed as with readlane/writelane, so a temporary SGPR and
// extra copy is needed.
@@ -652,44 +594,10 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
SuperReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
}
- int64_t FrOffset = FrameInfo.getObjectOffset(Index);
-
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
unsigned SubReg = NumSubRegs == 1 ?
SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));
- if (SpillToSMEM) {
- unsigned Size = FrameInfo.getObjectSize(Index);
- unsigned Align = FrameInfo.getObjectAlignment(Index);
- MachinePointerInfo PtrInfo
- = MachinePointerInfo::getFixedStack(*MF, Index);
- MachineMemOperand *MMO
- = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
- Size, Align);
-
- unsigned OffsetReg = AMDGPU::M0;
-
- // Add i * 4 offset
- int64_t Offset = ST.getWavefrontSize() * (FrOffset + 4 * i);
- if (Offset != 0) {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
- .addReg(MFI->getScratchWaveOffsetReg())
- .addImm(Offset);
- } else {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
- .addReg(MFI->getScratchWaveOffsetReg());
- }
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BUFFER_LOAD_DWORD_SGPR), SubReg)
- .addReg(MFI->getScratchRSrcReg()) // sbase
- .addReg(OffsetReg) // soff
- .addImm(0) // glc
- .addMemOperand(MMO)
- .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
-
- continue;
- }
-
SIMachineFunctionInfo::SpilledReg Spill
= MFI->getSpilledReg(MF, Index, i);
OpenPOWER on IntegriCloud