diff options
| author | Marek Olsak <marek.olsak@amd.com> | 2016-11-25 16:03:34 +0000 |
|---|---|---|
| committer | Marek Olsak <marek.olsak@amd.com> | 2016-11-25 16:03:34 +0000 |
| commit | e3895bfb470de6d552b73af103c783ce062e2dcd (patch) | |
| tree | 6c68a10373e21d4f906970e17a1a685844c04129 /llvm/lib | |
| parent | dad553a5cf9d49493d64a8d55683338336f1a9f9 (diff) | |
| download | bcm5719-llvm-e3895bfb470de6d552b73af103c783ce062e2dcd.tar.gz bcm5719-llvm-e3895bfb470de6d552b73af103c783ce062e2dcd.zip | |
Revert "AMDGPU: Implement SGPR spilling with scalar stores"
This reverts commit 4404d0d6e354e80dd7f8f0a0e12d8ad809cf007e.
llvm-svn: 287936
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 43 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 14 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 106 |
3 files changed, 10 insertions, 153 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index da4db63ab33..a9e693917bf 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -532,7 +532,6 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); IV = getIsaVersion(ST->getFeatureBits()); - const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); HardwareLimits.Named.VM = getVmcntBitMask(IV); HardwareLimits.Named.EXP = getExpcntBitMask(IV); @@ -544,27 +543,20 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { LastOpcodeType = OTHER; LastInstWritesM0 = false; IsFlatOutstanding = false; - ReturnsVoid = MFI->returnsVoid(); + ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid(); memset(&UsedRegs, 0, sizeof(UsedRegs)); memset(&DefinedRegs, 0, sizeof(DefinedRegs)); SmallVector<MachineInstr *, 4> RemoveMI; - SmallVector<MachineBasicBlock *, 4> EndPgmBlocks; - - bool HaveScalarStores = false; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { - if (!HaveScalarStores && TII->isScalarStore(*I)) - HaveScalarStores = true; - if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) { // There is a hardware bug on CI/SI where SMRD instruction may corrupt // vccz bit, so when we detect that an instruction may read from a @@ -633,45 +625,12 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { pushInstruction(MBB, I, Increment); handleSendMsg(MBB, I); - - if (I->getOpcode() == AMDGPU::S_ENDPGM || - I->getOpcode() == AMDGPU::SI_RETURN) - EndPgmBlocks.push_back(&MBB); } // Wait for everything at the end of the MBB Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued); } - if (HaveScalarStores) { - // If scalar writes are used, the cache must be flushed or else the next - // wave to reuse the same scratch memory can be clobbered. - // - // Insert s_dcache_wb at wave termination points if there were any scalar - // stores, and only if the cache hasn't already been flushed. This could be - // improved by looking across blocks for flushes in postdominating blocks - // from the stores but an explicitly requested flush is probably very rare. - for (MachineBasicBlock *MBB : EndPgmBlocks) { - bool SeenDCacheWB = false; - - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - - if (I->getOpcode() == AMDGPU::S_DCACHE_WB) - SeenDCacheWB = true; - else if (TII->isScalarStore(*I)) - SeenDCacheWB = false; - - // FIXME: It would be better to insert this before a waitcnt if any. - if ((I->getOpcode() == AMDGPU::S_ENDPGM || - I->getOpcode() == AMDGPU::SI_RETURN) && !SeenDCacheWB) { - Changes = true; - BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB)); - } - } - } - } - for (MachineInstr *I : RemoveMI) I->eraseFromParent(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e9fbde16e2a..63ce2583581 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -544,7 +544,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); } - MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc) + BuildMI(MBB, MI, DL, OpDesc) .addReg(SrcReg, getKillRegState(isKill)) // data .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) @@ -554,11 +554,6 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, // needing them, and need to ensure that the reserved registers are // correctly handled. - if (ST.hasScalarStores()) { - // m0 is used for offset to scalar stores if used to spill. - Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine); - } - return; } @@ -648,17 +643,12 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass); } - MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg) + BuildMI(MBB, MI, DL, OpDesc, DestReg) .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) .addReg(MFI->getScratchRSrcReg(), RegState::Implicit) .addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit); - if (ST.hasScalarStores()) { - // m0 is used for offset to scalar stores if used to spill. - Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine); - } - return; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 948ea113cd7..54fcbb507c8 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -24,12 +24,6 @@ using namespace llvm; -static cl::opt<bool> EnableSpillSGPRToSMEM( - "amdgpu-spill-sgpr-to-smem", - cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), - cl::init(true)); - - static bool hasPressureSet(const int *PSets, unsigned PSetID) { for (unsigned i = 0; PSets[i] != -1; ++i) { if (PSets[i] == (int)PSetID) @@ -491,21 +485,18 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS) const { - MachineBasicBlock *MBB = MI->getParent(); - MachineFunction *MF = MBB->getParent(); + MachineFunction *MF = MI->getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineBasicBlock *MBB = MI->getParent(); + SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); + MachineFrameInfo &FrameInfo = MF->getFrameInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); + const DebugLoc &DL = MI->getDebugLoc(); unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned SuperReg = MI->getOperand(0).getReg(); bool IsKill = MI->getOperand(0).isKill(); - const DebugLoc &DL = MI->getDebugLoc(); - - SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); - MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - - bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM; // SubReg carries the "Kill" flag when SubReg == SuperReg. unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); @@ -513,55 +504,6 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); - if (SpillToSMEM) { - if (SuperReg == AMDGPU::M0) { - assert(NumSubRegs == 1); - unsigned CopyM0 - = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), CopyM0) - .addReg(AMDGPU::M0, getKillRegState(IsKill)); - - // The real spill now kills the temp copy. - SubReg = SuperReg = CopyM0; - IsKill = true; - } - - int64_t FrOffset = FrameInfo.getObjectOffset(Index); - unsigned Size = FrameInfo.getObjectSize(Index); - unsigned Align = FrameInfo.getObjectAlignment(Index); - MachinePointerInfo PtrInfo - = MachinePointerInfo::getFixedStack(*MF, Index); - MachineMemOperand *MMO - = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, - Size, Align); - - unsigned OffsetReg = AMDGPU::M0; - // Add i * 4 wave offset. - // - // SMEM instructions only support a single offset, so increment the wave - // offset. - - int64_t Offset = ST.getWavefrontSize() * (FrOffset + 4 * i); - if (Offset != 0) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()) - .addImm(Offset); - } else { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()); - } - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BUFFER_STORE_DWORD_SGPR)) - .addReg(SubReg, getKillRegState(IsKill)) // sdata - .addReg(MFI->getScratchRSrcReg()) // sbase - .addReg(OffsetReg) // soff - .addImm(0) // glc - .addMemOperand(MMO); - - continue; - } - struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { @@ -588,9 +530,10 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, // it are fixed. } else { // Spill SGPR to a frame index. + // FIXME we should use S_STORE_DWORD here for VI. + // TODO: Should VI try to spill to VGPR and then spill to SMEM? unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - // TODO: Should VI try to spill to VGPR and then spill to SMEM? MachineInstrBuilder Mov = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) @@ -642,7 +585,6 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned SuperReg = MI->getOperand(0).getReg(); - bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM; // m0 is not allowed as with readlane/writelane, so a temporary SGPR and // extra copy is needed. @@ -652,44 +594,10 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, SuperReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); } - int64_t FrOffset = FrameInfo.getObjectOffset(Index); - for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); - if (SpillToSMEM) { - unsigned Size = FrameInfo.getObjectSize(Index); - unsigned Align = FrameInfo.getObjectAlignment(Index); - MachinePointerInfo PtrInfo - = MachinePointerInfo::getFixedStack(*MF, Index); - MachineMemOperand *MMO - = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, - Size, Align); - - unsigned OffsetReg = AMDGPU::M0; - - // Add i * 4 offset - int64_t Offset = ST.getWavefrontSize() * (FrOffset + 4 * i); - if (Offset != 0) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()) - .addImm(Offset); - } else { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()); - } - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BUFFER_LOAD_DWORD_SGPR), SubReg) - .addReg(MFI->getScratchRSrcReg()) // sbase - .addReg(OffsetReg) // soff - .addImm(0) // glc - .addMemOperand(MMO) - .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); - - continue; - } - SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); |

