diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 115 |
1 files changed, 71 insertions, 44 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 21c0a3977a2..759df16e6d3 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -397,28 +397,36 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, - const MachineOperand *SrcDst, + int Index, + unsigned ValueReg, + bool IsKill, unsigned ScratchRsrcReg, - unsigned ScratchOffset, - int64_t Offset, + unsigned ScratchOffsetReg, + int64_t InstOffset, + MachineMemOperand *MMO, RegScavenger *RS) const { - unsigned Value = SrcDst->getReg(); - bool IsKill = SrcDst->isKill(); MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MI->getParent()->getParent(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); - DebugLoc DL = MI->getDebugLoc(); - bool IsStore = MI->mayStore(); + const MCInstrDesc &Desc = TII->get(LoadStoreOp); + const DebugLoc &DL = MI->getDebugLoc(); + bool IsStore = Desc.mayStore(); bool RanOutOfSGPRs = false; bool Scavenged = false; - unsigned SOffset = ScratchOffset; - unsigned OriginalImmOffset = Offset; + unsigned SOffset = ScratchOffsetReg; - unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); + const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg); + unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32; unsigned Size = NumSubRegs * 4; + int64_t Offset = InstOffset + MFI.getObjectOffset(Index); + const int64_t OriginalImmOffset = Offset; + + unsigned Align = MFI.getObjectAlignment(Index); + const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo(); if (!isUInt<12>(Offset + Size)) { SOffset = AMDGPU::NoRegister; @@ -437,19 +445,23 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, // subtract the offset after the spill to return ScratchOffset to it's // original value. RanOutOfSGPRs = true; - SOffset = ScratchOffset; + SOffset = ScratchOffsetReg; } else { Scavenged = true; } + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) - .addReg(ScratchOffset) - .addImm(Offset); + .addReg(ScratchOffsetReg) + .addImm(Offset); + Offset = 0; } - for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) { + const unsigned EltSize = 4; + + for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) { unsigned SubReg = NumSubRegs == 1 ? - Value : getSubReg(Value, getSubRegFromChannel(i)); + ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i)); unsigned SOffsetRegState = 0; unsigned SrcDstRegState = getDefRegState(!IsStore); @@ -459,7 +471,12 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, SrcDstRegState |= getKillRegState(IsKill); } - BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) + MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i); + MachineMemOperand *NewMMO + = MF->getMachineMemOperand(PInfo, MMO->getFlags(), + EltSize, MinAlign(Align, EltSize * i)); + + BuildMI(*MBB, MI, DL, Desc) .addReg(SubReg, getDefRegState(!IsStore)) .addReg(ScratchRsrcReg) .addReg(SOffset, SOffsetRegState) @@ -467,14 +484,15 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, .addImm(0) // glc .addImm(0) // slc .addImm(0) // tfe - .addReg(Value, RegState::Implicit | SrcDstRegState) - .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + .addMemOperand(NewMMO) + .addReg(ValueReg, RegState::Implicit | SrcDstRegState); } + if (RanOutOfSGPRs) { // Subtract the offset we added to the ScratchOffset register. - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffset) - .addReg(ScratchOffset) - .addImm(OriginalImmOffset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg) + .addReg(ScratchOffsetReg) + .addImm(OriginalImmOffset); } } @@ -497,6 +515,8 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM; + const unsigned EltSize = 4; + // SubReg carries the "Kill" flag when SubReg == SuperReg. unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { @@ -518,13 +538,12 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, } int64_t FrOffset = FrameInfo.getObjectOffset(Index); - unsigned Size = FrameInfo.getObjectSize(Index); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo - = MachinePointerInfo::getFixedStack(*MF, Index); + = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, - Size, Align); + EltSize, MinAlign(Align, EltSize * i)); unsigned OffsetReg = AMDGPU::M0; // Add i * 4 wave offset. @@ -597,13 +616,12 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, Mov.addReg(SuperReg, RegState::Implicit | SuperKillState); } - unsigned Size = FrameInfo.getObjectSize(Index); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo - = MachinePointerInfo::getFixedStack(*MF, Index); + = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, - Size, Align); + EltSize, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) .addReg(TmpReg, RegState::Kill) // src .addFrameIndex(Index) // vaddr @@ -644,18 +662,19 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int64_t FrOffset = FrameInfo.getObjectOffset(Index); + const unsigned EltSize = 4; + for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); if (SpillToSMEM) { - unsigned Size = FrameInfo.getObjectSize(Index); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo - = MachinePointerInfo::getFixedStack(*MF, Index); + = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, - Size, Align); + EltSize, MinAlign(Align, EltSize * i)); unsigned OffsetReg = AMDGPU::M0; @@ -693,16 +712,15 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, } else { // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned Align = FrameInfo.getObjectAlignment(Index); - unsigned Size = FrameInfo.getObjectSize(Index); MachinePointerInfo PtrInfo - = MachinePointerInfo::getFixedStack(*MF, Index); + = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); - MachineMemOperand *MMO = MF->getMachineMemOperand( - PtrInfo, MachineMemOperand::MOLoad, Size, Align); + MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, + MachineMemOperand::MOLoad, EltSize, + MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) .addFrameIndex(Index) // vaddr @@ -710,8 +728,7 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, .addReg(MFI->getScratchWaveOffsetReg()) // soffset .addImm(i * 4) // offset .addMemOperand(MMO); - BuildMI(*MBB, MI, DL, - TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpReg, RegState::Kill) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } @@ -767,28 +784,38 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: - case AMDGPU::SI_SPILL_V32_SAVE: + case AMDGPU::SI_SPILL_V32_SAVE: { + const MachineOperand *VData = TII->getNamedOperand(*MI, + AMDGPU::OpName::vdata); buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, - TII->getNamedOperand(*MI, AMDGPU::OpName::vdata), + Index, + VData->getReg(), VData->isKill(), TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(), - FrameInfo.getObjectOffset(Index) + - TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS); + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), + *MI->memoperands_begin(), + RS); MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); MI->eraseFromParent(); break; + } case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_V96_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: { + const MachineOperand *VData = TII->getNamedOperand(*MI, + AMDGPU::OpName::vdata); + buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, - TII->getNamedOperand(*MI, AMDGPU::OpName::vdata), + Index, + VData->getReg(), VData->isKill(), TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(), - FrameInfo.getObjectOffset(Index) + - TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS); + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), + *MI->memoperands_begin(), + RS); MI->eraseFromParent(); break; } |