diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2015-06-13 03:28:10 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2015-06-13 03:28:10 +0000 |
commit | 45bb48ea197fe496865387120c7c55b56f0717d6 (patch) | |
tree | 3914fb8c4ace9ea4dee024fc944d51dc45adf401 /llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | |
parent | 8fa9677d4eb1cd43973eb59acc12b0534691d604 (diff) | |
download | bcm5719-llvm-45bb48ea197fe496865387120c7c55b56f0717d6.tar.gz bcm5719-llvm-45bb48ea197fe496865387120c7c55b56f0717d6.zip |
R600 -> AMDGPU rename
llvm-svn: 239657
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 543 |
1 files changed, 543 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp new file mode 100644 index 00000000000..db2ff0b1f95 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -0,0 +1,543 @@ +//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief SI implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + + +#include "SIRegisterInfo.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" + +using namespace llvm; + +SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {} + +BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + Reserved.set(AMDGPU::EXEC); + + // EXEC_LO and EXEC_HI could be allocated and used as regular register, + // but this seems likely to result in bugs, so I'm marking them as reserved. + Reserved.set(AMDGPU::EXEC_LO); + Reserved.set(AMDGPU::EXEC_HI); + + Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); + Reserved.set(AMDGPU::FLAT_SCR); + Reserved.set(AMDGPU::FLAT_SCR_LO); + Reserved.set(AMDGPU::FLAT_SCR_HI); + + // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs + Reserved.set(AMDGPU::VGPR255); + Reserved.set(AMDGPU::VGPR254); + + // Tonga and Iceland can only allocate a fixed number of SGPRs due + // to a hw bug. + if (MF.getSubtarget<AMDGPUSubtarget>().hasSGPRInitBug()) { + unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); + // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). + // Assume XNACK_MASK is unused. + unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4; + + for (unsigned i = Limit; i < NumSGPRs; ++i) { + unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); + MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true); + + for (; R.isValid(); ++R) + Reserved.set(*R); + } + } + + return Reserved; +} + +unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, + unsigned Idx) const { + + const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>(); + // FIXME: We should adjust the max number of waves based on LDS size. + unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(), + STI.getMaxWavesPerCU()); + unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU()); + + for (regclass_iterator I = regclass_begin(), E = regclass_end(); + I != E; ++I) { + + unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1); + unsigned Limit; + + if (isSGPRClass(*I)) { + Limit = SGPRLimit / NumSubRegs; + } else { + Limit = VGPRLimit / NumSubRegs; + } + + const int *Sets = getRegClassPressureSets(*I); + assert(Sets); + for (unsigned i = 0; Sets[i] != -1; ++i) { + if (Sets[i] == (int)Idx) + return Limit; + } + } + return 256; +} + +bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { + return Fn.getFrameInfo()->hasStackObjects(); +} + +static unsigned getNumSubRegsForSpillOp(unsigned Op) { + + switch (Op) { + case AMDGPU::SI_SPILL_S512_SAVE: + case AMDGPU::SI_SPILL_S512_RESTORE: + case AMDGPU::SI_SPILL_V512_SAVE: + case AMDGPU::SI_SPILL_V512_RESTORE: + return 16; + case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_V256_SAVE: + case AMDGPU::SI_SPILL_V256_RESTORE: + return 8; + case AMDGPU::SI_SPILL_S128_SAVE: + case AMDGPU::SI_SPILL_S128_RESTORE: + case AMDGPU::SI_SPILL_V128_SAVE: + case AMDGPU::SI_SPILL_V128_RESTORE: + return 4; + case AMDGPU::SI_SPILL_V96_SAVE: + case AMDGPU::SI_SPILL_V96_RESTORE: + return 3; + case AMDGPU::SI_SPILL_S64_SAVE: + case AMDGPU::SI_SPILL_S64_RESTORE: + case AMDGPU::SI_SPILL_V64_SAVE: + case AMDGPU::SI_SPILL_V64_RESTORE: + return 2; + case AMDGPU::SI_SPILL_S32_SAVE: + case AMDGPU::SI_SPILL_S32_RESTORE: + case AMDGPU::SI_SPILL_V32_SAVE: + case AMDGPU::SI_SPILL_V32_RESTORE: + return 1; + default: llvm_unreachable("Invalid spill opcode"); + } +} + +void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, + unsigned LoadStoreOp, + unsigned Value, + unsigned ScratchRsrcReg, + unsigned ScratchOffset, + int64_t Offset, + RegScavenger *RS) const { + + MachineBasicBlock *MBB = MI->getParent(); + const MachineFunction *MF = MI->getParent()->getParent(); + const SIInstrInfo *TII = + static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo()); + LLVMContext &Ctx = MF->getFunction()->getContext(); + DebugLoc DL = MI->getDebugLoc(); + bool IsLoad = TII->get(LoadStoreOp).mayLoad(); + + bool RanOutOfSGPRs = false; + unsigned SOffset = ScratchOffset; + + unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); + unsigned Size = NumSubRegs * 4; + + if (!isUInt<12>(Offset + Size)) { + SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); + if (SOffset == AMDGPU::NoRegister) { + RanOutOfSGPRs = true; + SOffset = AMDGPU::SGPR0; + } + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) + .addReg(ScratchOffset) + .addImm(Offset); + Offset = 0; + } + + if (RanOutOfSGPRs) + Ctx.emitError("Ran out of SGPRs for spilling VGPRS"); + + for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) { + unsigned SubReg = NumSubRegs > 1 ? + getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) : + Value; + bool IsKill = (i == e - 1); + + BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) + .addReg(SubReg, getDefRegState(IsLoad)) + .addReg(ScratchRsrcReg, getKillRegState(IsKill)) + .addReg(SOffset) + .addImm(Offset) + .addImm(0) // glc + .addImm(0) // slc + .addImm(0) // tfe + .addReg(Value, RegState::Implicit | getDefRegState(IsLoad)); + } +} + +void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + MachineFunction *MF = MI->getParent()->getParent(); + MachineBasicBlock *MBB = MI->getParent(); + SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); + MachineFrameInfo *FrameInfo = MF->getFrameInfo(); + const SIInstrInfo *TII = + static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo()); + DebugLoc DL = MI->getDebugLoc(); + + MachineOperand &FIOp = MI->getOperand(FIOperandNum); + int Index = MI->getOperand(FIOperandNum).getIndex(); + + switch (MI->getOpcode()) { + // SGPR register spill + case AMDGPU::SI_SPILL_S512_SAVE: + case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S128_SAVE: + case AMDGPU::SI_SPILL_S64_SAVE: + case AMDGPU::SI_SPILL_S32_SAVE: { + unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); + + for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { + unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), + &AMDGPU::SGPR_32RegClass, i); + struct SIMachineFunctionInfo::SpilledReg Spill = + MFI->getSpilledReg(MF, Index, i); + + if (Spill.VGPR == AMDGPU::NoRegister) { + LLVMContext &Ctx = MF->getFunction()->getContext(); + Ctx.emitError("Ran out of VGPRs for spilling SGPR"); + } + + BuildMI(*MBB, MI, DL, + TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), + Spill.VGPR) + .addReg(SubReg) + .addImm(Spill.Lane); + + } + MI->eraseFromParent(); + break; + } + + // SGPR register restore + case AMDGPU::SI_SPILL_S512_RESTORE: + case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S128_RESTORE: + case AMDGPU::SI_SPILL_S64_RESTORE: + case AMDGPU::SI_SPILL_S32_RESTORE: { + unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); + + for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { + unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), + &AMDGPU::SGPR_32RegClass, i); + struct SIMachineFunctionInfo::SpilledReg Spill = + MFI->getSpilledReg(MF, Index, i); + + if (Spill.VGPR == AMDGPU::NoRegister) { + LLVMContext &Ctx = MF->getFunction()->getContext(); + Ctx.emitError("Ran out of VGPRs for spilling SGPR"); + } + + BuildMI(*MBB, MI, DL, + TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), + SubReg) + .addReg(Spill.VGPR) + .addImm(Spill.Lane) + .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); + } + + // TODO: only do this when it is needed + switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) { + case AMDGPUSubtarget::SOUTHERN_ISLANDS: + // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI + TII->insertNOPs(MI, 3); + break; + case AMDGPUSubtarget::SEA_ISLANDS: + break; + default: // VOLCANIC_ISLANDS and later + // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI + // and later. This also applies to VALUs which write VCC, but we're + // unlikely to see VMEM use VCC. + TII->insertNOPs(MI, 4); + } + + MI->eraseFromParent(); + break; + } + + // VGPR register spill + case AMDGPU::SI_SPILL_V512_SAVE: + case AMDGPU::SI_SPILL_V256_SAVE: + case AMDGPU::SI_SPILL_V128_SAVE: + case AMDGPU::SI_SPILL_V96_SAVE: + case AMDGPU::SI_SPILL_V64_SAVE: + case AMDGPU::SI_SPILL_V32_SAVE: + buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, + TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), + FrameInfo->getObjectOffset(Index), RS); + MI->eraseFromParent(); + break; + case AMDGPU::SI_SPILL_V32_RESTORE: + case AMDGPU::SI_SPILL_V64_RESTORE: + case AMDGPU::SI_SPILL_V96_RESTORE: + case AMDGPU::SI_SPILL_V128_RESTORE: + case AMDGPU::SI_SPILL_V256_RESTORE: + case AMDGPU::SI_SPILL_V512_RESTORE: { + buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, + TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), + FrameInfo->getObjectOffset(Index), RS); + MI->eraseFromParent(); + break; + } + + default: { + int64_t Offset = FrameInfo->getObjectOffset(Index); + FIOp.ChangeToImmediate(Offset); + if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) { + unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, SPAdj); + BuildMI(*MBB, MI, MI->getDebugLoc(), + TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) + .addImm(Offset); + FIOp.ChangeToRegister(TmpReg, false, false, true); + } + } + } +} + +const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass( + MVT VT) const { + switch(VT.SimpleTy) { + default: + case MVT::i32: return &AMDGPU::VGPR_32RegClass; + } +} + +unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const { + return getEncodingValue(Reg) & 0xff; +} + +const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { + assert(!TargetRegisterInfo::isVirtualRegister(Reg)); + + static const TargetRegisterClass *BaseClasses[] = { + &AMDGPU::VGPR_32RegClass, + &AMDGPU::SReg_32RegClass, + &AMDGPU::VReg_64RegClass, + &AMDGPU::SReg_64RegClass, + &AMDGPU::VReg_96RegClass, + &AMDGPU::VReg_128RegClass, + &AMDGPU::SReg_128RegClass, + &AMDGPU::VReg_256RegClass, + &AMDGPU::SReg_256RegClass, + &AMDGPU::VReg_512RegClass + }; + + for (const TargetRegisterClass *BaseClass : BaseClasses) { + if (BaseClass->contains(Reg)) { + return BaseClass; + } + } + return nullptr; +} + +bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { + return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) || + getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) || + getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) || + getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) || + getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) || + getCommonSubClass(&AMDGPU::VReg_512RegClass, RC); +} + +const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( + const TargetRegisterClass *SRC) const { + if (hasVGPRs(SRC)) { + return SRC; + } else if (SRC == &AMDGPU::SCCRegRegClass) { + return &AMDGPU::VCCRegRegClass; + } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) { + return &AMDGPU::VGPR_32RegClass; + } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) { + return &AMDGPU::VReg_64RegClass; + } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) { + return &AMDGPU::VReg_128RegClass; + } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) { + return &AMDGPU::VReg_256RegClass; + } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) { + return &AMDGPU::VReg_512RegClass; + } + return nullptr; +} + +const TargetRegisterClass *SIRegisterInfo::getSubRegClass( + const TargetRegisterClass *RC, unsigned SubIdx) const { + if (SubIdx == AMDGPU::NoSubRegister) + return RC; + + // If this register has a sub-register, we can safely assume it is a 32-bit + // register, because all of SI's sub-registers are 32-bit. + if (isSGPRClass(RC)) { + return &AMDGPU::SGPR_32RegClass; + } else { + return &AMDGPU::VGPR_32RegClass; + } +} + +unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg, + const TargetRegisterClass *SubRC, + unsigned Channel) const { + + switch (Reg) { + case AMDGPU::VCC: + switch(Channel) { + case 0: return AMDGPU::VCC_LO; + case 1: return AMDGPU::VCC_HI; + default: llvm_unreachable("Invalid SubIdx for VCC"); + } + + case AMDGPU::FLAT_SCR: + switch (Channel) { + case 0: + return AMDGPU::FLAT_SCR_LO; + case 1: + return AMDGPU::FLAT_SCR_HI; + default: + llvm_unreachable("Invalid SubIdx for FLAT_SCR"); + } + break; + + case AMDGPU::EXEC: + switch (Channel) { + case 0: + return AMDGPU::EXEC_LO; + case 1: + return AMDGPU::EXEC_HI; + default: + llvm_unreachable("Invalid SubIdx for EXEC"); + } + break; + } + + const TargetRegisterClass *RC = getPhysRegClass(Reg); + // 32-bit registers don't have sub-registers, so we can just return the + // Reg. We need to have this check here, because the calculation below + // using getHWRegIndex() will fail with special 32-bit registers like + // VCC_LO, VCC_HI, EXEC_LO, EXEC_HI and M0. + if (RC->getSize() == 4) { + assert(Channel == 0); + return Reg; + } + + unsigned Index = getHWRegIndex(Reg); + return SubRC->getRegister(Index + Channel); +} + +bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const { + return OpType == AMDGPU::OPERAND_REG_IMM32; +} + +bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const { + if (opCanUseLiteralConstant(OpType)) + return true; + + return OpType == AMDGPU::OPERAND_REG_INLINE_C; +} + +unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, + enum PreloadedValue Value) const { + + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + switch (Value) { + case SIRegisterInfo::TGID_X: + return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 0); + case SIRegisterInfo::TGID_Y: + return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 1); + case SIRegisterInfo::TGID_Z: + return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2); + case SIRegisterInfo::SCRATCH_WAVE_OFFSET: + if (MFI->getShaderType() != ShaderType::COMPUTE) + return MFI->ScratchOffsetReg; + return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4); + case SIRegisterInfo::SCRATCH_PTR: + return AMDGPU::SGPR2_SGPR3; + case SIRegisterInfo::INPUT_PTR: + return AMDGPU::SGPR0_SGPR1; + case SIRegisterInfo::TIDIG_X: + return AMDGPU::VGPR0; + case SIRegisterInfo::TIDIG_Y: + return AMDGPU::VGPR1; + case SIRegisterInfo::TIDIG_Z: + return AMDGPU::VGPR2; + } + llvm_unreachable("unexpected preloaded value type"); +} + +/// \brief Returns a register that is not used at any point in the function. +/// If all registers are used, then this function will return +// AMDGPU::NoRegister. +unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, + const TargetRegisterClass *RC) const { + + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) { + if (!MRI.isPhysRegUsed(*I)) + return *I; + } + return AMDGPU::NoRegister; +} + +unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const { + switch(WaveCount) { + case 10: return 24; + case 9: return 28; + case 8: return 32; + case 7: return 36; + case 6: return 40; + case 5: return 48; + case 4: return 64; + case 3: return 84; + case 2: return 128; + default: return 256; + } +} + +unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen, + unsigned WaveCount) const { + if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + switch (WaveCount) { + case 10: return 80; + case 9: return 80; + case 8: return 96; + default: return 102; + } + } else { + switch(WaveCount) { + case 10: return 48; + case 9: return 56; + case 8: return 64; + case 7: return 72; + case 6: return 80; + case 5: return 96; + default: return 103; + } + } +} |