diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def | 33 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 165 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h | 10 |
3 files changed, 208 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def index c56380702f9..f2daf47df5d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def @@ -91,6 +91,28 @@ const RegisterBankInfo::ValueMapping ValMappings[] { {&PartMappings[17], 1} }; +const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] { + /*32-bit op*/ {0, 32, SGPRRegBank}, + /*2x32-bit op*/ {0, 32, SGPRRegBank}, + {32, 32, SGPRRegBank}, +/*<2x32-bit> op*/ {0, 64, SGPRRegBank}, + + /*32-bit op*/ {0, 32, VGPRRegBank}, + /*2x32-bit op*/ {0, 32, VGPRRegBank}, + {32, 32, VGPRRegBank}, +}; + + +// For some instructions which can operate 64-bit only for the scalar version. +const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] { + /*32-bit sgpr*/ {&SGPROnly64BreakDown[0], 1}, + /*2 x 32-bit sgpr*/ {&SGPROnly64BreakDown[1], 2}, + /*64-bit sgpr */ {&SGPROnly64BreakDown[3], 1}, + + /*32-bit vgpr*/ {&SGPROnly64BreakDown[4], 1}, + /*2 x 32-bit vgpr*/ {&SGPROnly64BreakDown[5], 2} +}; + enum ValueMappingIdx { SCCStartIdx = 0, SGPRStartIdx = 2, @@ -127,5 +149,16 @@ const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID, return &ValMappings[Idx]; } +const RegisterBankInfo::ValueMapping *getValueMappingSGPR64Only(unsigned BankID, + unsigned Size) { + assert(Size == 64); + + if (BankID == AMDGPU::VGPRRegBankID) + return &ValMappingsSGPR64OnlyVGPR32[4]; + + assert(BankID == AMDGPU::SGPRRegBankID); + return &ValMappingsSGPR64OnlyVGPR32[2]; +} + } // End AMDGPU namespace. } // End llvm namespace. diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 17d888d8c88..51f6c445b74 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -16,6 +16,7 @@ #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -85,9 +86,36 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, Src.getID() == AMDGPU::VCCRegBankID)) return std::numeric_limits<unsigned>::max(); + if (Dst.getID() == AMDGPU::SCCRegBankID && + Src.getID() == AMDGPU::VCCRegBankID) + return std::numeric_limits<unsigned>::max(); + return RegisterBankInfo::copyCost(Dst, Src, Size); } +unsigned AMDGPURegisterBankInfo::getBreakDownCost( + const ValueMapping &ValMapping, + const RegisterBank *CurBank) const { + // Currently we should only see rewrites of defs since copies from VGPR to + // SGPR are illegal. + assert(CurBank == nullptr && "shouldn't see already assigned bank"); + + assert(ValMapping.NumBreakDowns == 2 && + ValMapping.BreakDown[0].Length == 32 && + ValMapping.BreakDown[0].StartIdx == 0 && + ValMapping.BreakDown[1].Length == 32 && + ValMapping.BreakDown[1].StartIdx == 32 && + ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank); + + // 32-bit extract of a 64-bit value is just access of a subregister, so free. + // TODO: Cost of 0 hits assert, though it's not clear it's what we really + // want. + + // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR + // alignment restrictions, but this probably isn't important. + return 1; +} + const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass( const TargetRegisterClass &RC) const { @@ -107,6 +135,48 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( InstructionMappings AltMappings; switch (MI.getOpcode()) { + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: { + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + if (Size != 64) + break; + + const InstructionMapping &SSMapping = getInstructionMapping( + 1, 1, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&SSMapping); + + const InstructionMapping &VVMapping = getInstructionMapping( + 2, 2, getOperandsMapping( + {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&VVMapping); + + const InstructionMapping &SVMapping = getInstructionMapping( + 3, 3, getOperandsMapping( + {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&SVMapping); + + // SGPR in LHS is slightly preferrable, so make it VS more expnesive than + // SV. + const InstructionMapping &VSMapping = getInstructionMapping( + 3, 4, getOperandsMapping( + {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&VSMapping); + break; + } case TargetOpcode::G_LOAD: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); // FIXME: Should we be hard coding the size for these mappings? @@ -239,8 +309,85 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( return RegisterBankInfo::getInstrAlternativeMappings(MI); } +void AMDGPURegisterBankInfo::split64BitValueForMapping( + MachineIRBuilder &B, + SmallVector<unsigned, 2> &Regs, + unsigned Reg) const { + LLT S32 = LLT::scalar(32); + MachineRegisterInfo *MRI = B.getMRI(); + unsigned LoLHS = MRI->createGenericVirtualRegister(S32); + unsigned HiLHS = MRI->createGenericVirtualRegister(S32); + const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI); + MRI->setRegBank(LoLHS, *Bank); + MRI->setRegBank(HiLHS, *Bank); + + Regs.push_back(LoLHS); + Regs.push_back(HiLHS); + + B.buildInstr(AMDGPU::G_UNMERGE_VALUES) + .addDef(LoLHS) + .addDef(HiLHS) + .addUse(Reg); +} + void AMDGPURegisterBankInfo::applyMappingImpl( const OperandsMapper &OpdMapper) const { + MachineInstr &MI = OpdMapper.getMI(); + unsigned Opc = MI.getOpcode(); + MachineRegisterInfo &MRI = OpdMapper.getMRI(); + switch (Opc) { + case AMDGPU::G_AND: + case AMDGPU::G_OR: + case AMDGPU::G_XOR: { + // 64-bit and is only available on the SALU, so split into 2 32-bit ops if + // there is a VGPR input. + unsigned DstReg = MI.getOperand(0).getReg(); + if (MRI.getType(DstReg).getSizeInBits() != 64) + break; + + SmallVector<unsigned, 2> DefRegs(OpdMapper.getVRegs(0)); + SmallVector<unsigned, 2> Src0Regs(OpdMapper.getVRegs(1)); + SmallVector<unsigned, 2> Src1Regs(OpdMapper.getVRegs(2)); + + // All inputs are SGPRs, nothing special to do. + if (DefRegs.empty()) { + assert(Src0Regs.empty() && Src1Regs.empty()); + break; + } + + assert(DefRegs.size() == 2); + assert(Src0Regs.size() == Src1Regs.size() && + (Src0Regs.empty() || Src0Regs.size() == 2)); + + // Depending on where the source registers came from, the generic code may + // have decided to split the inputs already or not. If not, we still need to + // extract the values. + MachineIRBuilder B(MI); + + if (Src0Regs.empty()) + split64BitValueForMapping(B, Src0Regs, MI.getOperand(1).getReg()); + + if (Src1Regs.empty()) + split64BitValueForMapping(B, Src1Regs, MI.getOperand(2).getReg()); + + B.buildInstr(Opc) + .addDef(DefRegs[0]) + .addUse(Src0Regs[0]) + .addUse(Src1Regs[0]); + + B.buildInstr(Opc) + .addDef(DefRegs[1]) + .addUse(Src0Regs[1]) + .addUse(Src1Regs[1]); + + MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID)); + MI.eraseFromParent(); + return; + } + default: + break; + } + return applyDefaultMapping(OpdMapper); } @@ -405,6 +552,23 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } + if (Size == 64) { + + if (isSALUMapping(MI)) { + OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size); + OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0]; + } else { + OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size); + unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/); + OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size); + + unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/); + OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size); + } + + break; + } + LLVM_FALLTHROUGH; } @@ -742,3 +906,4 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands()); } + diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index 0e22a17246c..9cb7a7f8a23 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -21,6 +21,7 @@ namespace llvm { +class MachineIRBuilder; class SIRegisterInfo; class TargetRegisterInfo; @@ -45,6 +46,12 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const TargetRegisterInfo &TRI, unsigned Default = AMDGPU::VGPRRegBankID) const; + /// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p + /// Regs. This appropriately sets the regbank of the new registers. + void split64BitValueForMapping(MachineIRBuilder &B, + SmallVector<unsigned, 2> &Regs, + unsigned Reg) const; + bool isSALUMapping(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const; @@ -56,6 +63,9 @@ public: unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const override; + unsigned getBreakDownCost(const ValueMapping &ValMapping, + const RegisterBank *CurBank = nullptr) const override; + const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC) const override; |