diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-01-24 22:47:04 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-01-24 22:47:04 +0000 |
commit | baa5d2e69c156a9d8c5bb33ca76a2f3b4629ea65 (patch) | |
tree | f8bdfcc4a7fca8292e5e8ee2217cb18ede64d4fe /llvm/lib | |
parent | f3ecbfc164540353bc882137425e1a5e34623598 (diff) | |
download | bcm5719-llvm-baa5d2e69c156a9d8c5bb33ca76a2f3b4629ea65.tar.gz bcm5719-llvm-baa5d2e69c156a9d8c5bb33ca76a2f3b4629ea65.zip |
RegBankSelect: Support some more complex part mappings
llvm-svn: 352123
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp | 99 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def | 33 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 165 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h | 10 |
5 files changed, 295 insertions, 25 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 249f4887fdf..25a979cd332 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -135,33 +135,78 @@ bool RegBankSelect::repairReg( MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping, RegBankSelect::RepairingPlacement &RepairPt, const iterator_range<SmallVectorImpl<unsigned>::const_iterator> &NewVRegs) { - if (ValMapping.NumBreakDowns != 1 && !TPC->isGlobalISelAbortEnabled()) - return false; - assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented"); + + assert(ValMapping.NumBreakDowns == size(NewVRegs) && "need new vreg for each breakdown"); + // An empty range of new register means no repairing. assert(!empty(NewVRegs) && "We should not have to repair"); - // Assume we are repairing a use and thus, the original reg will be - // the source of the repairing. - unsigned Src = MO.getReg(); - unsigned Dst = *NewVRegs.begin(); - - // If we repair a definition, swap the source and destination for - // the repairing. - if (MO.isDef()) - std::swap(Src, Dst); - - assert((RepairPt.getNumInsertPoints() == 1 || - TargetRegisterInfo::isPhysicalRegister(Dst)) && - "We are about to create several defs for Dst"); - - // Build the instruction used to repair, then clone it at the right - // places. Avoiding buildCopy bypasses the check that Src and Dst have the - // same types because the type is a placeholder when this function is called. - MachineInstr *MI = - MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src); - LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) - << '\n'); + MachineInstr *MI; + if (ValMapping.NumBreakDowns == 1) { + // Assume we are repairing a use and thus, the original reg will be + // the source of the repairing. + unsigned Src = MO.getReg(); + unsigned Dst = *NewVRegs.begin(); + + // If we repair a definition, swap the source and destination for + // the repairing. + if (MO.isDef()) + std::swap(Src, Dst); + + assert((RepairPt.getNumInsertPoints() == 1 || + TargetRegisterInfo::isPhysicalRegister(Dst)) && + "We are about to create several defs for Dst"); + + // Build the instruction used to repair, then clone it at the right + // places. Avoiding buildCopy bypasses the check that Src and Dst have the + // same types because the type is a placeholder when this function is called. + MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY) + .addDef(Dst) + .addUse(Src); + LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) + << '\n'); + } else { + // TODO: Support with G_IMPLICIT_DEF + G_INSERT sequence or G_EXTRACT + // sequence. + assert(ValMapping.partsAllUniform() && "irregular breakdowns not supported"); + + LLT RegTy = MRI->getType(MO.getReg()); + assert(!RegTy.isPointer() && "not implemented"); + + // FIXME: We could handle split vectors with concat_vectors easily, but this + // would require an agreement on the type of registers with the + // target. Currently createVRegs just uses scalar types, and expects the + // target code to replace this type (which we won't know about here) + assert(RegTy.isScalar() || + (RegTy.getNumElements() == ValMapping.NumBreakDowns) && + "only basic vector breakdowns currently supported"); + + if (MO.isDef()) { + unsigned MergeOp = RegTy.isScalar() ? + TargetOpcode::G_MERGE_VALUES : TargetOpcode::G_BUILD_VECTOR; + + auto &MergeBuilder = + MIRBuilder.buildInstrNoInsert(MergeOp) + .addDef(MO.getReg()); + + for (unsigned SrcReg : NewVRegs) + MergeBuilder.addUse(SrcReg); + + MI = MergeBuilder; + } else { + MachineInstrBuilder UnMergeBuilder = + MIRBuilder.buildInstrNoInsert(TargetOpcode::G_UNMERGE_VALUES); + for (unsigned DefReg : NewVRegs) + UnMergeBuilder.addDef(DefReg); + + UnMergeBuilder.addUse(MO.getReg()); + MI = UnMergeBuilder; + } + } + + if (RepairPt.getNumInsertPoints() != 1) + report_fatal_error("need testcase to support multiple insertion points"); + // TODO: // Check if MI is legal. if not, we need to legalize all the // instructions we are going to insert. @@ -194,7 +239,8 @@ uint64_t RegBankSelect::getRepairCost( const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI); // If MO does not have a register bank, we should have just been // able to set one unless we have to break the value down. - assert((!IsSameNumOfValues || CurRegBank) && "We should not have to repair"); + assert(CurRegBank || MO.isDef()); + // Def: Val <- NewDefs // Same number of values: copy // Different number: Val = build_sequence Defs1, Defs2, ... @@ -205,6 +251,9 @@ uint64_t RegBankSelect::getRepairCost( // We should remember that this value is available somewhere else to // coalesce the value. + if (ValMapping.NumBreakDowns != 1) + return RBI->getBreakDownCost(ValMapping, CurRegBank); + if (IsSameNumOfValues) { const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank; // If we repair a definition, swap the source and destination for diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index 6c91dad20fb..1b639945293 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -497,6 +497,19 @@ void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const { OS << "nullptr"; } +bool RegisterBankInfo::ValueMapping::partsAllUniform() const { + if (NumBreakDowns < 2) + return true; + + const PartialMapping *First = begin(); + for (const PartialMapping *Part = First + 1; Part != end(); ++Part) { + if (Part->Length != First->Length || Part->RegBank != First->RegBank) + return false; + } + + return true; +} + bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const { assert(NumBreakDowns && "Value mapped nowhere?!"); unsigned OrigValueBitWidth = 0; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def index c56380702f9..f2daf47df5d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def @@ -91,6 +91,28 @@ const RegisterBankInfo::ValueMapping ValMappings[] { {&PartMappings[17], 1} }; +const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] { + /*32-bit op*/ {0, 32, SGPRRegBank}, + /*2x32-bit op*/ {0, 32, SGPRRegBank}, + {32, 32, SGPRRegBank}, +/*<2x32-bit> op*/ {0, 64, SGPRRegBank}, + + /*32-bit op*/ {0, 32, VGPRRegBank}, + /*2x32-bit op*/ {0, 32, VGPRRegBank}, + {32, 32, VGPRRegBank}, +}; + + +// For some instructions which can operate 64-bit only for the scalar version. +const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] { + /*32-bit sgpr*/ {&SGPROnly64BreakDown[0], 1}, + /*2 x 32-bit sgpr*/ {&SGPROnly64BreakDown[1], 2}, + /*64-bit sgpr */ {&SGPROnly64BreakDown[3], 1}, + + /*32-bit vgpr*/ {&SGPROnly64BreakDown[4], 1}, + /*2 x 32-bit vgpr*/ {&SGPROnly64BreakDown[5], 2} +}; + enum ValueMappingIdx { SCCStartIdx = 0, SGPRStartIdx = 2, @@ -127,5 +149,16 @@ const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID, return &ValMappings[Idx]; } +const RegisterBankInfo::ValueMapping *getValueMappingSGPR64Only(unsigned BankID, + unsigned Size) { + assert(Size == 64); + + if (BankID == AMDGPU::VGPRRegBankID) + return &ValMappingsSGPR64OnlyVGPR32[4]; + + assert(BankID == AMDGPU::SGPRRegBankID); + return &ValMappingsSGPR64OnlyVGPR32[2]; +} + } // End AMDGPU namespace. } // End llvm namespace. diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 17d888d8c88..51f6c445b74 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -16,6 +16,7 @@ #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -85,9 +86,36 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, Src.getID() == AMDGPU::VCCRegBankID)) return std::numeric_limits<unsigned>::max(); + if (Dst.getID() == AMDGPU::SCCRegBankID && + Src.getID() == AMDGPU::VCCRegBankID) + return std::numeric_limits<unsigned>::max(); + return RegisterBankInfo::copyCost(Dst, Src, Size); } +unsigned AMDGPURegisterBankInfo::getBreakDownCost( + const ValueMapping &ValMapping, + const RegisterBank *CurBank) const { + // Currently we should only see rewrites of defs since copies from VGPR to + // SGPR are illegal. + assert(CurBank == nullptr && "shouldn't see already assigned bank"); + + assert(ValMapping.NumBreakDowns == 2 && + ValMapping.BreakDown[0].Length == 32 && + ValMapping.BreakDown[0].StartIdx == 0 && + ValMapping.BreakDown[1].Length == 32 && + ValMapping.BreakDown[1].StartIdx == 32 && + ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank); + + // 32-bit extract of a 64-bit value is just access of a subregister, so free. + // TODO: Cost of 0 hits assert, though it's not clear it's what we really + // want. + + // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR + // alignment restrictions, but this probably isn't important. + return 1; +} + const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass( const TargetRegisterClass &RC) const { @@ -107,6 +135,48 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( InstructionMappings AltMappings; switch (MI.getOpcode()) { + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: { + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + if (Size != 64) + break; + + const InstructionMapping &SSMapping = getInstructionMapping( + 1, 1, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&SSMapping); + + const InstructionMapping &VVMapping = getInstructionMapping( + 2, 2, getOperandsMapping( + {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&VVMapping); + + const InstructionMapping &SVMapping = getInstructionMapping( + 3, 3, getOperandsMapping( + {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&SVMapping); + + // SGPR in LHS is slightly preferrable, so make it VS more expnesive than + // SV. + const InstructionMapping &VSMapping = getInstructionMapping( + 3, 4, getOperandsMapping( + {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&VSMapping); + break; + } case TargetOpcode::G_LOAD: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); // FIXME: Should we be hard coding the size for these mappings? @@ -239,8 +309,85 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( return RegisterBankInfo::getInstrAlternativeMappings(MI); } +void AMDGPURegisterBankInfo::split64BitValueForMapping( + MachineIRBuilder &B, + SmallVector<unsigned, 2> &Regs, + unsigned Reg) const { + LLT S32 = LLT::scalar(32); + MachineRegisterInfo *MRI = B.getMRI(); + unsigned LoLHS = MRI->createGenericVirtualRegister(S32); + unsigned HiLHS = MRI->createGenericVirtualRegister(S32); + const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI); + MRI->setRegBank(LoLHS, *Bank); + MRI->setRegBank(HiLHS, *Bank); + + Regs.push_back(LoLHS); + Regs.push_back(HiLHS); + + B.buildInstr(AMDGPU::G_UNMERGE_VALUES) + .addDef(LoLHS) + .addDef(HiLHS) + .addUse(Reg); +} + void AMDGPURegisterBankInfo::applyMappingImpl( const OperandsMapper &OpdMapper) const { + MachineInstr &MI = OpdMapper.getMI(); + unsigned Opc = MI.getOpcode(); + MachineRegisterInfo &MRI = OpdMapper.getMRI(); + switch (Opc) { + case AMDGPU::G_AND: + case AMDGPU::G_OR: + case AMDGPU::G_XOR: { + // 64-bit and is only available on the SALU, so split into 2 32-bit ops if + // there is a VGPR input. + unsigned DstReg = MI.getOperand(0).getReg(); + if (MRI.getType(DstReg).getSizeInBits() != 64) + break; + + SmallVector<unsigned, 2> DefRegs(OpdMapper.getVRegs(0)); + SmallVector<unsigned, 2> Src0Regs(OpdMapper.getVRegs(1)); + SmallVector<unsigned, 2> Src1Regs(OpdMapper.getVRegs(2)); + + // All inputs are SGPRs, nothing special to do. + if (DefRegs.empty()) { + assert(Src0Regs.empty() && Src1Regs.empty()); + break; + } + + assert(DefRegs.size() == 2); + assert(Src0Regs.size() == Src1Regs.size() && + (Src0Regs.empty() || Src0Regs.size() == 2)); + + // Depending on where the source registers came from, the generic code may + // have decided to split the inputs already or not. If not, we still need to + // extract the values. + MachineIRBuilder B(MI); + + if (Src0Regs.empty()) + split64BitValueForMapping(B, Src0Regs, MI.getOperand(1).getReg()); + + if (Src1Regs.empty()) + split64BitValueForMapping(B, Src1Regs, MI.getOperand(2).getReg()); + + B.buildInstr(Opc) + .addDef(DefRegs[0]) + .addUse(Src0Regs[0]) + .addUse(Src1Regs[0]); + + B.buildInstr(Opc) + .addDef(DefRegs[1]) + .addUse(Src0Regs[1]) + .addUse(Src1Regs[1]); + + MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID)); + MI.eraseFromParent(); + return; + } + default: + break; + } + return applyDefaultMapping(OpdMapper); } @@ -405,6 +552,23 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } + if (Size == 64) { + + if (isSALUMapping(MI)) { + OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size); + OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0]; + } else { + OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size); + unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/); + OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size); + + unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/); + OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size); + } + + break; + } + LLVM_FALLTHROUGH; } @@ -742,3 +906,4 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands()); } + diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index 0e22a17246c..9cb7a7f8a23 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -21,6 +21,7 @@ namespace llvm { +class MachineIRBuilder; class SIRegisterInfo; class TargetRegisterInfo; @@ -45,6 +46,12 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const TargetRegisterInfo &TRI, unsigned Default = AMDGPU::VGPRRegBankID) const; + /// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p + /// Regs. This appropriately sets the regbank of the new registers. + void split64BitValueForMapping(MachineIRBuilder &B, + SmallVector<unsigned, 2> &Regs, + unsigned Reg) const; + bool isSALUMapping(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const; @@ -56,6 +63,9 @@ public: unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const override; + unsigned getBreakDownCost(const ValueMapping &ValMapping, + const RegisterBank *CurBank = nullptr) const override; + const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC) const override; |