summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def33
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp165
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h10
3 files changed, 208 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
index c56380702f9..f2daf47df5d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
@@ -91,6 +91,28 @@ const RegisterBankInfo::ValueMapping ValMappings[] {
{&PartMappings[17], 1}
};
+const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] {
+ /*32-bit op*/ {0, 32, SGPRRegBank},
+ /*2x32-bit op*/ {0, 32, SGPRRegBank},
+ {32, 32, SGPRRegBank},
+/*<2x32-bit> op*/ {0, 64, SGPRRegBank},
+
+ /*32-bit op*/ {0, 32, VGPRRegBank},
+ /*2x32-bit op*/ {0, 32, VGPRRegBank},
+ {32, 32, VGPRRegBank},
+};
+
+
+// For some instructions which can operate 64-bit only for the scalar version.
+const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] {
+ /*32-bit sgpr*/ {&SGPROnly64BreakDown[0], 1},
+ /*2 x 32-bit sgpr*/ {&SGPROnly64BreakDown[1], 2},
+ /*64-bit sgpr */ {&SGPROnly64BreakDown[3], 1},
+
+ /*32-bit vgpr*/ {&SGPROnly64BreakDown[4], 1},
+ /*2 x 32-bit vgpr*/ {&SGPROnly64BreakDown[5], 2}
+};
+
enum ValueMappingIdx {
SCCStartIdx = 0,
SGPRStartIdx = 2,
@@ -127,5 +149,16 @@ const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
return &ValMappings[Idx];
}
+const RegisterBankInfo::ValueMapping *getValueMappingSGPR64Only(unsigned BankID,
+ unsigned Size) {
+ assert(Size == 64);
+
+ if (BankID == AMDGPU::VGPRRegBankID)
+ return &ValMappingsSGPR64OnlyVGPR32[4];
+
+ assert(BankID == AMDGPU::SGPRRegBankID);
+ return &ValMappingsSGPR64OnlyVGPR32[2];
+}
+
} // End AMDGPU namespace.
} // End llvm namespace.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 17d888d8c88..51f6c445b74 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -16,6 +16,7 @@
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -85,9 +86,36 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
Src.getID() == AMDGPU::VCCRegBankID))
return std::numeric_limits<unsigned>::max();
+ if (Dst.getID() == AMDGPU::SCCRegBankID &&
+ Src.getID() == AMDGPU::VCCRegBankID)
+ return std::numeric_limits<unsigned>::max();
+
return RegisterBankInfo::copyCost(Dst, Src, Size);
}
+unsigned AMDGPURegisterBankInfo::getBreakDownCost(
+ const ValueMapping &ValMapping,
+ const RegisterBank *CurBank) const {
+ // Currently we should only see rewrites of defs since copies from VGPR to
+ // SGPR are illegal.
+ assert(CurBank == nullptr && "shouldn't see already assigned bank");
+
+ assert(ValMapping.NumBreakDowns == 2 &&
+ ValMapping.BreakDown[0].Length == 32 &&
+ ValMapping.BreakDown[0].StartIdx == 0 &&
+ ValMapping.BreakDown[1].Length == 32 &&
+ ValMapping.BreakDown[1].StartIdx == 32 &&
+ ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank);
+
+ // 32-bit extract of a 64-bit value is just access of a subregister, so free.
+ // TODO: Cost of 0 hits assert, though it's not clear it's what we really
+ // want.
+
+ // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR
+ // alignment restrictions, but this probably isn't important.
+ return 1;
+}
+
const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
const TargetRegisterClass &RC) const {
@@ -107,6 +135,48 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
InstructionMappings AltMappings;
switch (MI.getOpcode()) {
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR: {
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ if (Size != 64)
+ break;
+
+ const InstructionMapping &SSMapping = getInstructionMapping(
+ 1, 1, getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
+ 3); // Num Operands
+ AltMappings.push_back(&SSMapping);
+
+ const InstructionMapping &VVMapping = getInstructionMapping(
+ 2, 2, getOperandsMapping(
+ {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
+ 3); // Num Operands
+ AltMappings.push_back(&VVMapping);
+
+ const InstructionMapping &SVMapping = getInstructionMapping(
+ 3, 3, getOperandsMapping(
+ {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
+ 3); // Num Operands
+ AltMappings.push_back(&SVMapping);
+
+ // SGPR in LHS is slightly preferrable, so make it VS more expnesive than
+ // SV.
+ const InstructionMapping &VSMapping = getInstructionMapping(
+ 3, 4, getOperandsMapping(
+ {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}),
+ 3); // Num Operands
+ AltMappings.push_back(&VSMapping);
+ break;
+ }
case TargetOpcode::G_LOAD: {
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
// FIXME: Should we be hard coding the size for these mappings?
@@ -239,8 +309,85 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
return RegisterBankInfo::getInstrAlternativeMappings(MI);
}
+void AMDGPURegisterBankInfo::split64BitValueForMapping(
+ MachineIRBuilder &B,
+ SmallVector<unsigned, 2> &Regs,
+ unsigned Reg) const {
+ LLT S32 = LLT::scalar(32);
+ MachineRegisterInfo *MRI = B.getMRI();
+ unsigned LoLHS = MRI->createGenericVirtualRegister(S32);
+ unsigned HiLHS = MRI->createGenericVirtualRegister(S32);
+ const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI);
+ MRI->setRegBank(LoLHS, *Bank);
+ MRI->setRegBank(HiLHS, *Bank);
+
+ Regs.push_back(LoLHS);
+ Regs.push_back(HiLHS);
+
+ B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
+ .addDef(LoLHS)
+ .addDef(HiLHS)
+ .addUse(Reg);
+}
+
void AMDGPURegisterBankInfo::applyMappingImpl(
const OperandsMapper &OpdMapper) const {
+ MachineInstr &MI = OpdMapper.getMI();
+ unsigned Opc = MI.getOpcode();
+ MachineRegisterInfo &MRI = OpdMapper.getMRI();
+ switch (Opc) {
+ case AMDGPU::G_AND:
+ case AMDGPU::G_OR:
+ case AMDGPU::G_XOR: {
+ // 64-bit and is only available on the SALU, so split into 2 32-bit ops if
+ // there is a VGPR input.
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (MRI.getType(DstReg).getSizeInBits() != 64)
+ break;
+
+ SmallVector<unsigned, 2> DefRegs(OpdMapper.getVRegs(0));
+ SmallVector<unsigned, 2> Src0Regs(OpdMapper.getVRegs(1));
+ SmallVector<unsigned, 2> Src1Regs(OpdMapper.getVRegs(2));
+
+ // All inputs are SGPRs, nothing special to do.
+ if (DefRegs.empty()) {
+ assert(Src0Regs.empty() && Src1Regs.empty());
+ break;
+ }
+
+ assert(DefRegs.size() == 2);
+ assert(Src0Regs.size() == Src1Regs.size() &&
+ (Src0Regs.empty() || Src0Regs.size() == 2));
+
+ // Depending on where the source registers came from, the generic code may
+ // have decided to split the inputs already or not. If not, we still need to
+ // extract the values.
+ MachineIRBuilder B(MI);
+
+ if (Src0Regs.empty())
+ split64BitValueForMapping(B, Src0Regs, MI.getOperand(1).getReg());
+
+ if (Src1Regs.empty())
+ split64BitValueForMapping(B, Src1Regs, MI.getOperand(2).getReg());
+
+ B.buildInstr(Opc)
+ .addDef(DefRegs[0])
+ .addUse(Src0Regs[0])
+ .addUse(Src1Regs[0]);
+
+ B.buildInstr(Opc)
+ .addDef(DefRegs[1])
+ .addUse(Src0Regs[1])
+ .addUse(Src1Regs[1]);
+
+ MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
+ MI.eraseFromParent();
+ return;
+ }
+ default:
+ break;
+ }
+
return applyDefaultMapping(OpdMapper);
}
@@ -405,6 +552,23 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
+ if (Size == 64) {
+
+ if (isSALUMapping(MI)) {
+ OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size);
+ OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
+ } else {
+ OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);
+ unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/);
+ OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size);
+
+ unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/);
+ OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size);
+ }
+
+ break;
+ }
+
LLVM_FALLTHROUGH;
}
@@ -742,3 +906,4 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
MI.getNumOperands());
}
+
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index 0e22a17246c..9cb7a7f8a23 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -21,6 +21,7 @@
namespace llvm {
+class MachineIRBuilder;
class SIRegisterInfo;
class TargetRegisterInfo;
@@ -45,6 +46,12 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const TargetRegisterInfo &TRI,
unsigned Default = AMDGPU::VGPRRegBankID) const;
+ /// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p
+ /// Regs. This appropriately sets the regbank of the new registers.
+ void split64BitValueForMapping(MachineIRBuilder &B,
+ SmallVector<unsigned, 2> &Regs,
+ unsigned Reg) const;
+
bool isSALUMapping(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
@@ -56,6 +63,9 @@ public:
unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
unsigned Size) const override;
+ unsigned getBreakDownCost(const ValueMapping &ValMapping,
+ const RegisterBank *CurBank = nullptr) const override;
+
const RegisterBank &
getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
OpenPOWER on IntegriCloud