summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp43
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h21
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp295
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h13
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp79
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h96
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp55
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h25
11 files changed, 423 insertions, 229 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 21aa0e59256..6d6fccb10cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -41,7 +41,7 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
unsigned Offset) const {
MachineFunction &MF = MIRBuilder.getMF();
- const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = *MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
@@ -49,7 +49,7 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
LLT PtrType = getLLTForType(*PtrTy, DL);
unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
unsigned KernArgSegmentPtr =
- TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
+ MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 9aa02346d42..4f65b40ece5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3582,6 +3582,49 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT);
}
+SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG,
+ EVT VT,
+ const SDLoc &SL,
+ int64_t Offset) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ int FI = MFI.CreateFixedObject(VT.getStoreSize(), Offset, true);
+ auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset);
+ SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32);
+
+ return DAG.getLoad(VT, SL, DAG.getEntryNode(), Ptr, SrcPtrInfo, 4,
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant);
+}
+
+SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
+ const SDLoc &SL,
+ SDValue Chain,
+ SDValue StackPtr,
+ SDValue ArgVal,
+ int64_t Offset) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset);
+ SDValue PtrOffset = DAG.getConstant(Offset, SL, MVT::i32);
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i32, StackPtr, PtrOffset);
+
+ SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, 4,
+ MachineMemOperand::MODereferenceable);
+ return Store;
+}
+
+SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG,
+ const TargetRegisterClass *RC,
+ EVT VT, const SDLoc &SL,
+ const ArgDescriptor &Arg) const {
+ assert(Arg && "Attempting to load missing argument");
+
+ if (Arg.isRegister())
+ return CreateLiveInRegister(DAG, RC, Arg.getRegister(), VT, SL);
+ return loadStackInputValue(DAG, VT, SL, Arg.getStackOffset());
+}
+
uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
unsigned Alignment = Subtarget->getAlignmentForImplicitArgPtr();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index d85aada6053..46c81f91d60 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -24,7 +24,7 @@ namespace llvm {
class AMDGPUMachineFunction;
class AMDGPUSubtarget;
-class MachineRegisterInfo;
+struct ArgDescriptor;
class AMDGPUTargetLowering : public TargetLowering {
private:
@@ -237,6 +237,25 @@ public:
return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()), true);
}
+ /// Similar to CreateLiveInRegister, except value maybe loaded from a stack
+ /// slot rather than passed in a register.
+ SDValue loadStackInputValue(SelectionDAG &DAG,
+ EVT VT,
+ const SDLoc &SL,
+ int64_t Offset) const;
+
+ SDValue storeStackInputValue(SelectionDAG &DAG,
+ const SDLoc &SL,
+ SDValue Chain,
+ SDValue StackPtr,
+ SDValue ArgVal,
+ int64_t Offset) const;
+
+ SDValue loadInputValue(SelectionDAG &DAG,
+ const TargetRegisterClass *RC,
+ EVT VT, const SDLoc &SL,
+ const ArgDescriptor &Arg) const;
+
enum ImplicitParameter {
FIRST_IMPLICIT,
GRID_DIM = FIRST_IMPLICIT,
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index f7e5cb03b3e..2ecf32c6ffe 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -38,6 +38,7 @@ void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST,
MachineBasicBlock &MBB) const {
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo* TRI = &TII->getRegisterInfo();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// We don't need this if we only have spills since there is no user facing
// scratch.
@@ -55,7 +56,7 @@ void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST,
MachineBasicBlock::iterator I = MBB.begin();
unsigned FlatScratchInitReg
- = TRI->getPreloadedValue(MF, SIRegisterInfo::FLAT_SCRATCH_INIT);
+ = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
MachineRegisterInfo &MRI = MF.getRegInfo();
MRI.addLiveIn(FlatScratchInitReg);
@@ -64,7 +65,6 @@ void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST,
unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
// Do a 64-bit pointer add.
@@ -283,13 +283,13 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
}
// We need to insert initialization of the scratch resource descriptor.
- unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
- MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+ unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
+ AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
if (ST.isAmdCodeObjectV2(MF)) {
- PreloadedPrivateBufferReg = TRI->getPreloadedValue(
- MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+ PreloadedPrivateBufferReg = MFI->getPreloadedReg(
+ AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
}
bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 47a5aa4b0ce..5a53d7914c0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -45,6 +45,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
@@ -895,14 +896,19 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
uint64_t Offset) const {
const DataLayout &DL = DAG.getDataLayout();
MachineFunction &MF = DAG.getMachineFunction();
- const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
- unsigned InputPtrReg = TRI->getPreloadedValue(MF,
- SIRegisterInfo::KERNARG_SEGMENT_PTR);
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ const ArgDescriptor *InputPtrReg;
+ const TargetRegisterClass *RC;
+
+ std::tie(InputPtrReg, RC)
+ = Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
- MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
+ MRI.getLiveInVirtReg(InputPtrReg->getRegister()), PtrVT);
+
return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
DAG.getConstant(Offset, SL, PtrVT));
}
@@ -1005,6 +1011,17 @@ SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA
return ArgValue;
}
+SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
+ const SIMachineFunctionInfo &MFI,
+ EVT VT,
+ AMDGPUFunctionArgInfo::PreloadedValue PVID) const {
+ const ArgDescriptor *Reg;
+ const TargetRegisterClass *RC;
+
+ std::tie(Reg, RC) = MFI.getPreloadedValue(PVID);
+ return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT);
+}
+
static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
CallingConv::ID CallConv,
ArrayRef<ISD::InputArg> Ins,
@@ -1055,29 +1072,131 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
}
// Allocate special inputs passed in VGPRs.
-static void allocateSpecialInputVGPRs(CCState &CCInfo,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
+static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
if (Info.hasWorkItemIDX()) {
- unsigned Reg = TRI.getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X);
+ unsigned Reg = AMDGPU::VGPR0;
MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ assert(Reg == AMDGPU::VGPR0);
+
CCInfo.AllocateReg(Reg);
+ Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg));
}
if (Info.hasWorkItemIDY()) {
- unsigned Reg = TRI.getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y);
+ unsigned Reg = AMDGPU::VGPR1;
MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+
+ assert(Reg == AMDGPU::VGPR1);
CCInfo.AllocateReg(Reg);
+ Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg));
}
if (Info.hasWorkItemIDZ()) {
- unsigned Reg = TRI.getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z);
+ unsigned Reg = AMDGPU::VGPR2;
MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+
+ assert(Reg == AMDGPU::VGPR2);
CCInfo.AllocateReg(Reg);
+ Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg));
}
}
+// Try to allocate a VGPR at the end of the argument list, or if no argument
+// VGPRs are left allocating a stack slot.
+static ArgDescriptor allocateVGPR32Input(CCState &CCInfo) {
+ ArrayRef<MCPhysReg> ArgVGPRs
+ = makeArrayRef(AMDGPU::VGPR_32RegClass.begin(), 32);
+ unsigned RegIdx = CCInfo.getFirstUnallocated(ArgVGPRs);
+ if (RegIdx == ArgVGPRs.size()) {
+ // Spill to stack required.
+ int64_t Offset = CCInfo.AllocateStack(4, 4);
+
+ return ArgDescriptor::createStack(Offset);
+ }
+
+ unsigned Reg = ArgVGPRs[RegIdx];
+ Reg = CCInfo.AllocateReg(Reg);
+ assert(Reg != AMDGPU::NoRegister);
+
+ MachineFunction &MF = CCInfo.getMachineFunction();
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ return ArgDescriptor::createRegister(Reg);
+}
+
+static ArgDescriptor allocateSGPR32InputImpl(CCState &CCInfo,
+ const TargetRegisterClass *RC,
+ unsigned NumArgRegs) {
+ ArrayRef<MCPhysReg> ArgSGPRs = makeArrayRef(RC->begin(), 32);
+ unsigned RegIdx = CCInfo.getFirstUnallocated(ArgSGPRs);
+ if (RegIdx == ArgSGPRs.size())
+ report_fatal_error("ran out of SGPRs for arguments");
+
+ unsigned Reg = ArgSGPRs[RegIdx];
+ Reg = CCInfo.AllocateReg(Reg);
+ assert(Reg != AMDGPU::NoRegister);
+
+ MachineFunction &MF = CCInfo.getMachineFunction();
+ MF.addLiveIn(Reg, RC);
+ return ArgDescriptor::createRegister(Reg);
+}
+
+static ArgDescriptor allocateSGPR32Input(CCState &CCInfo) {
+ return allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_32RegClass, 32);
+}
+
+static ArgDescriptor allocateSGPR64Input(CCState &CCInfo) {
+ return allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_64RegClass, 16);
+}
+
+static void allocateSpecialInputVGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
+ if (Info.hasWorkItemIDX())
+ Info.setWorkItemIDX(allocateVGPR32Input(CCInfo));
+
+ if (Info.hasWorkItemIDY())
+ Info.setWorkItemIDY(allocateVGPR32Input(CCInfo));
+
+ if (Info.hasWorkItemIDZ())
+ Info.setWorkItemIDZ(allocateVGPR32Input(CCInfo));
+}
+
+static void allocateSpecialInputSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
+ auto &ArgInfo = Info.getArgInfo();
+
+ // TODO: Unify handling with private memory pointers.
+
+ if (Info.hasDispatchPtr())
+ ArgInfo.DispatchPtr = allocateSGPR64Input(CCInfo);
+
+ if (Info.hasQueuePtr())
+ ArgInfo.QueuePtr = allocateSGPR64Input(CCInfo);
+
+ if (Info.hasKernargSegmentPtr())
+ ArgInfo.KernargSegmentPtr = allocateSGPR64Input(CCInfo);
+
+ if (Info.hasDispatchID())
+ ArgInfo.DispatchID = allocateSGPR64Input(CCInfo);
+
+ // flat_scratch_init is not applicable for non-kernel functions.
+
+ if (Info.hasWorkGroupIDX())
+ ArgInfo.WorkGroupIDX = allocateSGPR32Input(CCInfo);
+
+ if (Info.hasWorkGroupIDY())
+ ArgInfo.WorkGroupIDY = allocateSGPR32Input(CCInfo);
+
+ if (Info.hasWorkGroupIDZ())
+ ArgInfo.WorkGroupIDZ = allocateSGPR32Input(CCInfo);
+}
+
// Allocate special inputs passed in user SGPRs.
static void allocateHSAUserSGPRs(CCState &CCInfo,
MachineFunction &MF,
@@ -1212,8 +1331,8 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
// resource. For the Code Object V2 ABI, this will be the first 4 user
// SGPR inputs. We can reserve those and use them directly.
- unsigned PrivateSegmentBufferReg = TRI.getPreloadedValue(
- MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+ unsigned PrivateSegmentBufferReg = Info.getPreloadedReg(
+ AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
Info.setScratchRSrcReg(PrivateSegmentBufferReg);
if (MFI.hasCalls()) {
@@ -1229,8 +1348,8 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
= TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
Info.setScratchWaveOffsetReg(ReservedOffsetReg);
} else {
- unsigned PrivateSegmentWaveByteOffsetReg = TRI.getPreloadedValue(
- MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+ unsigned PrivateSegmentWaveByteOffsetReg = Info.getPreloadedReg(
+ AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
Info.setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg);
}
} else {
@@ -1256,8 +1375,8 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
Info.setScratchRSrcReg(ReservedBufferReg);
if (HasStackObjects && !MFI.hasCalls()) {
- unsigned ScratchWaveOffsetReg = TRI.getPreloadedValue(
- MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+ unsigned ScratchWaveOffsetReg = Info.getPreloadedReg(
+ AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
Info.setScratchWaveOffsetReg(ScratchWaveOffsetReg);
} else {
unsigned ReservedOffsetReg
@@ -1390,7 +1509,7 @@ SDValue SITargetLowering::LowerFormalArguments(
}
if (IsEntryFunc) {
- allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
+ allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
allocateHSAUserSGPRs(CCInfo, MF, *TRI, *Info);
}
@@ -1509,6 +1628,11 @@ SDValue SITargetLowering::LowerFormalArguments(
InVals.push_back(Val);
}
+ if (!IsEntryFunc) {
+ // Special inputs come after user arguments.
+ allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
+ }
+
// Start adding system SGPRs.
if (IsEntryFunc) {
allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsShader);
@@ -1516,8 +1640,13 @@ SDValue SITargetLowering::LowerFormalArguments(
CCInfo.AllocateReg(Info->getScratchRSrcReg());
CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
CCInfo.AllocateReg(Info->getFrameOffsetReg());
+ allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
}
+ auto &ArgUsageInfo =
+ DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
+ ArgUsageInfo.setFuncArgInfo(*MF.getFunction(), Info->getArgInfo());
+
return Chains.empty() ? Chain :
DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
@@ -1741,6 +1870,81 @@ SDValue SITargetLowering::LowerCallResult(
return Chain;
}
+// Add code to pass special inputs required depending on used features separate
+// from the explicit user arguments present in the IR.
+void SITargetLowering::passSpecialInputs(
+ CallLoweringInfo &CLI,
+ const SIMachineFunctionInfo &Info,
+ SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
+ SmallVectorImpl<SDValue> &MemOpChains,
+ SDValue Chain,
+ SDValue StackPtr) const {
+ // If we don't have a call site, this was a call inserted by
+ // legalization. These can never use special inputs.
+ if (!CLI.CS)
+ return;
+
+ const Function *CalleeFunc = CLI.CS.getCalledFunction();
+ if (!CalleeFunc)
+ report_fatal_error("indirect calls not handled");
+
+ SelectionDAG &DAG = CLI.DAG;
+ const SDLoc &DL = CLI.DL;
+
+ const SISubtarget *ST = getSubtarget();
+ const SIRegisterInfo *TRI = ST->getRegisterInfo();
+
+ auto &ArgUsageInfo =
+ DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
+ const AMDGPUFunctionArgInfo &CalleeArgInfo
+ = ArgUsageInfo.lookupFuncArgInfo(*CalleeFunc);
+
+ const AMDGPUFunctionArgInfo &CallerArgInfo = Info.getArgInfo();
+
+ // TODO: Unify with private memory register handling. This is complicated by
+ // the fact that at least in kernels, the input argument is not necessarily
+ // in the same location as the input.
+ AMDGPUFunctionArgInfo::PreloadedValue InputRegs[] = {
+ AMDGPUFunctionArgInfo::DISPATCH_PTR,
+ AMDGPUFunctionArgInfo::QUEUE_PTR,
+ AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR,
+ AMDGPUFunctionArgInfo::DISPATCH_ID,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
+ AMDGPUFunctionArgInfo::WORKITEM_ID_X,
+ AMDGPUFunctionArgInfo::WORKITEM_ID_Y,
+ AMDGPUFunctionArgInfo::WORKITEM_ID_Z
+ };
+
+ for (auto InputID : InputRegs) {
+ const ArgDescriptor *OutgoingArg;
+ const TargetRegisterClass *ArgRC;
+
+ std::tie(OutgoingArg, ArgRC) = CalleeArgInfo.getPreloadedValue(InputID);
+ if (!OutgoingArg)
+ continue;
+
+ const ArgDescriptor *IncomingArg;
+ const TargetRegisterClass *IncomingArgRC;
+ std::tie(IncomingArg, IncomingArgRC)
+ = CallerArgInfo.getPreloadedValue(InputID);
+ assert(IncomingArgRC == ArgRC);
+
+ // All special arguments are ints for now.
+ EVT ArgVT = TRI->getSpillSize(*ArgRC) == 8 ? MVT::i64 : MVT::i32;
+ SDValue InputReg = loadInputValue(DAG, ArgRC, ArgVT, DL, *IncomingArg);
+ if (OutgoingArg->isRegister()) {
+ RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
+ } else {
+ SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, StackPtr,
+ InputReg,
+ OutgoingArg->getStackOffset());
+ MemOpChains.push_back(ArgStore);
+ }
+ }
+}
+
// The wave scratch offset register is used as the global base pointer.
SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
@@ -1897,6 +2101,9 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
}
+ // Copy special input registers after user input arguments.
+ passSpecialInputs(CLI, *Info, RegsToPass, MemOpChains, Chain, StackPtr);
+
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
@@ -3424,7 +3631,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
auto MFI = MF.getInfo<SIMachineFunctionInfo>();
- const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
EVT VT = Op.getValueType();
SDLoc DL(Op);
@@ -3436,10 +3642,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_implicit_buffer_ptr: {
if (getSubtarget()->isAmdCodeObjectV2(MF))
return emitNonHSAIntrinsicError(DAG, DL, VT);
-
- unsigned Reg = TRI->getPreloadedValue(MF,
- SIRegisterInfo::IMPLICIT_BUFFER_PTR);
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
+ return getPreloadedValue(DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
}
case Intrinsic::amdgcn_dispatch_ptr:
case Intrinsic::amdgcn_queue_ptr: {
@@ -3451,10 +3655,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getUNDEF(VT);
}
- auto Reg = IntrinsicID == Intrinsic::amdgcn_dispatch_ptr ?
- SIRegisterInfo::DISPATCH_PTR : SIRegisterInfo::QUEUE_PTR;
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
- TRI->getPreloadedValue(MF, Reg), VT);
+ auto RegID = IntrinsicID == Intrinsic::amdgcn_dispatch_ptr ?
+ AMDGPUFunctionArgInfo::DISPATCH_PTR : AMDGPUFunctionArgInfo::QUEUE_PTR;
+ return getPreloadedValue(DAG, *MFI, VT, RegID);
}
case Intrinsic::amdgcn_implicitarg_ptr: {
if (MFI->isEntryFunction())
@@ -3462,13 +3665,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
report_fatal_error("amdgcn.implicitarg.ptr not implemented for functions");
}
case Intrinsic::amdgcn_kernarg_segment_ptr: {
- unsigned Reg
- = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
+ return getPreloadedValue(DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
}
case Intrinsic::amdgcn_dispatch_id: {
- unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_ID);
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
+ return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::DISPATCH_ID);
}
case Intrinsic::amdgcn_rcp:
return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
@@ -3553,28 +3754,32 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SI::KernelInputOffsets::LOCAL_SIZE_Z);
case Intrinsic::amdgcn_workgroup_id_x:
case Intrinsic::r600_read_tgid_x:
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_X), VT);
+ return getPreloadedValue(DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
case Intrinsic::amdgcn_workgroup_id_y:
case Intrinsic::r600_read_tgid_y:
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Y), VT);
+ return getPreloadedValue(DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
case Intrinsic::amdgcn_workgroup_id_z:
case Intrinsic::r600_read_tgid_z:
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Z), VT);
- case Intrinsic::amdgcn_workitem_id_x:
+ return getPreloadedValue(DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_workitem_id_x: {
case Intrinsic::r600_read_tidig_x:
- return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X), VT);
+ return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
+ SDLoc(DAG.getEntryNode()),
+ MFI->getArgInfo().WorkItemIDX);
+ }
case Intrinsic::amdgcn_workitem_id_y:
case Intrinsic::r600_read_tidig_y:
- return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y), VT);
+ return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
+ SDLoc(DAG.getEntryNode()),
+ MFI->getArgInfo().WorkItemIDY);
case Intrinsic::amdgcn_workitem_id_z:
case Intrinsic::r600_read_tidig_z:
- return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z), VT);
+ return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
+ SDLoc(DAG.getEntryNode()),
+ MFI->getArgInfo().WorkItemIDZ);
case AMDGPUIntrinsic::SI_load_const: {
SDValue Ops[] = {
Op.getOperand(1),
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index dbe78876298..9176e4a3004 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -16,6 +16,7 @@
#define LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H
#include "AMDGPUISelLowering.h"
+#include "AMDGPUArgumentUsageInfo.h"
#include "SIInstrInfo.h"
namespace llvm {
@@ -32,6 +33,10 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
const SDLoc &SL, SDValue Chain,
const ISD::InputArg &Arg) const;
+ SDValue getPreloadedValue(SelectionDAG &DAG,
+ const SIMachineFunctionInfo &MFI,
+ EVT VT,
+ AMDGPUFunctionArgInfo::PreloadedValue) const;
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
@@ -205,6 +210,14 @@ public:
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const override;
+ void passSpecialInputs(
+ CallLoweringInfo &CLI,
+ const SIMachineFunctionInfo &Info,
+ SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
+ SmallVectorImpl<SDValue> &MemOpChains,
+ SDValue Chain,
+ SDValue StackPtr) const;
+
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 204eeff54d1..1d884524bcd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -916,7 +916,6 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
unsigned WavefrontSize = ST.getWavefrontSize();
@@ -936,13 +935,13 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
WorkGroupSize > WavefrontSize) {
unsigned TIDIGXReg
- = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X);
+ = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
unsigned TIDIGYReg
- = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y);
+ = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
unsigned TIDIGZReg
- = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z);
+ = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
unsigned InputPtrReg =
- TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
+ MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
if (!Entry.isLiveIn(Reg))
Entry.addLiveIn(Reg);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index cfc9fe5fa51..c4405309e12 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -27,24 +27,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG),
FrameOffsetReg(AMDGPU::FP_REG),
StackPtrOffsetReg(AMDGPU::SP_REG),
- PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
- DispatchPtrUserSGPR(AMDGPU::NoRegister),
- QueuePtrUserSGPR(AMDGPU::NoRegister),
- KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
- DispatchIDUserSGPR(AMDGPU::NoRegister),
- FlatScratchInitUserSGPR(AMDGPU::NoRegister),
- PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
- GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
- GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
- GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
- WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
- WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
- WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
- WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
- PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
- WorkItemIDXVGPR(AMDGPU::NoRegister),
- WorkItemIDYVGPR(AMDGPU::NoRegister),
- WorkItemIDZVGPR(AMDGPU::NoRegister),
+ ArgInfo(),
PSInputAddr(0),
PSInputEnable(0),
ReturnsVoid(true),
@@ -91,8 +74,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
FrameOffsetReg = AMDGPU::SGPR5;
StackPtrOffsetReg = AMDGPU::SGPR32;
- // FIXME: Not really a system SGPR.
- PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
+ ArgInfo.PrivateSegmentBuffer =
+ ArgDescriptor::createRegister(ScratchRSrcReg);
+ ArgInfo.PrivateSegmentWaveByteOffset =
+ ArgDescriptor::createRegister(ScratchWaveOffsetReg);
+
if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
ImplicitArgPtr = true;
} else {
@@ -151,10 +137,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (HasStackObjects || MaySpill) {
PrivateSegmentWaveByteOffset = true;
- // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
- if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
- (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
- PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
+ // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
+ (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
+ ArgInfo.PrivateSegmentWaveByteOffset
+ = ArgDescriptor::createRegister(AMDGPU::SGPR5);
}
}
@@ -189,52 +176,54 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
const SIRegisterInfo &TRI) {
- PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
+ ArgInfo.PrivateSegmentBuffer =
+ ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
NumUserSGPRs += 4;
- return PrivateSegmentBufferUserSGPR;
+ return ArgInfo.PrivateSegmentBuffer.getRegister();
}
unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
- DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
- return DispatchPtrUserSGPR;
+ return ArgInfo.DispatchPtr.getRegister();
}
unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
- QueuePtrUserSGPR = TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
- return QueuePtrUserSGPR;
+ return ArgInfo.QueuePtr.getRegister();
}
unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
- KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ ArgInfo.KernargSegmentPtr
+ = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
- return KernargSegmentPtrUserSGPR;
+ return ArgInfo.KernargSegmentPtr.getRegister();
}
unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
- DispatchIDUserSGPR = TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
- return DispatchIDUserSGPR;
+ return ArgInfo.DispatchID.getRegister();
}
unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
- FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
- return FlatScratchInitUserSGPR;
+ return ArgInfo.FlatScratchInit.getRegister();
}
unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
- ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
- return ImplicitBufferPtrUserSGPR;
+ return ArgInfo.ImplicitBufferPtr.getRegister();
}
static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 94145c46e10..5581fe4c55e 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -16,6 +16,7 @@
#include "AMDGPUMachineFunction.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "AMDGPUArgumentUsageInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -96,33 +97,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
// Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
unsigned StackPtrOffsetReg;
- // Input registers for non-HSA ABI
- unsigned ImplicitBufferPtrUserSGPR;
-
- // Input registers setup for the HSA ABI.
- // User SGPRs in allocation order.
- unsigned PrivateSegmentBufferUserSGPR;
- unsigned DispatchPtrUserSGPR;
- unsigned QueuePtrUserSGPR;
- unsigned KernargSegmentPtrUserSGPR;
- unsigned DispatchIDUserSGPR;
- unsigned FlatScratchInitUserSGPR;
- unsigned PrivateSegmentSizeUserSGPR;
- unsigned GridWorkGroupCountXUserSGPR;
- unsigned GridWorkGroupCountYUserSGPR;
- unsigned GridWorkGroupCountZUserSGPR;
-
- // System SGPRs in allocation order.
- unsigned WorkGroupIDXSystemSGPR;
- unsigned WorkGroupIDYSystemSGPR;
- unsigned WorkGroupIDZSystemSGPR;
- unsigned WorkGroupInfoSystemSGPR;
- unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
-
- // VGPR inputs. These are always v0, v1 and v2 for entry functions.
- unsigned WorkItemIDXVGPR;
- unsigned WorkItemIDYVGPR;
- unsigned WorkItemIDZVGPR;
+ AMDGPUFunctionArgInfo ArgInfo;
// Graphics info.
unsigned PSInputAddr;
@@ -235,7 +210,6 @@ private:
SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
public:
-
SIMachineFunctionInfo(const MachineFunction &MF);
ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
@@ -266,37 +240,52 @@ public:
// Add system SGPRs.
unsigned addWorkGroupIDX() {
- WorkGroupIDXSystemSGPR = getNextSystemSGPR();
+ ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
- return WorkGroupIDXSystemSGPR;
+ return ArgInfo.WorkGroupIDX.getRegister();
}
unsigned addWorkGroupIDY() {
- WorkGroupIDYSystemSGPR = getNextSystemSGPR();
+ ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
- return WorkGroupIDYSystemSGPR;
+ return ArgInfo.WorkGroupIDY.getRegister();
}
unsigned addWorkGroupIDZ() {
- WorkGroupIDZSystemSGPR = getNextSystemSGPR();
+ ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
- return WorkGroupIDZSystemSGPR;
+ return ArgInfo.WorkGroupIDZ.getRegister();
}
unsigned addWorkGroupInfo() {
- WorkGroupInfoSystemSGPR = getNextSystemSGPR();
+ ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
- return WorkGroupInfoSystemSGPR;
+ return ArgInfo.WorkGroupInfo.getRegister();
+ }
+
+ // Add special VGPR inputs
+ void setWorkItemIDX(ArgDescriptor Arg) {
+ ArgInfo.WorkItemIDX = Arg;
+ }
+
+ void setWorkItemIDY(ArgDescriptor Arg) {
+ ArgInfo.WorkItemIDY = Arg;
+ }
+
+ void setWorkItemIDZ(ArgDescriptor Arg) {
+ ArgInfo.WorkItemIDZ = Arg;
}
+
unsigned addPrivateSegmentWaveByteOffset() {
- PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
+ ArgInfo.PrivateSegmentWaveByteOffset
+ = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
- return PrivateSegmentWaveByteOffsetSystemSGPR;
+ return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
}
void setPrivateSegmentWaveByteOffset(unsigned Reg) {
- PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
+ ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
}
bool hasPrivateSegmentBuffer() const {
@@ -375,6 +364,23 @@ public:
return ImplicitBufferPtr;
}
+ AMDGPUFunctionArgInfo &getArgInfo() {
+ return ArgInfo;
+ }
+
+ const AMDGPUFunctionArgInfo &getArgInfo() const {
+ return ArgInfo;
+ }
+
+ std::pair<const ArgDescriptor *, const TargetRegisterClass *>
+ getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
+ return ArgInfo.getPreloadedValue(Value);
+ }
+
+ unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
+ return ArgInfo.getPreloadedValue(Value).first->getRegister();
+ }
+
unsigned getNumUserSGPRs() const {
return NumUserSGPRs;
}
@@ -384,7 +390,7 @@ public:
}
unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
- return PrivateSegmentWaveByteOffsetSystemSGPR;
+ return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
}
/// \brief Returns the physical register reserved for use as the resource
@@ -426,11 +432,11 @@ public:
}
unsigned getQueuePtrUserSGPR() const {
- return QueuePtrUserSGPR;
+ return ArgInfo.QueuePtr.getRegister();
}
unsigned getImplicitBufferPtrUserSGPR() const {
- return ImplicitBufferPtrUserSGPR;
+ return ArgInfo.ImplicitBufferPtr.getRegister();
}
bool hasSpilledSGPRs() const {
@@ -562,13 +568,13 @@ public:
switch (Dim) {
case 0:
assert(hasWorkGroupIDX());
- return WorkGroupIDXSystemSGPR;
+ return ArgInfo.WorkGroupIDX.getRegister();
case 1:
assert(hasWorkGroupIDY());
- return WorkGroupIDYSystemSGPR;
+ return ArgInfo.WorkGroupIDY.getRegister();
case 2:
assert(hasWorkGroupIDZ());
- return WorkGroupIDZSystemSGPR;
+ return ArgInfo.WorkGroupIDZ.getRegister();
}
llvm_unreachable("unexpected dimension");
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index ea0bfb2ad9f..7c73f92eed2 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1338,61 +1338,6 @@ bool SIRegisterInfo::shouldRewriteCopySrc(
return getCommonSubClass(DefRC, SrcRC) != nullptr;
}
-// FIXME: Most of these are flexible with HSA and we don't need to reserve them
-// as input registers if unused. Whether the dispatch ptr is necessary should be
-// easy to detect from used intrinsics. Scratch setup is harder to know.
-unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
- enum PreloadedValue Value) const {
-
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- (void)ST;
- switch (Value) {
- case SIRegisterInfo::WORKGROUP_ID_X:
- assert(MFI->hasWorkGroupIDX());
- return MFI->WorkGroupIDXSystemSGPR;
- case SIRegisterInfo::WORKGROUP_ID_Y:
- assert(MFI->hasWorkGroupIDY());
- return MFI->WorkGroupIDYSystemSGPR;
- case SIRegisterInfo::WORKGROUP_ID_Z:
- assert(MFI->hasWorkGroupIDZ());
- return MFI->WorkGroupIDZSystemSGPR;
- case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
- return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
- case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
- assert(MFI->hasPrivateSegmentBuffer());
- return MFI->PrivateSegmentBufferUserSGPR;
- case SIRegisterInfo::IMPLICIT_BUFFER_PTR:
- assert(MFI->hasImplicitBufferPtr());
- return MFI->ImplicitBufferPtrUserSGPR;
- case SIRegisterInfo::KERNARG_SEGMENT_PTR:
- assert(MFI->hasKernargSegmentPtr());
- return MFI->KernargSegmentPtrUserSGPR;
- case SIRegisterInfo::DISPATCH_ID:
- assert(MFI->hasDispatchID());
- return MFI->DispatchIDUserSGPR;
- case SIRegisterInfo::FLAT_SCRATCH_INIT:
- assert(MFI->hasFlatScratchInit());
- return MFI->FlatScratchInitUserSGPR;
- case SIRegisterInfo::DISPATCH_PTR:
- assert(MFI->hasDispatchPtr());
- return MFI->DispatchPtrUserSGPR;
- case SIRegisterInfo::QUEUE_PTR:
- assert(MFI->hasQueuePtr());
- return MFI->QueuePtrUserSGPR;
- case SIRegisterInfo::WORKITEM_ID_X:
- assert(MFI->hasWorkItemIDX());
- return AMDGPU::VGPR0;
- case SIRegisterInfo::WORKITEM_ID_Y:
- assert(MFI->hasWorkItemIDY());
- return AMDGPU::VGPR1;
- case SIRegisterInfo::WORKITEM_ID_Z:
- assert(MFI->hasWorkItemIDZ());
- return AMDGPU::VGPR2;
- }
- llvm_unreachable("unexpected preloaded value type");
-}
-
/// \brief Returns a register that is not used at any point in the function.
/// If all registers are used, then this function will return
// AMDGPU::NoRegister.
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 3a8dea29df5..65655b79c21 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -186,31 +186,6 @@ public:
OpType <= AMDGPU::OPERAND_SRC_LAST;
}
- enum PreloadedValue {
- // SGPRS:
- PRIVATE_SEGMENT_BUFFER = 0,
- DISPATCH_PTR = 1,
- QUEUE_PTR = 2,
- KERNARG_SEGMENT_PTR = 3,
- DISPATCH_ID = 4,
- FLAT_SCRATCH_INIT = 5,
- WORKGROUP_ID_X = 10,
- WORKGROUP_ID_Y = 11,
- WORKGROUP_ID_Z = 12,
- PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
- IMPLICIT_BUFFER_PTR = 15,
-
- // VGPRS:
- FIRST_VGPR_VALUE = 16,
- WORKITEM_ID_X = FIRST_VGPR_VALUE,
- WORKITEM_ID_Y = 17,
- WORKITEM_ID_Z = 18
- };
-
- /// \brief Returns the physical register that \p Value is stored in.
- unsigned getPreloadedValue(const MachineFunction &MF,
- enum PreloadedValue Value) const;
-
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC,
const MachineFunction &MF) const;
OpenPOWER on IntegriCloud