summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp101
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp12
2 files changed, 72 insertions, 41 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 528a773ef21..adcc904f35c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -198,6 +198,58 @@ static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
}
}
+// Allocate special inputs passed in user SGPRs.
+static void allocateHSAUserSGPRs(CCState &CCInfo,
+ MachineIRBuilder &MIRBuilder,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
+ // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
+ if (Info.hasPrivateSegmentBuffer()) {
+ unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
+ MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
+ CCInfo.AllocateReg(PrivateSegmentBufferReg);
+ }
+
+ if (Info.hasDispatchPtr()) {
+ unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
+ MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
+ CCInfo.AllocateReg(DispatchPtrReg);
+ }
+
+ if (Info.hasQueuePtr()) {
+ unsigned QueuePtrReg = Info.addQueuePtr(TRI);
+ MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
+ CCInfo.AllocateReg(QueuePtrReg);
+ }
+
+ if (Info.hasKernargSegmentPtr()) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
+ const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
+ Register VReg = MRI.createGenericVirtualRegister(P4);
+ MRI.addLiveIn(InputPtrReg, VReg);
+ MIRBuilder.getMBB().addLiveIn(InputPtrReg);
+ MIRBuilder.buildCopy(VReg, InputPtrReg);
+ CCInfo.AllocateReg(InputPtrReg);
+ }
+
+ if (Info.hasDispatchID()) {
+ unsigned DispatchIDReg = Info.addDispatchID(TRI);
+ MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
+ CCInfo.AllocateReg(DispatchIDReg);
+ }
+
+ if (Info.hasFlatScratchInit()) {
+ unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
+ MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
+ CCInfo.AllocateReg(FlatScratchInitReg);
+ }
+
+ // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
+ // these from the dispatch pointer.
+}
+
static void allocateSystemSGPRs(CCState &CCInfo,
MachineFunction &MF,
SIMachineFunctionInfo &Info,
@@ -272,51 +324,12 @@ bool AMDGPUCallLowering::lowerFormalArguments(
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
- // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
- if (Info->hasPrivateSegmentBuffer()) {
- Register PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
- MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
- CCInfo.AllocateReg(PrivateSegmentBufferReg);
- }
-
- if (Info->hasDispatchPtr()) {
- Register DispatchPtrReg = Info->addDispatchPtr(*TRI);
- // FIXME: Need to add reg as live-in
- CCInfo.AllocateReg(DispatchPtrReg);
- }
-
- if (Info->hasQueuePtr()) {
- Register QueuePtrReg = Info->addQueuePtr(*TRI);
- // FIXME: Need to add reg as live-in
- CCInfo.AllocateReg(QueuePtrReg);
- }
-
- if (Info->hasKernargSegmentPtr()) {
- Register InputPtrReg = Info->addKernargSegmentPtr(*TRI);
- const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
- Register VReg = MRI.createGenericVirtualRegister(P2);
- MRI.addLiveIn(InputPtrReg, VReg);
- MIRBuilder.getMBB().addLiveIn(InputPtrReg);
- MIRBuilder.buildCopy(VReg, InputPtrReg);
- CCInfo.AllocateReg(InputPtrReg);
- }
-
- if (Info->hasDispatchID()) {
- unsigned DispatchIDReg = Info->addDispatchID(*TRI);
- // FIXME: Need to add reg as live-in
- CCInfo.AllocateReg(DispatchIDReg);
- }
-
- if (Info->hasFlatScratchInit()) {
- unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
- // FIXME: Need to add reg as live-in
- CCInfo.AllocateReg(FlatScratchInitReg);
- }
-
// The infrastructure for normal calling convention lowering is essentially
// useless for kernels. We want to avoid any kind of legalization or argument
// splitting.
if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
+ allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
+
unsigned i = 0;
const unsigned KernArgBaseAlign = 16;
const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
@@ -352,6 +365,12 @@ bool AMDGPUCallLowering::lowerFormalArguments(
return true;
}
+ if (Info->hasImplicitBufferPtr()) {
+ unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
+ MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
+ CCInfo.AllocateReg(ImplicitBufferPtrReg);
+ }
+
unsigned NumArgs = F.arg_size();
Function::const_arg_iterator CurOrigArg = F.arg_begin();
const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 653be650be0..cada5e5856c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1237,6 +1237,18 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
case Intrinsic::amdgcn_workgroup_id_z:
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_dispatch_ptr:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::DISPATCH_PTR);
+ case Intrinsic::amdgcn_queue_ptr:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::QUEUE_PTR);
+ case Intrinsic::amdgcn_implicit_buffer_ptr:
+ return legalizePreloadedArgIntrin(
+ MI, MRI, B, AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
+ case Intrinsic::amdgcn_dispatch_id:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::DISPATCH_ID);
default:
return true;
}
OpenPOWER on IntegriCloud