diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 101 |
1 files changed, 60 insertions, 41 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 528a773ef21..adcc904f35c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -198,6 +198,58 @@ static void allocateSpecialEntryInputVGPRs(CCState &CCInfo, } } +// Allocate special inputs passed in user SGPRs. +static void allocateHSAUserSGPRs(CCState &CCInfo, + MachineIRBuilder &MIRBuilder, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) { + // FIXME: How should these inputs interact with inreg / custom SGPR inputs? + if (Info.hasPrivateSegmentBuffer()) { + unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI); + MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass); + CCInfo.AllocateReg(PrivateSegmentBufferReg); + } + + if (Info.hasDispatchPtr()) { + unsigned DispatchPtrReg = Info.addDispatchPtr(TRI); + MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass); + CCInfo.AllocateReg(DispatchPtrReg); + } + + if (Info.hasQueuePtr()) { + unsigned QueuePtrReg = Info.addQueuePtr(TRI); + MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); + CCInfo.AllocateReg(QueuePtrReg); + } + + if (Info.hasKernargSegmentPtr()) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register InputPtrReg = Info.addKernargSegmentPtr(TRI); + const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); + Register VReg = MRI.createGenericVirtualRegister(P4); + MRI.addLiveIn(InputPtrReg, VReg); + MIRBuilder.getMBB().addLiveIn(InputPtrReg); + MIRBuilder.buildCopy(VReg, InputPtrReg); + CCInfo.AllocateReg(InputPtrReg); + } + + if (Info.hasDispatchID()) { + unsigned DispatchIDReg = Info.addDispatchID(TRI); + MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass); + CCInfo.AllocateReg(DispatchIDReg); + } + + if (Info.hasFlatScratchInit()) { + unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI); + MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass); + CCInfo.AllocateReg(FlatScratchInitReg); + } + + // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read + // these from the dispatch pointer. +} + static void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, @@ -272,51 +324,12 @@ bool AMDGPUCallLowering::lowerFormalArguments( SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); - // FIXME: How should these inputs interact with inreg / custom SGPR inputs? - if (Info->hasPrivateSegmentBuffer()) { - Register PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI); - MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass); - CCInfo.AllocateReg(PrivateSegmentBufferReg); - } - - if (Info->hasDispatchPtr()) { - Register DispatchPtrReg = Info->addDispatchPtr(*TRI); - // FIXME: Need to add reg as live-in - CCInfo.AllocateReg(DispatchPtrReg); - } - - if (Info->hasQueuePtr()) { - Register QueuePtrReg = Info->addQueuePtr(*TRI); - // FIXME: Need to add reg as live-in - CCInfo.AllocateReg(QueuePtrReg); - } - - if (Info->hasKernargSegmentPtr()) { - Register InputPtrReg = Info->addKernargSegmentPtr(*TRI); - const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); - Register VReg = MRI.createGenericVirtualRegister(P2); - MRI.addLiveIn(InputPtrReg, VReg); - MIRBuilder.getMBB().addLiveIn(InputPtrReg); - MIRBuilder.buildCopy(VReg, InputPtrReg); - CCInfo.AllocateReg(InputPtrReg); - } - - if (Info->hasDispatchID()) { - unsigned DispatchIDReg = Info->addDispatchID(*TRI); - // FIXME: Need to add reg as live-in - CCInfo.AllocateReg(DispatchIDReg); - } - - if (Info->hasFlatScratchInit()) { - unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI); - // FIXME: Need to add reg as live-in - CCInfo.AllocateReg(FlatScratchInitReg); - } - // The infrastructure for normal calling convention lowering is essentially // useless for kernels. We want to avoid any kind of legalization or argument // splitting. if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) { + allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info); + unsigned i = 0; const unsigned KernArgBaseAlign = 16; const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F); @@ -352,6 +365,12 @@ bool AMDGPUCallLowering::lowerFormalArguments( return true; } + if (Info->hasImplicitBufferPtr()) { + unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); + MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); + CCInfo.AllocateReg(ImplicitBufferPtrReg); + } + unsigned NumArgs = F.arg_size(); Function::const_arg_iterator CurOrigArg = F.arg_begin(); const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>(); |