diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-07-05 17:01:20 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-07-05 17:01:20 +0000 |
commit | 29f303799bf211f353f97982a11b8d1dd7f49656 (patch) | |
tree | a5d1596b6eadb948a21073277f423134af88095e /llvm/lib/Target | |
parent | 5ba72667619935ab33f8021e6132b8c7c03aa143 (diff) | |
download | bcm5719-llvm-29f303799bf211f353f97982a11b8d1dd7f49656.tar.gz bcm5719-llvm-29f303799bf211f353f97982a11b8d1dd7f49656.zip |
AMDGPU/GlobalISel: Implement custom kernel arg lowering
Avoid using allocateKernArg / AssignFn. We do not want any
of the type splitting properties of normal calling convention
lowering.
For now at least this exists alongside the IR argument lowering
pass. This is necessary to handle struct padding correctly while
some arguments are still skipped by the IR argument lowering
pass.
llvm-svn: 336373
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 48 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td | 15 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 4 |
4 files changed, 40 insertions, 32 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 6a7b823f901..2b14a22e0dc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -43,7 +43,7 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, - unsigned Offset) const { + uint64_t Offset) const { MachineFunction &MF = MIRBuilder.getMF(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); @@ -66,7 +66,8 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, } void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, - Type *ParamTy, unsigned Offset, + Type *ParamTy, uint64_t Offset, + unsigned Align, unsigned DstReg) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); @@ -74,7 +75,6 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); unsigned TypeSize = DL.getTypeStoreSize(ParamTy); - unsigned Align = DL.getABITypeAlignment(ParamTy); unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset); MachineMemOperand *MMO = @@ -95,7 +95,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, return false; MachineFunction &MF = MIRBuilder.getMF(); - const SISubtarget *Subtarget = static_cast<const SISubtarget *>(&MF.getSubtarget()); + const SISubtarget *Subtarget = &MF.getSubtarget<SISubtarget>(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo(); @@ -145,6 +145,36 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, CCInfo.AllocateReg(FlatScratchInitReg); } + // The infrastructure for normal calling convention lowering is essentially + // useless for kernels. We want to avoid any kind of legalization or argument + // splitting. + if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) { + unsigned i = 0; + const unsigned KernArgBaseAlign = 16; + const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F); + uint64_t ExplicitArgOffset = 0; + + // TODO: Align down to dword alignment and extract bits for extending loads. + for (auto &Arg : F.args()) { + Type *ArgTy = Arg.getType(); + unsigned AllocSize = DL.getTypeAllocSize(ArgTy); + if (AllocSize == 0) + continue; + + unsigned ABIAlign = DL.getABITypeAlignment(ArgTy); + + uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; + ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; + + unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset); + ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy)); + lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]); + ++i; + } + + return true; + } + unsigned NumArgs = F.arg_size(); Function::const_arg_iterator CurOrigArg = F.arg_begin(); const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>(); @@ -216,13 +246,5 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, return true; } - for (unsigned i = 0; i != ArgLocs.size(); ++i, ++Arg) { - // FIXME: We should be getting DebugInfo from the arguments some how. - CCValAssign &VA = ArgLocs[i]; - lowerParameter(MIRBuilder, Arg->getType(), - VA.getLocMemOffset() + - Subtarget->getExplicitKernelArgOffset(F), VRegs[i]); - } - - return true; + return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h index 251cb7a2c44..f51cb6abbf6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -26,10 +26,11 @@ class AMDGPUCallLowering: public CallLowering { AMDGPUAS AMDGPUASI; unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, - unsigned Offset) const; + uint64_t Offset) const; void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy, - unsigned Offset, unsigned DstReg) const; + uint64_t Offset, unsigned Align, + unsigned DstReg) const; public: AMDGPUCallLowering(const AMDGPUTargetLowering &TLI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index f6d7c1d2218..8a8143a5d78 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -85,11 +85,6 @@ def RetCC_SI_Shader : CallingConv<[ ]>> ]>; -// Calling convention for compute kernels -def CC_AMDGPU_Kernel : CallingConv<[ - CCCustom<"allocateKernArg"> -]>; - def CSR_AMDGPU_VGPRs_24_255 : CalleeSavedRegs< (sequence "VGPR%u", 24, 255) >; @@ -137,16 +132,6 @@ def RetCC_AMDGPU_Func : CallingConv<[ ]>; def CC_AMDGPU : CallingConv<[ - CCIf<"static_cast<const AMDGPUSubtarget&>" - "(State.getMachineFunction().getSubtarget()).getGeneration() >=" - "AMDGPUSubtarget::SOUTHERN_ISLANDS && " - "!AMDGPU::isShader(State.getCallingConv())", - CCDelegateTo<CC_AMDGPU_Kernel>>, - CCIf<"static_cast<const AMDGPUSubtarget&>" - "(State.getMachineFunction().getSubtarget()).getGeneration() < " - "AMDGPUSubtarget::SOUTHERN_ISLANDS && " - "!AMDGPU::isShader(State.getCallingConv())", - CCDelegateTo<CC_AMDGPU_Kernel>>, CCIf<"static_cast<const AMDGPUSubtarget&>" "(State.getMachineFunction().getSubtarget()).getGeneration() >= " "AMDGPUSubtarget::SOUTHERN_ISLANDS", diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index feb53016f7c..a1b90195411 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -843,7 +843,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC, switch (CC) { case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: - return CC_AMDGPU_Kernel; + llvm_unreachable("kernels should not be handled here"); case CallingConv::AMDGPU_VS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: @@ -866,7 +866,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC, switch (CC) { case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: - return CC_AMDGPU_Kernel; + llvm_unreachable("kernels should not be handled here"); case CallingConv::AMDGPU_VS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: |