diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 48 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td | 15 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 4 |
4 files changed, 40 insertions, 32 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 6a7b823f901..2b14a22e0dc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -43,7 +43,7 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, - unsigned Offset) const { + uint64_t Offset) const { MachineFunction &MF = MIRBuilder.getMF(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); @@ -66,7 +66,8 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, } void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, - Type *ParamTy, unsigned Offset, + Type *ParamTy, uint64_t Offset, + unsigned Align, unsigned DstReg) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); @@ -74,7 +75,6 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); unsigned TypeSize = DL.getTypeStoreSize(ParamTy); - unsigned Align = DL.getABITypeAlignment(ParamTy); unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset); MachineMemOperand *MMO = @@ -95,7 +95,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, return false; MachineFunction &MF = MIRBuilder.getMF(); - const SISubtarget *Subtarget = static_cast<const SISubtarget *>(&MF.getSubtarget()); + const SISubtarget *Subtarget = &MF.getSubtarget<SISubtarget>(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo(); @@ -145,6 +145,36 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, CCInfo.AllocateReg(FlatScratchInitReg); } + // The infrastructure for normal calling convention lowering is essentially + // useless for kernels. We want to avoid any kind of legalization or argument + // splitting. + if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) { + unsigned i = 0; + const unsigned KernArgBaseAlign = 16; + const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F); + uint64_t ExplicitArgOffset = 0; + + // TODO: Align down to dword alignment and extract bits for extending loads. + for (auto &Arg : F.args()) { + Type *ArgTy = Arg.getType(); + unsigned AllocSize = DL.getTypeAllocSize(ArgTy); + if (AllocSize == 0) + continue; + + unsigned ABIAlign = DL.getABITypeAlignment(ArgTy); + + uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; + ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; + + unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset); + ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy)); + lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]); + ++i; + } + + return true; + } + unsigned NumArgs = F.arg_size(); Function::const_arg_iterator CurOrigArg = F.arg_begin(); const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>(); @@ -216,13 +246,5 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, return true; } - for (unsigned i = 0; i != ArgLocs.size(); ++i, ++Arg) { - // FIXME: We should be getting DebugInfo from the arguments some how. - CCValAssign &VA = ArgLocs[i]; - lowerParameter(MIRBuilder, Arg->getType(), - VA.getLocMemOffset() + - Subtarget->getExplicitKernelArgOffset(F), VRegs[i]); - } - - return true; + return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h index 251cb7a2c44..f51cb6abbf6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -26,10 +26,11 @@ class AMDGPUCallLowering: public CallLowering { AMDGPUAS AMDGPUASI; unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, - unsigned Offset) const; + uint64_t Offset) const; void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy, - unsigned Offset, unsigned DstReg) const; + uint64_t Offset, unsigned Align, + unsigned DstReg) const; public: AMDGPUCallLowering(const AMDGPUTargetLowering &TLI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index f6d7c1d2218..8a8143a5d78 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -85,11 +85,6 @@ def RetCC_SI_Shader : CallingConv<[ ]>> ]>; -// Calling convention for compute kernels -def CC_AMDGPU_Kernel : CallingConv<[ - CCCustom<"allocateKernArg"> -]>; - def CSR_AMDGPU_VGPRs_24_255 : CalleeSavedRegs< (sequence "VGPR%u", 24, 255) >; @@ -137,16 +132,6 @@ def RetCC_AMDGPU_Func : CallingConv<[ ]>; def CC_AMDGPU : CallingConv<[ - CCIf<"static_cast<const AMDGPUSubtarget&>" - "(State.getMachineFunction().getSubtarget()).getGeneration() >=" - "AMDGPUSubtarget::SOUTHERN_ISLANDS && " - "!AMDGPU::isShader(State.getCallingConv())", - CCDelegateTo<CC_AMDGPU_Kernel>>, - CCIf<"static_cast<const AMDGPUSubtarget&>" - "(State.getMachineFunction().getSubtarget()).getGeneration() < " - "AMDGPUSubtarget::SOUTHERN_ISLANDS && " - "!AMDGPU::isShader(State.getCallingConv())", - CCDelegateTo<CC_AMDGPU_Kernel>>, CCIf<"static_cast<const AMDGPUSubtarget&>" "(State.getMachineFunction().getSubtarget()).getGeneration() >= " "AMDGPUSubtarget::SOUTHERN_ISLANDS", diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index feb53016f7c..a1b90195411 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -843,7 +843,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC, switch (CC) { case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: - return CC_AMDGPU_Kernel; + llvm_unreachable("kernels should not be handled here"); case CallingConv::AMDGPU_VS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: @@ -866,7 +866,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC, switch (CC) { case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: - return CC_AMDGPU_Kernel; + llvm_unreachable("kernels should not be handled here"); case CallingConv::AMDGPU_VS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: |