summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp48
1 files changed, 35 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 6a7b823f901..2b14a22e0dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -43,7 +43,7 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
Type *ParamTy,
- unsigned Offset) const {
+ uint64_t Offset) const {
MachineFunction &MF = MIRBuilder.getMF();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -66,7 +66,8 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
}
void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
- Type *ParamTy, unsigned Offset,
+ Type *ParamTy, uint64_t Offset,
+ unsigned Align,
unsigned DstReg) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
@@ -74,7 +75,6 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
- unsigned Align = DL.getABITypeAlignment(ParamTy);
unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
MachineMemOperand *MMO =
@@ -95,7 +95,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
return false;
MachineFunction &MF = MIRBuilder.getMF();
- const SISubtarget *Subtarget = static_cast<const SISubtarget *>(&MF.getSubtarget());
+ const SISubtarget *Subtarget = &MF.getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
@@ -145,6 +145,36 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CCInfo.AllocateReg(FlatScratchInitReg);
}
+ // The infrastructure for normal calling convention lowering is essentially
+ // useless for kernels. We want to avoid any kind of legalization or argument
+ // splitting.
+ if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
+ unsigned i = 0;
+ const unsigned KernArgBaseAlign = 16;
+ const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
+ uint64_t ExplicitArgOffset = 0;
+
+ // TODO: Align down to dword alignment and extract bits for extending loads.
+ for (auto &Arg : F.args()) {
+ Type *ArgTy = Arg.getType();
+ unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
+ if (AllocSize == 0)
+ continue;
+
+ unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
+
+ uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
+ ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
+
+ unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
+ ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
+ lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
+ ++i;
+ }
+
+ return true;
+ }
+
unsigned NumArgs = F.arg_size();
Function::const_arg_iterator CurOrigArg = F.arg_begin();
const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
@@ -216,13 +246,5 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
return true;
}
- for (unsigned i = 0; i != ArgLocs.size(); ++i, ++Arg) {
- // FIXME: We should be getting DebugInfo from the arguments some how.
- CCValAssign &VA = ArgLocs[i];
- lowerParameter(MIRBuilder, Arg->getType(),
- VA.getLocMemOffset() +
- Subtarget->getExplicitKernelArgOffset(F), VRegs[i]);
- }
-
- return true;
+ return false;
}
OpenPOWER on IntegriCloud