summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp48
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td15
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp4
4 files changed, 40 insertions, 32 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 6a7b823f901..2b14a22e0dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -43,7 +43,7 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
Type *ParamTy,
- unsigned Offset) const {
+ uint64_t Offset) const {
MachineFunction &MF = MIRBuilder.getMF();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -66,7 +66,8 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
}
void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
- Type *ParamTy, unsigned Offset,
+ Type *ParamTy, uint64_t Offset,
+ unsigned Align,
unsigned DstReg) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
@@ -74,7 +75,6 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
- unsigned Align = DL.getABITypeAlignment(ParamTy);
unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
MachineMemOperand *MMO =
@@ -95,7 +95,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
return false;
MachineFunction &MF = MIRBuilder.getMF();
- const SISubtarget *Subtarget = static_cast<const SISubtarget *>(&MF.getSubtarget());
+ const SISubtarget *Subtarget = &MF.getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
@@ -145,6 +145,36 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CCInfo.AllocateReg(FlatScratchInitReg);
}
+ // The infrastructure for normal calling convention lowering is essentially
+ // useless for kernels. We want to avoid any kind of legalization or argument
+ // splitting.
+ if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
+ unsigned i = 0;
+ const unsigned KernArgBaseAlign = 16;
+ const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
+ uint64_t ExplicitArgOffset = 0;
+
+ // TODO: Align down to dword alignment and extract bits for extending loads.
+ for (auto &Arg : F.args()) {
+ Type *ArgTy = Arg.getType();
+ unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
+ if (AllocSize == 0)
+ continue;
+
+ unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
+
+ uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
+ ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
+
+ unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
+ ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
+ lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
+ ++i;
+ }
+
+ return true;
+ }
+
unsigned NumArgs = F.arg_size();
Function::const_arg_iterator CurOrigArg = F.arg_begin();
const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
@@ -216,13 +246,5 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
return true;
}
- for (unsigned i = 0; i != ArgLocs.size(); ++i, ++Arg) {
- // FIXME: We should be getting DebugInfo from the arguments some how.
- CCValAssign &VA = ArgLocs[i];
- lowerParameter(MIRBuilder, Arg->getType(),
- VA.getLocMemOffset() +
- Subtarget->getExplicitKernelArgOffset(F), VRegs[i]);
- }
-
- return true;
+ return false;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
index 251cb7a2c44..f51cb6abbf6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -26,10 +26,11 @@ class AMDGPUCallLowering: public CallLowering {
AMDGPUAS AMDGPUASI;
unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
- unsigned Offset) const;
+ uint64_t Offset) const;
void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy,
- unsigned Offset, unsigned DstReg) const;
+ uint64_t Offset, unsigned Align,
+ unsigned DstReg) const;
public:
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index f6d7c1d2218..8a8143a5d78 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -85,11 +85,6 @@ def RetCC_SI_Shader : CallingConv<[
]>>
]>;
-// Calling convention for compute kernels
-def CC_AMDGPU_Kernel : CallingConv<[
- CCCustom<"allocateKernArg">
-]>;
-
def CSR_AMDGPU_VGPRs_24_255 : CalleeSavedRegs<
(sequence "VGPR%u", 24, 255)
>;
@@ -137,16 +132,6 @@ def RetCC_AMDGPU_Func : CallingConv<[
]>;
def CC_AMDGPU : CallingConv<[
- CCIf<"static_cast<const AMDGPUSubtarget&>"
- "(State.getMachineFunction().getSubtarget()).getGeneration() >="
- "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
- "!AMDGPU::isShader(State.getCallingConv())",
- CCDelegateTo<CC_AMDGPU_Kernel>>,
- CCIf<"static_cast<const AMDGPUSubtarget&>"
- "(State.getMachineFunction().getSubtarget()).getGeneration() < "
- "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
- "!AMDGPU::isShader(State.getCallingConv())",
- CCDelegateTo<CC_AMDGPU_Kernel>>,
CCIf<"static_cast<const AMDGPUSubtarget&>"
"(State.getMachineFunction().getSubtarget()).getGeneration() >= "
"AMDGPUSubtarget::SOUTHERN_ISLANDS",
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index feb53016f7c..a1b90195411 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -843,7 +843,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
switch (CC) {
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
- return CC_AMDGPU_Kernel;
+ llvm_unreachable("kernels should not be handled here");
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
@@ -866,7 +866,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
switch (CC) {
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
- return CC_AMDGPU_Kernel;
+ llvm_unreachable("kernels should not be handled here");
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
OpenPOWER on IntegriCloud