summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp471
1 files changed, 301 insertions, 170 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index b107c357196..a118743f4d8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -61,12 +61,126 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
}
};
+struct IncomingArgHandler : public CallLowering::ValueHandler {
+ uint64_t StackUsed = 0;
+
+ IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : ValueHandler(MIRBuilder, MRI, AssignFn) {}
+
+ Register getStackAddress(uint64_t Size, int64_t Offset,
+ MachinePointerInfo &MPO) override {
+ auto &MFI = MIRBuilder.getMF().getFrameInfo();
+ int FI = MFI.CreateFixedObject(Size, Offset, true);
+ MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+ Register AddrReg = MRI.createGenericVirtualRegister(
+ LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32));
+ MIRBuilder.buildFrameIndex(AddrReg, FI);
+ StackUsed = std::max(StackUsed, Size + Offset);
+ return AddrReg;
+ }
+
+ void assignValueToReg(Register ValVReg, Register PhysReg,
+ CCValAssign &VA) override {
+ markPhysRegUsed(PhysReg);
+
+ if (VA.getLocVT().getSizeInBits() < 32) {
+ // 16-bit types are reported as legal for 32-bit registers. We need to do
+ // a 32-bit copy, and truncate to avoid the verifier complaining about it.
+ auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
+ MIRBuilder.buildTrunc(ValVReg, Copy);
+ return;
+ }
+
+ switch (VA.getLocInfo()) {
+ case CCValAssign::LocInfo::SExt:
+ case CCValAssign::LocInfo::ZExt:
+ case CCValAssign::LocInfo::AExt: {
+ auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
+ MIRBuilder.buildTrunc(ValVReg, Copy);
+ break;
+ }
+ default:
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ break;
+ }
+ }
+
+ void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
+ MachinePointerInfo &MPO, CCValAssign &VA) override {
+ // FIXME: Get alignment
+ auto MMO = MIRBuilder.getMF().getMachineMemOperand(
+ MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1);
+ MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
+ }
+
+ /// How the physical register gets marked varies between formal
+ /// parameters (it's a basic-block live-in), and a call instruction
+ /// (it's an implicit-def of the BL).
+ virtual void markPhysRegUsed(unsigned PhysReg) = 0;
+
+ // FIXME: What is the point of this being a callback?
+ bool isArgumentHandler() const override { return true; }
+};
+
+struct FormalArgHandler : public IncomingArgHandler {
+ FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
+
+ void markPhysRegUsed(unsigned PhysReg) override {
+ MIRBuilder.getMBB().addLiveIn(PhysReg);
+ }
+};
+
}
AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
: CallLowering(&TLI) {
}
+void AMDGPUCallLowering::splitToValueTypes(
+ const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
+ SplitArgTy PerformArgSplit) const {
+ const SITargetLowering &TLI = *getTLI<SITargetLowering>();
+ LLVMContext &Ctx = OrigArg.Ty->getContext();
+
+ if (OrigArg.Ty->isVoidTy())
+ return;
+
+ SmallVector<EVT, 4> SplitVTs;
+ ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs);
+
+ EVT VT = SplitVTs[0];
+ unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
+
+ if (NumParts == 1) {
+ // No splitting to do, but we want to replace the original type (e.g. [1 x
+ // double] -> double).
+ SplitArgs.emplace_back(OrigArg.Regs[0], VT.getTypeForEVT(Ctx),
+ OrigArg.Flags, OrigArg.IsFixed);
+ return;
+ }
+
+ LLT LLTy = getLLTForType(*OrigArg.Ty, DL);
+ SmallVector<Register, 8> SplitRegs;
+
+ EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
+ Type *PartTy = PartVT.getTypeForEVT(Ctx);
+ LLT PartLLT = getLLTForType(*PartTy, DL);
+
+ // FIXME: Should we be reporting all of the part registers for a single
+ // argument, and let handleAssignments take care of the repacking?
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Register PartReg = MRI.createGenericVirtualRegister(PartLLT);
+ SplitRegs.push_back(PartReg);
+ SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
+ }
+
+ PerformArgSplit(SplitRegs, LLTy, PartLLT);
+}
+
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val,
ArrayRef<Register> VRegs) const {
@@ -156,48 +270,6 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
}
-static Register findFirstFreeSGPR(CCState &CCInfo) {
- unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
- for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
- if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) {
- return AMDGPU::SGPR0 + Reg;
- }
- }
- llvm_unreachable("Cannot allocate sgpr");
-}
-
-static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
- const LLT S32 = LLT::scalar(32);
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- if (Info.hasWorkItemIDX()) {
- Register Reg = AMDGPU::VGPR0;
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
-
- CCInfo.AllocateReg(Reg);
- Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg));
- }
-
- if (Info.hasWorkItemIDY()) {
- Register Reg = AMDGPU::VGPR1;
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
-
- CCInfo.AllocateReg(Reg);
- Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg));
- }
-
- if (Info.hasWorkItemIDZ()) {
- Register Reg = AMDGPU::VGPR2;
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
-
- CCInfo.AllocateReg(Reg);
- Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg));
- }
-}
-
// Allocate special inputs passed in user SGPRs.
static void allocateHSAUserSGPRs(CCState &CCInfo,
MachineIRBuilder &MIRBuilder,
@@ -250,60 +322,6 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
// these from the dispatch pointer.
}
-static void allocateSystemSGPRs(CCState &CCInfo,
- MachineFunction &MF,
- SIMachineFunctionInfo &Info,
- CallingConv::ID CallConv,
- bool IsShader) {
- const LLT S32 = LLT::scalar(32);
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- if (Info.hasWorkGroupIDX()) {
- Register Reg = Info.addWorkGroupIDX();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupIDY()) {
- Register Reg = Info.addWorkGroupIDY();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupIDZ()) {
- unsigned Reg = Info.addWorkGroupIDZ();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupInfo()) {
- unsigned Reg = Info.addWorkGroupInfo();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasPrivateSegmentWaveByteOffset()) {
- // Scratch wave offset passed in system SGPR.
- unsigned PrivateSegmentWaveByteOffsetReg;
-
- if (IsShader) {
- PrivateSegmentWaveByteOffsetReg =
- Info.getPrivateSegmentWaveByteOffsetSystemSGPR();
-
- // This is true if the scratch wave byte offset doesn't have a fixed
- // location.
- if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
- PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
- Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
- }
- } else
- PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
-
- MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
- CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
- }
-}
-
bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const {
@@ -311,7 +329,9 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+ const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const SITargetLowering &TLI = *getTLI<SITargetLowering>();
+
const DataLayout &DL = F.getParent()->getDataLayout();
SmallVector<CCValAssign, 16> ArgLocs;
@@ -349,117 +369,228 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
++i;
}
- allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
- allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
+ TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
+ TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
return true;
}
+static void packSplitRegsToOrigType(MachineIRBuilder &MIRBuilder,
+ ArrayRef<Register> OrigRegs,
+ ArrayRef<Register> Regs,
+ LLT LLTy,
+ LLT PartLLT) {
+ if (!LLTy.isVector() && !PartLLT.isVector()) {
+ MIRBuilder.buildMerge(OrigRegs[0], Regs);
+ return;
+ }
+
+ if (LLTy.isVector() && PartLLT.isVector()) {
+ assert(LLTy.getElementType() == PartLLT.getElementType());
+
+ int DstElts = LLTy.getNumElements();
+ int PartElts = PartLLT.getNumElements();
+ if (DstElts % PartElts == 0)
+ MIRBuilder.buildConcatVectors(OrigRegs[0], Regs);
+ else {
+ // Deal with v3s16 split into v2s16
+ assert(PartElts == 2 && DstElts % 2 != 0);
+ int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts);
+
+ LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType());
+ auto RoundedConcat = MIRBuilder.buildConcatVectors(RoundedDestTy, Regs);
+ MIRBuilder.buildExtract(OrigRegs[0], RoundedConcat, 0);
+ }
+
+ return;
+ }
+
+ assert(LLTy.isVector() && !PartLLT.isVector());
+
+ LLT DstEltTy = LLTy.getElementType();
+ if (DstEltTy == PartLLT) {
+ // Vector was trivially scalarized.
+ MIRBuilder.buildBuildVector(OrigRegs[0], Regs);
+ } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) {
+ // Deal with vector with 64-bit elements decomposed to 32-bit
+ // registers. Need to create intermediate 64-bit elements.
+ SmallVector<Register, 8> EltMerges;
+ int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits();
+
+ assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
+
+ for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) {
+ auto Merge = MIRBuilder.buildMerge(DstEltTy,
+ Regs.take_front(PartsPerElt));
+ EltMerges.push_back(Merge.getReg(0));
+ Regs = Regs.drop_front(PartsPerElt);
+ }
+
+ MIRBuilder.buildBuildVector(OrigRegs[0], EltMerges);
+ } else {
+ // Vector was split, and elements promoted to a wider type.
+ LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT);
+ auto BV = MIRBuilder.buildBuildVector(BVType, Regs);
+ MIRBuilder.buildTrunc(OrigRegs[0], BV);
+ }
+}
+
bool AMDGPUCallLowering::lowerFormalArguments(
MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const {
+ CallingConv::ID CC = F.getCallingConv();
+
// The infrastructure for normal calling convention lowering is essentially
// useless for kernels. We want to avoid any kind of legalization or argument
// splitting.
- if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL)
+ if (CC == CallingConv::AMDGPU_KERNEL)
return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs);
// AMDGPU_GS and AMDGP_HS are not supported yet.
- if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
- F.getCallingConv() == CallingConv::AMDGPU_HS)
+ if (CC == CallingConv::AMDGPU_GS || CC == CallingConv::AMDGPU_HS)
return false;
+ const bool IsShader = AMDGPU::isShader(CC);
+ const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
+
MachineFunction &MF = MIRBuilder.getMF();
+ MachineBasicBlock &MBB = MIRBuilder.getMBB();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+ const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
const DataLayout &DL = F.getParent()->getDataLayout();
- bool IsShader = AMDGPU::isShader(F.getCallingConv());
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+ CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
if (Info->hasImplicitBufferPtr()) {
- unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
+ Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
- unsigned NumArgs = F.arg_size();
- Function::const_arg_iterator CurOrigArg = F.arg_begin();
- const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
+
+ SmallVector<ArgInfo, 32> SplitArgs;
+ unsigned Idx = 0;
unsigned PSInputNum = 0;
- BitVector Skipped(NumArgs);
- for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
- EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
-
- // We can only hanlde simple value types at the moment.
- ISD::ArgFlagsTy Flags;
- assert(VRegs[i].size() == 1 && "Can't lower into more than one register");
- ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()};
- setArgFlags(OrigArg, i + 1, DL, F);
- Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
-
- if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
- !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
- PSInputNum <= 15) {
- if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
- Skipped.set(i);
- ++PSInputNum;
+
+ for (auto &Arg : F.args()) {
+ if (DL.getTypeStoreSize(Arg.getType()) == 0)
+ continue;
+
+ const bool InReg = Arg.hasAttribute(Attribute::InReg);
+
+ // SGPR arguments to functions not implemented.
+ if (!IsShader && InReg)
+ return false;
+
+ // TODO: Handle multiple registers and sret.
+ if (Arg.hasAttribute(Attribute::StructRet) ||
+ Arg.hasAttribute(Attribute::SwiftSelf) ||
+ Arg.hasAttribute(Attribute::SwiftError) ||
+ Arg.hasAttribute(Attribute::Nest) || VRegs[Idx].size() > 1)
+ return false;
+
+ if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
+ const bool ArgUsed = !Arg.use_empty();
+ bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
+
+ if (!SkipArg) {
+ Info->markPSInputAllocated(PSInputNum);
+ if (ArgUsed)
+ Info->markPSInputEnabled(PSInputNum);
+ }
+
+ ++PSInputNum;
+
+ if (SkipArg) {
+ MIRBuilder.buildUndef(VRegs[Idx][0]);
+ ++Idx;
continue;
}
+ }
- Info->markPSInputAllocated(PSInputNum);
- if (!CurOrigArg->use_empty())
- Info->markPSInputEnabled(PSInputNum);
+ ArgInfo OrigArg(VRegs[Idx], Arg.getType());
+ setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
+ splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CC,
+ // FIXME: We should probably be passing multiple registers to
+ // handleAssignments to do this
+ [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT) {
+ packSplitRegsToOrigType(MIRBuilder, VRegs[Idx], Regs, LLTy, PartLLT);
+ });
- ++PSInputNum;
+ ++Idx;
+ }
+
+ // At least one interpolation mode must be enabled or else the GPU will
+ // hang.
+ //
+ // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
+ // set PSInputAddr, the user wants to enable some bits after the compilation
+ // based on run-time states. Since we can't know what the final PSInputEna
+ // will look like, so we shouldn't do anything here and the user should take
+ // responsibility for the correct programming.
+ //
+ // Otherwise, the following restrictions apply:
+ // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
+ // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
+ // enabled too.
+ if (CC == CallingConv::AMDGPU_PS) {
+ if ((Info->getPSInputAddr() & 0x7F) == 0 ||
+ ((Info->getPSInputAddr() & 0xF) == 0 &&
+ Info->isPSInputAllocated(11))) {
+ CCInfo.AllocateReg(AMDGPU::VGPR0);
+ CCInfo.AllocateReg(AMDGPU::VGPR1);
+ Info->markPSInputAllocated(0);
+ Info->markPSInputEnabled(0);
}
- CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
- /*IsVarArg=*/false);
-
- if (ValEVT.isVector()) {
- EVT ElemVT = ValEVT.getVectorElementType();
- if (!ValEVT.isSimple())
- return false;
- MVT ValVT = ElemVT.getSimpleVT();
- bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
- OrigArg.Flags, CCInfo);
- if (!Res)
- return false;
- } else {
- MVT ValVT = ValEVT.getSimpleVT();
- if (!ValEVT.isSimple())
- return false;
- bool Res =
- AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
-
- // Fail if we don't know how to handle this type.
- if (Res)
- return false;
+ if (Subtarget.isAmdPalOS()) {
+ // For isAmdPalOS, the user does not enable some bits after compilation
+ // based on run-time states; the register values being generated here are
+ // the final ones set in hardware. Therefore we need to apply the
+ // workaround to PSInputAddr and PSInputEnable together. (The case where
+ // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
+ // set up an input arg for a particular interpolation mode, but nothing
+ // uses that input arg. Really we should have an earlier pass that removes
+ // such an arg.)
+ unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
+ if ((PsInputBits & 0x7F) == 0 ||
+ ((PsInputBits & 0xF) == 0 &&
+ (PsInputBits >> 11 & 1)))
+ Info->markPSInputEnabled(
+ countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
}
}
- Function::const_arg_iterator Arg = F.arg_begin();
-
- if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
- F.getCallingConv() == CallingConv::AMDGPU_PS) {
- for (unsigned i = 0, OrigArgIdx = 0;
- OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
- if (Skipped.test(OrigArgIdx))
- continue;
- assert(VRegs[OrigArgIdx].size() == 1 &&
- "Can't lower into more than 1 reg");
- CCValAssign &VA = ArgLocs[i++];
- MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]);
- MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
- MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg());
- }
+ const SITargetLowering &TLI = *getTLI<SITargetLowering>();
+ CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
- allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader);
- return true;
+ if (!MBB.empty())
+ MIRBuilder.setInstr(*MBB.begin());
+
+ FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
+ if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, SplitArgs, Handler))
+ return false;
+
+ if (!IsEntryFunc) {
+ // Special inputs come after user arguments.
+ TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
}
- return false;
+ // Start adding system SGPRs.
+ if (IsEntryFunc) {
+ TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader);
+ } else {
+ CCInfo.AllocateReg(Info->getScratchRSrcReg());
+ CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
+ CCInfo.AllocateReg(Info->getFrameOffsetReg());
+ TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
+ }
+
+ // Move back to the end of the basic block.
+ MIRBuilder.setMBB(MBB);
+
+ return true;
}
OpenPOWER on IntegriCloud