diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 471 |
1 files changed, 301 insertions, 170 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index b107c357196..a118743f4d8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -61,12 +61,126 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler { } }; +struct IncomingArgHandler : public CallLowering::ValueHandler { + uint64_t StackUsed = 0; + + IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + CCAssignFn *AssignFn) + : ValueHandler(MIRBuilder, MRI, AssignFn) {} + + Register getStackAddress(uint64_t Size, int64_t Offset, + MachinePointerInfo &MPO) override { + auto &MFI = MIRBuilder.getMF().getFrameInfo(); + int FI = MFI.CreateFixedObject(Size, Offset, true); + MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); + Register AddrReg = MRI.createGenericVirtualRegister( + LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32)); + MIRBuilder.buildFrameIndex(AddrReg, FI); + StackUsed = std::max(StackUsed, Size + Offset); + return AddrReg; + } + + void assignValueToReg(Register ValVReg, Register PhysReg, + CCValAssign &VA) override { + markPhysRegUsed(PhysReg); + + if (VA.getLocVT().getSizeInBits() < 32) { + // 16-bit types are reported as legal for 32-bit registers. We need to do + // a 32-bit copy, and truncate to avoid the verifier complaining about it. + auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg); + MIRBuilder.buildTrunc(ValVReg, Copy); + return; + } + + switch (VA.getLocInfo()) { + case CCValAssign::LocInfo::SExt: + case CCValAssign::LocInfo::ZExt: + case CCValAssign::LocInfo::AExt: { + auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); + MIRBuilder.buildTrunc(ValVReg, Copy); + break; + } + default: + MIRBuilder.buildCopy(ValVReg, PhysReg); + break; + } + } + + void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, + MachinePointerInfo &MPO, CCValAssign &VA) override { + // FIXME: Get alignment + auto MMO = MIRBuilder.getMF().getMachineMemOperand( + MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1); + MIRBuilder.buildLoad(ValVReg, Addr, *MMO); + } + + /// How the physical register gets marked varies between formal + /// parameters (it's a basic-block live-in), and a call instruction + /// (it's an implicit-def of the BL). + virtual void markPhysRegUsed(unsigned PhysReg) = 0; + + // FIXME: What is the point of this being a callback? + bool isArgumentHandler() const override { return true; } +}; + +struct FormalArgHandler : public IncomingArgHandler { + FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + CCAssignFn *AssignFn) + : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {} + + void markPhysRegUsed(unsigned PhysReg) override { + MIRBuilder.getMBB().addLiveIn(PhysReg); + } +}; + } AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) : CallLowering(&TLI) { } +void AMDGPUCallLowering::splitToValueTypes( + const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs, + const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv, + SplitArgTy PerformArgSplit) const { + const SITargetLowering &TLI = *getTLI<SITargetLowering>(); + LLVMContext &Ctx = OrigArg.Ty->getContext(); + + if (OrigArg.Ty->isVoidTy()) + return; + + SmallVector<EVT, 4> SplitVTs; + ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs); + + EVT VT = SplitVTs[0]; + unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); + + if (NumParts == 1) { + // No splitting to do, but we want to replace the original type (e.g. [1 x + // double] -> double). + SplitArgs.emplace_back(OrigArg.Regs[0], VT.getTypeForEVT(Ctx), + OrigArg.Flags, OrigArg.IsFixed); + return; + } + + LLT LLTy = getLLTForType(*OrigArg.Ty, DL); + SmallVector<Register, 8> SplitRegs; + + EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); + Type *PartTy = PartVT.getTypeForEVT(Ctx); + LLT PartLLT = getLLTForType(*PartTy, DL); + + // FIXME: Should we be reporting all of the part registers for a single + // argument, and let handleAssignments take care of the repacking? + for (unsigned i = 0; i < NumParts; ++i) { + Register PartReg = MRI.createGenericVirtualRegister(PartLLT); + SplitRegs.push_back(PartReg); + SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags); + } + + PerformArgSplit(SplitRegs, LLTy, PartLLT); +} + bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef<Register> VRegs) const { @@ -156,48 +270,6 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, MIRBuilder.buildLoad(DstReg, PtrReg, *MMO); } -static Register findFirstFreeSGPR(CCState &CCInfo) { - unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); - for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { - if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) { - return AMDGPU::SGPR0 + Reg; - } - } - llvm_unreachable("Cannot allocate sgpr"); -} - -static void allocateSpecialEntryInputVGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { - const LLT S32 = LLT::scalar(32); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - if (Info.hasWorkItemIDX()) { - Register Reg = AMDGPU::VGPR0; - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); - - CCInfo.AllocateReg(Reg); - Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg)); - } - - if (Info.hasWorkItemIDY()) { - Register Reg = AMDGPU::VGPR1; - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); - - CCInfo.AllocateReg(Reg); - Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg)); - } - - if (Info.hasWorkItemIDZ()) { - Register Reg = AMDGPU::VGPR2; - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); - - CCInfo.AllocateReg(Reg); - Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg)); - } -} - // Allocate special inputs passed in user SGPRs. static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &MIRBuilder, @@ -250,60 +322,6 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, // these from the dispatch pointer. } -static void allocateSystemSGPRs(CCState &CCInfo, - MachineFunction &MF, - SIMachineFunctionInfo &Info, - CallingConv::ID CallConv, - bool IsShader) { - const LLT S32 = LLT::scalar(32); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - if (Info.hasWorkGroupIDX()) { - Register Reg = Info.addWorkGroupIDX(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasWorkGroupIDY()) { - Register Reg = Info.addWorkGroupIDY(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasWorkGroupIDZ()) { - unsigned Reg = Info.addWorkGroupIDZ(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasWorkGroupInfo()) { - unsigned Reg = Info.addWorkGroupInfo(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasPrivateSegmentWaveByteOffset()) { - // Scratch wave offset passed in system SGPR. - unsigned PrivateSegmentWaveByteOffsetReg; - - if (IsShader) { - PrivateSegmentWaveByteOffsetReg = - Info.getPrivateSegmentWaveByteOffsetSystemSGPR(); - - // This is true if the scratch wave byte offset doesn't have a fixed - // location. - if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) { - PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo); - Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg); - } - } else - PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset(); - - MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass); - CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg); - } -} - bool AMDGPUCallLowering::lowerFormalArgumentsKernel( MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef<ArrayRef<Register>> VRegs) const { @@ -311,7 +329,9 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel( const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); - const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); + const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const SITargetLowering &TLI = *getTLI<SITargetLowering>(); + const DataLayout &DL = F.getParent()->getDataLayout(); SmallVector<CCValAssign, 16> ArgLocs; @@ -349,117 +369,228 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel( ++i; } - allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); - allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); + TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); + TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); return true; } +static void packSplitRegsToOrigType(MachineIRBuilder &MIRBuilder, + ArrayRef<Register> OrigRegs, + ArrayRef<Register> Regs, + LLT LLTy, + LLT PartLLT) { + if (!LLTy.isVector() && !PartLLT.isVector()) { + MIRBuilder.buildMerge(OrigRegs[0], Regs); + return; + } + + if (LLTy.isVector() && PartLLT.isVector()) { + assert(LLTy.getElementType() == PartLLT.getElementType()); + + int DstElts = LLTy.getNumElements(); + int PartElts = PartLLT.getNumElements(); + if (DstElts % PartElts == 0) + MIRBuilder.buildConcatVectors(OrigRegs[0], Regs); + else { + // Deal with v3s16 split into v2s16 + assert(PartElts == 2 && DstElts % 2 != 0); + int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts); + + LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType()); + auto RoundedConcat = MIRBuilder.buildConcatVectors(RoundedDestTy, Regs); + MIRBuilder.buildExtract(OrigRegs[0], RoundedConcat, 0); + } + + return; + } + + assert(LLTy.isVector() && !PartLLT.isVector()); + + LLT DstEltTy = LLTy.getElementType(); + if (DstEltTy == PartLLT) { + // Vector was trivially scalarized. + MIRBuilder.buildBuildVector(OrigRegs[0], Regs); + } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { + // Deal with vector with 64-bit elements decomposed to 32-bit + // registers. Need to create intermediate 64-bit elements. + SmallVector<Register, 8> EltMerges; + int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); + + assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); + + for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { + auto Merge = MIRBuilder.buildMerge(DstEltTy, + Regs.take_front(PartsPerElt)); + EltMerges.push_back(Merge.getReg(0)); + Regs = Regs.drop_front(PartsPerElt); + } + + MIRBuilder.buildBuildVector(OrigRegs[0], EltMerges); + } else { + // Vector was split, and elements promoted to a wider type. + LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT); + auto BV = MIRBuilder.buildBuildVector(BVType, Regs); + MIRBuilder.buildTrunc(OrigRegs[0], BV); + } +} + bool AMDGPUCallLowering::lowerFormalArguments( MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef<ArrayRef<Register>> VRegs) const { + CallingConv::ID CC = F.getCallingConv(); + // The infrastructure for normal calling convention lowering is essentially // useless for kernels. We want to avoid any kind of legalization or argument // splitting. - if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) + if (CC == CallingConv::AMDGPU_KERNEL) return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs); // AMDGPU_GS and AMDGP_HS are not supported yet. - if (F.getCallingConv() == CallingConv::AMDGPU_GS || - F.getCallingConv() == CallingConv::AMDGPU_HS) + if (CC == CallingConv::AMDGPU_GS || CC == CallingConv::AMDGPU_HS) return false; + const bool IsShader = AMDGPU::isShader(CC); + const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC); + MachineFunction &MF = MIRBuilder.getMF(); + MachineBasicBlock &MBB = MIRBuilder.getMBB(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); - const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); + const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); + const SIRegisterInfo *TRI = Subtarget.getRegisterInfo(); const DataLayout &DL = F.getParent()->getDataLayout(); - bool IsShader = AMDGPU::isShader(F.getCallingConv()); SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); + CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); if (Info->hasImplicitBufferPtr()) { - unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); + Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(ImplicitBufferPtrReg); } - unsigned NumArgs = F.arg_size(); - Function::const_arg_iterator CurOrigArg = F.arg_begin(); - const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>(); + + SmallVector<ArgInfo, 32> SplitArgs; + unsigned Idx = 0; unsigned PSInputNum = 0; - BitVector Skipped(NumArgs); - for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) { - EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType()); - - // We can only hanlde simple value types at the moment. - ISD::ArgFlagsTy Flags; - assert(VRegs[i].size() == 1 && "Can't lower into more than one register"); - ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()}; - setArgFlags(OrigArg, i + 1, DL, F); - Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType())); - - if (F.getCallingConv() == CallingConv::AMDGPU_PS && - !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() && - PSInputNum <= 15) { - if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) { - Skipped.set(i); - ++PSInputNum; + + for (auto &Arg : F.args()) { + if (DL.getTypeStoreSize(Arg.getType()) == 0) + continue; + + const bool InReg = Arg.hasAttribute(Attribute::InReg); + + // SGPR arguments to functions not implemented. + if (!IsShader && InReg) + return false; + + // TODO: Handle multiple registers and sret. + if (Arg.hasAttribute(Attribute::StructRet) || + Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::Nest) || VRegs[Idx].size() > 1) + return false; + + if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) { + const bool ArgUsed = !Arg.use_empty(); + bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum); + + if (!SkipArg) { + Info->markPSInputAllocated(PSInputNum); + if (ArgUsed) + Info->markPSInputEnabled(PSInputNum); + } + + ++PSInputNum; + + if (SkipArg) { + MIRBuilder.buildUndef(VRegs[Idx][0]); + ++Idx; continue; } + } - Info->markPSInputAllocated(PSInputNum); - if (!CurOrigArg->use_empty()) - Info->markPSInputEnabled(PSInputNum); + ArgInfo OrigArg(VRegs[Idx], Arg.getType()); + setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); + splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CC, + // FIXME: We should probably be passing multiple registers to + // handleAssignments to do this + [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT) { + packSplitRegsToOrigType(MIRBuilder, VRegs[Idx], Regs, LLTy, PartLLT); + }); - ++PSInputNum; + ++Idx; + } + + // At least one interpolation mode must be enabled or else the GPU will + // hang. + // + // Check PSInputAddr instead of PSInputEnable. The idea is that if the user + // set PSInputAddr, the user wants to enable some bits after the compilation + // based on run-time states. Since we can't know what the final PSInputEna + // will look like, so we shouldn't do anything here and the user should take + // responsibility for the correct programming. + // + // Otherwise, the following restrictions apply: + // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. + // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be + // enabled too. + if (CC == CallingConv::AMDGPU_PS) { + if ((Info->getPSInputAddr() & 0x7F) == 0 || + ((Info->getPSInputAddr() & 0xF) == 0 && + Info->isPSInputAllocated(11))) { + CCInfo.AllocateReg(AMDGPU::VGPR0); + CCInfo.AllocateReg(AMDGPU::VGPR1); + Info->markPSInputAllocated(0); + Info->markPSInputEnabled(0); } - CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(), - /*IsVarArg=*/false); - - if (ValEVT.isVector()) { - EVT ElemVT = ValEVT.getVectorElementType(); - if (!ValEVT.isSimple()) - return false; - MVT ValVT = ElemVT.getSimpleVT(); - bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, - OrigArg.Flags, CCInfo); - if (!Res) - return false; - } else { - MVT ValVT = ValEVT.getSimpleVT(); - if (!ValEVT.isSimple()) - return false; - bool Res = - AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo); - - // Fail if we don't know how to handle this type. - if (Res) - return false; + if (Subtarget.isAmdPalOS()) { + // For isAmdPalOS, the user does not enable some bits after compilation + // based on run-time states; the register values being generated here are + // the final ones set in hardware. Therefore we need to apply the + // workaround to PSInputAddr and PSInputEnable together. (The case where + // a bit is set in PSInputAddr but not PSInputEnable is where the frontend + // set up an input arg for a particular interpolation mode, but nothing + // uses that input arg. Really we should have an earlier pass that removes + // such an arg.) + unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable(); + if ((PsInputBits & 0x7F) == 0 || + ((PsInputBits & 0xF) == 0 && + (PsInputBits >> 11 & 1))) + Info->markPSInputEnabled( + countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined)); } } - Function::const_arg_iterator Arg = F.arg_begin(); - - if (F.getCallingConv() == CallingConv::AMDGPU_VS || - F.getCallingConv() == CallingConv::AMDGPU_PS) { - for (unsigned i = 0, OrigArgIdx = 0; - OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) { - if (Skipped.test(OrigArgIdx)) - continue; - assert(VRegs[OrigArgIdx].size() == 1 && - "Can't lower into more than 1 reg"); - CCValAssign &VA = ArgLocs[i++]; - MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]); - MIRBuilder.getMBB().addLiveIn(VA.getLocReg()); - MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg()); - } + const SITargetLowering &TLI = *getTLI<SITargetLowering>(); + CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg()); - allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader); - return true; + if (!MBB.empty()) + MIRBuilder.setInstr(*MBB.begin()); + + FormalArgHandler Handler(MIRBuilder, MRI, AssignFn); + if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, SplitArgs, Handler)) + return false; + + if (!IsEntryFunc) { + // Special inputs come after user arguments. + TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info); } - return false; + // Start adding system SGPRs. + if (IsEntryFunc) { + TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader); + } else { + CCInfo.AllocateReg(Info->getScratchRSrcReg()); + CCInfo.AllocateReg(Info->getScratchWaveOffsetReg()); + CCInfo.AllocateReg(Info->getFrameOffsetReg()); + TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info); + } + + // Move back to the end of the basic block. + MIRBuilder.setMBB(MBB); + + return true; } |