diff options
author | Reid Kleckner <reid@kleckner.net> | 2014-12-22 23:58:37 +0000 |
---|---|---|
committer | Reid Kleckner <reid@kleckner.net> | 2014-12-22 23:58:37 +0000 |
commit | ce0093344fa1f9a10831038bfd47703b699db5f4 (patch) | |
tree | 8a8a03dc74553dfc2302f41c6a839dde66def234 /llvm/lib | |
parent | ea37c1173e0a58a18a95c37535f3ac0abacccc03 (diff) | |
download | bcm5719-llvm-ce0093344fa1f9a10831038bfd47703b699db5f4.tar.gz bcm5719-llvm-ce0093344fa1f9a10831038bfd47703b699db5f4.zip |
Make musttail more robust for vector types on x86
Previously I tried to plug musttail into the existing vararg lowering
code. That turned out to be a mistake, because non-vararg calls use
significantly different register lowering, even on x86. For example, AVX
vectors are usually passed in registers to normal functions and memory
to vararg functions. Now musttail uses a completely separate lowering.
Hopefully this can be used as the basis for non-x86 perfect forwarding.
Reviewers: majnemer
Differential Revision: http://reviews.llvm.org/D6156
llvm-svn: 224745
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/CallingConvLower.cpp | 56 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 191 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86MachineFunctionInfo.h | 16 |
3 files changed, 163 insertions, 100 deletions
diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp index 56ecde0936c..fbe8b7c10e6 100644 --- a/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/llvm/lib/CodeGen/CallingConvLower.cpp @@ -14,9 +14,11 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -178,3 +180,57 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { llvm_unreachable(nullptr); } } + +void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, + MVT VT, CCAssignFn Fn) { + unsigned SavedStackOffset = StackOffset; + unsigned NumLocs = Locs.size(); + + // Allocate something of this value type repeatedly with just the inreg flag + // set until we get assigned a location in memory. + ISD::ArgFlagsTy Flags; + Flags.setInReg(); + bool HaveRegParm = true; + while (HaveRegParm) { + if (Fn(0, VT, VT, CCValAssign::Full, Flags, *this)) { +#ifndef NDEBUG + dbgs() << "Call has unhandled type " << EVT(VT).getEVTString() + << " while computing remaining regparms\n"; +#endif + llvm_unreachable(nullptr); + } + HaveRegParm = Locs.back().isRegLoc(); + } + + // Copy all the registers from the value locations we added. + assert(NumLocs < Locs.size() && "CC assignment failed to add location"); + for (unsigned I = NumLocs, E = Locs.size(); I != E; ++I) + if (Locs[I].isRegLoc()) + Regs.push_back(MCPhysReg(Locs[I].getLocReg())); + + // Clear the assigned values and stack memory. We leave the registers marked + // as allocated so that future queries don't return the same registers, i.e. + // when i64 and f64 are both passed in GPRs. + StackOffset = SavedStackOffset; + Locs.resize(NumLocs); +} + +void CCState::analyzeMustTailForwardedRegisters( + SmallVectorImpl<ForwardedRegister> &Forwards, ArrayRef<MVT> RegParmTypes, + CCAssignFn Fn) { + // Oftentimes calling conventions will not user register parameters for + // variadic functions, so we need to assume we're not variadic so that we get + // all the registers that might be used in a non-variadic call. + SaveAndRestore<bool> SavedVarArg(IsVarArg, false); + + for (MVT RegVT : RegParmTypes) { + SmallVector<MCPhysReg, 8> RemainingRegs; + getRemainingRegParmsForType(RemainingRegs, RegVT, Fn); + const TargetLowering *TL = MF.getSubtarget().getTargetLowering(); + const TargetRegisterClass *RC = TL->getRegClassFor(RegVT); + for (MCPhysReg PReg : RemainingRegs) { + unsigned VReg = MF.addLiveIn(PReg, RC); + Forwards.push_back(ForwardedRegister(VReg, PReg, RegVT)); + } + } +} diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c6e72c9aa6b..7440b5decbb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2549,11 +2549,19 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, MFI->CreateFixedObject(1, StackSize, true)); } + // Figure out if XMM registers are in use. + bool HaveXMMArgs = Is64Bit && !IsWin64; + bool NoImplicitFloatOps = Fn->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); + assert(!(MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) && + "SSE register cannot be used when SSE is disabled!"); + if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps || + !Subtarget->hasSSE1()) + HaveXMMArgs = false; + // 64-bit calling conventions support varargs and register parameters, so we - // have to do extra work to spill them in the prologue or forward them to - // musttail calls. - if (Is64Bit && isVarArg && - (MFI->hasVAStart() || MFI->hasMustTailInVarArgFunc())) { + // have to do extra work to spill them in the prologue. + if (Is64Bit && isVarArg && MFI->hasVAStart()) { // Find the first unallocated argument registers. ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget); ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget); @@ -2583,90 +2591,99 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } } - // Store them to the va_list returned by va_start. - if (MFI->hasVAStart()) { - if (IsWin64) { - const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); - // Get to the caller-allocated home save location. Add 8 to account - // for the return address. - int HomeOffset = TFI.getOffsetOfLocalArea() + 8; - FuncInfo->setRegSaveFrameIndex( + if (IsWin64) { + const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); + // Get to the caller-allocated home save location. Add 8 to account + // for the return address. + int HomeOffset = TFI.getOffsetOfLocalArea() + 8; + FuncInfo->setRegSaveFrameIndex( MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); - // Fixup to set vararg frame on shadow area (4 x i64). - if (NumIntRegs < 4) - FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); - } else { - // For X86-64, if there are vararg parameters that are passed via - // registers, then we must store them to their spots on the stack so - // they may be loaded by deferencing the result of va_next. - FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); - FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16); - FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject( - ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false)); - } - - // Store the integer parameter registers. - SmallVector<SDValue, 8> MemOps; - SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), - getPointerTy()); - unsigned Offset = FuncInfo->getVarArgsGPOffset(); - for (SDValue Val : LiveGPRs) { - SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, - DAG.getIntPtrConstant(Offset)); - SDValue Store = - DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo::getFixedStack( - FuncInfo->getRegSaveFrameIndex(), Offset), - false, false, 0); - MemOps.push_back(Store); - Offset += 8; - } - - if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) { - // Now store the XMM (fp + vector) parameter registers. - SmallVector<SDValue, 12> SaveXMMOps; - SaveXMMOps.push_back(Chain); - SaveXMMOps.push_back(ALVal); - SaveXMMOps.push_back(DAG.getIntPtrConstant( - FuncInfo->getRegSaveFrameIndex())); - SaveXMMOps.push_back(DAG.getIntPtrConstant( - FuncInfo->getVarArgsFPOffset())); - SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(), - LiveXMMRegs.end()); - MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, - MVT::Other, SaveXMMOps)); - } - - if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); + // Fixup to set vararg frame on shadow area (4 x i64). + if (NumIntRegs < 4) + FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); } else { - // Add all GPRs, al, and XMMs to the list of forwards. We will add then - // to the liveout set on a musttail call. - assert(MFI->hasMustTailInVarArgFunc()); - auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); - typedef X86MachineFunctionInfo::Forward Forward; - - for (unsigned I = 0, E = LiveGPRs.size(); I != E; ++I) { - unsigned VReg = - MF.getRegInfo().createVirtualRegister(&X86::GR64RegClass); - Chain = DAG.getCopyToReg(Chain, dl, VReg, LiveGPRs[I]); - Forwards.push_back(Forward(VReg, ArgGPRs[NumIntRegs + I], MVT::i64)); - } - - if (!ArgXMMs.empty()) { - unsigned ALVReg = - MF.getRegInfo().createVirtualRegister(&X86::GR8RegClass); - Chain = DAG.getCopyToReg(Chain, dl, ALVReg, ALVal); - Forwards.push_back(Forward(ALVReg, X86::AL, MVT::i8)); - - for (unsigned I = 0, E = LiveXMMRegs.size(); I != E; ++I) { - unsigned VReg = - MF.getRegInfo().createVirtualRegister(&X86::VR128RegClass); - Chain = DAG.getCopyToReg(Chain, dl, VReg, LiveXMMRegs[I]); - Forwards.push_back( - Forward(VReg, ArgXMMs[NumXMMRegs + I], MVT::v4f32)); - } - } + // For X86-64, if there are vararg parameters that are passed via + // registers, then we must store them to their spots on the stack so + // they may be loaded by deferencing the result of va_next. + FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); + FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16); + FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject( + ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false)); + } + + // Store the integer parameter registers. + SmallVector<SDValue, 8> MemOps; + SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), + getPointerTy()); + unsigned Offset = FuncInfo->getVarArgsGPOffset(); + for (SDValue Val : LiveGPRs) { + SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, + DAG.getIntPtrConstant(Offset)); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo::getFixedStack( + FuncInfo->getRegSaveFrameIndex(), Offset), + false, false, 0); + MemOps.push_back(Store); + Offset += 8; + } + + if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) { + // Now store the XMM (fp + vector) parameter registers. + SmallVector<SDValue, 12> SaveXMMOps; + SaveXMMOps.push_back(Chain); + SaveXMMOps.push_back(ALVal); + SaveXMMOps.push_back(DAG.getIntPtrConstant( + FuncInfo->getRegSaveFrameIndex())); + SaveXMMOps.push_back(DAG.getIntPtrConstant( + FuncInfo->getVarArgsFPOffset())); + SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(), + LiveXMMRegs.end()); + MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, + MVT::Other, SaveXMMOps)); + } + + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); + } + + if (isVarArg && MFI->hasMustTailInVarArgFunc()) { + // Find the largest legal vector type. + MVT VecVT = MVT::Other; + // FIXME: Only some x86_32 calling conventions support AVX512. + if (Subtarget->hasAVX512() && + (Is64Bit || (CallConv == CallingConv::X86_VectorCall || + CallConv == CallingConv::Intel_OCL_BI))) + VecVT = MVT::v16f32; + else if (Subtarget->hasAVX()) + VecVT = MVT::v8f32; + else if (Subtarget->hasSSE2()) + VecVT = MVT::v4f32; + + // We forward some GPRs and some vector types. + SmallVector<MVT, 2> RegParmTypes; + MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32; + RegParmTypes.push_back(IntVT); + if (VecVT != MVT::Other) + RegParmTypes.push_back(VecVT); + + // Compute the set of forwarded registers. The rest are scratch. + SmallVectorImpl<ForwardedRegister> &Forwards = + FuncInfo->getForwardedMustTailRegParms(); + CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86); + + // Conservatively forward AL on x86_64, since it might be used for varargs. + if (Is64Bit && !CCInfo.isAllocated(X86::AL)) { + unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass); + Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8)); + } + + // Copy all forwards from physical to virtual registers. + for (ForwardedRegister &F : Forwards) { + // FIXME: Can we use a less constrained schedule? + SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); + F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT)); + Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal); } } @@ -2986,7 +3003,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(NumXMMRegs, MVT::i8))); } - if (Is64Bit && isVarArg && IsMustTail) { + if (isVarArg && IsMustTail) { const auto &Forwards = X86Info->getForwardedMustTailRegParms(); for (const auto &F : Forwards) { SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index 42f449063f7..b23a744da68 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineValueType.h" #include <vector> @@ -77,21 +78,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// NumLocalDynamics - Number of local-dynamic TLS accesses. unsigned NumLocalDynamics; -public: - /// Describes a register that needs to be forwarded from the prologue to a - /// musttail call. - struct Forward { - Forward(unsigned VReg, MCPhysReg PReg, MVT VT) - : VReg(VReg), PReg(PReg), VT(VT) {} - unsigned VReg; - MCPhysReg PReg; - MVT VT; - }; - private: /// ForwardedMustTailRegParms - A list of virtual and physical registers /// that must be forwarded to every musttail call. - std::vector<Forward> ForwardedMustTailRegParms; + SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms; public: X86MachineFunctionInfo() : ForceFramePointer(false), @@ -168,7 +158,7 @@ public: unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } - std::vector<Forward> &getForwardedMustTailRegParms() { + SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() { return ForwardedMustTailRegParms; } }; |