diff options
4 files changed, 118 insertions, 44 deletions
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 65c527b96ca..dddb267f0c6 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -220,7 +220,8 @@ protected: /// /// Used to check if arguments are suitable for tail call lowering. bool analyzeArgInfo(CCState &CCState, SmallVectorImpl<ArgInfo> &Args, - CCAssignFn &Fn) const; + CCAssignFn &AssignFnFixed, + CCAssignFn &AssignFnVarArg) const; /// \returns True if the calling convention for a callee and its caller pass /// results in the same way. Typically used for tail call eligibility checks. @@ -228,12 +229,18 @@ protected: /// \p Info is the CallLoweringInfo for the call. /// \p MF is the MachineFunction for the caller. /// \p InArgs contains the results of the call. - /// \p CalleeAssignFn is the CCAssignFn to be used for the callee. - /// \p CallerAssignFn is the CCAssignFn to be used for the caller. + /// \p CalleeAssignFnFixed is the CCAssignFn to be used for the callee for + /// fixed arguments. + /// \p CalleeAssignFnVarArg is similar, but for varargs. + /// \p CallerAssignFnFixed is the CCAssignFn to be used for the caller for + /// fixed arguments. + /// \p CallerAssignFnVarArg is similar, but for varargs. bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl<ArgInfo> &InArgs, - CCAssignFn &CalleeAssignFn, - CCAssignFn &CallerAssignFn) const; + CCAssignFn &CalleeAssignFnFixed, + CCAssignFn &CalleeAssignFnVarArg, + CCAssignFn &CallerAssignFnFixed, + CCAssignFn &CallerAssignFnVarArg) const; public: CallLowering(const TargetLowering *TLI) : TLI(TLI) {} diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 9280c9efc93..d05e2091c71 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -379,10 +379,12 @@ bool CallLowering::handleAssignments(CCState &CCInfo, } bool CallLowering::analyzeArgInfo(CCState &CCState, - SmallVectorImpl<ArgInfo> &Args, - CCAssignFn &Fn) const { + SmallVectorImpl<ArgInfo> &Args, + CCAssignFn &AssignFnFixed, + CCAssignFn &AssignFnVarArg) const { for (unsigned i = 0, e = Args.size(); i < e; ++i) { MVT VT = MVT::getVT(Args[i].Ty); + CCAssignFn &Fn = Args[i].IsFixed ? AssignFnFixed : AssignFnVarArg; if (Fn(i, VT, VT, CCValAssign::Full, Args[i].Flags[0], CCState)) { // Bail out on anything we can't handle. LLVM_DEBUG(dbgs() << "Cannot analyze " << EVT(VT).getEVTString() @@ -396,8 +398,10 @@ bool CallLowering::analyzeArgInfo(CCState &CCState, bool CallLowering::resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl<ArgInfo> &InArgs, - CCAssignFn &CalleeAssignFn, - CCAssignFn &CallerAssignFn) const { + CCAssignFn &CalleeAssignFnFixed, + CCAssignFn &CalleeAssignFnVarArg, + CCAssignFn &CallerAssignFnFixed, + CCAssignFn &CallerAssignFnVarArg) const { const Function &F = MF.getFunction(); CallingConv::ID CalleeCC = Info.CallConv; CallingConv::ID CallerCC = F.getCallingConv(); @@ -407,12 +411,14 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info, SmallVector<CCValAssign, 16> ArgLocs1; CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext()); - if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFn)) + if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFnFixed, + CalleeAssignFnVarArg)) return false; SmallVector<CCValAssign, 16> ArgLocs2; CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext()); - if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFn)) + if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFnFixed, + CalleeAssignFnVarArg)) return false; // We need the argument locations to match up exactly. If there's more in diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp index a4f2bac8d60..9f1945cbc31 100644 --- a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp @@ -464,6 +464,13 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { } } +/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for +/// CC. +static std::pair<CCAssignFn *, CCAssignFn *> +getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) { + return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)}; +} + bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl<ArgInfo> &InArgs) const { @@ -477,11 +484,19 @@ bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( // Check if the caller and callee will handle arguments in the same way. const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); - CCAssignFn *CalleeAssignFn = TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg); - CCAssignFn *CallerAssignFn = - TLI.CCAssignFnForCall(CallerCC, CallerF.isVarArg()); - - if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFn, *CallerAssignFn)) + CCAssignFn *CalleeAssignFnFixed; + CCAssignFn *CalleeAssignFnVarArg; + std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) = + getAssignFnsForCC(CalleeCC, TLI); + + CCAssignFn *CallerAssignFnFixed; + CCAssignFn *CallerAssignFnVarArg; + std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) = + getAssignFnsForCC(CallerCC, TLI); + + if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFnFixed, + *CalleeAssignFnVarArg, *CallerAssignFnFixed, + *CallerAssignFnVarArg)) return false; // Make sure that the caller and callee preserve all of the same registers. @@ -508,12 +523,15 @@ bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( CallingConv::ID CallerCC = CallerF.getCallingConv(); const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); + CCAssignFn *AssignFnFixed; + CCAssignFn *AssignFnVarArg; + std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); + // We have outgoing arguments. Make sure that we can tail call with them. SmallVector<CCValAssign, 16> OutLocs; CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext()); - if (!analyzeArgInfo(OutInfo, OutArgs, - *TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg))) { + if (!analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg)) { LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n"); return false; } @@ -741,10 +759,9 @@ bool AArch64CallLowering::lowerTailCall( // Find out which ABI gets to decide where things go. CallingConv::ID CalleeCC = Info.CallConv; - CCAssignFn *AssignFnFixed = - TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/false); - CCAssignFn *AssignFnVarArg = - TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/true); + CCAssignFn *AssignFnFixed; + CCAssignFn *AssignFnVarArg; + std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); MachineInstrBuilder CallSeqStart; if (!IsSibCall) @@ -787,8 +804,7 @@ bool AArch64CallLowering::lowerTailCall( unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); SmallVector<CCValAssign, 16> OutLocs; CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext()); - analyzeArgInfo(OutInfo, OutArgs, - *TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg)); + analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg); // The callee will pop the argument stack as a tail call. Thus, we must // keep it 16-byte aligned. @@ -879,10 +895,10 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return lowerTailCall(MIRBuilder, Info, OutArgs); // Find out which ABI gets to decide where things go. - CCAssignFn *AssignFnFixed = - TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/false); - CCAssignFn *AssignFnVarArg = - TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/true); + CCAssignFn *AssignFnFixed; + CCAssignFn *AssignFnVarArg; + std::tie(AssignFnFixed, AssignFnVarArg) = + getAssignFnsForCC(Info.CallConv, TLI); MachineInstrBuilder CallSeqStart; CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll index 78ab27e1039..115fa4fb2a3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll @@ -77,32 +77,77 @@ define i32 @test_nonvoid_ret() { declare void @varargs(i32, double, i64, ...) define void @test_varargs() { - ; On Darwin, everything is passed on the stack. Since the caller has no stack, - ; we don't tail call. - ; DARWIN-LABEL: name: test_varargs - ; DARWIN: bb.1 (%ir-block.0): - ; DARWIN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; DARWIN: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; DARWIN: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; DARWIN: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; DARWIN: $w0 = COPY [[C]](s32) - ; DARWIN: $d0 = COPY [[C1]](s64) - ; DARWIN: $x1 = COPY [[C2]](s64) + ; COMMON-LABEL: name: test_varargs + ; COMMON: bb.1 (%ir-block.0): + ; COMMON: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; COMMON: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; COMMON: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; COMMON: $w0 = COPY [[C]](s32) + ; COMMON: $d0 = COPY [[C1]](s64) + ; COMMON: $x1 = COPY [[C2]](s64) + ; COMMON: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1 + tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12) + ret void +} + +; Darwin should not tail call here, because the last parameter to @varargs is +; not fixed. So, it's passed on the stack, which will make us not fit. On +; Windows, it's passed in a register, so it's safe to tail call. +define void @test_varargs_2() { + ; DARWIN-LABEL: name: test_varargs_2 + ; DARWIN-NOT: TCRETURNdi @varargs ; DARWIN: BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1 - ; DARWIN: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; DARWIN: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp ; DARWIN: RET_ReallyLR - ; Windows uses registers, so we don't need to worry about using the stack. - ; WINDOWS-LABEL: name: test_varargs + ; WINDOWS-LABEL: name: test_varargs_2 ; WINDOWS: bb.1 (%ir-block.0): ; WINDOWS: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; WINDOWS: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 ; WINDOWS: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; WINDOWS: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314 ; WINDOWS: $w0 = COPY [[C]](s32) ; WINDOWS: $d0 = COPY [[C1]](s64) ; WINDOWS: $x1 = COPY [[C2]](s64) - ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1 - tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12) + ; WINDOWS: $x2 = COPY [[C3]](s64) + ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1, implicit $x2 + tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314) + ret void +} + +; Same deal here, even though we have enough room to fit. On Darwin, we'll pass +; the last argument to @varargs on the stack. We don't allow tail calling +; varargs arguments that are on the stack. +define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) { + ; DARWIN-LABEL: name: test_varargs_3 + ; DARWIN-NOT: TCRETURNdi @varargs + ; DARWIN: BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1 + ; DARWIN: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp + ; DARWIN: RET_ReallyLR + + ; WINDOWS-LABEL: name: test_varargs_3 + ; WINDOWS: bb.1 (%ir-block.1): + ; WINDOWS: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 + ; WINDOWS: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; WINDOWS: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; WINDOWS: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 + ; WINDOWS: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 + ; WINDOWS: [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4 + ; WINDOWS: [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5 + ; WINDOWS: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6 + ; WINDOWS: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7 + ; WINDOWS: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 1) + ; WINDOWS: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; WINDOWS: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; WINDOWS: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; WINDOWS: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314 + ; WINDOWS: $w0 = COPY [[C]](s32) + ; WINDOWS: $d0 = COPY [[C1]](s64) + ; WINDOWS: $x1 = COPY [[C2]](s64) + ; WINDOWS: $x2 = COPY [[C3]](s64) + ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1, implicit $x2 + tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314) ret void } |

