diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 90 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCCallingConv.h | 35 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCCallingConv.td | 38 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCFastISel.cpp | 123 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 62 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 10 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 40 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCRegisterInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 9 |
11 files changed, 318 insertions, 98 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 564887353dd..decc1b27f77 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -69,10 +70,11 @@ namespace { MapVector<MCSymbol*, MCSymbol*> TOC; const PPCSubtarget &Subtarget; uint64_t TOCLabelID; + StackMaps SM; public: explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer), - Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {} + Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0), SM(*this) {} const char *getPassName() const override { return "PowerPC Assembly Printer"; @@ -90,6 +92,13 @@ namespace { bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override; + + void EmitEndOfAsmFile(Module &M) override; + + void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI); + void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI); }; /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux @@ -316,6 +325,80 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { return TOCEntry; } +void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) { + SM.serializeToStackMapSection(); +} + +void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI) { + unsigned NumNOPBytes = MI.getOperand(1).getImm(); + + SM.recordStackMap(MI); + assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + + // Scan ahead to trim the shadow. + const MachineBasicBlock &MBB = *MI.getParent(); + MachineBasicBlock::const_iterator MII(MI); + ++MII; + while (NumNOPBytes > 0) { + if (MII == MBB.end() || MII->isCall() || + MII->getOpcode() == PPC::DBG_VALUE || + MII->getOpcode() == TargetOpcode::PATCHPOINT || + MII->getOpcode() == TargetOpcode::STACKMAP) + break; + ++MII; + NumNOPBytes -= 4; + } + + // Emit nops. + for (unsigned i = 0; i < NumNOPBytes; i += 4) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP)); +} + +// Lower a patchpoint of the form: +// [<def>], <id>, <numBytes>, <target>, <numArgs> +void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI) { + SM.recordPatchPoint(MI); + PatchPointOpers Opers(&MI); + + int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm(); + unsigned EncodedBytes = 0; + if (CallTarget) { + assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget && + "High 16 bits of call target should be zero."); + unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); + EncodedBytes = 6*4; + // Materialize the jump address: + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8) + .addReg(ScratchReg) + .addImm((CallTarget >> 32) & 0xFFFF)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(32).addImm(16)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm((CallTarget >> 16) & 0xFFFF)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(CallTarget & 0xFFFF)); + + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8).addReg(ScratchReg)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8)); + } + + // Emit padding. + unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); + assert(NumBytes >= EncodedBytes && + "Patchpoint can't request size less than the length of a call."); + assert((NumBytes - EncodedBytes) % 4 == 0 && + "Invalid number of NOP bytes requested!"); + for (unsigned i = EncodedBytes; i < NumBytes; i += 4) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP)); +} /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to /// the current output stream. @@ -332,6 +415,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { default: break; case TargetOpcode::DBG_VALUE: llvm_unreachable("Should be handled target independently"); + case TargetOpcode::STACKMAP: + return LowerSTACKMAP(OutStreamer, SM, *MI); + case TargetOpcode::PATCHPOINT: + return LowerPATCHPOINT(OutStreamer, SM, *MI); + case PPC::MoveGOTtoLR: { // Transform %LR = MoveGOTtoLR // Into this: bl _GLOBAL_OFFSET_TABLE_@local-4 diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.h b/llvm/lib/Target/PowerPC/PPCCallingConv.h new file mode 100644 index 00000000000..eb904a85859 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.h @@ -0,0 +1,35 @@ +//=== PPCCallingConv.h - PPC Custom Calling Convention Routines -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the custom routines for the PPC Calling Convention that +// aren't done by tablegen. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_PPC_PPCCALLINGCONV_H +#define LLVM_LIB_TARGET_PPC_PPCCALLINGCONV_H + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/IR/CallingConv.h" + +namespace llvm { + +inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &, + CCValAssign::LocInfo &, ISD::ArgFlagsTy &, + CCState &) { + llvm_unreachable("The AnyReg calling convention is only supported by the " \ + "stackmap and patchpoint intrinsics."); + // gracefully fallback to PPC C calling convention on Release builds. + return false; +} + +} // End llvm namespace + +#endif + diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td index cf8fee49197..56176f14533 100644 --- a/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -28,8 +28,21 @@ class CCIfNotSubtarget<string F, CCAction A> // Return Value Calling Convention //===----------------------------------------------------------------------===// +// PPC64 AnyReg return-value convention. No explicit register is specified for +// the return-value. The register allocator is allowed and expected to choose +// any free register. +// +// This calling convention is currently only supported by the stackmap and +// patchpoint intrinsics. All other uses will result in an assert on Debug +// builds. On Release builds we fallback to the PPC C calling convention. +def RetCC_PPC64_AnyReg : CallingConv<[ + CCCustom<"CC_PPC_AnyReg_Error"> +]>; + // Return-value convention for PowerPC def RetCC_PPC : CallingConv<[ + CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>, + // On PPC64, integer return values are always promoted to i64 CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>, CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>, @@ -51,6 +64,15 @@ def RetCC_PPC : CallingConv<[ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>> ]>; +// No explicit register is specified for the AnyReg calling convention. The +// register allocator may assign the arguments to any free register. +// +// This calling convention is currently only supported by the stackmap and +// patchpoint intrinsics. All other uses will result in an assert on Debug +// builds. On Release builds we fallback to the PPC C calling convention. +def CC_PPC64_AnyReg : CallingConv<[ + CCCustom<"CC_PPC_AnyReg_Error"> +]>; // Note that we don't currently have calling conventions for 64-bit // PowerPC, but handle all the complexities of the ABI in the lowering @@ -61,6 +83,8 @@ def RetCC_PPC : CallingConv<[ // Only handle ints and floats. All ints are promoted to i64. // Vector types and quadword ints are not handled. def CC_PPC64_ELF_FIS : CallingConv<[ + CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_PPC64_AnyReg>>, + CCIfType<[i1], CCPromoteToType<i64>>, CCIfType<[i8], CCPromoteToType<i64>>, CCIfType<[i16], CCPromoteToType<i64>>, @@ -74,6 +98,8 @@ def CC_PPC64_ELF_FIS : CallingConv<[ // and multiple register returns are "supported" to avoid compile // errors, but none are handled by the fast selector. def RetCC_PPC64_ELF_FIS : CallingConv<[ + CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>, + CCIfType<[i1], CCPromoteToType<i64>>, CCIfType<[i8], CCPromoteToType<i64>>, CCIfType<[i16], CCPromoteToType<i64>>, @@ -203,3 +229,15 @@ def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>; def CSR_NoRegs : CalleeSavedRegs<(add)>; +def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10), + (sequence "X%u", 14, 31), + (sequence "F%u", 0, 31), + (sequence "CR%u", 0, 7))>; + +def CSR_64_AllRegs_Altivec : CalleeSavedRegs<(add CSR_64_AllRegs, + (sequence "V%u", 0, 31))>; + +def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec, + (sequence "VSL%u", 0, 31), + (sequence "VSH%u", 0, 31))>; + diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp index ddf13ff3cd6..6c0e376e247 100644 --- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -15,6 +15,7 @@ #include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPCCallingConv.h" #include "PPCISelLowering.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" @@ -119,6 +120,8 @@ class PPCFastISel final : public FastISel { unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); + bool fastLowerCall(CallLoweringInfo &CLI); + // Instruction selection routines. private: bool SelectLoad(const Instruction *I); @@ -130,7 +133,6 @@ class PPCFastISel final : public FastISel { bool SelectIToFP(const Instruction *I, bool IsSigned); bool SelectFPToI(const Instruction *I, bool IsSigned); bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); - bool SelectCall(const Instruction *I); bool SelectRet(const Instruction *I); bool SelectTrunc(const Instruction *I); bool SelectIntExt(const Instruction *I); @@ -174,9 +176,7 @@ class PPCFastISel final : public FastISel { CallingConv::ID CC, unsigned &NumBytes, bool IsVarArg); - void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, - const Instruction *I, CallingConv::ID CC, - unsigned &NumBytes, bool IsVarArg); + bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes); CCAssignFn *usePPC32CCs(unsigned Flag); private: @@ -1339,9 +1339,9 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, // For a call that we've determined we can fast-select, finish the // call sequence and generate a copy to obtain the return value (if any). -void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, - const Instruction *I, CallingConv::ID CC, - unsigned &NumBytes, bool IsVarArg) { +bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) { + CallingConv::ID CC = CLI.CallConv; + // Issue CallSEQ_END. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TII.getCallFrameDestroyOpcode())) @@ -1352,7 +1352,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, // any real difficulties there. if (RetVT != MVT::isVoid) { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context); + CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); CCValAssign &VA = RVLocs[0]; assert(RVLocs.size() == 1 && "No support for multi-reg return values!"); @@ -1397,39 +1397,35 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, } assert(ResultReg && "ResultReg unset!"); - UsedRegs.push_back(SourcePhysReg); - updateValueMap(I, ResultReg); + CLI.InRegs.push_back(SourcePhysReg); + CLI.ResultReg = ResultReg; + CLI.NumResultRegs = 1; } + + return true; } -// Attempt to fast-select a call instruction. -bool PPCFastISel::SelectCall(const Instruction *I) { - const CallInst *CI = cast<CallInst>(I); - const Value *Callee = CI->getCalledValue(); +bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { + CallingConv::ID CC = CLI.CallConv; + bool IsTailCall = CLI.IsTailCall; + bool IsVarArg = CLI.IsVarArg; + const Value *Callee = CLI.Callee; + const char *SymName = CLI.SymName; - // Can't handle inline asm. - if (isa<InlineAsm>(Callee)) + if (!Callee && !SymName) return false; // Allow SelectionDAG isel to handle tail calls. - if (CI->isTailCall()) + if (IsTailCall) return false; - // Obtain calling convention. - ImmutableCallSite CS(CI); - CallingConv::ID CC = CS.getCallingConv(); - - PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - FunctionType *FTy = cast<FunctionType>(PT->getElementType()); - bool IsVarArg = FTy->isVarArg(); - - // Not ready for varargs yet. + // Let SDISel handle vararg functions. if (IsVarArg) return false; // Handle simple calls for now, with legal return types and // those that can be extended. - Type *RetTy = I->getType(); + Type *RetTy = CLI.RetTy; MVT RetVT; if (RetTy->isVoidTy()) RetVT = MVT::isVoid; @@ -1450,7 +1446,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) { // Bail early if more than 8 arguments, as we only currently // handle arguments passed in registers. - unsigned NumArgs = CS.arg_size(); + unsigned NumArgs = CLI.OutVals.size(); if (NumArgs > 8) return false; @@ -1465,28 +1461,16 @@ bool PPCFastISel::SelectCall(const Instruction *I) { ArgVTs.reserve(NumArgs); ArgFlags.reserve(NumArgs); - for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end(); - II != IE; ++II) { - // FIXME: ARM does something for intrinsic calls here, check into that. - - unsigned AttrIdx = II - CS.arg_begin() + 1; - + for (unsigned i = 0, ie = NumArgs; i != ie; ++i) { // Only handle easy calls for now. It would be reasonably easy // to handle <= 8-byte structures passed ByVal in registers, but we // have to ensure they are right-justified in the register. - if (CS.paramHasAttr(AttrIdx, Attribute::InReg) || - CS.paramHasAttr(AttrIdx, Attribute::StructRet) || - CS.paramHasAttr(AttrIdx, Attribute::Nest) || - CS.paramHasAttr(AttrIdx, Attribute::ByVal)) + ISD::ArgFlagsTy Flags = CLI.OutFlags[i]; + if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal()) return false; - ISD::ArgFlagsTy Flags; - if (CS.paramHasAttr(AttrIdx, Attribute::SExt)) - Flags.setSExt(); - if (CS.paramHasAttr(AttrIdx, Attribute::ZExt)) - Flags.setZExt(); - - Type *ArgTy = (*II)->getType(); + Value *ArgValue = CLI.OutVals[i]; + Type *ArgTy = ArgValue->getType(); MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8) return false; @@ -1494,14 +1478,11 @@ bool PPCFastISel::SelectCall(const Instruction *I) { if (ArgVT.isVector()) return false; - unsigned Arg = getRegForValue(*II); + unsigned Arg = getRegForValue(ArgValue); if (Arg == 0) return false; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - Flags.setOrigAlign(OriginalAlignment); - - Args.push_back(*II); + Args.push_back(ArgValue); ArgRegs.push_back(Arg); ArgVTs.push_back(ArgVT); ArgFlags.push_back(Flags); @@ -1515,18 +1496,28 @@ bool PPCFastISel::SelectCall(const Instruction *I) { RegArgs, CC, NumBytes, IsVarArg)) return false; + MachineInstrBuilder MIB; // FIXME: No handling for function pointers yet. This requires // implementing the function descriptor (OPD) setup. const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); - if (!GV) - return false; - - // Build direct call with NOP for TOC restore. - // FIXME: We can and should optimize away the NOP for local calls. - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(PPC::BL8_NOP)); - // Add callee. - MIB.addGlobalAddress(GV); + if (!GV) { + // patchpoints are a special case; they always dispatch to a pointer value. + // However, we don't actually want to generate the indirect call sequence + // here (that will be generated, as necessary, during asm printing), and + // the call we generate here will be erased by FastISel::selectPatchpoint, + // so don't try very hard... + if (CLI.IsPatchPoint) + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP)); + else + return false; + } else { + // Build direct call with NOP for TOC restore. + // FIXME: We can and should optimize away the NOP for local calls. + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(PPC::BL8_NOP)); + // Add callee. + MIB.addGlobalAddress(GV); + } // Add implicit physical register uses to the call. for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II) @@ -1540,14 +1531,10 @@ bool PPCFastISel::SelectCall(const Instruction *I) { // defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(CC)); - // Finish off the call including any return values. - SmallVector<unsigned, 4> UsedRegs; - finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg); - - // Set all unused physregs defs as dead. - static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); + CLI.Call = MIB; - return true; + // Finish off the call including any return values. + return finishCall(RetVT, CLI, NumBytes); } // Attempt to fast-select a return instruction. @@ -1837,9 +1824,7 @@ bool PPCFastISel::fastSelectInstruction(const Instruction *I) { case Instruction::Sub: return SelectBinaryIntOp(I, ISD::SUB); case Instruction::Call: - if (dyn_cast<IntrinsicInst>(I)) - return false; - return SelectCall(I); + return selectCall(I); case Instruction::Ret: return SelectRet(I); case Instruction::Trunc: diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index e39f75544e1..1b850478c88 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -450,6 +450,7 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { return MF.getTarget().Options.DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || + MFI->hasStackMap() || MFI->hasPatchPoint() || (MF.getTarget().Options.GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 2157003ef3e..120e4ed4cb0 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -13,6 +13,7 @@ #include "PPCISelLowering.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPCCallingConv.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" @@ -3590,6 +3591,7 @@ static bool isFunctionGlobalAddress(SDValue Callee) { static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, + bool IsPatchPoint, SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, const PPCSubtarget &Subtarget) { @@ -3663,7 +3665,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; - if (isSVR4ABI && isPPC64 && !isELFv2ABI) { + if (isSVR4ABI && isPPC64 && !isELFv2ABI && !IsPatchPoint) { // Function pointers in the 64-bit SVR4 ABI do not point to the function // entry point, but to the function descriptor (the function entry point // address is part of the function descriptor though). @@ -3732,9 +3734,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, MTCTROps[2] = InFlag; } - Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, - makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); - InFlag = Chain.getValue(1); + if (!IsPatchPoint) { + Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, + makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); + InFlag = Chain.getValue(1); + } NodeTys.clear(); NodeTys.push_back(MVT::Other); @@ -3743,7 +3747,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, CallOpc = PPCISD::BCTRL; Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) - if (isSVR4ABI && isPPC64 && !isELFv2ABI) + if (isSVR4ABI && isPPC64 && !isELFv2ABI && !IsPatchPoint) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) @@ -3783,7 +3787,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, RegsToPass[i].second.getValueType())); // Direct calls in the ELFv2 ABI need the TOC register live into the call. - if (Callee.getNode() && isELFv2ABI) + if (Callee.getNode() && isELFv2ABI && !IsPatchPoint) Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); return CallOpc; @@ -3846,7 +3850,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SDValue PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, - bool isTailCall, bool isVarArg, + bool isTailCall, bool isVarArg, bool IsPatchPoint, SelectionDAG &DAG, SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, @@ -3860,8 +3864,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, std::vector<EVT> NodeTys; SmallVector<SDValue, 8> Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, - isTailCall, RegsToPass, Ops, NodeTys, - Subtarget); + isTailCall, IsPatchPoint, RegsToPass, Ops, + NodeTys, Subtarget); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) @@ -3963,6 +3967,7 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; + bool IsPatchPoint = CLI.IsPatchPoint; if (isTailCall) isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, @@ -3975,23 +3980,23 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Subtarget.isSVR4ABI()) { if (Subtarget.isPPC64()) return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, OutVals, Ins, + isTailCall, IsPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals); else return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, OutVals, Ins, + isTailCall, IsPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals); } return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, OutVals, Ins, + isTailCall, IsPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals); } SDValue PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool isTailCall, bool IsPatchPoint, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -4201,7 +4206,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, false, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, Ins, InVals); } @@ -4229,7 +4234,7 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, SDValue PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool isTailCall, bool IsPatchPoint, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -4665,7 +4670,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // Check if this is an indirect call (MTCTR/BCTRL). // See PrepareCall() for more information about calls through function // pointers in the 64-bit SVR4 ABI. - if (!isTailCall && + if (!isTailCall && !IsPatchPoint && !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee)) { // Load r2 into a virtual register and store it to the TOC save area. @@ -4679,7 +4684,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // In the ELFv2 ABI, R12 must contain the address of an indirect callee. // This does not mean the MTCTR instruction must use R12; it's easier // to model this as an extra parameter, so do that. - if (isELFv2ABI) + if (isELFv2ABI && !IsPatchPoint) RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); } @@ -4696,7 +4701,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, FPOp, true, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, Ins, InVals); } @@ -4704,7 +4709,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, SDValue PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool isTailCall, bool IsPatchPoint, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -5089,7 +5094,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, FPOp, true, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, Ins, InVals); } @@ -7246,6 +7251,10 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { + if (MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) + return emitPatchPoint(MI, BB); + if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { return emitEHSjLjSetJmp(MI, BB); @@ -9882,6 +9891,19 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } +const MCPhysReg * +PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { + // LR is a callee-save register, but we must treat it as clobbered by any call + // site. Hence we include LR in the scratch registers, which are in turn added + // as implicit-defs for stackmaps and patchpoints. The same reasoning applies + // to CTR, which is used by any indirect call. + static const MCPhysReg ScratchRegs[] = { + PPC::X11, PPC::X12, PPC::LR8, PPC::CTR8, 0 + }; + + return ScratchRegs; +} + bool PPCTargetLowering::shouldExpandBuildVectorWithShuffles( EVT VT , unsigned DefinedValues) const { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index b171b165877..1b585d8e509 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -570,6 +570,8 @@ namespace llvm { /// expanded to fmul + fadd. bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; + // Should we expand the build vector with shuffles? bool shouldExpandBuildVectorWithShuffles(EVT VT, @@ -681,7 +683,7 @@ namespace llvm { SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall, - bool isVarArg, + bool isVarArg, bool IsPatchPoint, SelectionDAG &DAG, SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, @@ -746,7 +748,7 @@ namespace llvm { SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool isTailCall, + bool isVarArg, bool isTailCall, bool IsPatchPoint, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -755,7 +757,7 @@ namespace llvm { SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool isTailCall, + bool isVarArg, bool isTailCall, bool IsPatchPoint, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -763,7 +765,7 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool isTailCall, + bool isVarArg, bool isTailCall, bool IsPatchPoint, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 1242c3e4015..2492229dc31 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -1596,6 +1597,11 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { const MachineFunction *MF = MI->getParent()->getParent(); const char *AsmStr = MI->getOperand(0).getSymbolName(); return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + } else if (Opcode == TargetOpcode::STACKMAP) { + return MI->getOperand(1).getImm(); + } else if (Opcode == TargetOpcode::PATCHPOINT) { + PatchPointOpers Opers(MI); + return Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); } else { const MCInstrDesc &Desc = get(Opcode); return Desc.getSize(); diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 9b9966f6465..aa0da7a97b1 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -99,6 +99,14 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const MCPhysReg* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) { + if (Subtarget.hasVSX()) + return CSR_64_AllRegs_VSX_SaveList; + if (Subtarget.hasAltivec()) + return CSR_64_AllRegs_Altivec_SaveList; + return CSR_64_AllRegs_SaveList; + } + if (Subtarget.isDarwinABI()) return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_SaveList : @@ -117,6 +125,14 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const uint32_t* PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + if (CC == CallingConv::AnyReg) { + if (Subtarget.hasVSX()) + return CSR_64_AllRegs_VSX_RegMask; + if (Subtarget.hasAltivec()) + return CSR_64_AllRegs_Altivec_RegMask; + return CSR_64_AllRegs_RegMask; + } + if (Subtarget.isDarwinABI()) return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_RegMask : @@ -138,6 +154,14 @@ PPCRegisterInfo::getNoPreservedMask() const { return CSR_NoRegs_RegMask; } +void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { + unsigned PseudoRegs[] = { PPC::ZERO, PPC::ZERO8, PPC::RM }; + for (unsigned i = 0, ie = array_lengthof(PseudoRegs); i != ie; ++i) { + unsigned Reg = PseudoRegs[i]; + Mask[Reg / 32] &= ~(1u << (Reg % 32)); + } +} + BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>( @@ -700,7 +724,10 @@ static unsigned getOffsetONFromFION(const MachineInstr &MI, // Take into account whether it's an add or mem instruction unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2; if (MI.isInlineAsm()) - OffsetOperandNo = FIOperandNum-1; + OffsetOperandNo = FIOperandNum - 1; + else if (MI.getOpcode() == TargetOpcode::STACKMAP || + MI.getOpcode() == TargetOpcode::PATCHPOINT) + OffsetOperandNo = FIOperandNum + 1; return OffsetOperandNo; } @@ -772,7 +799,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If the instruction is not present in ImmToIdxMap, then it has no immediate // form (and must be r+r). - bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC); + bool noImmForm = !MI.isInlineAsm() && OpC != TargetOpcode::STACKMAP && + OpC != TargetOpcode::PATCHPOINT && !ImmToIdxMap.count(OpC); // Now add the frame object offset to the offset from r1. int Offset = MFI->getObjectOffset(FrameIndex); @@ -796,8 +824,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. assert(OpC != PPC::DBG_VALUE && - "This should be handle in a target independent way"); - if (!noImmForm && isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) { + "This should be handled in a target-independent way"); + if (!noImmForm && ((isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) || + OpC == TargetOpcode::STACKMAP || + OpC == TargetOpcode::PATCHPOINT)) { MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); return; } @@ -1008,6 +1038,8 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, Offset += MI->getOperand(OffsetOperandNo).getImm(); return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm + MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT || (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0)); } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index c182f95ff64..4c2ef9067cb 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -49,6 +49,8 @@ public: const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override; const uint32_t *getNoPreservedMask() const; + void adjustStackMapLiveOutMask(uint32_t *Mask) const override; + BitVector getReservedRegs(const MachineFunction &MF) const override; /// We require the register scavenger. diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index fc440a56fdb..a7bb2526d9d 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -183,6 +183,15 @@ unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue())) return TCC_Free; break; + case Intrinsic::experimental_stackmap: + if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TCC_Free; + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TCC_Free; + break; } return PPCTTI::getIntImmCost(Imm, Ty); } |