diff options
author | Hal Finkel <hfinkel@anl.gov> | 2015-01-14 01:07:51 +0000 |
---|---|---|
committer | Hal Finkel <hfinkel@anl.gov> | 2015-01-14 01:07:51 +0000 |
commit | 934361a4b8661410b5c1d8e1f50f3f1c145e8b7e (patch) | |
tree | 15dc2d1ca8dd9d8c7d68fe98f6b3d905eae32c3b /llvm/lib | |
parent | dcdd5ad25254e8811fdea76c39a93ecbb992d230 (diff) | |
download | bcm5719-llvm-934361a4b8661410b5c1d8e1f50f3f1c145e8b7e.tar.gz bcm5719-llvm-934361a4b8661410b5c1d8e1f50f3f1c145e8b7e.zip |
Revert "r225811 - Revert "r225808 - [PowerPC] Add StackMap/PatchPoint support""
This re-applies r225808, fixed to avoid problems with SDAG dependencies along
with the preceding fix to ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs.
These problems caused the original regression tests to assert/segfault on many
(but not all) systems.
Original commit message:
This commit does two things:
1. Refactors PPCFastISel to use more of the common infrastructure for call
lowering (this lets us take advantage of this common code for lowering some
common intrinsics, stackmap/patchpoint among them).
2. Adds support for stackmap/patchpoint lowering. For the most part, this is
very similar to the support in the AArch64 target, with the obvious differences
(different registers, NOP instructions, etc.). The test cases are adapted
from the AArch64 test cases.
One difference of note is that the patchpoint call sequence takes 24 bytes, so
you can't use less than that (on AArch64 you can go down to 16). Also, as noted
in the docs, we take the patchpoint address to be the actual code address
(assuming the call is local in the TOC-sharing sense), which should yield
higher performance than generating the full cross-DSO indirect-call sequence
and is likely just as useful for JITed code (if not, we'll change it).
StackMaps and Patchpoints are still marked as experimental, and so this support
is doubly experimental. So go ahead and experiment!
llvm-svn: 225909
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 90 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCCallingConv.h | 35 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCCallingConv.td | 38 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCFastISel.cpp | 123 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 63 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 10 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 40 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCRegisterInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 9 |
11 files changed, 323 insertions, 94 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 564887353dd..decc1b27f77 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -69,10 +70,11 @@ namespace { MapVector<MCSymbol*, MCSymbol*> TOC; const PPCSubtarget &Subtarget; uint64_t TOCLabelID; + StackMaps SM; public: explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer), - Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {} + Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0), SM(*this) {} const char *getPassName() const override { return "PowerPC Assembly Printer"; @@ -90,6 +92,13 @@ namespace { bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override; + + void EmitEndOfAsmFile(Module &M) override; + + void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI); + void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI); }; /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux @@ -316,6 +325,80 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { return TOCEntry; } +void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) { + SM.serializeToStackMapSection(); +} + +void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI) { + unsigned NumNOPBytes = MI.getOperand(1).getImm(); + + SM.recordStackMap(MI); + assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + + // Scan ahead to trim the shadow. + const MachineBasicBlock &MBB = *MI.getParent(); + MachineBasicBlock::const_iterator MII(MI); + ++MII; + while (NumNOPBytes > 0) { + if (MII == MBB.end() || MII->isCall() || + MII->getOpcode() == PPC::DBG_VALUE || + MII->getOpcode() == TargetOpcode::PATCHPOINT || + MII->getOpcode() == TargetOpcode::STACKMAP) + break; + ++MII; + NumNOPBytes -= 4; + } + + // Emit nops. + for (unsigned i = 0; i < NumNOPBytes; i += 4) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP)); +} + +// Lower a patchpoint of the form: +// [<def>], <id>, <numBytes>, <target>, <numArgs> +void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI) { + SM.recordPatchPoint(MI); + PatchPointOpers Opers(&MI); + + int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm(); + unsigned EncodedBytes = 0; + if (CallTarget) { + assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget && + "High 16 bits of call target should be zero."); + unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); + EncodedBytes = 6*4; + // Materialize the jump address: + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8) + .addReg(ScratchReg) + .addImm((CallTarget >> 32) & 0xFFFF)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(32).addImm(16)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm((CallTarget >> 16) & 0xFFFF)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(CallTarget & 0xFFFF)); + + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8).addReg(ScratchReg)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8)); + } + + // Emit padding. + unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); + assert(NumBytes >= EncodedBytes && + "Patchpoint can't request size less than the length of a call."); + assert((NumBytes - EncodedBytes) % 4 == 0 && + "Invalid number of NOP bytes requested!"); + for (unsigned i = EncodedBytes; i < NumBytes; i += 4) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP)); +} /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to /// the current output stream. @@ -332,6 +415,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { default: break; case TargetOpcode::DBG_VALUE: llvm_unreachable("Should be handled target independently"); + case TargetOpcode::STACKMAP: + return LowerSTACKMAP(OutStreamer, SM, *MI); + case TargetOpcode::PATCHPOINT: + return LowerPATCHPOINT(OutStreamer, SM, *MI); + case PPC::MoveGOTtoLR: { // Transform %LR = MoveGOTtoLR // Into this: bl _GLOBAL_OFFSET_TABLE_@local-4 diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.h b/llvm/lib/Target/PowerPC/PPCCallingConv.h new file mode 100644 index 00000000000..eb904a85859 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.h @@ -0,0 +1,35 @@ +//=== PPCCallingConv.h - PPC Custom Calling Convention Routines -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the custom routines for the PPC Calling Convention that +// aren't done by tablegen. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_PPC_PPCCALLINGCONV_H +#define LLVM_LIB_TARGET_PPC_PPCCALLINGCONV_H + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/IR/CallingConv.h" + +namespace llvm { + +inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &, + CCValAssign::LocInfo &, ISD::ArgFlagsTy &, + CCState &) { + llvm_unreachable("The AnyReg calling convention is only supported by the " \ + "stackmap and patchpoint intrinsics."); + // gracefully fallback to PPC C calling convention on Release builds. + return false; +} + +} // End llvm namespace + +#endif + diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td index cf8fee49197..56176f14533 100644 --- a/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -28,8 +28,21 @@ class CCIfNotSubtarget<string F, CCAction A> // Return Value Calling Convention //===----------------------------------------------------------------------===// +// PPC64 AnyReg return-value convention. No explicit register is specified for +// the return-value. The register allocator is allowed and expected to choose +// any free register. +// +// This calling convention is currently only supported by the stackmap and +// patchpoint intrinsics. All other uses will result in an assert on Debug +// builds. On Release builds we fallback to the PPC C calling convention. +def RetCC_PPC64_AnyReg : CallingConv<[ + CCCustom<"CC_PPC_AnyReg_Error"> +]>; + // Return-value convention for PowerPC def RetCC_PPC : CallingConv<[ + CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>, + // On PPC64, integer return values are always promoted to i64 CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>, CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>, @@ -51,6 +64,15 @@ def RetCC_PPC : CallingConv<[ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>> ]>; +// No explicit register is specified for the AnyReg calling convention. The +// register allocator may assign the arguments to any free register. +// +// This calling convention is currently only supported by the stackmap and +// patchpoint intrinsics. All other uses will result in an assert on Debug +// builds. On Release builds we fallback to the PPC C calling convention. +def CC_PPC64_AnyReg : CallingConv<[ + CCCustom<"CC_PPC_AnyReg_Error"> +]>; // Note that we don't currently have calling conventions for 64-bit // PowerPC, but handle all the complexities of the ABI in the lowering @@ -61,6 +83,8 @@ def RetCC_PPC : CallingConv<[ // Only handle ints and floats. All ints are promoted to i64. // Vector types and quadword ints are not handled. def CC_PPC64_ELF_FIS : CallingConv<[ + CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_PPC64_AnyReg>>, + CCIfType<[i1], CCPromoteToType<i64>>, CCIfType<[i8], CCPromoteToType<i64>>, CCIfType<[i16], CCPromoteToType<i64>>, @@ -74,6 +98,8 @@ def CC_PPC64_ELF_FIS : CallingConv<[ // and multiple register returns are "supported" to avoid compile // errors, but none are handled by the fast selector. def RetCC_PPC64_ELF_FIS : CallingConv<[ + CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>, + CCIfType<[i1], CCPromoteToType<i64>>, CCIfType<[i8], CCPromoteToType<i64>>, CCIfType<[i16], CCPromoteToType<i64>>, @@ -203,3 +229,15 @@ def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>; def CSR_NoRegs : CalleeSavedRegs<(add)>; +def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10), + (sequence "X%u", 14, 31), + (sequence "F%u", 0, 31), + (sequence "CR%u", 0, 7))>; + +def CSR_64_AllRegs_Altivec : CalleeSavedRegs<(add CSR_64_AllRegs, + (sequence "V%u", 0, 31))>; + +def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec, + (sequence "VSL%u", 0, 31), + (sequence "VSH%u", 0, 31))>; + diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp index ddf13ff3cd6..13bd0c7be2c 100644 --- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -15,6 +15,7 @@ #include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPCCallingConv.h" #include "PPCISelLowering.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" @@ -119,6 +120,8 @@ class PPCFastISel final : public FastISel { unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); + bool fastLowerCall(CallLoweringInfo &CLI) override; + // Instruction selection routines. private: bool SelectLoad(const Instruction *I); @@ -130,7 +133,6 @@ class PPCFastISel final : public FastISel { bool SelectIToFP(const Instruction *I, bool IsSigned); bool SelectFPToI(const Instruction *I, bool IsSigned); bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); - bool SelectCall(const Instruction *I); bool SelectRet(const Instruction *I); bool SelectTrunc(const Instruction *I); bool SelectIntExt(const Instruction *I); @@ -174,9 +176,7 @@ class PPCFastISel final : public FastISel { CallingConv::ID CC, unsigned &NumBytes, bool IsVarArg); - void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, - const Instruction *I, CallingConv::ID CC, - unsigned &NumBytes, bool IsVarArg); + bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes); CCAssignFn *usePPC32CCs(unsigned Flag); private: @@ -1339,9 +1339,9 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, // For a call that we've determined we can fast-select, finish the // call sequence and generate a copy to obtain the return value (if any). -void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, - const Instruction *I, CallingConv::ID CC, - unsigned &NumBytes, bool IsVarArg) { +bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) { + CallingConv::ID CC = CLI.CallConv; + // Issue CallSEQ_END. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TII.getCallFrameDestroyOpcode())) @@ -1352,7 +1352,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, // any real difficulties there. if (RetVT != MVT::isVoid) { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context); + CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); CCValAssign &VA = RVLocs[0]; assert(RVLocs.size() == 1 && "No support for multi-reg return values!"); @@ -1397,39 +1397,35 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, } assert(ResultReg && "ResultReg unset!"); - UsedRegs.push_back(SourcePhysReg); - updateValueMap(I, ResultReg); + CLI.InRegs.push_back(SourcePhysReg); + CLI.ResultReg = ResultReg; + CLI.NumResultRegs = 1; } + + return true; } -// Attempt to fast-select a call instruction. -bool PPCFastISel::SelectCall(const Instruction *I) { - const CallInst *CI = cast<CallInst>(I); - const Value *Callee = CI->getCalledValue(); +bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { + CallingConv::ID CC = CLI.CallConv; + bool IsTailCall = CLI.IsTailCall; + bool IsVarArg = CLI.IsVarArg; + const Value *Callee = CLI.Callee; + const char *SymName = CLI.SymName; - // Can't handle inline asm. - if (isa<InlineAsm>(Callee)) + if (!Callee && !SymName) return false; // Allow SelectionDAG isel to handle tail calls. - if (CI->isTailCall()) + if (IsTailCall) return false; - // Obtain calling convention. - ImmutableCallSite CS(CI); - CallingConv::ID CC = CS.getCallingConv(); - - PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - FunctionType *FTy = cast<FunctionType>(PT->getElementType()); - bool IsVarArg = FTy->isVarArg(); - - // Not ready for varargs yet. + // Let SDISel handle vararg functions. if (IsVarArg) return false; // Handle simple calls for now, with legal return types and // those that can be extended. - Type *RetTy = I->getType(); + Type *RetTy = CLI.RetTy; MVT RetVT; if (RetTy->isVoidTy()) RetVT = MVT::isVoid; @@ -1450,7 +1446,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) { // Bail early if more than 8 arguments, as we only currently // handle arguments passed in registers. - unsigned NumArgs = CS.arg_size(); + unsigned NumArgs = CLI.OutVals.size(); if (NumArgs > 8) return false; @@ -1465,28 +1461,16 @@ bool PPCFastISel::SelectCall(const Instruction *I) { ArgVTs.reserve(NumArgs); ArgFlags.reserve(NumArgs); - for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end(); - II != IE; ++II) { - // FIXME: ARM does something for intrinsic calls here, check into that. - - unsigned AttrIdx = II - CS.arg_begin() + 1; - + for (unsigned i = 0, ie = NumArgs; i != ie; ++i) { // Only handle easy calls for now. It would be reasonably easy // to handle <= 8-byte structures passed ByVal in registers, but we // have to ensure they are right-justified in the register. - if (CS.paramHasAttr(AttrIdx, Attribute::InReg) || - CS.paramHasAttr(AttrIdx, Attribute::StructRet) || - CS.paramHasAttr(AttrIdx, Attribute::Nest) || - CS.paramHasAttr(AttrIdx, Attribute::ByVal)) + ISD::ArgFlagsTy Flags = CLI.OutFlags[i]; + if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal()) return false; - ISD::ArgFlagsTy Flags; - if (CS.paramHasAttr(AttrIdx, Attribute::SExt)) - Flags.setSExt(); - if (CS.paramHasAttr(AttrIdx, Attribute::ZExt)) - Flags.setZExt(); - - Type *ArgTy = (*II)->getType(); + Value *ArgValue = CLI.OutVals[i]; + Type *ArgTy = ArgValue->getType(); MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8) return false; @@ -1494,14 +1478,11 @@ bool PPCFastISel::SelectCall(const Instruction *I) { if (ArgVT.isVector()) return false; - unsigned Arg = getRegForValue(*II); + unsigned Arg = getRegForValue(ArgValue); if (Arg == 0) return false; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - Flags.setOrigAlign(OriginalAlignment); - - Args.push_back(*II); + Args.push_back(ArgValue); ArgRegs.push_back(Arg); ArgVTs.push_back(ArgVT); ArgFlags.push_back(Flags); @@ -1515,18 +1496,28 @@ bool PPCFastISel::SelectCall(const Instruction *I) { RegArgs, CC, NumBytes, IsVarArg)) return false; + MachineInstrBuilder MIB; // FIXME: No handling for function pointers yet. This requires // implementing the function descriptor (OPD) setup. const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); - if (!GV) - return false; - - // Build direct call with NOP for TOC restore. - // FIXME: We can and should optimize away the NOP for local calls. - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(PPC::BL8_NOP)); - // Add callee. - MIB.addGlobalAddress(GV); + if (!GV) { + // patchpoints are a special case; they always dispatch to a pointer value. + // However, we don't actually want to generate the indirect call sequence + // here (that will be generated, as necessary, during asm printing), and + // the call we generate here will be erased by FastISel::selectPatchpoint, + // so don't try very hard... + if (CLI.IsPatchPoint) + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP)); + else + return false; + } else { + // Build direct call with NOP for TOC restore. + // FIXME: We can and should optimize away the NOP for local calls. + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(PPC::BL8_NOP)); + // Add callee. + MIB.addGlobalAddress(GV); + } // Add implicit physical register uses to the call. for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II) @@ -1540,14 +1531,10 @@ bool PPCFastISel::SelectCall(const Instruction *I) { // defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(CC)); - // Finish off the call including any return values. - SmallVector<unsigned, 4> UsedRegs; - finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg); - - // Set all unused physregs defs as dead. - static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); + CLI.Call = MIB; - return true; + // Finish off the call including any return values. + return finishCall(RetVT, CLI, NumBytes); } // Attempt to fast-select a return instruction. @@ -1837,9 +1824,7 @@ bool PPCFastISel::fastSelectInstruction(const Instruction *I) { case Instruction::Sub: return SelectBinaryIntOp(I, ISD::SUB); case Instruction::Call: - if (dyn_cast<IntrinsicInst>(I)) - return false; - return SelectCall(I); + return selectCall(I); case Instruction::Ret: return SelectRet(I); case Instruction::Trunc: diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index e39f75544e1..1b850478c88 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -450,6 +450,7 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { return MF.getTarget().Options.DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || + MFI->hasStackMap() || MFI->hasPatchPoint() || (MF.getTarget().Options.GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 2157003ef3e..5c52bb19b0d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -13,6 +13,7 @@ #include "PPCISelLowering.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPCCallingConv.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" @@ -3590,6 +3591,7 @@ static bool isFunctionGlobalAddress(SDValue Callee) { static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, + bool IsPatchPoint, SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, const PPCSubtarget &Subtarget) { @@ -3658,6 +3660,16 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, needIndirectCall = false; } + if (IsPatchPoint) { + // We'll form an invalid direct call when lowering a patchpoint; the full + // sequence for an indirect call is complicated, and many of the + // instructions introduced might have side effects (and, thus, can't be + // removed later). The call itself will be removed as soon as the + // argument/return lowering is complete, so the fact that it has the wrong + // kind of operands should not really matter. + needIndirectCall = false; + } + if (needIndirectCall) { // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair // to do the call, we can't use PPCISD::CALL. @@ -3783,7 +3795,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, RegsToPass[i].second.getValueType())); // Direct calls in the ELFv2 ABI need the TOC register live into the call. - if (Callee.getNode() && isELFv2ABI) + if (Callee.getNode() && isELFv2ABI && !IsPatchPoint) Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); return CallOpc; @@ -3846,7 +3858,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SDValue PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, - bool isTailCall, bool isVarArg, + bool isTailCall, bool isVarArg, bool IsPatchPoint, SelectionDAG &DAG, SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, @@ -3860,8 +3872,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, std::vector<EVT> NodeTys; SmallVector<SDValue, 8> Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, - isTailCall, RegsToPass, Ops, NodeTys, - Subtarget); + isTailCall, IsPatchPoint, RegsToPass, Ops, + NodeTys, Subtarget); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) @@ -3905,7 +3917,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. - if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) { + if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && + !IsPatchPoint) { if (CallOpc == PPCISD::BCTRL) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. @@ -3963,6 +3976,7 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; + bool IsPatchPoint = CLI.IsPatchPoint; if (isTailCall) isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, @@ -3975,23 +3989,23 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Subtarget.isSVR4ABI()) { if (Subtarget.isPPC64()) return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, OutVals, Ins, + isTailCall, IsPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals); else return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, OutVals, Ins, + isTailCall, IsPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals); } return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, OutVals, Ins, + isTailCall, IsPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals); } SDValue PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool isTailCall, bool IsPatchPoint, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -4201,7 +4215,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, false, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, Ins, InVals); } @@ -4229,7 +4243,7 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, SDValue PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool isTailCall, bool IsPatchPoint, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -4665,7 +4679,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // Check if this is an indirect call (MTCTR/BCTRL). // See PrepareCall() for more information about calls through function // pointers in the 64-bit SVR4 ABI. - if (!isTailCall && + if (!isTailCall && !IsPatchPoint && !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee)) { // Load r2 into a virtual register and store it to the TOC save area. @@ -4679,7 +4693,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // In the ELFv2 ABI, R12 must contain the address of an indirect callee. // This does not mean the MTCTR instruction must use R12; it's easier // to model this as an extra parameter, so do that. - if (isELFv2ABI) + if (isELFv2ABI && !IsPatchPoint) RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); } @@ -4696,7 +4710,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, FPOp, true, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, Ins, InVals); } @@ -4704,7 +4718,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, SDValue PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool isTailCall, bool IsPatchPoint, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -5089,7 +5103,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, FPOp, true, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, Ins, InVals); } @@ -7246,6 +7260,10 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { + if (MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) + return emitPatchPoint(MI, BB); + if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { return emitEHSjLjSetJmp(MI, BB); @@ -9882,6 +9900,19 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } +const MCPhysReg * +PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { + // LR is a callee-save register, but we must treat it as clobbered by any call + // site. Hence we include LR in the scratch registers, which are in turn added + // as implicit-defs for stackmaps and patchpoints. The same reasoning applies + // to CTR, which is used by any indirect call. + static const MCPhysReg ScratchRegs[] = { + PPC::X11, PPC::X12, PPC::LR8, PPC::CTR8, 0 + }; + + return ScratchRegs; +} + bool PPCTargetLowering::shouldExpandBuildVectorWithShuffles( EVT VT , unsigned DefinedValues) const { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index b171b165877..1b585d8e509 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -570,6 +570,8 @@ namespace llvm { /// expanded to fmul + fadd. bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; + // Should we expand the build vector with shuffles? bool shouldExpandBuildVectorWithShuffles(EVT VT, @@ -681,7 +683,7 @@ namespace llvm { SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall, - bool isVarArg, + bool isVarArg, bool IsPatchPoint, SelectionDAG &DAG, SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, @@ -746,7 +748,7 @@ namespace llvm { SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool isTailCall, + bool isVarArg, bool isTailCall, bool IsPatchPoint, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -755,7 +757,7 @@ namespace llvm { SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool isTailCall, + bool isVarArg, bool isTailCall, bool IsPatchPoint, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -763,7 +765,7 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool isTailCall, + bool isVarArg, bool isTailCall, bool IsPatchPoint, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 1242c3e4015..2492229dc31 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -1596,6 +1597,11 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { const MachineFunction *MF = MI->getParent()->getParent(); const char *AsmStr = MI->getOperand(0).getSymbolName(); return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + } else if (Opcode == TargetOpcode::STACKMAP) { + return MI->getOperand(1).getImm(); + } else if (Opcode == TargetOpcode::PATCHPOINT) { + PatchPointOpers Opers(MI); + return Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); } else { const MCInstrDesc &Desc = get(Opcode); return Desc.getSize(); diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 9b9966f6465..aa0da7a97b1 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -99,6 +99,14 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const MCPhysReg* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) { + if (Subtarget.hasVSX()) + return CSR_64_AllRegs_VSX_SaveList; + if (Subtarget.hasAltivec()) + return CSR_64_AllRegs_Altivec_SaveList; + return CSR_64_AllRegs_SaveList; + } + if (Subtarget.isDarwinABI()) return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_SaveList : @@ -117,6 +125,14 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const uint32_t* PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + if (CC == CallingConv::AnyReg) { + if (Subtarget.hasVSX()) + return CSR_64_AllRegs_VSX_RegMask; + if (Subtarget.hasAltivec()) + return CSR_64_AllRegs_Altivec_RegMask; + return CSR_64_AllRegs_RegMask; + } + if (Subtarget.isDarwinABI()) return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_RegMask : @@ -138,6 +154,14 @@ PPCRegisterInfo::getNoPreservedMask() const { return CSR_NoRegs_RegMask; } +void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { + unsigned PseudoRegs[] = { PPC::ZERO, PPC::ZERO8, PPC::RM }; + for (unsigned i = 0, ie = array_lengthof(PseudoRegs); i != ie; ++i) { + unsigned Reg = PseudoRegs[i]; + Mask[Reg / 32] &= ~(1u << (Reg % 32)); + } +} + BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>( @@ -700,7 +724,10 @@ static unsigned getOffsetONFromFION(const MachineInstr &MI, // Take into account whether it's an add or mem instruction unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2; if (MI.isInlineAsm()) - OffsetOperandNo = FIOperandNum-1; + OffsetOperandNo = FIOperandNum - 1; + else if (MI.getOpcode() == TargetOpcode::STACKMAP || + MI.getOpcode() == TargetOpcode::PATCHPOINT) + OffsetOperandNo = FIOperandNum + 1; return OffsetOperandNo; } @@ -772,7 +799,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If the instruction is not present in ImmToIdxMap, then it has no immediate // form (and must be r+r). - bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC); + bool noImmForm = !MI.isInlineAsm() && OpC != TargetOpcode::STACKMAP && + OpC != TargetOpcode::PATCHPOINT && !ImmToIdxMap.count(OpC); // Now add the frame object offset to the offset from r1. int Offset = MFI->getObjectOffset(FrameIndex); @@ -796,8 +824,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. assert(OpC != PPC::DBG_VALUE && - "This should be handle in a target independent way"); - if (!noImmForm && isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) { + "This should be handled in a target-independent way"); + if (!noImmForm && ((isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) || + OpC == TargetOpcode::STACKMAP || + OpC == TargetOpcode::PATCHPOINT)) { MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); return; } @@ -1008,6 +1038,8 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, Offset += MI->getOperand(OffsetOperandNo).getImm(); return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm + MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT || (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0)); } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index c182f95ff64..4c2ef9067cb 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -49,6 +49,8 @@ public: const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override; const uint32_t *getNoPreservedMask() const; + void adjustStackMapLiveOutMask(uint32_t *Mask) const override; + BitVector getReservedRegs(const MachineFunction &MF) const override; /// We require the register scavenger. diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index fc440a56fdb..a7bb2526d9d 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -183,6 +183,15 @@ unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue())) return TCC_Free; break; + case Intrinsic::experimental_stackmap: + if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TCC_Free; + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TCC_Free; + break; } return PPCTTI::getIntImmCost(Imm, Ty); } |