summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2015-01-13 17:48:12 +0000
committerHal Finkel <hfinkel@anl.gov>2015-01-13 17:48:12 +0000
commit821befd52b933ca65022f79345407f6b6bf0b836 (patch)
tree3229072c33e80b4ae61191d3e8e6faf4a7516ce1 /llvm/lib/Target/PowerPC/PPCISelLowering.cpp
parentc4ee2c5188872b5a7a4e0d6379141f65cc7356bd (diff)
downloadbcm5719-llvm-821befd52b933ca65022f79345407f6b6bf0b836.tar.gz
bcm5719-llvm-821befd52b933ca65022f79345407f6b6bf0b836.zip
[PowerPC] Add StackMap/PatchPoint support
This commit does two things: 1. Refactors PPCFastISel to use more of the common infrastructure for call lowering (this lets us take advantage of this common code for lowering some common intrinsics, stackmap/patchpoint among them). 2. Adds support for stackmap/patchpoint lowering. For the most part, this is very similar to the support in the AArch64 target, with the obvious differences (different registers, NOP instructions, etc.). The test cases are adapted from the AArch64 test cases. One difference of note is that the patchpoint call sequence takes 24 bytes, so you can't use less than that (on AArch64 you can go down to 16). Also, as noted in the docs, we take the patchpoint address to be the actual code address (assuming the call is local in the TOC-sharing sense), which should yield higher performance than generating the full cross-DSO indirect-call sequence and is likely just as useful for JITed code (if not, we'll change it). StackMaps and Patchpoints are still marked as experimental, and so this support is doubly experimental. So go ahead and experiment! llvm-svn: 225808
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp62
1 files changed, 42 insertions, 20 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2157003ef3e..120e4ed4cb0 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -13,6 +13,7 @@
#include "PPCISelLowering.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCCallingConv.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
#include "PPCTargetMachine.h"
@@ -3590,6 +3591,7 @@ static bool isFunctionGlobalAddress(SDValue Callee) {
static
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
+ bool IsPatchPoint,
SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
const PPCSubtarget &Subtarget) {
@@ -3663,7 +3665,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// to do the call, we can't use PPCISD::CALL.
SDValue MTCTROps[] = {Chain, Callee, InFlag};
- if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
+ if (isSVR4ABI && isPPC64 && !isELFv2ABI && !IsPatchPoint) {
// Function pointers in the 64-bit SVR4 ABI do not point to the function
// entry point, but to the function descriptor (the function entry point
// address is part of the function descriptor though).
@@ -3732,9 +3734,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
MTCTROps[2] = InFlag;
}
- Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
- makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
- InFlag = Chain.getValue(1);
+ if (!IsPatchPoint) {
+ Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
+ makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
+ InFlag = Chain.getValue(1);
+ }
NodeTys.clear();
NodeTys.push_back(MVT::Other);
@@ -3743,7 +3747,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
CallOpc = PPCISD::BCTRL;
Callee.setNode(nullptr);
// Add use of X11 (holding environment pointer)
- if (isSVR4ABI && isPPC64 && !isELFv2ABI)
+ if (isSVR4ABI && isPPC64 && !isELFv2ABI && !IsPatchPoint)
Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
@@ -3783,7 +3787,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
RegsToPass[i].second.getValueType()));
// Direct calls in the ELFv2 ABI need the TOC register live into the call.
- if (Callee.getNode() && isELFv2ABI)
+ if (Callee.getNode() && isELFv2ABI && !IsPatchPoint)
Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
return CallOpc;
@@ -3846,7 +3850,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SDValue
PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
- bool isTailCall, bool isVarArg,
+ bool isTailCall, bool isVarArg, bool IsPatchPoint,
SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
&RegsToPass,
@@ -3860,8 +3864,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
- isTailCall, RegsToPass, Ops, NodeTys,
- Subtarget);
+ isTailCall, IsPatchPoint, RegsToPass, Ops,
+ NodeTys, Subtarget);
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
@@ -3963,6 +3967,7 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool &isTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
+ bool IsPatchPoint = CLI.IsPatchPoint;
if (isTailCall)
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
@@ -3975,23 +3980,23 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (Subtarget.isSVR4ABI()) {
if (Subtarget.isPPC64())
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
+ isTailCall, IsPatchPoint, Outs, OutVals, Ins,
dl, DAG, InVals);
else
return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
+ isTailCall, IsPatchPoint, Outs, OutVals, Ins,
dl, DAG, InVals);
}
return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
+ isTailCall, IsPatchPoint, Outs, OutVals, Ins,
dl, DAG, InVals);
}
SDValue
PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall,
+ bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -4201,7 +4206,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
false, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
Ins, InVals);
}
@@ -4229,7 +4234,7 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
SDValue
PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall,
+ bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -4665,7 +4670,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// Check if this is an indirect call (MTCTR/BCTRL).
// See PrepareCall() for more information about calls through function
// pointers in the 64-bit SVR4 ABI.
- if (!isTailCall &&
+ if (!isTailCall && !IsPatchPoint &&
!isFunctionGlobalAddress(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) {
// Load r2 into a virtual register and store it to the TOC save area.
@@ -4679,7 +4684,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
// This does not mean the MTCTR instruction must use R12; it's easier
// to model this as an extra parameter, so do that.
- if (isELFv2ABI)
+ if (isELFv2ABI && !IsPatchPoint)
RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
@@ -4696,7 +4701,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
Ins, InVals);
}
@@ -4704,7 +4709,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
SDValue
PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall,
+ bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -5089,7 +5094,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
Ins, InVals);
}
@@ -7246,6 +7251,10 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
+ if (MI->getOpcode() == TargetOpcode::STACKMAP ||
+ MI->getOpcode() == TargetOpcode::PATCHPOINT)
+ return emitPatchPoint(MI, BB);
+
if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
return emitEHSjLjSetJmp(MI, BB);
@@ -9882,6 +9891,19 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
+const MCPhysReg *
+PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
+ // LR is a callee-save register, but we must treat it as clobbered by any call
+ // site. Hence we include LR in the scratch registers, which are in turn added
+ // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
+ // to CTR, which is used by any indirect call.
+ static const MCPhysReg ScratchRegs[] = {
+ PPC::X11, PPC::X12, PPC::LR8, PPC::CTR8, 0
+ };
+
+ return ScratchRegs;
+}
+
bool
PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
EVT VT , unsigned DefinedValues) const {
OpenPOWER on IntegriCloud