diff options
| author | Hal Finkel <hfinkel@anl.gov> | 2012-08-28 02:10:27 +0000 |
|---|---|---|
| committer | Hal Finkel <hfinkel@anl.gov> | 2012-08-28 02:10:27 +0000 |
| commit | 5ab378037f2fde6694ef926b26d3df7ea6b8334d (patch) | |
| tree | 4371442d19d761286b37b66c6d8476c702cf7ce1 /llvm/lib/Target | |
| parent | e39526a789754d8459eed63757cd6782dd2db834 (diff) | |
| download | bcm5719-llvm-5ab378037f2fde6694ef926b26d3df7ea6b8334d.tar.gz bcm5719-llvm-5ab378037f2fde6694ef926b26d3df7ea6b8334d.zip | |
Eliminate redundant CR moves on PPC32.
The 32-bit ABI requires CR bit 6 to be set if the call has fp arguments and
unset if it doesn't. The solution up to now was to insert a MachineNode to
set/unset the CR bit, which produces a CR vreg. This vreg was then copied
into CR bit 6. When the register allocator saw a bunch of these in the same
function, it allocated the set/unset CR bit in some random CR register (1
extra instruction) and then emitted CR moves before every vararg function
call, rather than just setting and unsetting CR bit 6 directly before every
vararg function call. This patch instead inserts a PPCcrset/PPCcrunset
instruction which are then matched by a dedicated instruction pattern.
Patch by Tobias von Koch.
llvm-svn: 162725
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.td | 16 |
3 files changed, 34 insertions, 8 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 88078246c1f..9ec100e6904 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -517,6 +517,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::MTFSF: return "PPCISD::MTFSF"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; + case PPCISD::CR6SET: return "PPCISD::CR6SET"; + case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; } } @@ -2834,6 +2836,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, isTailCall, RegsToPass, Ops, NodeTys, PPCSubTarget); + // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls + if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) + Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); + // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in // PPCRegisterInfo::eliminateCallFramePseudoInstr. @@ -3131,14 +3137,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Set CR6 to true if this is a vararg call with floating args passed in - // registers. - if (isVarArg) { - SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET, - dl, MVT::i32), 0); - RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR)); - } - // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; @@ -3148,6 +3146,14 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } + // Set CR bit 6 to true if this is a vararg call with floating args passed in + // registers. + if (isVarArg) { + Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, + dl, DAG.getVTList(MVT::Other, MVT::Glue), Chain); + InFlag = Chain.getValue(1); + } + if (isTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, false, TailCallArguments); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index b0a013b4b4c..902b188da72 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -174,6 +174,10 @@ namespace llvm { /// operand #3 optional in flag TC_RETURN, + /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls + CR6SET, + CR6UNSET, + /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 20af16cea30..0230b4e7954 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -155,6 +155,12 @@ def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain, SDNPMayStore]>; +// Instructions to set/unset CR bit 6 for SVR4 vararg calls +def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + // Instructions to support atomic operations def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, [SDNPHasChain, SDNPMayLoad]>; @@ -1145,6 +1151,16 @@ def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins), "crxor $dst, $dst, $dst", BrCR, []>; +let Defs = [CR1EQ], CRD = 6 in { +def CR6SET : XLForm_1_ext<19, 289, (outs), (ins), + "creqv 6, 6, 6", BrCR, + [(PPCcr6set)]>; + +def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), + "crxor 6, 6, 6", BrCR, + [(PPCcr6unset)]>; +} + // XFX-Form instructions. Instructions that deal with SPRs. // let Uses = [CTR] in { |

