diff options
author | Tony Jiang <jtony@ca.ibm.com> | 2017-11-20 14:38:30 +0000 |
---|---|---|
committer | Tony Jiang <jtony@ca.ibm.com> | 2017-11-20 14:38:30 +0000 |
commit | 438bf4a66b1573887823fe36c82c45a891107768 (patch) | |
tree | 05e9a21a96890432bd744b8cd764ec78d79e56c8 /llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | |
parent | 0c5a29b6be61151e0e6876014c3c4dfd8aa1b9d9 (diff) | |
download | bcm5719-llvm-438bf4a66b1573887823fe36c82c45a891107768.tar.gz bcm5719-llvm-438bf4a66b1573887823fe36c82c45a891107768.zip |
[PPC] Heuristic to choose between a X-Form VSX ld/st vs a X-Form FP ld/st.
The VSX versions have the advantage of a full 64-register target whereas the FP
ones have the advantage of lower latency and higher throughput. So what we’re
after is using the faster instructions in low register pressure situations and
using the larger register file in high register pressure situations.
The heuristic chooses between the following 7 pairs of instructions.
PPC::LXSSPX vs PPC::LFSX
PPC::LXSDX vs PPC::LFDX
PPC::STXSSPX vs PPC::STFSX
PPC::STXSDX vs PPC::STFDX
PPC::LXSIWAX vs PPC::LFIWAX
PPC::LXSIWZX vs PPC::LFIWZX
PPC::STXSIWX vs PPC::STFIWX
Differential Revision: https://reviews.llvm.org/D38486
llvm-svn: 318651
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCInstrInfo.cpp')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 107 |
1 files changed, 84 insertions, 23 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 70920294aea..99a52902a52 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1977,29 +1977,13 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { return makeArrayRef(TargetFlags); } -bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { - auto &MBB = *MI.getParent(); - auto DL = MI.getDebugLoc(); - switch (MI.getOpcode()) { - case TargetOpcode::LOAD_STACK_GUARD: { - assert(Subtarget.isTargetLinux() && - "Only Linux target is expected to contain LOAD_STACK_GUARD"); - const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008; - const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2; - MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ)); - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addImm(Offset) - .addReg(Reg); - return true; - } - case PPC::DFLOADf32: - case PPC::DFLOADf64: - case PPC::DFSTOREf32: - case PPC::DFSTOREf64: { - assert(Subtarget.hasP9Vector() && - "Invalid D-Form Pseudo-ops on non-P9 target."); - assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() && - "D-form op must have register and immediate operands"); +// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction. +// The VSX versions have the advantage of a full 64-register target whereas +// the FP ones have the advantage of lower latency and higher throughput. So +// what we are after is using the faster instructions in low register pressure +// situations and using the larger register file in high register pressure +// situations. +bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const { unsigned UpperOpcode, LowerOpcode; switch (MI.getOpcode()) { case PPC::DFLOADf32: @@ -2018,7 +2002,38 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { UpperOpcode = PPC::STXSD; LowerOpcode = PPC::STFD; break; + case PPC::XFLOADf32: + UpperOpcode = PPC::LXSSPX; + LowerOpcode = PPC::LFSX; + break; + case PPC::XFLOADf64: + UpperOpcode = PPC::LXSDX; + LowerOpcode = PPC::LFDX; + break; + case PPC::XFSTOREf32: + UpperOpcode = PPC::STXSSPX; + LowerOpcode = PPC::STFSX; + break; + case PPC::XFSTOREf64: + UpperOpcode = PPC::STXSDX; + LowerOpcode = PPC::STFDX; + break; + case PPC::LIWAX: + UpperOpcode = PPC::LXSIWAX; + LowerOpcode = PPC::LFIWAX; + break; + case PPC::LIWZX: + UpperOpcode = PPC::LXSIWZX; + LowerOpcode = PPC::LFIWZX; + break; + case PPC::STIWX: + UpperOpcode = PPC::STXSIWX; + LowerOpcode = PPC::STFIWX; + break; + default: + llvm_unreachable("Unknown Operation!"); } + unsigned TargetReg = MI.getOperand(0).getReg(); unsigned Opcode; if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) || @@ -2028,6 +2043,52 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { Opcode = UpperOpcode; MI.setDesc(get(Opcode)); return true; +} + +bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + auto &MBB = *MI.getParent(); + auto DL = MI.getDebugLoc(); + + switch (MI.getOpcode()) { + case TargetOpcode::LOAD_STACK_GUARD: { + assert(Subtarget.isTargetLinux() && + "Only Linux target is expected to contain LOAD_STACK_GUARD"); + const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008; + const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2; + MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(Offset) + .addReg(Reg); + return true; + } + case PPC::DFLOADf32: + case PPC::DFLOADf64: + case PPC::DFSTOREf32: + case PPC::DFSTOREf64: { + assert(Subtarget.hasP9Vector() && + "Invalid D-Form Pseudo-ops on Pre-P9 target."); + assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() && + "D-form op must have register and immediate operands"); + return expandVSXMemPseudo(MI); + } + case PPC::XFLOADf32: + case PPC::XFSTOREf32: + case PPC::LIWAX: + case PPC::LIWZX: + case PPC::STIWX: { + assert(Subtarget.hasP8Vector() && + "Invalid X-Form Pseudo-ops on Pre-P8 target."); + assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() && + "X-form op must have register and register operands"); + return expandVSXMemPseudo(MI); + } + case PPC::XFLOADf64: + case PPC::XFSTOREf64: { + assert(Subtarget.hasVSX() && + "Invalid X-Form Pseudo-ops on target that has no VSX."); + assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() && + "X-form op must have register and register operands"); + return expandVSXMemPseudo(MI); } case PPC::SPILLTOVSR_LD: { unsigned TargetReg = MI.getOperand(0).getReg(); |