diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64MacroFusion.cpp')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64MacroFusion.cpp | 183 |
1 files changed, 152 insertions, 31 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp index 7e97b6ccb48..3b71d529db5 100644 --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -7,27 +7,37 @@ // //===----------------------------------------------------------------------===// // -/// \file This file contains the AArch64 implementation of the DAG scheduling -/// mutation to pair instructions back to back. +// \file This file contains the AArch64 implementation of the DAG scheduling mutation +// to pair instructions back to back. // //===----------------------------------------------------------------------===// #include "AArch64MacroFusion.h" #include "AArch64Subtarget.h" -#include "llvm/CodeGen/MacroFusion.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" +#define DEBUG_TYPE "misched" + +STATISTIC(NumFused, "Number of instr pairs fused"); + using namespace llvm; +static cl::opt<bool> EnableMacroFusion("aarch64-misched-fusion", cl::Hidden, + cl::desc("Enable scheduling for macro fusion."), cl::init(true)); + namespace { -/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused -/// together. Given SecondMI, when FirstMI is unspecified, then check if -/// SecondMI may be part of a fused pair at all. +/// \brief Verify that the instr pair, FirstMI and SecondMI, should be fused +/// together. Given an anchor instr, when the other instr is unspecified, then +/// check if the anchor instr may be part of a fused pair at all. static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { + const MachineInstr *SecondMI) { + assert((FirstMI || SecondMI) && "At least one instr must be specified"); + const AArch64InstrInfo &II = static_cast<const AArch64InstrInfo&>(TII); const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI); @@ -35,7 +45,9 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, unsigned FirstOpcode = FirstMI ? FirstMI->getOpcode() : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END); - unsigned SecondOpcode = SecondMI.getOpcode(); + unsigned SecondOpcode = + SecondMI ? SecondMI->getOpcode() + : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END); if (ST.hasArithmeticBccFusion()) // Fuse CMN, CMP, TST followed by Bcc. @@ -116,49 +128,158 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, if (ST.hasFuseAES()) // Fuse AES crypto operations. - switch(SecondOpcode) { + switch(FirstOpcode) { // AES encode. - case AArch64::AESMCrr : - return FirstOpcode == AArch64::AESErr || - FirstOpcode == AArch64::INSTRUCTION_LIST_END; + case AArch64::AESErr: + return SecondOpcode == AArch64::AESMCrr || + SecondOpcode == AArch64::INSTRUCTION_LIST_END; // AES decode. - case AArch64::AESIMCrr: - return FirstOpcode == AArch64::AESDrr || - FirstOpcode == AArch64::INSTRUCTION_LIST_END; + case AArch64::AESDrr: + return SecondOpcode == AArch64::AESIMCrr || + SecondOpcode == AArch64::INSTRUCTION_LIST_END; } if (ST.hasFuseLiterals()) // Fuse literal generation operations. - switch (SecondOpcode) { + switch (FirstOpcode) { // PC relative address. - case AArch64::ADDXri: - return FirstOpcode == AArch64::ADRP || - FirstOpcode == AArch64::INSTRUCTION_LIST_END; + case AArch64::ADRP: + return SecondOpcode == AArch64::ADDXri || + SecondOpcode == AArch64::INSTRUCTION_LIST_END; // 32 bit immediate. - case AArch64::MOVKWi: - return (FirstOpcode == AArch64::MOVZWi && - SecondMI.getOperand(3).getImm() == 16) || - FirstOpcode == AArch64::INSTRUCTION_LIST_END; - // Lower and upper half of 64 bit immediate. + case AArch64::MOVZWi: + return (SecondOpcode == AArch64::MOVKWi && + SecondMI->getOperand(3).getImm() == 16) || + SecondOpcode == AArch64::INSTRUCTION_LIST_END; + // Lower half of 64 bit immediate. + case AArch64::MOVZXi: + return (SecondOpcode == AArch64::MOVKXi && + SecondMI->getOperand(3).getImm() == 16) || + SecondOpcode == AArch64::INSTRUCTION_LIST_END; + // Upper half of 64 bit immediate. case AArch64::MOVKXi: - return FirstOpcode == AArch64::INSTRUCTION_LIST_END || - (FirstOpcode == AArch64::MOVZXi && - SecondMI.getOperand(3).getImm() == 16) || - (FirstMI->getOperand(3).getImm() == 32 && - FirstOpcode == AArch64::MOVKXi && - SecondMI.getOperand(3).getImm() == 48); + return FirstMI->getOperand(3).getImm() == 32 && + ((SecondOpcode == AArch64::MOVKXi && + SecondMI->getOperand(3).getImm() == 48) || + SecondOpcode == AArch64::INSTRUCTION_LIST_END); } return false; } +/// \brief Implement the fusion of instr pairs in the scheduling DAG, +/// anchored at the instr in AnchorSU.. +static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) { + const MachineInstr *AnchorMI = AnchorSU.getInstr(); + if (!AnchorMI || AnchorMI->isPseudo() || AnchorMI->isTransient()) + return false; + + // If the anchor instr is the ExitSU, then consider its predecessors; + // otherwise, its successors. + bool Preds = (&AnchorSU == &DAG->ExitSU); + SmallVectorImpl<SDep> &AnchorDeps = Preds ? AnchorSU.Preds : AnchorSU.Succs; + + const MachineInstr *FirstMI = Preds ? nullptr : AnchorMI; + const MachineInstr *SecondMI = Preds ? AnchorMI : nullptr; + + // Check if the anchor instr may be fused. + if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(), + FirstMI, SecondMI)) + return false; + + // Explorer for fusion candidates among the dependencies of the anchor instr. + for (SDep &Dep : AnchorDeps) { + // Ignore dependencies that don't enforce ordering. + if (Dep.isWeak()) + continue; + + SUnit &DepSU = *Dep.getSUnit(); + // Ignore the ExitSU if the dependents are successors. + if (!Preds && &DepSU == &DAG->ExitSU) + continue; + + const MachineInstr *DepMI = DepSU.getInstr(); + if (!DepMI || DepMI->isPseudo() || DepMI->isTransient()) + continue; + + FirstMI = Preds ? DepMI : AnchorMI; + SecondMI = Preds ? AnchorMI : DepMI; + if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(), + FirstMI, SecondMI)) + continue; + + // Create a single weak edge between the adjacent instrs. The only effect is + // to cause bottom-up scheduling to heavily prioritize the clustered instrs. + SUnit &FirstSU = Preds ? DepSU : AnchorSU; + SUnit &SecondSU = Preds ? AnchorSU : DepSU; + DAG->addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)); + + // Adjust the latency between the anchor instr and its + // predecessors/successors. + for (SDep &IDep : AnchorDeps) + if (IDep.getSUnit() == &DepSU) + IDep.setLatency(0); + + // Adjust the latency between the dependent instr and its + // successors/predecessors. + for (SDep &IDep : Preds ? DepSU.Succs : DepSU.Preds) + if (IDep.getSUnit() == &AnchorSU) + IDep.setLatency(0); + + DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse "; + FirstSU.print(dbgs(), DAG); dbgs() << " - "; + SecondSU.print(dbgs(), DAG); dbgs() << " / "; + dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " << + DAG->TII->getName(SecondMI->getOpcode()) << '\n'; ); + + if (&SecondSU != &DAG->ExitSU) + // Make instructions dependent on FirstSU also dependent on SecondSU to + // prevent them from being scheduled between FirstSU and and SecondSU. + for (SUnit::const_succ_iterator + SI = FirstSU.Succs.begin(), SE = FirstSU.Succs.end(); + SI != SE; ++SI) { + if (!SI->getSUnit() || SI->getSUnit() == &SecondSU) + continue; + DEBUG(dbgs() << " Copy Succ "; + SI->getSUnit()->print(dbgs(), DAG); dbgs() << '\n';); + DAG->addEdge(SI->getSUnit(), SDep(&SecondSU, SDep::Artificial)); + } + + ++NumFused; + return true; + } + + return false; +} + +/// \brief Post-process the DAG to create cluster edges between instrs that may +/// be fused by the processor into a single operation. +class AArch64MacroFusion : public ScheduleDAGMutation { +public: + AArch64MacroFusion() {} + + void apply(ScheduleDAGInstrs *DAGInstrs) override; +}; + +void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { + ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); + + // For each of the SUnits in the scheduling block, try to fuse the instr in it + // with one in its successors. + for (SUnit &ISU : DAG->SUnits) + scheduleAdjacentImpl(DAG, ISU); + + // Try to fuse the instr in the ExitSU with one in its predecessors. + scheduleAdjacentImpl(DAG, DAG->ExitSU); +} + } // end namespace namespace llvm { std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () { - return createMacroFusionDAGMutation(shouldScheduleAdjacent); + return EnableMacroFusion ? make_unique<AArch64MacroFusion>() : nullptr; } } // end namespace llvm |