diff options
| -rw-r--r-- | llvm/include/llvm/CodeGen/MacroFusion.h | 41 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/MacroFusion.cpp | 150 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64MacroFusion.cpp | 183 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64MacroFusion.h | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86MacroFusion.cpp | 101 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86MacroFusion.h | 11 | 
7 files changed, 245 insertions, 253 deletions
| diff --git a/llvm/include/llvm/CodeGen/MacroFusion.h b/llvm/include/llvm/CodeGen/MacroFusion.h new file mode 100644 index 00000000000..473784bc584 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MacroFusion.h @@ -0,0 +1,41 @@ +//===- MacroFusion.h - Macro Fusion ------------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the definition of the DAG scheduling mutation to +/// pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include <functional> +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +typedef std::function<bool(const TargetInstrInfo &TII, +                           const TargetSubtargetInfo &TSI, +                           const MachineInstr *FirstMI, +                           const MachineInstr &SecondMI)>  ShouldSchedulePredTy; + +/// \brief Create a DAG scheduling mutation to pair instructions back to back +/// for instructions that benefit according to the target-specific +/// shouldScheduleAdjacent predicate function. +std::unique_ptr<ScheduleDAGMutation> +createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent); + +/// \brief Create a DAG scheduling mutation to pair branch instructions with one +/// of their predecessors back to back for instructions that benefit according +/// to the target-specific shouldScheduleAdjacent predicate function. +std::unique_ptr<ScheduleDAGMutation> +createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent); + +} // end namespace llvm diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 55a27e2fb79..7f3c6da9126 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -92,6 +92,7 @@ add_llvm_library(LLVMCodeGen    PatchableFunction.cpp    MIRPrinter.cpp    MIRPrintingPass.cpp +  MacroFusion.cpp    OptimizePHIs.cpp    ParallelCG.cpp    PeepholeOptimizer.cpp diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp new file mode 100644 index 00000000000..d5c901c7645 --- /dev/null +++ b/llvm/lib/CodeGen/MacroFusion.cpp @@ -0,0 +1,150 @@ +//===- MacroFusion.cpp - Macro Fusion ----------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the implementation of the DAG scheduling mutation +/// to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MacroFusion.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define DEBUG_TYPE "misched" + +STATISTIC(NumFused, "Number of instr pairs fused"); + +using namespace llvm; + +static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, +  cl::desc("Enable scheduling for macro fusion."), cl::init(true)); + +namespace { + +static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, +                                SUnit &SecondSU) { +  // Create a single weak edge between the adjacent instrs. The only effect is +  // to cause bottom-up scheduling to heavily prioritize the clustered instrs. +  DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)); + +  // Adjust the latency between the anchor instr and its +  // predecessors. +  for (SDep &IDep : SecondSU.Preds) +    if (IDep.getSUnit() == &FirstSU) +      IDep.setLatency(0); + +  // Adjust the latency between the dependent instr and its +  // predecessors. +  for (SDep &IDep : FirstSU.Succs) +    if (IDep.getSUnit() == &SecondSU) +      IDep.setLatency(0); + +  DEBUG(dbgs() << DAG.MF.getName() << "(): Macro fuse "; +        FirstSU.print(dbgs(), &DAG); dbgs() << " - "; +        SecondSU.print(dbgs(), &DAG); dbgs() << " /  "; +        dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " << +                  DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n'; ); + +  if (&SecondSU != &DAG.ExitSU) +    // Make instructions dependent on FirstSU also dependent on SecondSU to +    // prevent them from being scheduled between FirstSU and and SecondSU. +    for (const SDep &SI : FirstSU.Succs) { +      if (SI.getSUnit() == &SecondSU) +        continue; +      DEBUG(dbgs() << "  Copy Succ "; +            SI.getSUnit()->print(dbgs(), &DAG); dbgs() << '\n';); +      DAG.addEdge(SI.getSUnit(), SDep(&SecondSU, SDep::Artificial)); +    } + +  ++NumFused; +} + + +/// \brief Post-process the DAG to create cluster edges between instrs that may +/// be fused by the processor into a single operation. +class MacroFusion : public ScheduleDAGMutation { +  ShouldSchedulePredTy shouldScheduleAdjacent; +  bool FuseBlock; +  bool scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU); + +public: +  MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock) +    : shouldScheduleAdjacent(shouldScheduleAdjacent), FuseBlock(FuseBlock) {} + +  void apply(ScheduleDAGInstrs *DAGInstrs) override; +}; + +void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { +  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); + +  if (FuseBlock) +    // For each of the SUnits in the scheduling block, try to fuse the instr in +    // it with one in its predecessors. +    for (SUnit &ISU : DAG->SUnits) +        scheduleAdjacentImpl(*DAG, ISU); + +  if (DAG->ExitSU.getInstr()) +    // Try to fuse the instr in the ExitSU with one in its predecessors. +    scheduleAdjacentImpl(*DAG, DAG->ExitSU); +} + +/// \brief Implement the fusion of instr pairs in the scheduling DAG, +/// anchored at the instr in AnchorSU.. +bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) { +  const MachineInstr &AnchorMI = *AnchorSU.getInstr(); +  const TargetInstrInfo &TII = *DAG.TII; +  const TargetSubtargetInfo &ST = DAG.MF.getSubtarget(); + +  // Check if the anchor instr may be fused. +  if (!shouldScheduleAdjacent(TII, ST, nullptr, AnchorMI)) +    return false; + +  // Explorer for fusion candidates among the dependencies of the anchor instr. +  for (SDep &Dep : AnchorSU.Preds) { +    // Ignore dependencies that don't enforce ordering. +    if (Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output || +        Dep.isWeak()) +      continue; + +    SUnit &DepSU = *Dep.getSUnit(); +    if (DepSU.isBoundaryNode()) +      continue; + +    const MachineInstr *DepMI = DepSU.getInstr(); +    if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI)) +      continue; + +    fuseInstructionPair(DAG, DepSU, AnchorSU); +    return true; +  } + +  return false; +} + +} // end anonymous namespace + + +namespace llvm { + +std::unique_ptr<ScheduleDAGMutation> +createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) { +  if(EnableMacroFusion) +    return make_unique<MacroFusion>(shouldScheduleAdjacent, true); +  return nullptr; +} + +std::unique_ptr<ScheduleDAGMutation> +createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) { +  if(EnableMacroFusion) +    return make_unique<MacroFusion>(shouldScheduleAdjacent, false); +  return nullptr; +} + +} // end namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp index 3b71d529db5..7e97b6ccb48 100644 --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -7,37 +7,27 @@  //  //===----------------------------------------------------------------------===//  // -// \file This file contains the AArch64 implementation of the DAG scheduling mutation -// to pair instructions back to back. +/// \file This file contains the AArch64 implementation of the DAG scheduling +///  mutation to pair instructions back to back.  //  //===----------------------------------------------------------------------===//  #include "AArch64MacroFusion.h"  #include "AArch64Subtarget.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/CodeGen/MacroFusion.h"  #include "llvm/Target/TargetInstrInfo.h" -#define DEBUG_TYPE "misched" - -STATISTIC(NumFused, "Number of instr pairs fused"); -  using namespace llvm; -static cl::opt<bool> EnableMacroFusion("aarch64-misched-fusion", cl::Hidden, -  cl::desc("Enable scheduling for macro fusion."), cl::init(true)); -  namespace { -/// \brief Verify that the instr pair, FirstMI and SecondMI, should be fused -/// together.  Given an anchor instr, when the other instr is unspecified, then -/// check if the anchor instr may be part of a fused pair at all. +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all.  static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,                                     const TargetSubtargetInfo &TSI,                                     const MachineInstr *FirstMI, -                                   const MachineInstr *SecondMI) { -  assert((FirstMI || SecondMI) && "At least one instr must be specified"); - +                                   const MachineInstr &SecondMI) {    const AArch64InstrInfo &II = static_cast<const AArch64InstrInfo&>(TII);    const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI); @@ -45,9 +35,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,    unsigned FirstOpcode =      FirstMI ? FirstMI->getOpcode()  	    : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END); -  unsigned SecondOpcode = -    SecondMI ? SecondMI->getOpcode() -             : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END); +  unsigned SecondOpcode = SecondMI.getOpcode();    if (ST.hasArithmeticBccFusion())      // Fuse CMN, CMP, TST followed by Bcc. @@ -128,158 +116,49 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,    if (ST.hasFuseAES())      // Fuse AES crypto operations. -    switch(FirstOpcode) { +    switch(SecondOpcode) {      // AES encode. -    case AArch64::AESErr: -      return SecondOpcode == AArch64::AESMCrr || -             SecondOpcode == AArch64::INSTRUCTION_LIST_END; +    case AArch64::AESMCrr : +      return FirstOpcode == AArch64::AESErr || +             FirstOpcode == AArch64::INSTRUCTION_LIST_END;      // AES decode. -    case AArch64::AESDrr: -      return SecondOpcode == AArch64::AESIMCrr || -             SecondOpcode == AArch64::INSTRUCTION_LIST_END; +    case AArch64::AESIMCrr: +      return FirstOpcode == AArch64::AESDrr || +             FirstOpcode == AArch64::INSTRUCTION_LIST_END;      }    if (ST.hasFuseLiterals())      // Fuse literal generation operations. -    switch (FirstOpcode) { +    switch (SecondOpcode) {      // PC relative address. -    case AArch64::ADRP: -      return SecondOpcode == AArch64::ADDXri || -             SecondOpcode == AArch64::INSTRUCTION_LIST_END; +    case AArch64::ADDXri: +      return FirstOpcode == AArch64::ADRP || +             FirstOpcode == AArch64::INSTRUCTION_LIST_END;      // 32 bit immediate. -    case AArch64::MOVZWi: -      return (SecondOpcode == AArch64::MOVKWi && -              SecondMI->getOperand(3).getImm() == 16) || -             SecondOpcode == AArch64::INSTRUCTION_LIST_END; -    // Lower half of 64 bit immediate. -    case AArch64::MOVZXi: -      return (SecondOpcode == AArch64::MOVKXi && -              SecondMI->getOperand(3).getImm() == 16) || -             SecondOpcode == AArch64::INSTRUCTION_LIST_END; -    // Upper half of 64 bit immediate. +    case AArch64::MOVKWi: +      return (FirstOpcode == AArch64::MOVZWi && +              SecondMI.getOperand(3).getImm() == 16) || +             FirstOpcode == AArch64::INSTRUCTION_LIST_END; +    // Lower and upper half of 64 bit immediate.      case AArch64::MOVKXi: -      return FirstMI->getOperand(3).getImm() == 32 && -             ((SecondOpcode == AArch64::MOVKXi && -               SecondMI->getOperand(3).getImm() == 48) || -              SecondOpcode == AArch64::INSTRUCTION_LIST_END); +      return FirstOpcode == AArch64::INSTRUCTION_LIST_END || +             (FirstOpcode == AArch64::MOVZXi && +              SecondMI.getOperand(3).getImm() == 16) || +             (FirstMI->getOperand(3).getImm() == 32 && +              FirstOpcode == AArch64::MOVKXi && +              SecondMI.getOperand(3).getImm() == 48);      }    return false;  } -/// \brief Implement the fusion of instr pairs in the scheduling DAG, -/// anchored at the instr in AnchorSU.. -static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) { -  const MachineInstr *AnchorMI = AnchorSU.getInstr(); -  if (!AnchorMI || AnchorMI->isPseudo() || AnchorMI->isTransient()) -    return false; - -  // If the anchor instr is the ExitSU, then consider its predecessors; -  // otherwise, its successors. -  bool Preds = (&AnchorSU == &DAG->ExitSU); -  SmallVectorImpl<SDep> &AnchorDeps = Preds ? AnchorSU.Preds : AnchorSU.Succs; - -  const MachineInstr *FirstMI = Preds ? nullptr : AnchorMI; -  const MachineInstr *SecondMI = Preds ? AnchorMI : nullptr; - -  // Check if the anchor instr may be fused. -  if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(), -                              FirstMI, SecondMI)) -    return false; - -  // Explorer for fusion candidates among the dependencies of the anchor instr. -  for (SDep &Dep : AnchorDeps) { -    // Ignore dependencies that don't enforce ordering. -    if (Dep.isWeak()) -      continue; - -    SUnit &DepSU = *Dep.getSUnit(); -    // Ignore the ExitSU if the dependents are successors. -    if (!Preds && &DepSU == &DAG->ExitSU) -      continue; - -    const MachineInstr *DepMI = DepSU.getInstr(); -    if (!DepMI || DepMI->isPseudo() || DepMI->isTransient()) -      continue; - -    FirstMI = Preds ? DepMI : AnchorMI; -    SecondMI = Preds ? AnchorMI : DepMI; -    if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(), -                                FirstMI, SecondMI)) -      continue; - -    // Create a single weak edge between the adjacent instrs. The only effect is -    // to cause bottom-up scheduling to heavily prioritize the clustered instrs. -    SUnit &FirstSU = Preds ? DepSU : AnchorSU; -    SUnit &SecondSU = Preds ? AnchorSU : DepSU; -    DAG->addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)); - -    // Adjust the latency between the anchor instr and its -    // predecessors/successors. -    for (SDep &IDep : AnchorDeps) -      if (IDep.getSUnit() == &DepSU) -        IDep.setLatency(0); - -    // Adjust the latency between the dependent instr and its -    // successors/predecessors. -    for (SDep &IDep : Preds ? DepSU.Succs : DepSU.Preds) -      if (IDep.getSUnit() == &AnchorSU) -        IDep.setLatency(0); - -    DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse "; -          FirstSU.print(dbgs(), DAG); dbgs() << " - "; -          SecondSU.print(dbgs(), DAG); dbgs() << " /  "; -          dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " << -                    DAG->TII->getName(SecondMI->getOpcode()) << '\n'; ); - -    if (&SecondSU != &DAG->ExitSU) -      // Make instructions dependent on FirstSU also dependent on SecondSU to -      // prevent them from being scheduled between FirstSU and and SecondSU. -      for (SUnit::const_succ_iterator -             SI = FirstSU.Succs.begin(), SE = FirstSU.Succs.end(); -           SI != SE; ++SI) { -        if (!SI->getSUnit() || SI->getSUnit() == &SecondSU) -          continue; -        DEBUG(dbgs() << "  Copy Succ "; -              SI->getSUnit()->print(dbgs(), DAG); dbgs() << '\n';); -        DAG->addEdge(SI->getSUnit(), SDep(&SecondSU, SDep::Artificial)); -      } - -    ++NumFused; -    return true; -  } - -  return false; -} - -/// \brief Post-process the DAG to create cluster edges between instrs that may -/// be fused by the processor into a single operation. -class AArch64MacroFusion : public ScheduleDAGMutation { -public: -  AArch64MacroFusion() {} - -  void apply(ScheduleDAGInstrs *DAGInstrs) override; -}; - -void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { -  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); - -  // For each of the SUnits in the scheduling block, try to fuse the instr in it -  // with one in its successors. -  for (SUnit &ISU : DAG->SUnits) -    scheduleAdjacentImpl(DAG, ISU); - -  // Try to fuse the instr in the ExitSU with one in its predecessors. -  scheduleAdjacentImpl(DAG, DAG->ExitSU); -} -  } // end namespace  namespace llvm {  std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () { -  return EnableMacroFusion ? make_unique<AArch64MacroFusion>() : nullptr; +  return createMacroFusionDAGMutation(shouldScheduleAdjacent);  }  } // end namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.h b/llvm/lib/Target/AArch64/AArch64MacroFusion.h index e5efedd9fbf..32d90d4c40d 100644 --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.h +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.h @@ -2,23 +2,18 @@  //  //                     The LLVM Compiler Infrastructure  // -// \fileThis file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source  // License. See LICENSE.TXT for details.  //  //===----------------------------------------------------------------------===//  // -// This file contains the AArch64 definition of the DAG scheduling mutation -// to pair instructions back to back. +/// \file This file contains the AArch64 definition of the DAG scheduling +/// mutation to pair instructions back to back.  //  //===----------------------------------------------------------------------===// -#include "AArch64InstrInfo.h"  #include "llvm/CodeGen/MachineScheduler.h" -//===----------------------------------------------------------------------===// -// AArch64MacroFusion - DAG post-processing to encourage fusion of macro ops. -//===----------------------------------------------------------------------===// -  namespace llvm {  /// Note that you have to add: diff --git a/llvm/lib/Target/X86/X86MacroFusion.cpp b/llvm/lib/Target/X86/X86MacroFusion.cpp index dd21e2b7c4a..8fdf1061705 100644 --- a/llvm/lib/Target/X86/X86MacroFusion.cpp +++ b/llvm/lib/Target/X86/X86MacroFusion.cpp @@ -2,39 +2,31 @@  //  //                     The LLVM Compiler Infrastructure  // -// \file This file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source  // License. See LICENSE.TXT for details.  //  //===----------------------------------------------------------------------===//  // -// This file contains the X86 implementation of the DAG scheduling mutation to -// pair instructions back to back. +/// \file This file contains the X86 implementation of the DAG scheduling +/// mutation to pair instructions back to back.  //  //===----------------------------------------------------------------------===//  #include "X86MacroFusion.h"  #include "X86Subtarget.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h"  #include "llvm/Target/TargetInstrInfo.h" - -#define DEBUG_TYPE "misched" - -STATISTIC(NumFused, "Number of instr pairs fused"); +#include "llvm/CodeGen/MacroFusion.h"  using namespace llvm; -static cl::opt<bool> EnableMacroFusion("x86-misched-fusion", cl::Hidden, -  cl::desc("Enable scheduling for macro fusion."), cl::init(true)); - -namespace { - -/// \brief Verify that the instruction pair, First and Second, -/// should be scheduled back to back.  If either instruction is unspecified, -/// then verify that the other instruction may be part of a pair at all. -static bool shouldScheduleAdjacent(const X86Subtarget &ST, -                                   const MachineInstr *First, -                                   const MachineInstr *Second) { +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, +                                   const TargetSubtargetInfo &TSI, +                                   const MachineInstr *FirstMI, +                                   const MachineInstr &SecondMI) { +  const X86Subtarget &ST = static_cast<const X86Subtarget&>(TSI);    // Check if this processor supports macro-fusion. Since this is a minor    // heuristic, we haven't specifically reserved a feature. hasAVX is a decent    // proxy for SandyBridge+. @@ -47,13 +39,10 @@ static bool shouldScheduleAdjacent(const X86Subtarget &ST,      FuseInc    } FuseKind; -  assert((First || Second) && "At least one instr must be specified"); -  unsigned FirstOpcode = First -                         ? First->getOpcode() +  unsigned FirstOpcode = FirstMI +                         ? FirstMI->getOpcode()                           : static_cast<unsigned>(X86::INSTRUCTION_LIST_END); -  unsigned SecondOpcode = Second -                          ? Second->getOpcode() -                          : static_cast<unsigned>(X86::INSTRUCTION_LIST_END); +  unsigned SecondOpcode = SecondMI.getOpcode();    switch (SecondOpcode) {    default: @@ -203,69 +192,11 @@ static bool shouldScheduleAdjacent(const X86Subtarget &ST,    }  } -/// \brief Post-process the DAG to create cluster edges between instructions -/// that may be fused by the processor into a single operation. -class X86MacroFusion : public ScheduleDAGMutation { -public: -  X86MacroFusion() {} - -  void apply(ScheduleDAGInstrs *DAGInstrs) override; -}; - -void X86MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { -  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); -  const X86Subtarget &ST = DAG->MF.getSubtarget<X86Subtarget>(); - -  // For now, assume targets can only fuse with the branch. -  SUnit &ExitSU = DAG->ExitSU; -  MachineInstr *Branch = ExitSU.getInstr(); -  if (!Branch || !shouldScheduleAdjacent(ST, nullptr, Branch)) -    return; - -  for (SDep &PredDep : ExitSU.Preds) { -    if (PredDep.isWeak()) -      continue; -    SUnit &SU = *PredDep.getSUnit(); -    MachineInstr &Pred = *SU.getInstr(); -    if (!shouldScheduleAdjacent(ST, &Pred, Branch)) -      continue; - -    // Create a single weak edge from SU to ExitSU. The only effect is to cause -    // bottom-up scheduling to heavily prioritize the clustered SU.  There is no -    // need to copy predecessor edges from ExitSU to SU, since top-down -    // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling -    // of SU, we could create an artificial edge from the deepest root, but it -    // hasn't been needed yet. -    bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster)); -    (void)Success; -    assert(Success && "No DAG nodes should be reachable from ExitSU"); - -    // Adjust latency of data deps between the nodes. -    for (SDep &PredDep : ExitSU.Preds) -      if (PredDep.getSUnit() == &SU) -        PredDep.setLatency(0); -    for (SDep &SuccDep : SU.Succs) -      if (SuccDep.getSUnit() == &ExitSU) -        SuccDep.setLatency(0); - -    ++NumFused; -    DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse "; -          SU.print(dbgs(), DAG); -          dbgs() << " - ExitSU" -                 << " / " << DAG->TII->getName(Pred.getOpcode()) << " - " -                 << DAG->TII->getName(Branch->getOpcode()) << '\n';); - -    break; -  } -} - -} // end namespace -  namespace llvm {  std::unique_ptr<ScheduleDAGMutation>  createX86MacroFusionDAGMutation () { -  return EnableMacroFusion ? make_unique<X86MacroFusion>() : nullptr; +  return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent);  }  } // end namespace llvm diff --git a/llvm/lib/Target/X86/X86MacroFusion.h b/llvm/lib/Target/X86/X86MacroFusion.h index e630f802e8e..13fa2d78a01 100644 --- a/llvm/lib/Target/X86/X86MacroFusion.h +++ b/llvm/lib/Target/X86/X86MacroFusion.h @@ -2,23 +2,18 @@  //  //                     The LLVM Compiler Infrastructure  // -// \file This file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source  // License. See LICENSE.TXT for details.  //  //===----------------------------------------------------------------------===//  // -// This file contains the X86 definition of the DAG scheduling mutation to pair -// instructions back to back. +/// \file This file contains the X86 definition of the DAG scheduling mutation +///  to pair instructions back to back.  //  //===----------------------------------------------------------------------===// -#include "X86InstrInfo.h"  #include "llvm/CodeGen/MachineScheduler.h" -//===----------------------------------------------------------------------===// -// X86MacroFusion - DAG post-processing to encourage fusion of macro ops. -//===----------------------------------------------------------------------===// -  namespace llvm {  /// Note that you have to add: | 

