diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64MacroFusion.cpp | 183 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64MacroFusion.h | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86MacroFusion.cpp | 101 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86MacroFusion.h | 11 | 
4 files changed, 53 insertions, 253 deletions
| diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp index 3b71d529db5..7e97b6ccb48 100644 --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -7,37 +7,27 @@  //  //===----------------------------------------------------------------------===//  // -// \file This file contains the AArch64 implementation of the DAG scheduling mutation -// to pair instructions back to back. +/// \file This file contains the AArch64 implementation of the DAG scheduling +///  mutation to pair instructions back to back.  //  //===----------------------------------------------------------------------===//  #include "AArch64MacroFusion.h"  #include "AArch64Subtarget.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/CodeGen/MacroFusion.h"  #include "llvm/Target/TargetInstrInfo.h" -#define DEBUG_TYPE "misched" - -STATISTIC(NumFused, "Number of instr pairs fused"); -  using namespace llvm; -static cl::opt<bool> EnableMacroFusion("aarch64-misched-fusion", cl::Hidden, -  cl::desc("Enable scheduling for macro fusion."), cl::init(true)); -  namespace { -/// \brief Verify that the instr pair, FirstMI and SecondMI, should be fused -/// together.  Given an anchor instr, when the other instr is unspecified, then -/// check if the anchor instr may be part of a fused pair at all. +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all.  static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,                                     const TargetSubtargetInfo &TSI,                                     const MachineInstr *FirstMI, -                                   const MachineInstr *SecondMI) { -  assert((FirstMI || SecondMI) && "At least one instr must be specified"); - +                                   const MachineInstr &SecondMI) {    const AArch64InstrInfo &II = static_cast<const AArch64InstrInfo&>(TII);    const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI); @@ -45,9 +35,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,    unsigned FirstOpcode =      FirstMI ? FirstMI->getOpcode()  	    : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END); -  unsigned SecondOpcode = -    SecondMI ? SecondMI->getOpcode() -             : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END); +  unsigned SecondOpcode = SecondMI.getOpcode();    if (ST.hasArithmeticBccFusion())      // Fuse CMN, CMP, TST followed by Bcc. @@ -128,158 +116,49 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,    if (ST.hasFuseAES())      // Fuse AES crypto operations. -    switch(FirstOpcode) { +    switch(SecondOpcode) {      // AES encode. -    case AArch64::AESErr: -      return SecondOpcode == AArch64::AESMCrr || -             SecondOpcode == AArch64::INSTRUCTION_LIST_END; +    case AArch64::AESMCrr : +      return FirstOpcode == AArch64::AESErr || +             FirstOpcode == AArch64::INSTRUCTION_LIST_END;      // AES decode. -    case AArch64::AESDrr: -      return SecondOpcode == AArch64::AESIMCrr || -             SecondOpcode == AArch64::INSTRUCTION_LIST_END; +    case AArch64::AESIMCrr: +      return FirstOpcode == AArch64::AESDrr || +             FirstOpcode == AArch64::INSTRUCTION_LIST_END;      }    if (ST.hasFuseLiterals())      // Fuse literal generation operations. -    switch (FirstOpcode) { +    switch (SecondOpcode) {      // PC relative address. -    case AArch64::ADRP: -      return SecondOpcode == AArch64::ADDXri || -             SecondOpcode == AArch64::INSTRUCTION_LIST_END; +    case AArch64::ADDXri: +      return FirstOpcode == AArch64::ADRP || +             FirstOpcode == AArch64::INSTRUCTION_LIST_END;      // 32 bit immediate. -    case AArch64::MOVZWi: -      return (SecondOpcode == AArch64::MOVKWi && -              SecondMI->getOperand(3).getImm() == 16) || -             SecondOpcode == AArch64::INSTRUCTION_LIST_END; -    // Lower half of 64 bit immediate. -    case AArch64::MOVZXi: -      return (SecondOpcode == AArch64::MOVKXi && -              SecondMI->getOperand(3).getImm() == 16) || -             SecondOpcode == AArch64::INSTRUCTION_LIST_END; -    // Upper half of 64 bit immediate. +    case AArch64::MOVKWi: +      return (FirstOpcode == AArch64::MOVZWi && +              SecondMI.getOperand(3).getImm() == 16) || +             FirstOpcode == AArch64::INSTRUCTION_LIST_END; +    // Lower and upper half of 64 bit immediate.      case AArch64::MOVKXi: -      return FirstMI->getOperand(3).getImm() == 32 && -             ((SecondOpcode == AArch64::MOVKXi && -               SecondMI->getOperand(3).getImm() == 48) || -              SecondOpcode == AArch64::INSTRUCTION_LIST_END); +      return FirstOpcode == AArch64::INSTRUCTION_LIST_END || +             (FirstOpcode == AArch64::MOVZXi && +              SecondMI.getOperand(3).getImm() == 16) || +             (FirstMI->getOperand(3).getImm() == 32 && +              FirstOpcode == AArch64::MOVKXi && +              SecondMI.getOperand(3).getImm() == 48);      }    return false;  } -/// \brief Implement the fusion of instr pairs in the scheduling DAG, -/// anchored at the instr in AnchorSU.. -static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) { -  const MachineInstr *AnchorMI = AnchorSU.getInstr(); -  if (!AnchorMI || AnchorMI->isPseudo() || AnchorMI->isTransient()) -    return false; - -  // If the anchor instr is the ExitSU, then consider its predecessors; -  // otherwise, its successors. -  bool Preds = (&AnchorSU == &DAG->ExitSU); -  SmallVectorImpl<SDep> &AnchorDeps = Preds ? AnchorSU.Preds : AnchorSU.Succs; - -  const MachineInstr *FirstMI = Preds ? nullptr : AnchorMI; -  const MachineInstr *SecondMI = Preds ? AnchorMI : nullptr; - -  // Check if the anchor instr may be fused. -  if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(), -                              FirstMI, SecondMI)) -    return false; - -  // Explorer for fusion candidates among the dependencies of the anchor instr. -  for (SDep &Dep : AnchorDeps) { -    // Ignore dependencies that don't enforce ordering. -    if (Dep.isWeak()) -      continue; - -    SUnit &DepSU = *Dep.getSUnit(); -    // Ignore the ExitSU if the dependents are successors. -    if (!Preds && &DepSU == &DAG->ExitSU) -      continue; - -    const MachineInstr *DepMI = DepSU.getInstr(); -    if (!DepMI || DepMI->isPseudo() || DepMI->isTransient()) -      continue; - -    FirstMI = Preds ? DepMI : AnchorMI; -    SecondMI = Preds ? AnchorMI : DepMI; -    if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(), -                                FirstMI, SecondMI)) -      continue; - -    // Create a single weak edge between the adjacent instrs. The only effect is -    // to cause bottom-up scheduling to heavily prioritize the clustered instrs. -    SUnit &FirstSU = Preds ? DepSU : AnchorSU; -    SUnit &SecondSU = Preds ? AnchorSU : DepSU; -    DAG->addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)); - -    // Adjust the latency between the anchor instr and its -    // predecessors/successors. -    for (SDep &IDep : AnchorDeps) -      if (IDep.getSUnit() == &DepSU) -        IDep.setLatency(0); - -    // Adjust the latency between the dependent instr and its -    // successors/predecessors. -    for (SDep &IDep : Preds ? DepSU.Succs : DepSU.Preds) -      if (IDep.getSUnit() == &AnchorSU) -        IDep.setLatency(0); - -    DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse "; -          FirstSU.print(dbgs(), DAG); dbgs() << " - "; -          SecondSU.print(dbgs(), DAG); dbgs() << " /  "; -          dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " << -                    DAG->TII->getName(SecondMI->getOpcode()) << '\n'; ); - -    if (&SecondSU != &DAG->ExitSU) -      // Make instructions dependent on FirstSU also dependent on SecondSU to -      // prevent them from being scheduled between FirstSU and and SecondSU. -      for (SUnit::const_succ_iterator -             SI = FirstSU.Succs.begin(), SE = FirstSU.Succs.end(); -           SI != SE; ++SI) { -        if (!SI->getSUnit() || SI->getSUnit() == &SecondSU) -          continue; -        DEBUG(dbgs() << "  Copy Succ "; -              SI->getSUnit()->print(dbgs(), DAG); dbgs() << '\n';); -        DAG->addEdge(SI->getSUnit(), SDep(&SecondSU, SDep::Artificial)); -      } - -    ++NumFused; -    return true; -  } - -  return false; -} - -/// \brief Post-process the DAG to create cluster edges between instrs that may -/// be fused by the processor into a single operation. -class AArch64MacroFusion : public ScheduleDAGMutation { -public: -  AArch64MacroFusion() {} - -  void apply(ScheduleDAGInstrs *DAGInstrs) override; -}; - -void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { -  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); - -  // For each of the SUnits in the scheduling block, try to fuse the instr in it -  // with one in its successors. -  for (SUnit &ISU : DAG->SUnits) -    scheduleAdjacentImpl(DAG, ISU); - -  // Try to fuse the instr in the ExitSU with one in its predecessors. -  scheduleAdjacentImpl(DAG, DAG->ExitSU); -} -  } // end namespace  namespace llvm {  std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () { -  return EnableMacroFusion ? make_unique<AArch64MacroFusion>() : nullptr; +  return createMacroFusionDAGMutation(shouldScheduleAdjacent);  }  } // end namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.h b/llvm/lib/Target/AArch64/AArch64MacroFusion.h index e5efedd9fbf..32d90d4c40d 100644 --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.h +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.h @@ -2,23 +2,18 @@  //  //                     The LLVM Compiler Infrastructure  // -// \fileThis file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source  // License. See LICENSE.TXT for details.  //  //===----------------------------------------------------------------------===//  // -// This file contains the AArch64 definition of the DAG scheduling mutation -// to pair instructions back to back. +/// \file This file contains the AArch64 definition of the DAG scheduling +/// mutation to pair instructions back to back.  //  //===----------------------------------------------------------------------===// -#include "AArch64InstrInfo.h"  #include "llvm/CodeGen/MachineScheduler.h" -//===----------------------------------------------------------------------===// -// AArch64MacroFusion - DAG post-processing to encourage fusion of macro ops. -//===----------------------------------------------------------------------===// -  namespace llvm {  /// Note that you have to add: diff --git a/llvm/lib/Target/X86/X86MacroFusion.cpp b/llvm/lib/Target/X86/X86MacroFusion.cpp index dd21e2b7c4a..8fdf1061705 100644 --- a/llvm/lib/Target/X86/X86MacroFusion.cpp +++ b/llvm/lib/Target/X86/X86MacroFusion.cpp @@ -2,39 +2,31 @@  //  //                     The LLVM Compiler Infrastructure  // -// \file This file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source  // License. See LICENSE.TXT for details.  //  //===----------------------------------------------------------------------===//  // -// This file contains the X86 implementation of the DAG scheduling mutation to -// pair instructions back to back. +/// \file This file contains the X86 implementation of the DAG scheduling +/// mutation to pair instructions back to back.  //  //===----------------------------------------------------------------------===//  #include "X86MacroFusion.h"  #include "X86Subtarget.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h"  #include "llvm/Target/TargetInstrInfo.h" - -#define DEBUG_TYPE "misched" - -STATISTIC(NumFused, "Number of instr pairs fused"); +#include "llvm/CodeGen/MacroFusion.h"  using namespace llvm; -static cl::opt<bool> EnableMacroFusion("x86-misched-fusion", cl::Hidden, -  cl::desc("Enable scheduling for macro fusion."), cl::init(true)); - -namespace { - -/// \brief Verify that the instruction pair, First and Second, -/// should be scheduled back to back.  If either instruction is unspecified, -/// then verify that the other instruction may be part of a pair at all. -static bool shouldScheduleAdjacent(const X86Subtarget &ST, -                                   const MachineInstr *First, -                                   const MachineInstr *Second) { +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, +                                   const TargetSubtargetInfo &TSI, +                                   const MachineInstr *FirstMI, +                                   const MachineInstr &SecondMI) { +  const X86Subtarget &ST = static_cast<const X86Subtarget&>(TSI);    // Check if this processor supports macro-fusion. Since this is a minor    // heuristic, we haven't specifically reserved a feature. hasAVX is a decent    // proxy for SandyBridge+. @@ -47,13 +39,10 @@ static bool shouldScheduleAdjacent(const X86Subtarget &ST,      FuseInc    } FuseKind; -  assert((First || Second) && "At least one instr must be specified"); -  unsigned FirstOpcode = First -                         ? First->getOpcode() +  unsigned FirstOpcode = FirstMI +                         ? FirstMI->getOpcode()                           : static_cast<unsigned>(X86::INSTRUCTION_LIST_END); -  unsigned SecondOpcode = Second -                          ? Second->getOpcode() -                          : static_cast<unsigned>(X86::INSTRUCTION_LIST_END); +  unsigned SecondOpcode = SecondMI.getOpcode();    switch (SecondOpcode) {    default: @@ -203,69 +192,11 @@ static bool shouldScheduleAdjacent(const X86Subtarget &ST,    }  } -/// \brief Post-process the DAG to create cluster edges between instructions -/// that may be fused by the processor into a single operation. -class X86MacroFusion : public ScheduleDAGMutation { -public: -  X86MacroFusion() {} - -  void apply(ScheduleDAGInstrs *DAGInstrs) override; -}; - -void X86MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { -  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); -  const X86Subtarget &ST = DAG->MF.getSubtarget<X86Subtarget>(); - -  // For now, assume targets can only fuse with the branch. -  SUnit &ExitSU = DAG->ExitSU; -  MachineInstr *Branch = ExitSU.getInstr(); -  if (!Branch || !shouldScheduleAdjacent(ST, nullptr, Branch)) -    return; - -  for (SDep &PredDep : ExitSU.Preds) { -    if (PredDep.isWeak()) -      continue; -    SUnit &SU = *PredDep.getSUnit(); -    MachineInstr &Pred = *SU.getInstr(); -    if (!shouldScheduleAdjacent(ST, &Pred, Branch)) -      continue; - -    // Create a single weak edge from SU to ExitSU. The only effect is to cause -    // bottom-up scheduling to heavily prioritize the clustered SU.  There is no -    // need to copy predecessor edges from ExitSU to SU, since top-down -    // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling -    // of SU, we could create an artificial edge from the deepest root, but it -    // hasn't been needed yet. -    bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster)); -    (void)Success; -    assert(Success && "No DAG nodes should be reachable from ExitSU"); - -    // Adjust latency of data deps between the nodes. -    for (SDep &PredDep : ExitSU.Preds) -      if (PredDep.getSUnit() == &SU) -        PredDep.setLatency(0); -    for (SDep &SuccDep : SU.Succs) -      if (SuccDep.getSUnit() == &ExitSU) -        SuccDep.setLatency(0); - -    ++NumFused; -    DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse "; -          SU.print(dbgs(), DAG); -          dbgs() << " - ExitSU" -                 << " / " << DAG->TII->getName(Pred.getOpcode()) << " - " -                 << DAG->TII->getName(Branch->getOpcode()) << '\n';); - -    break; -  } -} - -} // end namespace -  namespace llvm {  std::unique_ptr<ScheduleDAGMutation>  createX86MacroFusionDAGMutation () { -  return EnableMacroFusion ? make_unique<X86MacroFusion>() : nullptr; +  return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent);  }  } // end namespace llvm diff --git a/llvm/lib/Target/X86/X86MacroFusion.h b/llvm/lib/Target/X86/X86MacroFusion.h index e630f802e8e..13fa2d78a01 100644 --- a/llvm/lib/Target/X86/X86MacroFusion.h +++ b/llvm/lib/Target/X86/X86MacroFusion.h @@ -2,23 +2,18 @@  //  //                     The LLVM Compiler Infrastructure  // -// \file This file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source  // License. See LICENSE.TXT for details.  //  //===----------------------------------------------------------------------===//  // -// This file contains the X86 definition of the DAG scheduling mutation to pair -// instructions back to back. +/// \file This file contains the X86 definition of the DAG scheduling mutation +///  to pair instructions back to back.  //  //===----------------------------------------------------------------------===// -#include "X86InstrInfo.h"  #include "llvm/CodeGen/MachineScheduler.h" -//===----------------------------------------------------------------------===// -// X86MacroFusion - DAG post-processing to encourage fusion of macro ops. -//===----------------------------------------------------------------------===// -  namespace llvm {  /// Note that you have to add: | 

