diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/MCA/HardwareUnits/Scheduler.cpp | 3 | ||||
| -rw-r--r-- | llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp | 216 | ||||
| -rw-r--r-- | llvm/tools/llvm-mca/Views/BottleneckAnalysis.h | 65 | ||||
| -rw-r--r-- | llvm/tools/llvm-mca/llvm-mca.cpp | 2 | 
4 files changed, 174 insertions, 112 deletions
diff --git a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp index c7091203595..0f0f2ffb832 100644 --- a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp +++ b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp @@ -198,7 +198,8 @@ InstRef Scheduler::select() {          Strategy->compare(IR, ReadySet[QueueIndex])) {        Instruction &IS = *IR.getInstruction();        uint64_t BusyResourceMask = Resources->checkAvailability(IS.getDesc()); -      IS.setCriticalResourceMask(BusyResourceMask); +      if (BusyResourceMask) +        IS.setCriticalResourceMask(BusyResourceMask);        BusyResourceUnits |= BusyResourceMask;        if (!BusyResourceMask)          QueueIndex = I; diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp index cde896fbb5f..1c38cb90831 100644 --- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp +++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp @@ -13,6 +13,7 @@  //===----------------------------------------------------------------------===//  #include "Views/BottleneckAnalysis.h" +#include "llvm/MC/MCInst.h"  #include "llvm/MCA/Support.h"  #include "llvm/Support/Format.h" @@ -40,43 +41,38 @@ PressureTracker::PressureTracker(const MCSchedModel &Model)    }    ResourceUsers.resize(NextResourceUsersIdx); -  std::fill(ResourceUsers.begin(), ResourceUsers.end(), ~0U); +  std::fill(ResourceUsers.begin(), ResourceUsers.end(), +            std::make_pair<unsigned, unsigned>(~0U, 0U));  } -void PressureTracker::getUniqueUsers( -    uint64_t ResourceMask, SmallVectorImpl<unsigned> &UniqueUsers) const { +void PressureTracker::getResourceUsers(uint64_t ResourceMask, +                                       SmallVectorImpl<User> &Users) const {    unsigned Index = getResourceStateIndex(ResourceMask);    unsigned ProcResID = ResIdx2ProcResID[Index];    const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);    for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { -    unsigned From = getResourceUser(ProcResID, I); -    if (find(UniqueUsers, From) == UniqueUsers.end()) -      UniqueUsers.emplace_back(From); +    const User U = getResourceUser(ProcResID, I); +    if (U.second && IPI.find(U.first) != IPI.end()) +      Users.emplace_back(U);    }  } -void PressureTracker::handleInstructionEvent(const HWInstructionEvent &Event) { +void PressureTracker::onInstructionDispatched(unsigned IID) { +  IPI.insert(std::make_pair(IID, InstructionPressureInfo())); +} + +void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); } + +void PressureTracker::handleInstructionIssuedEvent( +    const HWInstructionIssuedEvent &Event) {    unsigned IID = Event.IR.getSourceIndex(); -  switch (Event.Type) { -  default: -    break; -  case HWInstructionEvent::Dispatched: -    IPI.insert(std::make_pair(IID, InstructionPressureInfo())); -    break; -  case HWInstructionEvent::Executed: -    IPI.erase(IID); -    break; -  case HWInstructionEvent::Issued: { -    const auto &IIE = static_cast<const HWInstructionIssuedEvent &>(Event); -    using ResourceRef = HWInstructionIssuedEvent::ResourceRef; -    using ResourceUse = std::pair<ResourceRef, ResourceCycles>; -    for (const ResourceUse &Use : IIE.UsedResources) { -      const ResourceRef &RR = Use.first; -      unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; -      Index += countTrailingZeros(RR.second); -      ResourceUsers[Index] = IID; -    } -  } +  using ResourceRef = HWInstructionIssuedEvent::ResourceRef; +  using ResourceUse = std::pair<ResourceRef, ResourceCycles>; +  for (const ResourceUse &Use : Event.UsedResources) { +    const ResourceRef &RR = Use.first; +    unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; +    Index += countTrailingZeros(RR.second); +    ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());    }  } @@ -125,7 +121,8 @@ void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {        if (!BusyResources)          continue; -      IPI[IR.getSourceIndex()].ResourcePressureCycles++; +      unsigned IID = IR.getSourceIndex(); +      IPI[IID].ResourcePressureCycles++;      }      break;    } @@ -146,51 +143,59 @@ void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {  }  #ifndef NDEBUG -void DependencyGraph::dumpRegDeps(raw_ostream &OS, MCInstPrinter &MCIP) const { +void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, unsigned FromIID, +                                         const DependencyEdge &DE, +                                         MCInstPrinter &MCIP) const { +  bool LoopCarried = FromIID >= DE.IID; +  OS << " FROM: " << FromIID << " TO: " << DE.IID +     << (LoopCarried ? " (loop carried)" : "             "); +  if (DE.Type == DT_REGISTER) { +    OS << " - REGISTER: "; +    MCIP.printRegName(OS, DE.ResourceOrRegID); +  } else if (DE.Type == DT_MEMORY) { +    OS << " - MEMORY"; +  } else { +    assert(DE.Type == DT_RESOURCE && "Unexpected unsupported dependency type!"); +    OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; +  } +  OS << " - CYCLES: " << DE.Cycles << '\n'; +} + +void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {    OS << "\nREG DEPS\n";    for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {      const DGNode &Node = Nodes[I]; -    for (const DependencyEdge &DE : Node.RegDeps) { -      bool LoopCarried = I >= DE.IID; -      OS << " FROM: " << I << " TO: " << DE.IID -         << (LoopCarried ? " (loop carried)" : "             ") -         << " - REGISTER: "; -      MCIP.printRegName(OS, DE.ResourceOrRegID); -      OS << " - CYCLES: " << DE.Cycles << '\n'; +    for (const DependencyEdge &DE : Node.OutgoingEdges) { +      if (DE.Type == DT_REGISTER) +        dumpDependencyEdge(OS, I, DE, MCIP);      }    } -} -void DependencyGraph::dumpMemDeps(raw_ostream &OS) const {    OS << "\nMEM DEPS\n";    for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {      const DGNode &Node = Nodes[I]; -    for (const DependencyEdge &DE : Node.MemDeps) { -      bool LoopCarried = I >= DE.IID; -      OS << " FROM: " << I << " TO: " << DE.IID -         << (LoopCarried ? " (loop carried)" : "             ") -         << " - MEMORY - CYCLES: " << DE.Cycles << '\n'; +    for (const DependencyEdge &DE : Node.OutgoingEdges) { +      if (DE.Type == DT_MEMORY) +        dumpDependencyEdge(OS, I, DE, MCIP);      }    } -} -void DependencyGraph::dumpResDeps(raw_ostream &OS) const {    OS << "\nRESOURCE DEPS\n";    for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {      const DGNode &Node = Nodes[I]; -    for (const DependencyEdge &DE : Node.ResDeps) { -      bool LoopCarried = I >= DE.IID; -      OS << " FROM: " << I << " TO: " << DE.IID -         << (LoopCarried ? "(loop carried)" : "             ") -         << " - RESOURCE MASK: " << DE.ResourceOrRegID; -      OS << " - CYCLES: " << DE.Cycles << '\n'; +    for (const DependencyEdge &DE : Node.OutgoingEdges) { +      if (DE.Type == DT_RESOURCE) +        dumpDependencyEdge(OS, I, DE, MCIP);      }    }  }  #endif // NDEBUG -void DependencyGraph::addDepImpl(SmallVectorImpl<DependencyEdge> &Vec, -                                 DependencyEdge &&Dep) { +void DependencyGraph::addDependency(unsigned From, DependencyEdge &&Dep) { +  DGNode &NodeFrom = Nodes[From]; +  DGNode &NodeTo = Nodes[Dep.IID]; +  SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges; +    auto It = find_if(Vec, [Dep](DependencyEdge &DE) {      return DE.IID == Dep.IID && DE.ResourceOrRegID == Dep.ResourceOrRegID;    }); @@ -201,38 +206,102 @@ void DependencyGraph::addDepImpl(SmallVectorImpl<DependencyEdge> &Vec,    }    Vec.emplace_back(Dep); -  Nodes[Dep.IID].NumPredecessors++; +  NodeTo.NumPredecessors++;  }  BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, -                                       ArrayRef<MCInst> Sequence) -    : STI(sti), Tracker(STI.getSchedModel()), DG(Sequence.size()), -      Source(Sequence), TotalCycles(0), -      PressureIncreasedBecauseOfResources(false), +                                       MCInstPrinter &Printer, +                                       ArrayRef<MCInst> S) +    : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3), +      Source(S), TotalCycles(0), PressureIncreasedBecauseOfResources(false),        PressureIncreasedBecauseOfRegisterDependencies(false),        PressureIncreasedBecauseOfMemoryDependencies(false),        SeenStallCycles(false), BPI() {} +void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, +                                        unsigned RegID, unsigned Cy) { +  bool IsLoopCarried = From >= To; +  unsigned SourceSize = Source.size(); +  if (IsLoopCarried) { +    DG.addRegisterDep(From, To + SourceSize, RegID, Cy); +    DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cy); +    return; +  } +  DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cy); +} + +void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, unsigned Cy) { +  bool IsLoopCarried = From >= To; +  unsigned SourceSize = Source.size(); +  if (IsLoopCarried) { +    DG.addMemoryDep(From, To + SourceSize, Cy); +    DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cy); +    return; +  } +  DG.addMemoryDep(From + SourceSize, To + SourceSize, Cy); +} + +void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, +                                        uint64_t Mask, unsigned Cy) { +  bool IsLoopCarried = From >= To; +  unsigned SourceSize = Source.size(); +  if (IsLoopCarried) { +    DG.addResourceDep(From, To + SourceSize, Mask, Cy); +    DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cy); +    return; +  } +  DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cy); +} +  void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { -  Tracker.handleInstructionEvent(Event); +  const unsigned IID = Event.IR.getSourceIndex(); +  if (Event.Type == HWInstructionEvent::Dispatched) { +    Tracker.onInstructionDispatched(IID); +    return; +  } +  if (Event.Type == HWInstructionEvent::Executed) { +    Tracker.onInstructionExecuted(IID); +    return; +  } +    if (Event.Type != HWInstructionEvent::Issued)      return; -  const unsigned IID = Event.IR.getSourceIndex();    const Instruction &IS = *Event.IR.getInstruction(); -  unsigned Cycles = Tracker.getRegisterPressureCycles(IID);    unsigned To = IID % Source.size(); + +  unsigned Cycles = Tracker.getResourcePressureCycles(IID); +  if (Cycles) { +    uint64_t ResourceMask = IS.getCriticalResourceMask(); +    SmallVector<std::pair<unsigned, unsigned>, 4> Users; +    while (ResourceMask) { +      uint64_t Current = ResourceMask & (-ResourceMask); +      Tracker.getResourceUsers(Current, Users); +      for (const std::pair<unsigned, unsigned> &U : Users) { +        unsigned Cost = std::min(U.second, Cycles); +        addResourceDep(U.first % Source.size(), To, Current, Cost); +      } +      Users.clear(); +      ResourceMask ^= Current; +    } +  } + +  Cycles = Tracker.getRegisterPressureCycles(IID);    if (Cycles) {      const CriticalDependency &RegDep = IS.getCriticalRegDep();      unsigned From = RegDep.IID % Source.size(); -    DG.addRegDep(From, To, RegDep.RegID, Cycles); +    addRegisterDep(From, To, RegDep.RegID, Cycles);    } +    Cycles = Tracker.getMemoryPressureCycles(IID);    if (Cycles) {      const CriticalDependency &MemDep = IS.getCriticalMemDep();      unsigned From = MemDep.IID % Source.size(); -    DG.addMemDep(From, To, Cycles); +    addMemoryDep(From, To, Cycles);    } + +  Tracker.handleInstructionIssuedEvent( +      static_cast<const HWInstructionIssuedEvent &>(Event));  }  void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { @@ -245,28 +314,9 @@ void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {    default:      break; -  case HWPressureEvent::RESOURCES: { +  case HWPressureEvent::RESOURCES:      PressureIncreasedBecauseOfResources = true; - -    SmallVector<unsigned, 4> UniqueUsers; -    for (const InstRef &IR : Event.AffectedInstructions) { -      const Instruction &IS = *IR.getInstruction(); -      unsigned To = IR.getSourceIndex() % Source.size(); -      unsigned BusyResources = -          IS.getCriticalResourceMask() & Event.ResourceMask; -      while (BusyResources) { -        uint64_t Current = BusyResources & (-BusyResources); -        Tracker.getUniqueUsers(Current, UniqueUsers); -        for (unsigned User : UniqueUsers) -          DG.addResourceDep(User % Source.size(), To, Current, 1); -        BusyResources ^= Current; -      } -      UniqueUsers.clear(); -    } -      break; -  } -    case HWPressureEvent::REGISTER_DEPS:      PressureIncreasedBecauseOfRegisterDependencies = true;      break; diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h index 4c4dc193e13..c208847fe9f 100644 --- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h +++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h @@ -63,7 +63,8 @@ class PressureTracker {    // There is one entry for every processor resource unit declared by the    // processor model. An all_ones value is treated like an invalid instruction    // identifier. -  SmallVector<unsigned, 4> ResourceUsers; +  using User = std::pair<unsigned, unsigned>; +  SmallVector<User, 4> ResourceUsers;    struct InstructionPressureInfo {      unsigned RegisterPressureCycles; @@ -74,7 +75,7 @@ class PressureTracker {    void updateResourcePressureDistribution(uint64_t CumulativeMask); -  unsigned getResourceUser(unsigned ProcResID, unsigned UnitID) const { +  User getResourceUser(unsigned ProcResID, unsigned UnitID) const {      unsigned Index = ProcResID2ResourceUsersIndex[ProcResID];      return ResourceUsers[Index + UnitID];    } @@ -86,8 +87,8 @@ public:      return ResourcePressureDistribution;    } -  void getUniqueUsers(uint64_t ResourceMask, -                      SmallVectorImpl<unsigned> &Users) const; +  void getResourceUsers(uint64_t ResourceMask, +                        SmallVectorImpl<User> &Users) const;    unsigned getRegisterPressureCycles(unsigned IID) const {      assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!"); @@ -107,12 +108,18 @@ public:      return Info.ResourcePressureCycles;    } +  void onInstructionDispatched(unsigned IID); +  void onInstructionExecuted(unsigned IID); +    void handlePressureEvent(const HWPressureEvent &Event); -  void handleInstructionEvent(const HWInstructionEvent &Event); +  void handleInstructionIssuedEvent(const HWInstructionIssuedEvent &Event);  };  class DependencyGraph { +  enum DependencyType { DT_REGISTER, DT_MEMORY, DT_RESOURCE }; +    struct DependencyEdge { +    DependencyType Type;      unsigned IID;      uint64_t ResourceOrRegID;      uint64_t Cycles; @@ -120,46 +127,44 @@ class DependencyGraph {    struct DGNode {      unsigned NumPredecessors; -    SmallVector<DependencyEdge, 8> RegDeps; -    SmallVector<DependencyEdge, 8> MemDeps; -    SmallVector<DependencyEdge, 8> ResDeps; +    SmallVector<DependencyEdge, 8> OutgoingEdges;    };    SmallVector<DGNode, 16> Nodes; -  void addDepImpl(SmallVectorImpl<DependencyEdge> &Vec, DependencyEdge &&DE); -    DependencyGraph(const DependencyGraph &) = delete;    DependencyGraph &operator=(const DependencyGraph &) = delete; +  void addDependency(unsigned From, DependencyEdge &&DE); + +#ifndef NDEBUG +  void dumpDependencyEdge(raw_ostream &OS, unsigned FromIID, +                          const DependencyEdge &DE, MCInstPrinter &MCIP) const; +#endif +  public: -  DependencyGraph(unsigned NumNodes) : Nodes(NumNodes, DGNode()) {} +  DependencyGraph(unsigned Size) : Nodes(Size) {} -  void addRegDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy) { -    addDepImpl(Nodes[From].RegDeps, {To, RegID, Cy}); +  void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy) { +    addDependency(From, {DT_REGISTER, To, RegID, Cy});    } -  void addMemDep(unsigned From, unsigned To, unsigned Cy) { -    addDepImpl(Nodes[From].MemDeps, {To, /* unused */ 0, Cy}); + +  void addMemoryDep(unsigned From, unsigned To, unsigned Cy) { +    addDependency(From, {DT_MEMORY, To, /* unused */ 0, Cy});    } +    void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy) { -    addDepImpl(Nodes[From].ResDeps, {To, Mask, Cy}); +    addDependency(From, {DT_RESOURCE, To, Mask, Cy});    }  #ifndef NDEBUG -  void dumpRegDeps(raw_ostream &OS, MCInstPrinter &MCIP) const; -  void dumpMemDeps(raw_ostream &OS) const; -  void dumpResDeps(raw_ostream &OS) const; - -  void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { -    dumpRegDeps(OS, MCIP); -    dumpMemDeps(OS); -    dumpResDeps(OS); -  } +  void dump(raw_ostream &OS, MCInstPrinter &MCIP) const;  #endif  };  /// A view that collects and prints a few performance numbers.  class BottleneckAnalysis : public View {    const MCSubtargetInfo &STI; +  MCInstPrinter &MCIP;    PressureTracker Tracker;    DependencyGraph DG; @@ -189,8 +194,14 @@ class BottleneckAnalysis : public View {    // Prints a bottleneck message to OS.    void printBottleneckHints(raw_ostream &OS) const; +  // Used to populate the dependency graph DG. +  void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy); +  void addMemoryDep(unsigned From, unsigned To, unsigned Cy); +  void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy); +  public: -  BottleneckAnalysis(const MCSubtargetInfo &STI, ArrayRef<MCInst> Sequence); +  BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP, +                     ArrayRef<MCInst> Sequence);    void onCycleEnd() override;    void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; } @@ -200,7 +211,7 @@ public:    void printView(raw_ostream &OS) const override;  #ifndef NDEBUG -  void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); } +  void dump(raw_ostream &OS) const { DG.dump(OS, MCIP); }  #endif  }; diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index a875c70e17f..6e6d23ff6f1 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -487,7 +487,7 @@ int main(int argc, char **argv) {            llvm::make_unique<mca::SummaryView>(SM, Insts, DispatchWidth));      if (EnableBottleneckAnalysis) -      Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>(*STI, Insts)); +      Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>(*STI, *IP, Insts));      if (PrintInstructionInfoView)        Printer.addView(  | 

