[llvm-mca] Move llvm-mca library to llvm/lib/MCA.

Summary: See PR38731. Reviewers: andreadb Subscribers: mgorny, javed.absar, tschuett, gbedwell, andreadb, RKSimon, llvm-commits Differential Revision: https://reviews.llvm.org/D55557 llvm-svn: 349332
author: Clement Courbet <courbet@google.com> 2018-12-17 08:08:31 +0000
committer: Clement Courbet <courbet@google.com> 2018-12-17 08:08:31 +0000
commit: cc5e6a72de590bff4f1e68b7271d9f82bc66b995 (patch)
tree: 38daf2528d9f190e3ba1743d0cb592fa782698b4 /llvm/lib/MCA
parent: 792d4f130d6fcfebc14f0e4e20d3111a24852da2 (diff)
download: bcm5719-llvm-cc5e6a72de590bff4f1e68b7271d9f82bc66b995.tar.gz
bcm5719-llvm-cc5e6a72de590bff4f1e68b7271d9f82bc66b995.zip
20 files changed, 3194 insertions, 0 deletions
diff --git a/llvm/lib/MCA/CMakeLists.txt b/llvm/lib/MCA/CMakeLists.txt
new file mode 100644
index 00000000000..bfd0782d1f7
--- /dev/null
+++ b/llvm/lib/MCA/CMakeLists.txt
@@ -0,0 +1,23 @@
+add_llvm_library(LLVMMCA
+  Context.cpp
+  HWEventListener.cpp
+  HardwareUnits/HardwareUnit.cpp
+  HardwareUnits/LSUnit.cpp
+  HardwareUnits/RegisterFile.cpp
+  HardwareUnits/ResourceManager.cpp
+  HardwareUnits/RetireControlUnit.cpp
+  HardwareUnits/Scheduler.cpp
+  InstrBuilder.cpp
+  Instruction.cpp
+  Pipeline.cpp
+  Stages/DispatchStage.cpp
+  Stages/EntryStage.cpp
+  Stages/ExecuteStage.cpp
+  Stages/InstructionTables.cpp
+  Stages/RetireStage.cpp
+  Stages/Stage.cpp
+  Support.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/MCA
+  )
diff --git a/llvm/lib/MCA/Context.cpp b/llvm/lib/MCA/Context.cpp
new file mode 100644
index 00000000000..c1b197dfe2e
--- /dev/null
+++ b/llvm/lib/MCA/Context.cpp
@@ -0,0 +1,65 @@
+//===---------------------------- Context.cpp -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a class for holding ownership of various simulated
+/// hardware units.  A Context also provides a utility routine for constructing
+/// a default out-of-order pipeline with fetch, dispatch, execute, and retire
+/// stages.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Context.h"
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Stages/DispatchStage.h"
+#include "llvm/MCA/Stages/EntryStage.h"
+#include "llvm/MCA/Stages/ExecuteStage.h"
+#include "llvm/MCA/Stages/RetireStage.h"
+
+namespace llvm {
+namespace mca {
+
+std::unique_ptr<Pipeline>
+Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
+                               SourceMgr &SrcMgr) {
+  const MCSchedModel &SM = STI.getSchedModel();
+
+  // Create the hardware units defining the backend.
+  auto RCU = llvm::make_unique<RetireControlUnit>(SM);
+  auto PRF = llvm::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
+  auto LSU = llvm::make_unique<LSUnit>(SM, Opts.LoadQueueSize,
+                                       Opts.StoreQueueSize, Opts.AssumeNoAlias);
+  auto HWS = llvm::make_unique<Scheduler>(SM, *LSU);
+
+  // Create the pipeline stages.
+  auto Fetch = llvm::make_unique<EntryStage>(SrcMgr);
+  auto Dispatch = llvm::make_unique<DispatchStage>(STI, MRI, Opts.DispatchWidth,
+                                                   *RCU, *PRF);
+  auto Execute = llvm::make_unique<ExecuteStage>(*HWS);
+  auto Retire = llvm::make_unique<RetireStage>(*RCU, *PRF);
+
+  // Pass the ownership of all the hardware units to this Context.
+  addHardwareUnit(std::move(RCU));
+  addHardwareUnit(std::move(PRF));
+  addHardwareUnit(std::move(LSU));
+  addHardwareUnit(std::move(HWS));
+
+  // Build the pipeline.
+  auto StagePipeline = llvm::make_unique<Pipeline>();
+  StagePipeline->appendStage(std::move(Fetch));
+  StagePipeline->appendStage(std::move(Dispatch));
+  StagePipeline->appendStage(std::move(Execute));
+  StagePipeline->appendStage(std::move(Retire));
+  return StagePipeline;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/HWEventListener.cpp b/llvm/lib/MCA/HWEventListener.cpp
new file mode 100644
index 00000000000..4a0e5b1754d
--- /dev/null
+++ b/llvm/lib/MCA/HWEventListener.cpp
@@ -0,0 +1,23 @@
+//===----------------------- HWEventListener.cpp ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a vtable anchor for class HWEventListener.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HWEventListener.h"
+
+namespace llvm {
+namespace mca {
+
+// Anchor the vtable here.
+void HWEventListener::anchor() {}
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp b/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp
new file mode 100644
index 00000000000..edd32b9c0c1
--- /dev/null
+++ b/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp
@@ -0,0 +1,25 @@
+//===------------------------- HardwareUnit.cpp -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the anchor for the base class that describes
+/// simulated hardware units.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+
+namespace llvm {
+namespace mca {
+
+// Pin the vtable with this method.
+HardwareUnit::~HardwareUnit() = default;
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
new file mode 100644
index 00000000000..8895eb392b6
--- /dev/null
+++ b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
@@ -0,0 +1,190 @@
+//===----------------------- LSUnit.cpp --------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A Load-Store Unit for the llvm-mca tool.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/LSUnit.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+LSUnit::LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
+               bool AssumeNoAlias)
+    : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {
+  if (SM.hasExtraProcessorInfo()) {
+    const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+    if (!LQ_Size && EPI.LoadQueueID) {
+      const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID);
+      LQ_Size = LdQDesc.BufferSize;
+    }
+
+    if (!SQ_Size && EPI.StoreQueueID) {
+      const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID);
+      SQ_Size = StQDesc.BufferSize;
+    }
+  }
+}
+
+#ifndef NDEBUG
+void LSUnit::dump() const {
+  dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n';
+  dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n';
+  dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n';
+  dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n';
+}
+#endif
+
+void LSUnit::assignLQSlot(unsigned Index) {
+  assert(!isLQFull());
+  assert(LoadQueue.count(Index) == 0);
+
+  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << Index
+                    << ",slot=" << LoadQueue.size() << ">\n");
+  LoadQueue.insert(Index);
+}
+
+void LSUnit::assignSQSlot(unsigned Index) {
+  assert(!isSQFull());
+  assert(StoreQueue.count(Index) == 0);
+
+  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << Index
+                    << ",slot=" << StoreQueue.size() << ">\n");
+  StoreQueue.insert(Index);
+}
+
+void LSUnit::dispatch(const InstRef &IR) {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  unsigned IsMemBarrier = Desc.HasSideEffects;
+  assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
+
+  const unsigned Index = IR.getSourceIndex();
+  if (Desc.MayLoad) {
+    if (IsMemBarrier)
+      LoadBarriers.insert(Index);
+    assignLQSlot(Index);
+  }
+
+  if (Desc.MayStore) {
+    if (IsMemBarrier)
+      StoreBarriers.insert(Index);
+    assignSQSlot(Index);
+  }
+}
+
+LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  if (Desc.MayLoad && isLQFull())
+    return LSUnit::LSU_LQUEUE_FULL;
+  if (Desc.MayStore && isSQFull())
+    return LSUnit::LSU_SQUEUE_FULL;
+  return LSUnit::LSU_AVAILABLE;
+}
+
+bool LSUnit::isReady(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  const unsigned Index = IR.getSourceIndex();
+  bool IsALoad = Desc.MayLoad;
+  bool IsAStore = Desc.MayStore;
+  assert((IsALoad || IsAStore) && "Not a memory operation!");
+  assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!");
+  assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!");
+
+  if (IsALoad && !LoadBarriers.empty()) {
+    unsigned LoadBarrierIndex = *LoadBarriers.begin();
+    // A younger load cannot pass a older load barrier.
+    if (Index > LoadBarrierIndex)
+      return false;
+    // A load barrier cannot pass a older load.
+    if (Index == LoadBarrierIndex && Index != *LoadQueue.begin())
+      return false;
+  }
+
+  if (IsAStore && !StoreBarriers.empty()) {
+    unsigned StoreBarrierIndex = *StoreBarriers.begin();
+    // A younger store cannot pass a older store barrier.
+    if (Index > StoreBarrierIndex)
+      return false;
+    // A store barrier cannot pass a older store.
+    if (Index == StoreBarrierIndex && Index != *StoreQueue.begin())
+      return false;
+  }
+
+  // A load may not pass a previous store unless flag 'NoAlias' is set.
+  // A load may pass a previous load.
+  if (NoAlias && IsALoad)
+    return true;
+
+  if (StoreQueue.size()) {
+    // A load may not pass a previous store.
+    // A store may not pass a previous store.
+    if (Index > *StoreQueue.begin())
+      return false;
+  }
+
+  // Okay, we are older than the oldest store in the queue.
+  // If there are no pending loads, then we can say for sure that this
+  // instruction is ready.
+  if (isLQEmpty())
+    return true;
+
+  // Check if there are no older loads.
+  if (Index <= *LoadQueue.begin())
+    return true;
+
+  // There is at least one younger load.
+  //
+  // A store may not pass a previous load.
+  // A load may pass a previous load.
+  return !IsAStore;
+}
+
+void LSUnit::onInstructionExecuted(const InstRef &IR) {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  const unsigned Index = IR.getSourceIndex();
+  bool IsALoad = Desc.MayLoad;
+  bool IsAStore = Desc.MayStore;
+
+  if (IsALoad) {
+    if (LoadQueue.erase(Index)) {
+      LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
+                        << " has been removed from the load queue.\n");
+    }
+    if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) {
+      LLVM_DEBUG(
+          dbgs() << "[LSUnit]: Instruction idx=" << Index
+                 << " has been removed from the set of load barriers.\n");
+      LoadBarriers.erase(Index);
+    }
+  }
+
+  if (IsAStore) {
+    if (StoreQueue.erase(Index)) {
+      LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
+                        << " has been removed from the store queue.\n");
+    }
+
+    if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) {
+      LLVM_DEBUG(
+          dbgs() << "[LSUnit]: Instruction idx=" << Index
+                 << " has been removed from the set of store barriers.\n");
+      StoreBarriers.erase(Index);
+    }
+  }
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
new file mode 100644
index 00000000000..22977e5ded6
--- /dev/null
+++ b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
@@ -0,0 +1,491 @@
+//===--------------------- RegisterFile.cpp ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a register mapping file class.  This class is responsible
+/// for managing hardware register files and the tracking of data dependencies
+/// between registers.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+RegisterFile::RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri,
+                           unsigned NumRegs)
+    : MRI(mri),
+      RegisterMappings(mri.getNumRegs(), {WriteRef(), RegisterRenamingInfo()}),
+      ZeroRegisters(mri.getNumRegs(), false) {
+  initialize(SM, NumRegs);
+}
+
+void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) {
+  // Create a default register file that "sees" all the machine registers
+  // declared by the target. The number of physical registers in the default
+  // register file is set equal to `NumRegs`. A value of zero for `NumRegs`
+  // means: this register file has an unbounded number of physical registers.
+  RegisterFiles.emplace_back(NumRegs);
+  if (!SM.hasExtraProcessorInfo())
+    return;
+
+  // For each user defined register file, allocate a RegisterMappingTracker
+  // object. The size of every register file, as well as the mapping between
+  // register files and register classes is specified via tablegen.
+  const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo();
+
+  // Skip invalid register file at index 0.
+  for (unsigned I = 1, E = Info.NumRegisterFiles; I < E; ++I) {
+    const MCRegisterFileDesc &RF = Info.RegisterFiles[I];
+    assert(RF.NumPhysRegs && "Invalid PRF with zero physical registers!");
+
+    // The cost of a register definition is equivalent to the number of
+    // physical registers that are allocated at register renaming stage.
+    unsigned Length = RF.NumRegisterCostEntries;
+    const MCRegisterCostEntry *FirstElt =
+        &Info.RegisterCostTable[RF.RegisterCostEntryIdx];
+    addRegisterFile(RF, ArrayRef<MCRegisterCostEntry>(FirstElt, Length));
+  }
+}
+
+void RegisterFile::cycleStart() {
+  for (RegisterMappingTracker &RMT : RegisterFiles)
+    RMT.NumMoveEliminated = 0;
+}
+
+void RegisterFile::addRegisterFile(const MCRegisterFileDesc &RF,
+                                   ArrayRef<MCRegisterCostEntry> Entries) {
+  // A default register file is always allocated at index #0. That register file
+  // is mainly used to count the total number of mappings created by all
+  // register files at runtime. Users can limit the number of available physical
+  // registers in register file #0 through the command line flag
+  // `-register-file-size`.
+  unsigned RegisterFileIndex = RegisterFiles.size();
+  RegisterFiles.emplace_back(RF.NumPhysRegs, RF.MaxMovesEliminatedPerCycle,
+                             RF.AllowZeroMoveEliminationOnly);
+
+  // Special case where there is no register class identifier in the set.
+  // An empty set of register classes means: this register file contains all
+  // the physical registers specified by the target.
+  // We optimistically assume that a register can be renamed at the cost of a
+  // single physical register. The constructor of RegisterFile ensures that
+  // a RegisterMapping exists for each logical register defined by the Target.
+  if (Entries.empty())
+    return;
+
+  // Now update the cost of individual registers.
+  for (const MCRegisterCostEntry &RCE : Entries) {
+    const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID);
+    for (const MCPhysReg Reg : RC) {
+      RegisterRenamingInfo &Entry = RegisterMappings[Reg].second;
+      IndexPlusCostPairTy &IPC = Entry.IndexPlusCost;
+      if (IPC.first && IPC.first != RegisterFileIndex) {
+        // The only register file that is allowed to overlap is the default
+        // register file at index #0. The analysis is inaccurate if register
+        // files overlap.
+        errs() << "warning: register " << MRI.getName(Reg)
+               << " defined in multiple register files.";
+      }
+      IPC = std::make_pair(RegisterFileIndex, RCE.Cost);
+      Entry.RenameAs = Reg;
+      Entry.AllowMoveElimination = RCE.AllowMoveElimination;
+
+      // Assume the same cost for each sub-register.
+      for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) {
+        RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second;
+        if (!OtherEntry.IndexPlusCost.first &&
+            (!OtherEntry.RenameAs ||
+             MRI.isSuperRegister(*I, OtherEntry.RenameAs))) {
+          OtherEntry.IndexPlusCost = IPC;
+          OtherEntry.RenameAs = Reg;
+        }
+      }
+    }
+  }
+}
+
+void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry,
+                                    MutableArrayRef<unsigned> UsedPhysRegs) {
+  unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
+  unsigned Cost = Entry.IndexPlusCost.second;
+  if (RegisterFileIndex) {
+    RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
+    RMT.NumUsedPhysRegs += Cost;
+    UsedPhysRegs[RegisterFileIndex] += Cost;
+  }
+
+  // Now update the default register mapping tracker.
+  RegisterFiles[0].NumUsedPhysRegs += Cost;
+  UsedPhysRegs[0] += Cost;
+}
+
+void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry,
+                                MutableArrayRef<unsigned> FreedPhysRegs) {
+  unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
+  unsigned Cost = Entry.IndexPlusCost.second;
+  if (RegisterFileIndex) {
+    RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
+    RMT.NumUsedPhysRegs -= Cost;
+    FreedPhysRegs[RegisterFileIndex] += Cost;
+  }
+
+  // Now update the default register mapping tracker.
+  RegisterFiles[0].NumUsedPhysRegs -= Cost;
+  FreedPhysRegs[0] += Cost;
+}
+
+void RegisterFile::addRegisterWrite(WriteRef Write,
+                                    MutableArrayRef<unsigned> UsedPhysRegs) {
+  WriteState &WS = *Write.getWriteState();
+  unsigned RegID = WS.getRegisterID();
+  assert(RegID && "Adding an invalid register definition?");
+
+  LLVM_DEBUG({
+    dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex()
+           << ", " << MRI.getName(RegID) << "]\n";
+  });
+
+  // If RenameAs is equal to RegID, then RegID is subject to register renaming
+  // and false dependencies on RegID are all eliminated.
+
+  // If RenameAs references the invalid register, then we optimistically assume
+  // that it can be renamed. In the absence of tablegen descriptors for register
+  // files, RenameAs is always set to the invalid register ID.  In all other
+  // cases, RenameAs must be either equal to RegID, or it must reference a
+  // super-register of RegID.
+
+  // If RenameAs is a super-register of RegID, then a write to RegID has always
+  // a false dependency on RenameAs. The only exception is for when the write
+  // implicitly clears the upper portion of the underlying register.
+  // If a write clears its super-registers, then it is renamed as `RenameAs`.
+  bool IsWriteZero = WS.isWriteZero();
+  bool IsEliminated = WS.isEliminated();
+  bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated;
+  const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+  WS.setPRF(RRI.IndexPlusCost.first);
+
+  if (RRI.RenameAs && RRI.RenameAs != RegID) {
+    RegID = RRI.RenameAs;
+    WriteRef &OtherWrite = RegisterMappings[RegID].first;
+
+    if (!WS.clearsSuperRegisters()) {
+      // The processor keeps the definition of `RegID` together with register
+      // `RenameAs`. Since this partial write is not renamed, no physical
+      // register is allocated.
+      ShouldAllocatePhysRegs = false;
+
+      WriteState *OtherWS = OtherWrite.getWriteState();
+      if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {
+        // This partial write has a false dependency on RenameAs.
+        assert(!IsEliminated && "Unexpected partial update!");
+        OtherWS->addUser(&WS);
+      }
+    }
+  }
+
+  // Update zero registers.
+  unsigned ZeroRegisterID =
+      WS.clearsSuperRegisters() ? RegID : WS.getRegisterID();
+  if (IsWriteZero) {
+    ZeroRegisters.setBit(ZeroRegisterID);
+    for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
+      ZeroRegisters.setBit(*I);
+  } else {
+    ZeroRegisters.clearBit(ZeroRegisterID);
+    for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
+      ZeroRegisters.clearBit(*I);
+  }
+
+  // If this is move has been eliminated, then the call to tryEliminateMove
+  // should have already updated all the register mappings.
+  if (!IsEliminated) {
+    // Update the mapping for register RegID including its sub-registers.
+    RegisterMappings[RegID].first = Write;
+    RegisterMappings[RegID].second.AliasRegID = 0U;
+    for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+      RegisterMappings[*I].first = Write;
+      RegisterMappings[*I].second.AliasRegID = 0U;
+    }
+
+    // No physical registers are allocated for instructions that are optimized
+    // in hardware. For example, zero-latency data-dependency breaking
+    // instructions don't consume physical registers.
+    if (ShouldAllocatePhysRegs)
+      allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
+  }
+
+  if (!WS.clearsSuperRegisters())
+    return;
+
+  for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+    if (!IsEliminated) {
+      RegisterMappings[*I].first = Write;
+      RegisterMappings[*I].second.AliasRegID = 0U;
+    }
+
+    if (IsWriteZero)
+      ZeroRegisters.setBit(*I);
+    else
+      ZeroRegisters.clearBit(*I);
+  }
+}
+
+void RegisterFile::removeRegisterWrite(
+    const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs) {
+  // Early exit if this write was eliminated. A write eliminated at register
+  // renaming stage generates an alias, and it is not added to the PRF.
+  if (WS.isEliminated())
+    return;
+
+  unsigned RegID = WS.getRegisterID();
+
+  assert(RegID != 0 && "Invalidating an already invalid register?");
+  assert(WS.getCyclesLeft() != UNKNOWN_CYCLES &&
+         "Invalidating a write of unknown cycles!");
+  assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!");
+
+  bool ShouldFreePhysRegs = !WS.isWriteZero();
+  unsigned RenameAs = RegisterMappings[RegID].second.RenameAs;
+  if (RenameAs && RenameAs != RegID) {
+    RegID = RenameAs;
+
+    if (!WS.clearsSuperRegisters()) {
+      // Keep the definition of `RegID` together with register `RenameAs`.
+      ShouldFreePhysRegs = false;
+    }
+  }
+
+  if (ShouldFreePhysRegs)
+    freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs);
+
+  WriteRef &WR = RegisterMappings[RegID].first;
+  if (WR.getWriteState() == &WS)
+    WR.invalidate();
+
+  for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+    WriteRef &OtherWR = RegisterMappings[*I].first;
+    if (OtherWR.getWriteState() == &WS)
+      OtherWR.invalidate();
+  }
+
+  if (!WS.clearsSuperRegisters())
+    return;
+
+  for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+    WriteRef &OtherWR = RegisterMappings[*I].first;
+    if (OtherWR.getWriteState() == &WS)
+      OtherWR.invalidate();
+  }
+}
+
+bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) {
+  const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()];
+  const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()];
+
+  // From and To must be owned by the same PRF.
+  const RegisterRenamingInfo &RRIFrom = RMFrom.second;
+  const RegisterRenamingInfo &RRITo = RMTo.second;
+  unsigned RegisterFileIndex = RRIFrom.IndexPlusCost.first;
+  if (RegisterFileIndex != RRITo.IndexPlusCost.first)
+    return false;
+
+  // We only allow move elimination for writes that update a full physical
+  // register. On X86, move elimination is possible with 32-bit general purpose
+  // registers because writes to those registers are not partial writes.  If a
+  // register move is a partial write, then we conservatively assume that move
+  // elimination fails, since it would either trigger a partial update, or the
+  // issue of a merge opcode.
+  //
+  // Note that this constraint may be lifted in future.  For example, we could
+  // make this model more flexible, and let users customize the set of registers
+  // (i.e. register classes) that allow move elimination.
+  //
+  // For now, we assume that there is a strong correlation between registers
+  // that allow move elimination, and how those same registers are renamed in
+  // hardware.
+  if (RRITo.RenameAs && RRITo.RenameAs != WS.getRegisterID()) {
+    // Early exit if the PRF doesn't support move elimination for this register.
+    if (!RegisterMappings[RRITo.RenameAs].second.AllowMoveElimination)
+      return false;
+    if (!WS.clearsSuperRegisters())
+      return false;
+  }
+
+  RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
+  if (RMT.MaxMoveEliminatedPerCycle &&
+      RMT.NumMoveEliminated == RMT.MaxMoveEliminatedPerCycle)
+    return false;
+
+  bool IsZeroMove = ZeroRegisters[RS.getRegisterID()];
+  if (RMT.AllowZeroMoveEliminationOnly && !IsZeroMove)
+    return false;
+
+  MCPhysReg FromReg = RS.getRegisterID();
+  MCPhysReg ToReg = WS.getRegisterID();
+
+  // Construct an alias.
+  MCPhysReg AliasReg = FromReg;
+  if (RRIFrom.RenameAs)
+    AliasReg = RRIFrom.RenameAs;
+
+  const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasReg].second;
+  if (RMAlias.AliasRegID)
+    AliasReg = RMAlias.AliasRegID;
+
+  if (AliasReg != ToReg) {
+    RegisterMappings[ToReg].second.AliasRegID = AliasReg;
+    for (MCSubRegIterator I(ToReg, &MRI); I.isValid(); ++I)
+      RegisterMappings[*I].second.AliasRegID = AliasReg;
+  }
+
+  RMT.NumMoveEliminated++;
+  if (IsZeroMove) {
+    WS.setWriteZero();
+    RS.setReadZero();
+  }
+  WS.setEliminated();
+
+  return true;
+}
+
+void RegisterFile::collectWrites(const ReadState &RS,
+                                 SmallVectorImpl<WriteRef> &Writes) const {
+  unsigned RegID = RS.getRegisterID();
+  assert(RegID && RegID < RegisterMappings.size());
+  LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
+                    << MRI.getName(RegID) << '\n');
+
+  // Check if this is an alias.
+  const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+  if (RRI.AliasRegID)
+    RegID = RRI.AliasRegID;
+
+  const WriteRef &WR = RegisterMappings[RegID].first;
+  if (WR.isValid())
+    Writes.push_back(WR);
+
+  // Handle potential partial register updates.
+  for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+    const WriteRef &WR = RegisterMappings[*I].first;
+    if (WR.isValid())
+      Writes.push_back(WR);
+  }
+
+  // Remove duplicate entries and resize the input vector.
+  if (Writes.size() > 1) {
+    sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) {
+      return Lhs.getWriteState() < Rhs.getWriteState();
+    });
+    auto It = std::unique(Writes.begin(), Writes.end());
+    Writes.resize(std::distance(Writes.begin(), It));
+  }
+
+  LLVM_DEBUG({
+    for (const WriteRef &WR : Writes) {
+      const WriteState &WS = *WR.getWriteState();
+      dbgs() << "[PRF] Found a dependent use of Register "
+             << MRI.getName(WS.getRegisterID()) << " (defined by instruction #"
+             << WR.getSourceIndex() << ")\n";
+    }
+  });
+}
+
+void RegisterFile::addRegisterRead(ReadState &RS,
+                                   SmallVectorImpl<WriteRef> &Defs) const {
+  unsigned RegID = RS.getRegisterID();
+  const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+  RS.setPRF(RRI.IndexPlusCost.first);
+  if (RS.isIndependentFromDef())
+    return;
+
+  if (ZeroRegisters[RS.getRegisterID()])
+    RS.setReadZero();
+  collectWrites(RS, Defs);
+  RS.setDependentWrites(Defs.size());
+}
+
+unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
+  SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles());
+
+  // Find how many new mappings must be created for each register file.
+  for (const unsigned RegID : Regs) {
+    const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+    const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost;
+    if (Entry.first)
+      NumPhysRegs[Entry.first] += Entry.second;
+    NumPhysRegs[0] += Entry.second;
+  }
+
+  unsigned Response = 0;
+  for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
+    unsigned NumRegs = NumPhysRegs[I];
+    if (!NumRegs)
+      continue;
+
+    const RegisterMappingTracker &RMT = RegisterFiles[I];
+    if (!RMT.NumPhysRegs) {
+      // The register file has an unbounded number of microarchitectural
+      // registers.
+      continue;
+    }
+
+    if (RMT.NumPhysRegs < NumRegs) {
+      // The current register file is too small. This may occur if the number of
+      // microarchitectural registers in register file #0 was changed by the
+      // users via flag -reg-file-size. Alternatively, the scheduling model
+      // specified a too small number of registers for this register file.
+      LLVM_DEBUG(dbgs() << "Not enough registers in the register file.\n");
+
+      // FIXME: Normalize the instruction register count to match the
+      // NumPhysRegs value.  This is a highly unusual case, and is not expected
+      // to occur.  This normalization is hiding an inconsistency in either the
+      // scheduling model or in the value that the user might have specified
+      // for NumPhysRegs.
+      NumRegs = RMT.NumPhysRegs;
+    }
+
+    if (RMT.NumPhysRegs < (RMT.NumUsedPhysRegs + NumRegs))
+      Response |= (1U << I);
+  }
+
+  return Response;
+}
+
+#ifndef NDEBUG
+void RegisterFile::dump() const {
+  for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) {
+    const RegisterMapping &RM = RegisterMappings[I];
+    const RegisterRenamingInfo &RRI = RM.second;
+    if (ZeroRegisters[I]) {
+      dbgs() << MRI.getName(I) << ", " << I
+             << ", PRF=" << RRI.IndexPlusCost.first
+             << ", Cost=" << RRI.IndexPlusCost.second
+             << ", RenameAs=" << RRI.RenameAs << ", IsZero=" << ZeroRegisters[I]
+             << ",";
+      RM.first.dump();
+      dbgs() << '\n';
+    }
+  }
+
+  for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
+    dbgs() << "Register File #" << I;
+    const RegisterMappingTracker &RMT = RegisterFiles[I];
+    dbgs() << "\n  TotalMappings:        " << RMT.NumPhysRegs
+           << "\n  NumUsedMappings:      " << RMT.NumUsedPhysRegs << '\n';
+  }
+}
+#endif
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
new file mode 100644
index 00000000000..b62fccdfd28
--- /dev/null
+++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -0,0 +1,326 @@
+//===--------------------- ResourceManager.cpp ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// The classes here represent processor resource units and their management
+/// strategy.  These classes are managed by the Scheduler.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/ResourceManager.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+ResourceStrategy::~ResourceStrategy() = default;
+
+uint64_t DefaultResourceStrategy::select(uint64_t ReadyMask) {
+  // This method assumes that ReadyMask cannot be zero.
+  uint64_t CandidateMask = ReadyMask & NextInSequenceMask;
+  if (CandidateMask) {
+    CandidateMask = PowerOf2Floor(CandidateMask);
+    NextInSequenceMask &= (CandidateMask | (CandidateMask - 1));
+    return CandidateMask;
+  }
+
+  NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence;
+  RemovedFromNextInSequence = 0;
+  CandidateMask = ReadyMask & NextInSequenceMask;
+
+  if (CandidateMask) {
+    CandidateMask = PowerOf2Floor(CandidateMask);
+    NextInSequenceMask &= (CandidateMask | (CandidateMask - 1));
+    return CandidateMask;
+  }
+
+  NextInSequenceMask = ResourceUnitMask;
+  CandidateMask = PowerOf2Floor(ReadyMask & NextInSequenceMask);
+  NextInSequenceMask &= (CandidateMask | (CandidateMask - 1));
+  return CandidateMask;
+}
+
+void DefaultResourceStrategy::used(uint64_t Mask) {
+  if (Mask > NextInSequenceMask) {
+    RemovedFromNextInSequence |= Mask;
+    return;
+  }
+
+  NextInSequenceMask &= (~Mask);
+  if (NextInSequenceMask)
+    return;
+
+  NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence;
+  RemovedFromNextInSequence = 0;
+}
+
+ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index,
+                             uint64_t Mask)
+    : ProcResourceDescIndex(Index), ResourceMask(Mask),
+      BufferSize(Desc.BufferSize), IsAGroup(countPopulation(ResourceMask)>1) {
+  if (IsAGroup)
+    ResourceSizeMask = ResourceMask ^ PowerOf2Floor(ResourceMask);
+  else
+    ResourceSizeMask = (1ULL << Desc.NumUnits) - 1;
+  ReadyMask = ResourceSizeMask;
+  AvailableSlots = BufferSize == -1 ? 0U : static_cast<unsigned>(BufferSize);
+  Unavailable = false;
+}
+
+bool ResourceState::isReady(unsigned NumUnits) const {
+  return (!isReserved() || isADispatchHazard()) &&
+         countPopulation(ReadyMask) >= NumUnits;
+}
+
+ResourceStateEvent ResourceState::isBufferAvailable() const {
+  if (isADispatchHazard() && isReserved())
+    return RS_RESERVED;
+  if (!isBuffered() || AvailableSlots)
+    return RS_BUFFER_AVAILABLE;
+  return RS_BUFFER_UNAVAILABLE;
+}
+
+#ifndef NDEBUG
+void ResourceState::dump() const {
+  dbgs() << "MASK: " << ResourceMask << ", SIZE_MASK: " << ResourceSizeMask
+         << ", RDYMASK: " << ReadyMask << ", BufferSize=" << BufferSize
+         << ", AvailableSlots=" << AvailableSlots
+         << ", Reserved=" << Unavailable << '\n';
+}
+#endif
+
+static unsigned getResourceStateIndex(uint64_t Mask) {
+  return std::numeric_limits<uint64_t>::digits - countLeadingZeros(Mask);
+}
+
+static std::unique_ptr<ResourceStrategy>
+getStrategyFor(const ResourceState &RS) {
+  if (RS.isAResourceGroup() || RS.getNumUnits() > 1)
+    return llvm::make_unique<DefaultResourceStrategy>(RS.getReadyMask());
+  return std::unique_ptr<ResourceStrategy>(nullptr);
+}
+
+ResourceManager::ResourceManager(const MCSchedModel &SM) {
+  computeProcResourceMasks(SM, ProcResID2Mask);
+  Resources.resize(SM.getNumProcResourceKinds());
+  Strategies.resize(SM.getNumProcResourceKinds());
+
+  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    uint64_t Mask = ProcResID2Mask[I];
+    unsigned Index = getResourceStateIndex(Mask);
+    Resources[Index] =
+        llvm::make_unique<ResourceState>(*SM.getProcResource(I), I, Mask);
+    Strategies[Index] = getStrategyFor(*Resources[Index]);
+  }
+}
+
+void ResourceManager::setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
+                                            uint64_t ResourceMask) {
+  unsigned Index = getResourceStateIndex(ResourceMask);
+  assert(Index < Resources.size() && "Invalid processor resource index!");
+  assert(S && "Unexpected null strategy in input!");
+  Strategies[Index] = std::move(S);
+}
+
+unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const {
+  return Resources[getResourceStateIndex(Mask)]->getProcResourceID();
+}
+
+unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const {
+  return Resources[getResourceStateIndex(ResourceID)]->getNumUnits();
+}
+
+// Returns the actual resource consumed by this Use.
+// First, is the primary resource ID.
+// Second, is the specific sub-resource ID.
+ResourceRef ResourceManager::selectPipe(uint64_t ResourceID) {
+  unsigned Index = getResourceStateIndex(ResourceID);
+  ResourceState &RS = *Resources[Index];
+  assert(RS.isReady() && "No available units to select!");
+
+  // Special case where RS is not a group, and it only declares a single
+  // resource unit.
+  if (!RS.isAResourceGroup() && RS.getNumUnits() == 1)
+    return std::make_pair(ResourceID, RS.getReadyMask());
+
+  uint64_t SubResourceID = Strategies[Index]->select(RS.getReadyMask());
+  if (RS.isAResourceGroup())
+    return selectPipe(SubResourceID);
+  return std::make_pair(ResourceID, SubResourceID);
+}
+
+void ResourceManager::use(const ResourceRef &RR) {
+  // Mark the sub-resource referenced by RR as used.
+  unsigned RSID = getResourceStateIndex(RR.first);
+  ResourceState &RS = *Resources[RSID];
+  RS.markSubResourceAsUsed(RR.second);
+  // Remember to update the resource strategy for non-group resources with
+  // multiple units.
+  if (RS.getNumUnits() > 1)
+    Strategies[RSID]->used(RR.second);
+
+  // If there are still available units in RR.first,
+  // then we are done.
+  if (RS.isReady())
+    return;
+
+  // Notify to other resources that RR.first is no longer available.
+  for (std::unique_ptr<ResourceState> &Res : Resources) {
+    ResourceState &Current = *Res;
+    if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
+      continue;
+
+    if (Current.containsResource(RR.first)) {
+      unsigned Index = getResourceStateIndex(Current.getResourceMask());
+      Current.markSubResourceAsUsed(RR.first);
+      Strategies[Index]->used(RR.first);
+    }
+  }
+}
+
+void ResourceManager::release(const ResourceRef &RR) {
+  ResourceState &RS = *Resources[getResourceStateIndex(RR.first)];
+  bool WasFullyUsed = !RS.isReady();
+  RS.releaseSubResource(RR.second);
+  if (!WasFullyUsed)
+    return;
+
+  for (std::unique_ptr<ResourceState> &Res : Resources) {
+    ResourceState &Current = *Res;
+    if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
+      continue;
+
+    if (Current.containsResource(RR.first))
+      Current.releaseSubResource(RR.first);
+  }
+}
+
+ResourceStateEvent
+ResourceManager::canBeDispatched(ArrayRef<uint64_t> Buffers) const {
+  ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE;
+  for (uint64_t Buffer : Buffers) {
+    ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
+    Result = RS.isBufferAvailable();
+    if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE)
+      break;
+  }
+  return Result;
+}
+
+void ResourceManager::reserveBuffers(ArrayRef<uint64_t> Buffers) {
+  for (const uint64_t Buffer : Buffers) {
+    ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
+    assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE);
+    RS.reserveBuffer();
+
+    if (RS.isADispatchHazard()) {
+      assert(!RS.isReserved());
+      RS.setReserved();
+    }
+  }
+}
+
+void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) {
+  for (const uint64_t R : Buffers)
+    Resources[getResourceStateIndex(R)]->releaseBuffer();
+}
+
+bool ResourceManager::canBeIssued(const InstrDesc &Desc) const {
+  return all_of(
+      Desc.Resources, [&](const std::pair<uint64_t, const ResourceUsage> &E) {
+        unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits;
+        unsigned Index = getResourceStateIndex(E.first);
+        return Resources[Index]->isReady(NumUnits);
+      });
+}
+
+// Returns true if all resources are in-order, and there is at least one
+// resource which is a dispatch hazard (BufferSize = 0).
+bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) const {
+  if (!canBeIssued(Desc))
+    return false;
+  bool AllInOrderResources = all_of(Desc.Buffers, [&](uint64_t BufferMask) {
+    unsigned Index = getResourceStateIndex(BufferMask);
+    const ResourceState &Resource = *Resources[Index];
+    return Resource.isInOrder() || Resource.isADispatchHazard();
+  });
+  if (!AllInOrderResources)
+    return false;
+
+  return any_of(Desc.Buffers, [&](uint64_t BufferMask) {
+    return Resources[getResourceStateIndex(BufferMask)]->isADispatchHazard();
+  });
+}
+
+void ResourceManager::issueInstruction(
+    const InstrDesc &Desc,
+    SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes) {
+  for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) {
+    const CycleSegment &CS = R.second.CS;
+    if (!CS.size()) {
+      releaseResource(R.first);
+      continue;
+    }
+
+    assert(CS.begin() == 0 && "Invalid {Start, End} cycles!");
+    if (!R.second.isReserved()) {
+      ResourceRef Pipe = selectPipe(R.first);
+      use(Pipe);
+      BusyResources[Pipe] += CS.size();
+      // Replace the resource mask with a valid processor resource index.
+      const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)];
+      Pipe.first = RS.getProcResourceID();
+      Pipes.emplace_back(std::pair<ResourceRef, ResourceCycles>(
+          Pipe, ResourceCycles(CS.size())));
+    } else {
+      assert((countPopulation(R.first) > 1) && "Expected a group!");
+      // Mark this group as reserved.
+      assert(R.second.isReserved());
+      reserveResource(R.first);
+      BusyResources[ResourceRef(R.first, R.first)] += CS.size();
+    }
+  }
+}
+
+void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) {
+  for (std::pair<ResourceRef, unsigned> &BR : BusyResources) {
+    if (BR.second)
+      BR.second--;
+    if (!BR.second) {
+      // Release this resource.
+      const ResourceRef &RR = BR.first;
+
+      if (countPopulation(RR.first) == 1)
+        release(RR);
+
+      releaseResource(RR.first);
+      ResourcesFreed.push_back(RR);
+    }
+  }
+
+  for (const ResourceRef &RF : ResourcesFreed)
+    BusyResources.erase(RF);
+}
+
+void ResourceManager::reserveResource(uint64_t ResourceID) {
+  ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
+  assert(!Resource.isReserved());
+  Resource.setReserved();
+}
+
+void ResourceManager::releaseResource(uint64_t ResourceID) {
+  ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
+  Resource.clearReserved();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
new file mode 100644
index 00000000000..de9f24552c3
--- /dev/null
+++ b/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
@@ -0,0 +1,88 @@
+//===---------------------- RetireControlUnit.cpp ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file simulates the hardware responsible for retiring instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+RetireControlUnit::RetireControlUnit(const MCSchedModel &SM)
+    : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
+      AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) {
+  // Check if the scheduling model provides extra information about the machine
+  // processor. If so, then use that information to set the reorder buffer size
+  // and the maximum number of instructions retired per cycle.
+  if (SM.hasExtraProcessorInfo()) {
+    const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+    if (EPI.ReorderBufferSize)
+      AvailableSlots = EPI.ReorderBufferSize;
+    MaxRetirePerCycle = EPI.MaxRetirePerCycle;
+  }
+
+  assert(AvailableSlots && "Invalid reorder buffer size!");
+  Queue.resize(AvailableSlots);
+}
+
+// Reserves a number of slots, and returns a new token.
+unsigned RetireControlUnit::reserveSlot(const InstRef &IR,
+                                        unsigned NumMicroOps) {
+  assert(isAvailable(NumMicroOps) && "Reorder Buffer unavailable!");
+  unsigned NormalizedQuantity =
+      std::min(NumMicroOps, static_cast<unsigned>(Queue.size()));
+  // Zero latency instructions may have zero uOps. Artificially bump this
+  // value to 1. Although zero latency instructions don't consume scheduler
+  // resources, they still consume one slot in the retire queue.
+  NormalizedQuantity = std::max(NormalizedQuantity, 1U);
+  unsigned TokenID = NextAvailableSlotIdx;
+  Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false};
+  NextAvailableSlotIdx += NormalizedQuantity;
+  NextAvailableSlotIdx %= Queue.size();
+  AvailableSlots -= NormalizedQuantity;
+  return TokenID;
+}
+
+const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const {
+  return Queue[CurrentInstructionSlotIdx];
+}
+
+void RetireControlUnit::consumeCurrentToken() {
+  RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx];
+  assert(Current.NumSlots && "Reserved zero slots?");
+  assert(Current.IR && "Invalid RUToken in the RCU queue.");
+  Current.IR.getInstruction()->retire();
+
+  // Update the slot index to be the next item in the circular queue.
+  CurrentInstructionSlotIdx += Current.NumSlots;
+  CurrentInstructionSlotIdx %= Queue.size();
+  AvailableSlots += Current.NumSlots;
+}
+
+void RetireControlUnit::onInstructionExecuted(unsigned TokenID) {
+  assert(Queue.size() > TokenID);
+  assert(Queue[TokenID].Executed == false && Queue[TokenID].IR);
+  Queue[TokenID].Executed = true;
+}
+
+#ifndef NDEBUG
+void RetireControlUnit::dump() const {
+  dbgs() << "Retire Unit: { Total Slots=" << Queue.size()
+         << ", Available Slots=" << AvailableSlots << " }\n";
+}
+#endif
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
new file mode 100644
index 00000000000..3924ac59910
--- /dev/null
+++ b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -0,0 +1,245 @@
+//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A scheduler for processor resource units and processor resource groups.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+void Scheduler::initializeStrategy(std::unique_ptr<SchedulerStrategy> S) {
+  // Ensure we have a valid (non-null) strategy object.
+  Strategy = S ? std::move(S) : llvm::make_unique<DefaultSchedulerStrategy>();
+}
+
+// Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy.
+SchedulerStrategy::~SchedulerStrategy() = default;
+DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default;
+
+#ifndef NDEBUG
+void Scheduler::dump() const {
+  dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n';
+  dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n';
+  dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n';
+  Resources->dump();
+}
+#endif
+
+Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+
+  switch (Resources->canBeDispatched(Desc.Buffers)) {
+  case ResourceStateEvent::RS_BUFFER_UNAVAILABLE:
+    return Scheduler::SC_BUFFERS_FULL;
+  case ResourceStateEvent::RS_RESERVED:
+    return Scheduler::SC_DISPATCH_GROUP_STALL;
+  case ResourceStateEvent::RS_BUFFER_AVAILABLE:
+    break;
+  }
+
+  // Give lower priority to LSUnit stall events.
+  switch (LSU.isAvailable(IR)) {
+  case LSUnit::LSU_LQUEUE_FULL:
+    return Scheduler::SC_LOAD_QUEUE_FULL;
+  case LSUnit::LSU_SQUEUE_FULL:
+    return Scheduler::SC_STORE_QUEUE_FULL;
+  case LSUnit::LSU_AVAILABLE:
+    return Scheduler::SC_AVAILABLE;
+  }
+
+  llvm_unreachable("Don't know how to process this LSU state result!");
+}
+
+void Scheduler::issueInstructionImpl(
+    InstRef &IR,
+    SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources) {
+  Instruction *IS = IR.getInstruction();
+  const InstrDesc &D = IS->getDesc();
+
+  // Issue the instruction and collect all the consumed resources
+  // into a vector. That vector is then used to notify the listener.
+  Resources->issueInstruction(D, UsedResources);
+
+  // Notify the instruction that it started executing.
+  // This updates the internal state of each write.
+  IS->execute();
+
+  if (IS->isExecuting())
+    IssuedSet.emplace_back(IR);
+  else if (IS->isExecuted())
+    LSU.onInstructionExecuted(IR);
+}
+
+// Release the buffered resources and issue the instruction.
+void Scheduler::issueInstruction(
+    InstRef &IR,
+    SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources,
+    SmallVectorImpl<InstRef> &ReadyInstructions) {
+  const Instruction &Inst = *IR.getInstruction();
+  bool HasDependentUsers = Inst.hasDependentUsers();
+
+  Resources->releaseBuffers(Inst.getDesc().Buffers);
+  issueInstructionImpl(IR, UsedResources);
+  // Instructions that have been issued during this cycle might have unblocked
+  // other dependent instructions. Dependent instructions may be issued during
+  // this same cycle if operands have ReadAdvance entries.  Promote those
+  // instructions to the ReadySet and notify the caller that those are ready.
+  if (HasDependentUsers)
+    promoteToReadySet(ReadyInstructions);
+}
+
+void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
+  // Scan the set of waiting instructions and promote them to the
+  // ready queue if operands are all ready.
+  unsigned RemovedElements = 0;
+  for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) {
+    InstRef &IR = *I;
+    if (!IR)
+      break;
+
+    // Check if this instruction is now ready. In case, force
+    // a transition in state using method 'update()'.
+    Instruction &IS = *IR.getInstruction();
+    if (!IS.isReady())
+      IS.update();
+
+    // Check if there are still unsolved data dependencies.
+    if (!isReady(IR)) {
+      ++I;
+      continue;
+    }
+
+    Ready.emplace_back(IR);
+    ReadySet.emplace_back(IR);
+
+    IR.invalidate();
+    ++RemovedElements;
+    std::iter_swap(I, E - RemovedElements);
+  }
+
+  WaitSet.resize(WaitSet.size() - RemovedElements);
+}
+
+InstRef Scheduler::select() {
+  unsigned QueueIndex = ReadySet.size();
+  for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) {
+    const InstRef &IR = ReadySet[I];
+    if (QueueIndex == ReadySet.size() ||
+        Strategy->compare(IR, ReadySet[QueueIndex])) {
+      const InstrDesc &D = IR.getInstruction()->getDesc();
+      if (Resources->canBeIssued(D))
+        QueueIndex = I;
+    }
+  }
+
+  if (QueueIndex == ReadySet.size())
+    return InstRef();
+
+  // We found an instruction to issue.
+  InstRef IR = ReadySet[QueueIndex];
+  std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]);
+  ReadySet.pop_back();
+  return IR;
+}
+
+void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) {
+  unsigned RemovedElements = 0;
+  for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) {
+    InstRef &IR = *I;
+    if (!IR)
+      break;
+    Instruction &IS = *IR.getInstruction();
+    if (!IS.isExecuted()) {
+      LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
+                        << " is still executing.\n");
+      ++I;
+      continue;
+    }
+
+    // Instruction IR has completed execution.
+    LSU.onInstructionExecuted(IR);
+    Executed.emplace_back(IR);
+    ++RemovedElements;
+    IR.invalidate();
+    std::iter_swap(I, E - RemovedElements);
+  }
+
+  IssuedSet.resize(IssuedSet.size() - RemovedElements);
+}
+
+void Scheduler::cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
+                           SmallVectorImpl<InstRef> &Executed,
+                           SmallVectorImpl<InstRef> &Ready) {
+  // Release consumed resources.
+  Resources->cycleEvent(Freed);
+
+  // Propagate the cycle event to the 'Issued' and 'Wait' sets.
+  for (InstRef &IR : IssuedSet)
+    IR.getInstruction()->cycleEvent();
+
+  updateIssuedSet(Executed);
+
+  for (InstRef &IR : WaitSet)
+    IR.getInstruction()->cycleEvent();
+
+  promoteToReadySet(Ready);
+}
+
+bool Scheduler::mustIssueImmediately(const InstRef &IR) const {
+  // Instructions that use an in-order dispatch/issue processor resource must be
+  // issued immediately to the pipeline(s). Any other in-order buffered
+  // resources (i.e. BufferSize=1) is consumed.
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  return Desc.isZeroLatency() || Resources->mustIssueImmediately(Desc);
+}
+
+void Scheduler::dispatch(const InstRef &IR) {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  Resources->reserveBuffers(Desc.Buffers);
+
+  // If necessary, reserve queue entries in the load-store unit (LSU).
+  bool IsMemOp = Desc.MayLoad || Desc.MayStore;
+  if (IsMemOp)
+    LSU.dispatch(IR);
+
+  if (!isReady(IR)) {
+    LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
+    WaitSet.push_back(IR);
+    return;
+  }
+
+  // Don't add a zero-latency instruction to the Ready queue.
+  // A zero-latency instruction doesn't consume any scheduler resources. That is
+  // because it doesn't need to be executed, and it is often removed at register
+  // renaming stage. For example, register-register moves are often optimized at
+  // register renaming stage by simply updating register aliases. On some
+  // targets, zero-idiom instructions (for example: a xor that clears the value
+  // of a register) are treated specially, and are often eliminated at register
+  // renaming stage.
+  if (!mustIssueImmediately(IR)) {
+    LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n");
+    ReadySet.push_back(IR);
+  }
+}
+
+bool Scheduler::isReady(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  bool IsMemOp = Desc.MayLoad || Desc.MayStore;
+  return IR.getInstruction()->isReady() && (!IsMemOp || LSU.isReady(IR));
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
new file mode 100644
index 00000000000..5e228a255f1
--- /dev/null
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -0,0 +1,675 @@
+//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the InstrBuilder interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/InstrBuilder.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
+                           const llvm::MCInstrInfo &mcii,
+                           const llvm::MCRegisterInfo &mri,
+                           const llvm::MCInstrAnalysis &mcia)
+    : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true),
+      FirstReturnInst(true) {
+  computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
+}
+
+static void initializeUsedResources(InstrDesc &ID,
+                                    const MCSchedClassDesc &SCDesc,
+                                    const MCSubtargetInfo &STI,
+                                    ArrayRef<uint64_t> ProcResourceMasks) {
+  const MCSchedModel &SM = STI.getSchedModel();
+
+  // Populate resources consumed.
+  using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
+  std::vector<ResourcePlusCycles> Worklist;
+
+  // Track cycles contributed by resources that are in a "Super" relationship.
+  // This is required if we want to correctly match the behavior of method
+  // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
+  // of "consumed" processor resources and resource cycles, the logic in
+  // ExpandProcResource() doesn't update the number of resource cycles
+  // contributed by a "Super" resource to a group.
+  // We need to take this into account when we find that a processor resource is
+  // part of a group, and it is also used as the "Super" of other resources.
+  // This map stores the number of cycles contributed by sub-resources that are
+  // part of a "Super" resource. The key value is the "Super" resource mask ID.
+  DenseMap<uint64_t, unsigned> SuperResources;
+
+  unsigned NumProcResources = SM.getNumProcResourceKinds();
+  APInt Buffers(NumProcResources, 0);
+
+  for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
+    const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
+    const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
+    uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
+    if (PR.BufferSize != -1)
+      Buffers.setBit(PRE->ProcResourceIdx);
+    CycleSegment RCy(0, PRE->Cycles, false);
+    Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
+    if (PR.SuperIdx) {
+      uint64_t Super = ProcResourceMasks[PR.SuperIdx];
+      SuperResources[Super] += PRE->Cycles;
+    }
+  }
+
+  // Sort elements by mask popcount, so that we prioritize resource units over
+  // resource groups, and smaller groups over larger groups.
+  sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
+    unsigned popcntA = countPopulation(A.first);
+    unsigned popcntB = countPopulation(B.first);
+    if (popcntA < popcntB)
+      return true;
+    if (popcntA > popcntB)
+      return false;
+    return A.first < B.first;
+  });
+
+  uint64_t UsedResourceUnits = 0;
+
+  // Remove cycles contributed by smaller resources.
+  for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
+    ResourcePlusCycles &A = Worklist[I];
+    if (!A.second.size()) {
+      A.second.NumUnits = 0;
+      A.second.setReserved();
+      ID.Resources.emplace_back(A);
+      continue;
+    }
+
+    ID.Resources.emplace_back(A);
+    uint64_t NormalizedMask = A.first;
+    if (countPopulation(A.first) == 1) {
+      UsedResourceUnits |= A.first;
+    } else {
+      // Remove the leading 1 from the resource group mask.
+      NormalizedMask ^= PowerOf2Floor(NormalizedMask);
+    }
+
+    for (unsigned J = I + 1; J < E; ++J) {
+      ResourcePlusCycles &B = Worklist[J];
+      if ((NormalizedMask & B.first) == NormalizedMask) {
+        B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
+        if (countPopulation(B.first) > 1)
+          B.second.NumUnits++;
+      }
+    }
+  }
+
+  // A SchedWrite may specify a number of cycles in which a resource group
+  // is reserved. For example (on target x86; cpu Haswell):
+  //
+  //  SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
+  //    let ResourceCycles = [2, 2, 3];
+  //  }
+  //
+  // This means:
+  // Resource units HWPort0 and HWPort1 are both used for 2cy.
+  // Resource group HWPort01 is the union of HWPort0 and HWPort1.
+  // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
+  // will not be usable for 2 entire cycles from instruction issue.
+  //
+  // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
+  // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
+  // extra delay on top of the 2 cycles latency.
+  // During those extra cycles, HWPort01 is not usable by other instructions.
+  for (ResourcePlusCycles &RPC : ID.Resources) {
+    if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
+      // Remove the leading 1 from the resource group mask.
+      uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
+      if ((Mask & UsedResourceUnits) == Mask)
+        RPC.second.setReserved();
+    }
+  }
+
+  // Identify extra buffers that are consumed through super resources.
+  for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
+    for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
+      const MCProcResourceDesc &PR = *SM.getProcResource(I);
+      if (PR.BufferSize == -1)
+        continue;
+
+      uint64_t Mask = ProcResourceMasks[I];
+      if (Mask != SR.first && ((Mask & SR.first) == SR.first))
+        Buffers.setBit(I);
+    }
+  }
+
+  // Now set the buffers.
+  if (unsigned NumBuffers = Buffers.countPopulation()) {
+    ID.Buffers.resize(NumBuffers);
+    for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) {
+      if (Buffers[I]) {
+        --NumBuffers;
+        ID.Buffers[NumBuffers] = ProcResourceMasks[I];
+      }
+    }
+  }
+
+  LLVM_DEBUG({
+    for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
+      dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n';
+    for (const uint64_t R : ID.Buffers)
+      dbgs() << "\t\tBuffer Mask=" << R << '\n';
+  });
+}
+
+static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
+                              const MCSchedClassDesc &SCDesc,
+                              const MCSubtargetInfo &STI) {
+  if (MCDesc.isCall()) {
+    // We cannot estimate how long this call will take.
+    // Artificially set an arbitrarily high latency (100cy).
+    ID.MaxLatency = 100U;
+    return;
+  }
+
+  int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
+  // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
+  ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
+}
+
+static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
+  // Count register definitions, and skip non register operands in the process.
+  unsigned I, E;
+  unsigned NumExplicitDefs = MCDesc.getNumDefs();
+  for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
+    const MCOperand &Op = MCI.getOperand(I);
+    if (Op.isReg())
+      --NumExplicitDefs;
+  }
+
+  if (NumExplicitDefs) {
+    return make_error<InstructionError<MCInst>>(
+        "Expected more register operand definitions.", MCI);
+  }
+
+  if (MCDesc.hasOptionalDef()) {
+    // Always assume that the optional definition is the last operand.
+    const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
+    if (I == MCI.getNumOperands() || !Op.isReg()) {
+      std::string Message =
+          "expected a register operand for an optional definition. Instruction "
+          "has not been correctly analyzed.";
+      return make_error<InstructionError<MCInst>>(Message, MCI);
+    }
+  }
+
+  return ErrorSuccess();
+}
+
+void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
+                                  unsigned SchedClassID) {
+  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
+  const MCSchedModel &SM = STI.getSchedModel();
+  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+
+  // Assumptions made by this algorithm:
+  //  1. The number of explicit and implicit register definitions in a MCInst
+  //     matches the number of explicit and implicit definitions according to
+  //     the opcode descriptor (MCInstrDesc).
+  //  2. Uses start at index #(MCDesc.getNumDefs()).
+  //  3. There can only be a single optional register definition, an it is
+  //     always the last operand of the sequence (excluding extra operands
+  //     contributed by variadic opcodes).
+  //
+  // These assumptions work quite well for most out-of-order in-tree targets
+  // like x86. This is mainly because the vast majority of instructions is
+  // expanded to MCInst using a straightforward lowering logic that preserves
+  // the ordering of the operands.
+  //
+  // About assumption 1.
+  // The algorithm allows non-register operands between register operand
+  // definitions. This helps to handle some special ARM instructions with
+  // implicit operand increment (-mtriple=armv7):
+  //
+  // vld1.32  {d18, d19}, [r1]!  @ <MCInst #1463 VLD1q32wb_fixed
+  //                             @  <MCOperand Reg:59>
+  //                             @  <MCOperand Imm:0>     (!!)
+  //                             @  <MCOperand Reg:67>
+  //                             @  <MCOperand Imm:0>
+  //                             @  <MCOperand Imm:14>
+  //                             @  <MCOperand Reg:0>>
+  //
+  // MCDesc reports:
+  //  6 explicit operands.
+  //  1 optional definition
+  //  2 explicit definitions (!!)
+  //
+  // The presence of an 'Imm' operand between the two register definitions
+  // breaks the assumption that "register definitions are always at the
+  // beginning of the operand sequence".
+  //
+  // To workaround this issue, this algorithm ignores (i.e. skips) any
+  // non-register operands between register definitions.  The optional
+  // definition is still at index #(NumOperands-1).
+  //
+  // According to assumption 2. register reads start at #(NumExplicitDefs-1).
+  // That means, register R1 from the example is both read and written.
+  unsigned NumExplicitDefs = MCDesc.getNumDefs();
+  unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
+  unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
+  unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
+  if (MCDesc.hasOptionalDef())
+    TotalDefs++;
+
+  unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
+  ID.Writes.resize(TotalDefs + NumVariadicOps);
+  // Iterate over the operands list, and skip non-register operands.
+  // The first NumExplictDefs register operands are expected to be register
+  // definitions.
+  unsigned CurrentDef = 0;
+  unsigned i = 0;
+  for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
+    const MCOperand &Op = MCI.getOperand(i);
+    if (!Op.isReg())
+      continue;
+
+    WriteDescriptor &Write = ID.Writes[CurrentDef];
+    Write.OpIndex = i;
+    if (CurrentDef < NumWriteLatencyEntries) {
+      const MCWriteLatencyEntry &WLE =
+          *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
+      // Conservatively default to MaxLatency.
+      Write.Latency =
+          WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
+      Write.SClassOrWriteResourceID = WLE.WriteResourceID;
+    } else {
+      // Assign a default latency for this write.
+      Write.Latency = ID.MaxLatency;
+      Write.SClassOrWriteResourceID = 0;
+    }
+    Write.IsOptionalDef = false;
+    LLVM_DEBUG({
+      dbgs() << "\t\t[Def]    OpIdx=" << Write.OpIndex
+             << ", Latency=" << Write.Latency
+             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+    });
+    CurrentDef++;
+  }
+
+  assert(CurrentDef == NumExplicitDefs &&
+         "Expected more register operand definitions.");
+  for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
+    unsigned Index = NumExplicitDefs + CurrentDef;
+    WriteDescriptor &Write = ID.Writes[Index];
+    Write.OpIndex = ~CurrentDef;
+    Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
+    if (Index < NumWriteLatencyEntries) {
+      const MCWriteLatencyEntry &WLE =
+          *STI.getWriteLatencyEntry(&SCDesc, Index);
+      // Conservatively default to MaxLatency.
+      Write.Latency =
+          WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
+      Write.SClassOrWriteResourceID = WLE.WriteResourceID;
+    } else {
+      // Assign a default latency for this write.
+      Write.Latency = ID.MaxLatency;
+      Write.SClassOrWriteResourceID = 0;
+    }
+
+    Write.IsOptionalDef = false;
+    assert(Write.RegisterID != 0 && "Expected a valid phys register!");
+    LLVM_DEBUG({
+      dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
+             << ", PhysReg=" << MRI.getName(Write.RegisterID)
+             << ", Latency=" << Write.Latency
+             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+    });
+  }
+
+  if (MCDesc.hasOptionalDef()) {
+    WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
+    Write.OpIndex = MCDesc.getNumOperands() - 1;
+    // Assign a default latency for this write.
+    Write.Latency = ID.MaxLatency;
+    Write.SClassOrWriteResourceID = 0;
+    Write.IsOptionalDef = true;
+    LLVM_DEBUG({
+      dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
+             << ", Latency=" << Write.Latency
+             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+    });
+  }
+
+  if (!NumVariadicOps)
+    return;
+
+  // FIXME: if an instruction opcode is flagged 'mayStore', and it has no
+  // "unmodeledSideEffects', then this logic optimistically assumes that any
+  // extra register operands in the variadic sequence is not a register
+  // definition.
+  //
+  // Otherwise, we conservatively assume that any register operand from the
+  // variadic sequence is both a register read and a register write.
+  bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() &&
+                        !MCDesc.hasUnmodeledSideEffects();
+  CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
+  for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
+       I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
+    const MCOperand &Op = MCI.getOperand(OpIndex);
+    if (!Op.isReg())
+      continue;
+
+    WriteDescriptor &Write = ID.Writes[CurrentDef];
+    Write.OpIndex = OpIndex;
+    // Assign a default latency for this write.
+    Write.Latency = ID.MaxLatency;
+    Write.SClassOrWriteResourceID = 0;
+    Write.IsOptionalDef = false;
+    ++CurrentDef;
+    LLVM_DEBUG({
+      dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
+             << ", Latency=" << Write.Latency
+             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+    });
+  }
+
+  ID.Writes.resize(CurrentDef);
+}
+
+void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
+                                 unsigned SchedClassID) {
+  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
+  unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
+  unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
+  // Remove the optional definition.
+  if (MCDesc.hasOptionalDef())
+    --NumExplicitUses;
+  unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
+  unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
+  ID.Reads.resize(TotalUses);
+  unsigned CurrentUse = 0;
+  for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
+       ++I, ++OpIndex) {
+    const MCOperand &Op = MCI.getOperand(OpIndex);
+    if (!Op.isReg())
+      continue;
+
+    ReadDescriptor &Read = ID.Reads[CurrentUse];
+    Read.OpIndex = OpIndex;
+    Read.UseIndex = I;
+    Read.SchedClassID = SchedClassID;
+    ++CurrentUse;
+    LLVM_DEBUG(dbgs() << "\t\t[Use]    OpIdx=" << Read.OpIndex
+                      << ", UseIndex=" << Read.UseIndex << '\n');
+  }
+
+  // For the purpose of ReadAdvance, implicit uses come directly after explicit
+  // uses. The "UseIndex" must be updated according to that implicit layout.
+  for (unsigned I = 0; I < NumImplicitUses; ++I) {
+    ReadDescriptor &Read = ID.Reads[CurrentUse + I];
+    Read.OpIndex = ~I;
+    Read.UseIndex = NumExplicitUses + I;
+    Read.RegisterID = MCDesc.getImplicitUses()[I];
+    Read.SchedClassID = SchedClassID;
+    LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
+                      << ", UseIndex=" << Read.UseIndex << ", RegisterID="
+                      << MRI.getName(Read.RegisterID) << '\n');
+  }
+
+  CurrentUse += NumImplicitUses;
+
+  // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no
+  // "unmodeledSideEffects", then this logic optimistically assumes that any
+  // extra register operands in the variadic sequence are not register
+  // definition.
+
+  bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() &&
+                        !MCDesc.hasUnmodeledSideEffects();
+  for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
+       I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
+    const MCOperand &Op = MCI.getOperand(OpIndex);
+    if (!Op.isReg())
+      continue;
+
+    ReadDescriptor &Read = ID.Reads[CurrentUse];
+    Read.OpIndex = OpIndex;
+    Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
+    Read.SchedClassID = SchedClassID;
+    ++CurrentUse;
+    LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
+                      << ", UseIndex=" << Read.UseIndex << '\n');
+  }
+
+  ID.Reads.resize(CurrentUse);
+}
+
+Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
+                                    const MCInst &MCI) const {
+  if (ID.NumMicroOps != 0)
+    return ErrorSuccess();
+
+  bool UsesMemory = ID.MayLoad || ID.MayStore;
+  bool UsesBuffers = !ID.Buffers.empty();
+  bool UsesResources = !ID.Resources.empty();
+  if (!UsesMemory && !UsesBuffers && !UsesResources)
+    return ErrorSuccess();
+
+  StringRef Message;
+  if (UsesMemory) {
+    Message = "found an inconsistent instruction that decodes "
+              "into zero opcodes and that consumes load/store "
+              "unit resources.";
+  } else {
+    Message = "found an inconsistent instruction that decodes "
+              "to zero opcodes and that consumes scheduler "
+              "resources.";
+  }
+
+  return make_error<InstructionError<MCInst>>(Message, MCI);
+}
+
+Expected<const InstrDesc &>
+InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
+  assert(STI.getSchedModel().hasInstrSchedModel() &&
+         "Itineraries are not yet supported!");
+
+  // Obtain the instruction descriptor from the opcode.
+  unsigned short Opcode = MCI.getOpcode();
+  const MCInstrDesc &MCDesc = MCII.get(Opcode);
+  const MCSchedModel &SM = STI.getSchedModel();
+
+  // Then obtain the scheduling class information from the instruction.
+  unsigned SchedClassID = MCDesc.getSchedClass();
+  bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
+
+  // Try to solve variant scheduling classes.
+  if (IsVariant) {
+    unsigned CPUID = SM.getProcessorID();
+    while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
+      SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
+
+    if (!SchedClassID) {
+      return make_error<InstructionError<MCInst>>(
+          "unable to resolve scheduling class for write variant.", MCI);
+    }
+  }
+
+  // Check if this instruction is supported. Otherwise, report an error.
+  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+  if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
+    return make_error<InstructionError<MCInst>>(
+        "found an unsupported instruction in the input assembly sequence.",
+        MCI);
+  }
+
+  // Create a new empty descriptor.
+  std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
+  ID->NumMicroOps = SCDesc.NumMicroOps;
+
+  if (MCDesc.isCall() && FirstCallInst) {
+    // We don't correctly model calls.
+    WithColor::warning() << "found a call in the input assembly sequence.\n";
+    WithColor::note() << "call instructions are not correctly modeled. "
+                      << "Assume a latency of 100cy.\n";
+    FirstCallInst = false;
+  }
+
+  if (MCDesc.isReturn() && FirstReturnInst) {
+    WithColor::warning() << "found a return instruction in the input"
+                         << " assembly sequence.\n";
+    WithColor::note() << "program counter updates are ignored.\n";
+    FirstReturnInst = false;
+  }
+
+  ID->MayLoad = MCDesc.mayLoad();
+  ID->MayStore = MCDesc.mayStore();
+  ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
+
+  initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
+  computeMaxLatency(*ID, MCDesc, SCDesc, STI);
+
+  if (Error Err = verifyOperands(MCDesc, MCI))
+    return std::move(Err);
+
+  populateWrites(*ID, MCI, SchedClassID);
+  populateReads(*ID, MCI, SchedClassID);
+
+  LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
+  LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
+
+  // Sanity check on the instruction descriptor.
+  if (Error Err = verifyInstrDesc(*ID, MCI))
+    return std::move(Err);
+
+  // Now add the new descriptor.
+  SchedClassID = MCDesc.getSchedClass();
+  bool IsVariadic = MCDesc.isVariadic();
+  if (!IsVariadic && !IsVariant) {
+    Descriptors[MCI.getOpcode()] = std::move(ID);
+    return *Descriptors[MCI.getOpcode()];
+  }
+
+  VariantDescriptors[&MCI] = std::move(ID);
+  return *VariantDescriptors[&MCI];
+}
+
+Expected<const InstrDesc &>
+InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
+  if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
+    return *Descriptors[MCI.getOpcode()];
+
+  if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
+    return *VariantDescriptors[&MCI];
+
+  return createInstrDescImpl(MCI);
+}
+
+Expected<std::unique_ptr<Instruction>>
+InstrBuilder::createInstruction(const MCInst &MCI) {
+  Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI);
+  if (!DescOrErr)
+    return DescOrErr.takeError();
+  const InstrDesc &D = *DescOrErr;
+  std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);
+
+  // Check if this is a dependency breaking instruction.
+  APInt Mask;
+
+  unsigned ProcID = STI.getSchedModel().getProcessorID();
+  bool IsZeroIdiom = MCIA.isZeroIdiom(MCI, Mask, ProcID);
+  bool IsDepBreaking =
+      IsZeroIdiom || MCIA.isDependencyBreaking(MCI, Mask, ProcID);
+  if (MCIA.isOptimizableRegisterMove(MCI, ProcID))
+    NewIS->setOptimizableMove();
+
+  // Initialize Reads first.
+  for (const ReadDescriptor &RD : D.Reads) {
+    int RegID = -1;
+    if (!RD.isImplicitRead()) {
+      // explicit read.
+      const MCOperand &Op = MCI.getOperand(RD.OpIndex);
+      // Skip non-register operands.
+      if (!Op.isReg())
+        continue;
+      RegID = Op.getReg();
+    } else {
+      // Implicit read.
+      RegID = RD.RegisterID;
+    }
+
+    // Skip invalid register operands.
+    if (!RegID)
+      continue;
+
+    // Okay, this is a register operand. Create a ReadState for it.
+    assert(RegID > 0 && "Invalid register ID found!");
+    NewIS->getUses().emplace_back(RD, RegID);
+    ReadState &RS = NewIS->getUses().back();
+
+    if (IsDepBreaking) {
+      // A mask of all zeroes means: explicit input operands are not
+      // independent.
+      if (Mask.isNullValue()) {
+        if (!RD.isImplicitRead())
+          RS.setIndependentFromDef();
+      } else {
+        // Check if this register operand is independent according to `Mask`.
+        // Note that Mask may not have enough bits to describe all explicit and
+        // implicit input operands. If this register operand doesn't have a
+        // corresponding bit in Mask, then conservatively assume that it is
+        // dependent.
+        if (Mask.getBitWidth() > RD.UseIndex) {
+          // Okay. This map describe register use `RD.UseIndex`.
+          if (Mask[RD.UseIndex])
+            RS.setIndependentFromDef();
+        }
+      }
+    }
+  }
+
+  // Early exit if there are no writes.
+  if (D.Writes.empty())
+    return std::move(NewIS);
+
+  // Track register writes that implicitly clear the upper portion of the
+  // underlying super-registers using an APInt.
+  APInt WriteMask(D.Writes.size(), 0);
+
+  // Now query the MCInstrAnalysis object to obtain information about which
+  // register writes implicitly clear the upper portion of a super-register.
+  MCIA.clearsSuperRegisters(MRI, MCI, WriteMask);
+
+  // Initialize writes.
+  unsigned WriteIndex = 0;
+  for (const WriteDescriptor &WD : D.Writes) {
+    unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID
+                                          : MCI.getOperand(WD.OpIndex).getReg();
+    // Check if this is a optional definition that references NoReg.
+    if (WD.IsOptionalDef && !RegID) {
+      ++WriteIndex;
+      continue;
+    }
+
+    assert(RegID && "Expected a valid register ID!");
+    NewIS->getDefs().emplace_back(WD, RegID,
+                                  /* ClearsSuperRegs */ WriteMask[WriteIndex],
+                                  /* WritesZero */ IsZeroIdiom);
+    ++WriteIndex;
+  }
+
+  return std::move(NewIS);
+}
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/Instruction.cpp b/llvm/lib/MCA/Instruction.cpp
new file mode 100644
index 00000000000..057e95ca999
--- /dev/null
+++ b/llvm/lib/MCA/Instruction.cpp
@@ -0,0 +1,205 @@
+//===--------------------- Instruction.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines abstractions used by the Pipeline to model register reads,
+// register writes and instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+void ReadState::writeStartEvent(unsigned Cycles) {
+  assert(DependentWrites);
+  assert(CyclesLeft == UNKNOWN_CYCLES);
+
+  // This read may be dependent on more than one write. This typically occurs
+  // when a definition is the result of multiple writes where at least one
+  // write does a partial register update.
+  // The HW is forced to do some extra bookkeeping to track of all the
+  // dependent writes, and implement a merging scheme for the partial writes.
+  --DependentWrites;
+  TotalCycles = std::max(TotalCycles, Cycles);
+
+  if (!DependentWrites) {
+    CyclesLeft = TotalCycles;
+    IsReady = !CyclesLeft;
+  }
+}
+
+void WriteState::onInstructionIssued() {
+  assert(CyclesLeft == UNKNOWN_CYCLES);
+  // Update the number of cycles left based on the WriteDescriptor info.
+  CyclesLeft = getLatency();
+
+  // Now that the time left before write-back is known, notify
+  // all the users.
+  for (const std::pair<ReadState *, int> &User : Users) {
+    ReadState *RS = User.first;
+    unsigned ReadCycles = std::max(0, CyclesLeft - User.second);
+    RS->writeStartEvent(ReadCycles);
+  }
+
+  // Notify any writes that are in a false dependency with this write.
+  if (PartialWrite)
+    PartialWrite->writeStartEvent(CyclesLeft);
+}
+
+void WriteState::addUser(ReadState *User, int ReadAdvance) {
+  // If CyclesLeft is different than -1, then we don't need to
+  // update the list of users. We can just notify the user with
+  // the actual number of cycles left (which may be zero).
+  if (CyclesLeft != UNKNOWN_CYCLES) {
+    unsigned ReadCycles = std::max(0, CyclesLeft - ReadAdvance);
+    User->writeStartEvent(ReadCycles);
+    return;
+  }
+
+  if (llvm::find_if(Users, [&User](const std::pair<ReadState *, int> &Use) {
+        return Use.first == User;
+      }) == Users.end()) {
+    Users.emplace_back(User, ReadAdvance);
+  }
+}
+
+void WriteState::addUser(WriteState *User) {
+  if (CyclesLeft != UNKNOWN_CYCLES) {
+    User->writeStartEvent(std::max(0, CyclesLeft));
+    return;
+  }
+
+  assert(!PartialWrite && "PartialWrite already set!");
+  PartialWrite = User;
+  User->setDependentWrite(this);
+}
+
+void WriteState::cycleEvent() {
+  // Note: CyclesLeft can be a negative number. It is an error to
+  // make it an unsigned quantity because users of this write may
+  // specify a negative ReadAdvance.
+  if (CyclesLeft != UNKNOWN_CYCLES)
+    CyclesLeft--;
+
+  if (DependentWriteCyclesLeft)
+    DependentWriteCyclesLeft--;
+}
+
+void ReadState::cycleEvent() {
+  // Update the total number of cycles.
+  if (DependentWrites && TotalCycles) {
+    --TotalCycles;
+    return;
+  }
+
+  // Bail out immediately if we don't know how many cycles are left.
+  if (CyclesLeft == UNKNOWN_CYCLES)
+    return;
+
+  if (CyclesLeft) {
+    --CyclesLeft;
+    IsReady = !CyclesLeft;
+  }
+}
+
+#ifndef NDEBUG
+void WriteState::dump() const {
+  dbgs() << "{ OpIdx=" << WD->OpIndex << ", Lat=" << getLatency() << ", RegID "
+         << getRegisterID() << ", Cycles Left=" << getCyclesLeft() << " }";
+}
+
+void WriteRef::dump() const {
+  dbgs() << "IID=" << getSourceIndex() << ' ';
+  if (isValid())
+    getWriteState()->dump();
+  else
+    dbgs() << "(null)";
+}
+#endif
+
+void Instruction::dispatch(unsigned RCUToken) {
+  assert(Stage == IS_INVALID);
+  Stage = IS_AVAILABLE;
+  RCUTokenID = RCUToken;
+
+  // Check if input operands are already available.
+  update();
+}
+
+void Instruction::execute() {
+  assert(Stage == IS_READY);
+  Stage = IS_EXECUTING;
+
+  // Set the cycles left before the write-back stage.
+  CyclesLeft = getLatency();
+
+  for (WriteState &WS : getDefs())
+    WS.onInstructionIssued();
+
+  // Transition to the "executed" stage if this is a zero-latency instruction.
+  if (!CyclesLeft)
+    Stage = IS_EXECUTED;
+}
+
+void Instruction::forceExecuted() {
+  assert(Stage == IS_READY && "Invalid internal state!");
+  CyclesLeft = 0;
+  Stage = IS_EXECUTED;
+}
+
+void Instruction::update() {
+  assert(isDispatched() && "Unexpected instruction stage found!");
+
+  if (!all_of(getUses(), [](const ReadState &Use) { return Use.isReady(); }))
+    return;
+
+  // A partial register write cannot complete before a dependent write.
+  auto IsDefReady = [&](const WriteState &Def) {
+    if (!Def.getDependentWrite()) {
+      unsigned CyclesLeft = Def.getDependentWriteCyclesLeft();
+      return !CyclesLeft || CyclesLeft < getLatency();
+    }
+    return false;
+  };
+
+  if (all_of(getDefs(), IsDefReady))
+    Stage = IS_READY;
+}
+
+void Instruction::cycleEvent() {
+  if (isReady())
+    return;
+
+  if (isDispatched()) {
+    for (ReadState &Use : getUses())
+      Use.cycleEvent();
+
+    for (WriteState &Def : getDefs())
+      Def.cycleEvent();
+
+    update();
+    return;
+  }
+
+  assert(isExecuting() && "Instruction not in-flight?");
+  assert(CyclesLeft && "Instruction already executed?");
+  for (WriteState &Def : getDefs())
+    Def.cycleEvent();
+  CyclesLeft--;
+  if (!CyclesLeft)
+    Stage = IS_EXECUTED;
+}
+
+const unsigned WriteRef::INVALID_IID = std::numeric_limits<unsigned>::max();
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/LLVMBuild.txt b/llvm/lib/MCA/LLVMBuild.txt
new file mode 100644
index 00000000000..75f3a9e2229
--- /dev/null
+++ b/llvm/lib/MCA/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-mca/lib/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MCA
+parent = Libraries
+required_libraries = MC Support
diff --git a/llvm/lib/MCA/Pipeline.cpp b/llvm/lib/MCA/Pipeline.cpp
new file mode 100644
index 00000000000..fd97ea624b8
--- /dev/null
+++ b/llvm/lib/MCA/Pipeline.cpp
@@ -0,0 +1,97 @@
+//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements an ordered container of stages that simulate the
+/// pipeline of a hardware backend.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Pipeline.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+void Pipeline::addEventListener(HWEventListener *Listener) {
+  if (Listener)
+    Listeners.insert(Listener);
+  for (auto &S : Stages)
+    S->addListener(Listener);
+}
+
+bool Pipeline::hasWorkToProcess() {
+  return any_of(Stages, [](const std::unique_ptr<Stage> &S) {
+    return S->hasWorkToComplete();
+  });
+}
+
+Expected<unsigned> Pipeline::run() {
+  assert(!Stages.empty() && "Unexpected empty pipeline found!");
+
+  do {
+    notifyCycleBegin();
+    if (Error Err = runCycle())
+      return std::move(Err);
+    notifyCycleEnd();
+    ++Cycles;
+  } while (hasWorkToProcess());
+
+  return Cycles;
+}
+
+Error Pipeline::runCycle() {
+  Error Err = ErrorSuccess();
+  // Update stages before we start processing new instructions.
+  for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
+    const std::unique_ptr<Stage> &S = *I;
+    Err = S->cycleStart();
+  }
+
+  // Now fetch and execute new instructions.
+  InstRef IR;
+  Stage &FirstStage = *Stages[0];
+  while (!Err && FirstStage.isAvailable(IR))
+    Err = FirstStage.execute(IR);
+
+  // Update stages in preparation for a new cycle.
+  for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
+    const std::unique_ptr<Stage> &S = *I;
+    Err = S->cycleEnd();
+  }
+
+  return Err;
+}
+
+void Pipeline::appendStage(std::unique_ptr<Stage> S) {
+  assert(S && "Invalid null stage in input!");
+  if (!Stages.empty()) {
+    Stage *Last = Stages.back().get();
+    Last->setNextInSequence(S.get());
+  }
+
+  Stages.push_back(std::move(S));
+}
+
+void Pipeline::notifyCycleBegin() {
+  LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n');
+  for (HWEventListener *Listener : Listeners)
+    Listener->onCycleBegin();
+}
+
+void Pipeline::notifyCycleEnd() {
+  LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n\n");
+  for (HWEventListener *Listener : Listeners)
+    Listener->onCycleEnd();
+}
+} // namespace mca.
+} // namespace llvm
diff --git a/llvm/lib/MCA/Stages/DispatchStage.cpp b/llvm/lib/MCA/Stages/DispatchStage.cpp
new file mode 100644
index 00000000000..52e04fad58e
--- /dev/null
+++ b/llvm/lib/MCA/Stages/DispatchStage.cpp
@@ -0,0 +1,185 @@
+//===--------------------- DispatchStage.cpp --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file models the dispatch component of an instruction pipeline.
+///
+/// The DispatchStage is responsible for updating instruction dependencies
+/// and communicating to the simulated instruction scheduler that an instruction
+/// is ready to be scheduled for execution.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/DispatchStage.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+void DispatchStage::notifyInstructionDispatched(const InstRef &IR,
+                                                ArrayRef<unsigned> UsedRegs,
+                                                unsigned UOps) const {
+  LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n');
+  notifyEvent<HWInstructionEvent>(
+      HWInstructionDispatchedEvent(IR, UsedRegs, UOps));
+}
+
+bool DispatchStage::checkPRF(const InstRef &IR) const {
+  SmallVector<unsigned, 4> RegDefs;
+  for (const WriteState &RegDef : IR.getInstruction()->getDefs())
+    RegDefs.emplace_back(RegDef.getRegisterID());
+
+  const unsigned RegisterMask = PRF.isAvailable(RegDefs);
+  // A mask with all zeroes means: register files are available.
+  if (RegisterMask) {
+    notifyEvent<HWStallEvent>(
+        HWStallEvent(HWStallEvent::RegisterFileStall, IR));
+    return false;
+  }
+
+  return true;
+}
+
+bool DispatchStage::checkRCU(const InstRef &IR) const {
+  const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps;
+  if (RCU.isAvailable(NumMicroOps))
+    return true;
+  notifyEvent<HWStallEvent>(
+      HWStallEvent(HWStallEvent::RetireControlUnitStall, IR));
+  return false;
+}
+
+bool DispatchStage::canDispatch(const InstRef &IR) const {
+  return checkRCU(IR) && checkPRF(IR) && checkNextStage(IR);
+}
+
+void DispatchStage::updateRAWDependencies(ReadState &RS,
+                                          const MCSubtargetInfo &STI) {
+  SmallVector<WriteRef, 4> DependentWrites;
+
+  // Collect all the dependent writes, and update RS internal state.
+  PRF.addRegisterRead(RS, DependentWrites);
+
+  // We know that this read depends on all the writes in DependentWrites.
+  // For each write, check if we have ReadAdvance information, and use it
+  // to figure out in how many cycles this read becomes available.
+  const ReadDescriptor &RD = RS.getDescriptor();
+  const MCSchedModel &SM = STI.getSchedModel();
+  const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
+  for (WriteRef &WR : DependentWrites) {
+    WriteState &WS = *WR.getWriteState();
+    unsigned WriteResID = WS.getWriteResourceID();
+    int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
+    WS.addUser(&RS, ReadAdvance);
+  }
+}
+
+Error DispatchStage::dispatch(InstRef IR) {
+  assert(!CarryOver && "Cannot dispatch another instruction!");
+  Instruction &IS = *IR.getInstruction();
+  const InstrDesc &Desc = IS.getDesc();
+  const unsigned NumMicroOps = Desc.NumMicroOps;
+  if (NumMicroOps > DispatchWidth) {
+    assert(AvailableEntries == DispatchWidth);
+    AvailableEntries = 0;
+    CarryOver = NumMicroOps - DispatchWidth;
+    CarriedOver = IR;
+  } else {
+    assert(AvailableEntries >= NumMicroOps);
+    AvailableEntries -= NumMicroOps;
+  }
+
+  // Check if this is an optimizable reg-reg move.
+  bool IsEliminated = false;
+  if (IS.isOptimizableMove()) {
+    assert(IS.getDefs().size() == 1 && "Expected a single input!");
+    assert(IS.getUses().size() == 1 && "Expected a single output!");
+    IsEliminated = PRF.tryEliminateMove(IS.getDefs()[0], IS.getUses()[0]);
+  }
+
+  // A dependency-breaking instruction doesn't have to wait on the register
+  // input operands, and it is often optimized at register renaming stage.
+  // Update RAW dependencies if this instruction is not a dependency-breaking
+  // instruction. A dependency-breaking instruction is a zero-latency
+  // instruction that doesn't consume hardware resources.
+  // An example of dependency-breaking instruction on X86 is a zero-idiom XOR.
+  //
+  // We also don't update data dependencies for instructions that have been
+  // eliminated at register renaming stage.
+  if (!IsEliminated) {
+    for (ReadState &RS : IS.getUses())
+      updateRAWDependencies(RS, STI);
+  }
+
+  // By default, a dependency-breaking zero-idiom is expected to be optimized
+  // at register renaming stage. That means, no physical register is allocated
+  // to the instruction.
+  SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles());
+  for (WriteState &WS : IS.getDefs())
+    PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles);
+
+  // Reserve slots in the RCU, and notify the instruction that it has been
+  // dispatched to the schedulers for execution.
+  IS.dispatch(RCU.reserveSlot(IR, NumMicroOps));
+
+  // Notify listeners of the "instruction dispatched" event,
+  // and move IR to the next stage.
+  notifyInstructionDispatched(IR, RegisterFiles,
+                              std::min(DispatchWidth, NumMicroOps));
+  return moveToTheNextStage(IR);
+}
+
+Error DispatchStage::cycleStart() {
+  PRF.cycleStart();
+
+  if (!CarryOver) {
+    AvailableEntries = DispatchWidth;
+    return ErrorSuccess();
+  }
+
+  AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
+  unsigned DispatchedOpcodes = DispatchWidth - AvailableEntries;
+  CarryOver -= DispatchedOpcodes;
+  assert(CarriedOver && "Invalid dispatched instruction");
+
+  SmallVector<unsigned, 8> RegisterFiles(PRF.getNumRegisterFiles(), 0U);
+  notifyInstructionDispatched(CarriedOver, RegisterFiles, DispatchedOpcodes);
+  if (!CarryOver)
+    CarriedOver = InstRef();
+  return ErrorSuccess();
+}
+
+bool DispatchStage::isAvailable(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth);
+  if (Required > AvailableEntries)
+    return false;
+  // The dispatch logic doesn't internally buffer instructions.  It only accepts
+  // instructions that can be successfully moved to the next stage during this
+  // same cycle.
+  return canDispatch(IR);
+}
+
+Error DispatchStage::execute(InstRef &IR) {
+  assert(canDispatch(IR) && "Cannot dispatch another instruction!");
+  return dispatch(IR);
+}
+
+#ifndef NDEBUG
+void DispatchStage::dump() const {
+  PRF.dump();
+  RCU.dump();
+}
+#endif
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/Stages/EntryStage.cpp b/llvm/lib/MCA/Stages/EntryStage.cpp
new file mode 100644
index 00000000000..3325bb36f5a
--- /dev/null
+++ b/llvm/lib/MCA/Stages/EntryStage.cpp
@@ -0,0 +1,76 @@
+//===---------------------- EntryStage.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the Fetch stage of an instruction pipeline.  Its sole
+/// purpose in life is to produce instructions for the rest of the pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/EntryStage.h"
+#include "llvm/MCA/Instruction.h"
+
+namespace llvm {
+namespace mca {
+
+bool EntryStage::hasWorkToComplete() const { return CurrentInstruction; }
+
+bool EntryStage::isAvailable(const InstRef & /* unused */) const {
+  if (CurrentInstruction)
+    return checkNextStage(CurrentInstruction);
+  return false;
+}
+
+void EntryStage::getNextInstruction() {
+  assert(!CurrentInstruction && "There is already an instruction to process!");
+  if (!SM.hasNext())
+    return;
+  SourceRef SR = SM.peekNext();
+  std::unique_ptr<Instruction> Inst = llvm::make_unique<Instruction>(SR.second);
+  CurrentInstruction = InstRef(SR.first, Inst.get());
+  Instructions.emplace_back(std::move(Inst));
+  SM.updateNext();
+}
+
+llvm::Error EntryStage::execute(InstRef & /*unused */) {
+  assert(CurrentInstruction && "There is no instruction to process!");
+  if (llvm::Error Val = moveToTheNextStage(CurrentInstruction))
+    return Val;
+
+  // Move the program counter.
+  CurrentInstruction.invalidate();
+  getNextInstruction();
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error EntryStage::cycleStart() {
+  if (!CurrentInstruction)
+    getNextInstruction();
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error EntryStage::cycleEnd() {
+  // Find the first instruction which hasn't been retired.
+  auto Range = make_range(&Instructions[NumRetired], Instructions.end());
+  auto It = find_if(Range, [](const std::unique_ptr<Instruction> &I) {
+    return !I->isRetired();
+  });
+
+  NumRetired = std::distance(Instructions.begin(), It);
+  // Erase instructions up to the first that hasn't been retired.
+  if ((NumRetired * 2) >= Instructions.size()) {
+    Instructions.erase(Instructions.begin(), It);
+    NumRetired = 0;
+  }
+
+  return llvm::ErrorSuccess();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/Stages/ExecuteStage.cpp b/llvm/lib/MCA/Stages/ExecuteStage.cpp
new file mode 100644
index 00000000000..17f7ff7259a
--- /dev/null
+++ b/llvm/lib/MCA/Stages/ExecuteStage.cpp
@@ -0,0 +1,219 @@
+//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the execution stage of an instruction pipeline.
+///
+/// The ExecuteStage is responsible for managing the hardware scheduler
+/// and issuing notifications that an instruction has been executed.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/ExecuteStage.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+HWStallEvent::GenericEventType toHWStallEventType(Scheduler::Status Status) {
+  switch (Status) {
+  case Scheduler::SC_LOAD_QUEUE_FULL:
+    return HWStallEvent::LoadQueueFull;
+  case Scheduler::SC_STORE_QUEUE_FULL:
+    return HWStallEvent::StoreQueueFull;
+  case Scheduler::SC_BUFFERS_FULL:
+    return HWStallEvent::SchedulerQueueFull;
+  case Scheduler::SC_DISPATCH_GROUP_STALL:
+    return HWStallEvent::DispatchGroupStall;
+  case Scheduler::SC_AVAILABLE:
+    return HWStallEvent::Invalid;
+  }
+
+  llvm_unreachable("Don't know how to process this StallKind!");
+}
+
+bool ExecuteStage::isAvailable(const InstRef &IR) const {
+  if (Scheduler::Status S = HWS.isAvailable(IR)) {
+    HWStallEvent::GenericEventType ET = toHWStallEventType(S);
+    notifyEvent<HWStallEvent>(HWStallEvent(ET, IR));
+    return false;
+  }
+
+  return true;
+}
+
+Error ExecuteStage::issueInstruction(InstRef &IR) {
+  SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> Used;
+  SmallVector<InstRef, 4> Ready;
+  HWS.issueInstruction(IR, Used, Ready);
+
+  notifyReservedOrReleasedBuffers(IR, /* Reserved */ false);
+  notifyInstructionIssued(IR, Used);
+  if (IR.getInstruction()->isExecuted()) {
+    notifyInstructionExecuted(IR);
+    // FIXME: add a buffer of executed instructions.
+    if (Error S = moveToTheNextStage(IR))
+      return S;
+  }
+
+  for (const InstRef &I : Ready)
+    notifyInstructionReady(I);
+  return ErrorSuccess();
+}
+
+Error ExecuteStage::issueReadyInstructions() {
+  InstRef IR = HWS.select();
+  while (IR) {
+    if (Error Err = issueInstruction(IR))
+      return Err;
+
+    // Select the next instruction to issue.
+    IR = HWS.select();
+  }
+
+  return ErrorSuccess();
+}
+
+Error ExecuteStage::cycleStart() {
+  SmallVector<ResourceRef, 8> Freed;
+  SmallVector<InstRef, 4> Executed;
+  SmallVector<InstRef, 4> Ready;
+
+  HWS.cycleEvent(Freed, Executed, Ready);
+
+  for (const ResourceRef &RR : Freed)
+    notifyResourceAvailable(RR);
+
+  for (InstRef &IR : Executed) {
+    notifyInstructionExecuted(IR);
+    // FIXME: add a buffer of executed instructions.
+    if (Error S = moveToTheNextStage(IR))
+      return S;
+  }
+
+  for (const InstRef &IR : Ready)
+    notifyInstructionReady(IR);
+
+  return issueReadyInstructions();
+}
+
+#ifndef NDEBUG
+static void verifyInstructionEliminated(const InstRef &IR) {
+  const Instruction &Inst = *IR.getInstruction();
+  assert(Inst.isEliminated() && "Instruction was not eliminated!");
+  assert(Inst.isReady() && "Instruction in an inconsistent state!");
+
+  // Ensure that instructions eliminated at register renaming stage are in a
+  // consistent state.
+  const InstrDesc &Desc = Inst.getDesc();
+  assert(!Desc.MayLoad && !Desc.MayStore && "Cannot eliminate a memory op!");
+}
+#endif
+
+Error ExecuteStage::handleInstructionEliminated(InstRef &IR) {
+#ifndef NDEBUG
+  verifyInstructionEliminated(IR);
+#endif
+  notifyInstructionReady(IR);
+  notifyInstructionIssued(IR, {});
+  IR.getInstruction()->forceExecuted();
+  notifyInstructionExecuted(IR);
+  return moveToTheNextStage(IR);
+}
+
+// Schedule the instruction for execution on the hardware.
+Error ExecuteStage::execute(InstRef &IR) {
+  assert(isAvailable(IR) && "Scheduler is not available!");
+
+#ifndef NDEBUG
+  // Ensure that the HWS has not stored this instruction in its queues.
+  HWS.sanityCheck(IR);
+#endif
+
+  if (IR.getInstruction()->isEliminated())
+    return handleInstructionEliminated(IR);
+
+  // Reserve a slot in each buffered resource. Also, mark units with
+  // BufferSize=0 as reserved. Resources with a buffer size of zero will only
+  // be released after MCIS is issued, and all the ResourceCycles for those
+  // units have been consumed.
+  HWS.dispatch(IR);
+  notifyReservedOrReleasedBuffers(IR, /* Reserved */ true);
+  if (!HWS.isReady(IR))
+    return ErrorSuccess();
+
+  // If we did not return early, then the scheduler is ready for execution.
+  notifyInstructionReady(IR);
+
+  // If we cannot issue immediately, the HWS will add IR to its ready queue for
+  // execution later, so we must return early here.
+  if (!HWS.mustIssueImmediately(IR))
+    return ErrorSuccess();
+
+  // Issue IR to the underlying pipelines.
+  return issueInstruction(IR);
+}
+
+void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) const {
+  LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n');
+  notifyEvent<HWInstructionEvent>(
+      HWInstructionEvent(HWInstructionEvent::Executed, IR));
+}
+
+void ExecuteStage::notifyInstructionReady(const InstRef &IR) const {
+  LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n');
+  notifyEvent<HWInstructionEvent>(
+      HWInstructionEvent(HWInstructionEvent::Ready, IR));
+}
+
+void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) const {
+  LLVM_DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.'
+                    << RR.second << "]\n");
+  for (HWEventListener *Listener : getListeners())
+    Listener->onResourceAvailable(RR);
+}
+
+void ExecuteStage::notifyInstructionIssued(
+    const InstRef &IR,
+    ArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const {
+  LLVM_DEBUG({
+    dbgs() << "[E] Instruction Issued: #" << IR << '\n';
+    for (const std::pair<ResourceRef, ResourceCycles> &Resource : Used) {
+      dbgs() << "[E] Resource Used: [" << Resource.first.first << '.'
+             << Resource.first.second << "], ";
+      dbgs() << "cycles: " << Resource.second << '\n';
+    }
+  });
+  notifyEvent<HWInstructionEvent>(HWInstructionIssuedEvent(IR, Used));
+}
+
+void ExecuteStage::notifyReservedOrReleasedBuffers(const InstRef &IR,
+                                                   bool Reserved) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  if (Desc.Buffers.empty())
+    return;
+
+  SmallVector<unsigned, 4> BufferIDs(Desc.Buffers.begin(), Desc.Buffers.end());
+  std::transform(Desc.Buffers.begin(), Desc.Buffers.end(), BufferIDs.begin(),
+                 [&](uint64_t Op) { return HWS.getResourceID(Op); });
+  if (Reserved) {
+    for (HWEventListener *Listener : getListeners())
+      Listener->onReservedBuffers(IR, BufferIDs);
+    return;
+  }
+
+  for (HWEventListener *Listener : getListeners())
+    Listener->onReleasedBuffers(IR, BufferIDs);
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/Stages/InstructionTables.cpp b/llvm/lib/MCA/Stages/InstructionTables.cpp
new file mode 100644
index 00000000000..f918c183aa5
--- /dev/null
+++ b/llvm/lib/MCA/Stages/InstructionTables.cpp
@@ -0,0 +1,69 @@
+//===--------------------- InstructionTables.cpp ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the method InstructionTables::execute().
+/// Method execute() prints a theoretical resource pressure distribution based
+/// on the information available in the scheduling model, and without running
+/// the pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/InstructionTables.h"
+
+namespace llvm {
+namespace mca {
+
+Error InstructionTables::execute(InstRef &IR) {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  UsedResources.clear();
+
+  // Identify the resources consumed by this instruction.
+  for (const std::pair<uint64_t, ResourceUsage> Resource : Desc.Resources) {
+    // Skip zero-cycle resources (i.e., unused resources).
+    if (!Resource.second.size())
+      continue;
+    unsigned Cycles = Resource.second.size();
+    unsigned Index = std::distance(
+        Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first));
+    const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index);
+    unsigned NumUnits = ProcResource.NumUnits;
+    if (!ProcResource.SubUnitsIdxBegin) {
+      // The number of cycles consumed by each unit.
+      for (unsigned I = 0, E = NumUnits; I < E; ++I) {
+        ResourceRef ResourceUnit = std::make_pair(Index, 1U << I);
+        UsedResources.emplace_back(
+            std::make_pair(ResourceUnit, ResourceCycles(Cycles, NumUnits)));
+      }
+      continue;
+    }
+
+    // This is a group. Obtain the set of resources contained in this
+    // group. Some of these resources may implement multiple units.
+    // Uniformly distribute Cycles across all of the units.
+    for (unsigned I1 = 0; I1 < NumUnits; ++I1) {
+      unsigned SubUnitIdx = ProcResource.SubUnitsIdxBegin[I1];
+      const MCProcResourceDesc &SubUnit = *SM.getProcResource(SubUnitIdx);
+      // Compute the number of cycles consumed by each resource unit.
+      for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) {
+        ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2);
+        UsedResources.emplace_back(std::make_pair(
+            ResourceUnit, ResourceCycles(Cycles, NumUnits * SubUnit.NumUnits)));
+      }
+    }
+  }
+
+  // Send a fake instruction issued event to all the views.
+  HWInstructionIssuedEvent Event(IR, UsedResources);
+  notifyEvent<HWInstructionIssuedEvent>(Event);
+  return ErrorSuccess();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/Stages/RetireStage.cpp b/llvm/lib/MCA/Stages/RetireStage.cpp
new file mode 100644
index 00000000000..d6bcc518662
--- /dev/null
+++ b/llvm/lib/MCA/Stages/RetireStage.cpp
@@ -0,0 +1,62 @@
+//===---------------------- RetireStage.cpp ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the retire stage of an instruction pipeline.
+/// The RetireStage represents the process logic that interacts with the
+/// simulated RetireControlUnit hardware.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/RetireStage.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+llvm::Error RetireStage::cycleStart() {
+  if (RCU.isEmpty())
+    return llvm::ErrorSuccess();
+
+  const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle();
+  unsigned NumRetired = 0;
+  while (!RCU.isEmpty()) {
+    if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle)
+      break;
+    const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken();
+    if (!Current.Executed)
+      break;
+    RCU.consumeCurrentToken();
+    notifyInstructionRetired(Current.IR);
+    NumRetired++;
+  }
+
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error RetireStage::execute(InstRef &IR) {
+  RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID());
+  return llvm::ErrorSuccess();
+}
+
+void RetireStage::notifyInstructionRetired(const InstRef &IR) const {
+  LLVM_DEBUG(llvm::dbgs() << "[E] Instruction Retired: #" << IR << '\n');
+  llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
+  const Instruction &Inst = *IR.getInstruction();
+
+  for (const WriteState &WS : Inst.getDefs())
+    PRF.removeRegisterWrite(WS, FreedRegs);
+  notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/Stages/Stage.cpp b/llvm/lib/MCA/Stages/Stage.cpp
new file mode 100644
index 00000000000..38191645e73
--- /dev/null
+++ b/llvm/lib/MCA/Stages/Stage.cpp
@@ -0,0 +1,29 @@
+//===---------------------- Stage.cpp ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a stage.
+/// A chain of stages compose an instruction pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+// Pin the vtable here in the implementation file.
+Stage::~Stage() = default;
+
+void Stage::addListener(HWEventListener *Listener) {
+  Listeners.insert(Listener);
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/Support.cpp b/llvm/lib/MCA/Support.cpp
new file mode 100644
index 00000000000..3271bc6bf5b
--- /dev/null
+++ b/llvm/lib/MCA/Support.cpp
@@ -0,0 +1,79 @@
+//===--------------------- Support.cpp --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements a few helper functions used by various pipeline
+/// components.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Support.h"
+#include "llvm/MC/MCSchedule.h"
+
+namespace llvm {
+namespace mca {
+
+void computeProcResourceMasks(const MCSchedModel &SM,
+                              SmallVectorImpl<uint64_t> &Masks) {
+  unsigned ProcResourceID = 0;
+
+  // Create a unique bitmask for every processor resource unit.
+  // Skip resource at index 0, since it always references 'InvalidUnit'.
+  Masks.resize(SM.getNumProcResourceKinds());
+  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+    if (Desc.SubUnitsIdxBegin)
+      continue;
+    Masks[I] = 1ULL << ProcResourceID;
+    ProcResourceID++;
+  }
+
+  // Create a unique bitmask for every processor resource group.
+  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+    if (!Desc.SubUnitsIdxBegin)
+      continue;
+    Masks[I] = 1ULL << ProcResourceID;
+    for (unsigned U = 0; U < Desc.NumUnits; ++U) {
+      uint64_t OtherMask = Masks[Desc.SubUnitsIdxBegin[U]];
+      Masks[I] |= OtherMask;
+    }
+    ProcResourceID++;
+  }
+}
+
+double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
+                               unsigned NumMicroOps,
+                               ArrayRef<unsigned> ProcResourceUsage) {
+  // The block throughput is bounded from above by the hardware dispatch
+  // throughput. That is because the DispatchWidth is an upper bound on the
+  // number of opcodes that can be part of a single dispatch group.
+  double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
+
+  // The block throughput is also limited by the amount of hardware parallelism.
+  // The number of available resource units affects the resource pressure
+  // distribution, as well as how many blocks can be executed every cycle.
+  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    unsigned ResourceCycles = ProcResourceUsage[I];
+    if (!ResourceCycles)
+      continue;
+
+    const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
+    double Throughput = static_cast<double>(ResourceCycles) / MCDesc.NumUnits;
+    Max = std::max(Max, Throughput);
+  }
+
+  // The block reciprocal throughput is computed as the MAX of:
+  //  - (NumMicroOps / DispatchWidth)
+  //  - (NumUnits / ResourceCycles)   for every consumed processor resource.
+  return Max;
+}
+
+} // namespace mca
+} // namespace llvm
author	Clement Courbet <courbet@google.com>	2018-12-17 08:08:31 +0000
committer	Clement Courbet <courbet@google.com>	2018-12-17 08:08:31 +0000
commit	cc5e6a72de590bff4f1e68b7271d9f82bc66b995 (patch)
tree	38daf2528d9f190e3ba1743d0cb592fa782698b4 /llvm/lib/MCA
parent	792d4f130d6fcfebc14f0e4e20d3111a24852da2 (diff)
download	bcm5719-llvm-cc5e6a72de590bff4f1e68b7271d9f82bc66b995.tar.gz bcm5719-llvm-cc5e6a72de590bff4f1e68b7271d9f82bc66b995.zip