[ModuloSchedule] Peel out prologs and epilogs, generate actual code

Summary: This extends the PeelingModuloScheduleExpander to generate prolog and epilog code, and correctly stitch uses through the prolog, kernel, epilog DAG. The key concept in this patch is to ensure that all transforms are *local*; only a function of a block and its immediate predecessor and successor. By defining the problem in this way we can inductively rewrite the entire DAG using only local knowledge that is easy to reason about. For example, we assume that all prologs and epilogs are near-perfect clones of the steady-state kernel. This means that if a block has an instruction that is predicated out, we can redirect all users of that instruction to that equivalent instruction in our immediate predecessor. As all blocks are clones, every instruction must have an equivalent in every other block. Similarly we can make the assumption by construction that if a value defined in a block is used outside that block, the only possible user is its immediate successors. We maintain this even for values that are used outside the loop by creating a limited form of LCSSA. This code isn't small, but it isn't complex. Enabled a bunch of testing from Hexagon. There are a couple of tests not enabled yet; I'm about 80% sure there isn't buggy codegen but the tests are checking for patterns that we don't produce. Those still need a bit more investigation. In the meantime we (Google) are happy with the code produced by this on our downstream SMS implementation, and believe it generates correct code. Subscribers: mgorny, hiraditya, jsji, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68205 llvm-svn: 373462
author: James Molloy <jmolloy@google.com> 2019-10-02 12:46:44 +0000
committer: James Molloy <jmolloy@google.com> 2019-10-02 12:46:44 +0000
commit: 9026518e7398eeced1f07f5f6003ba047209c033 (patch)
tree: 003cd7a5581a310d3f737ef01cf107b5d92ef5a3 /llvm
parent: 671fb3435862899db32ed20e680ded2ee665effd (diff)
download: bcm5719-llvm-9026518e7398eeced1f07f5f6003ba047209c033.tar.gz
bcm5719-llvm-9026518e7398eeced1f07f5f6003ba047209c033.zip
61 files changed, 554 insertions, 71 deletions
diff --git a/llvm/include/llvm/CodeGen/MachineLoopUtils.h b/llvm/include/llvm/CodeGen/MachineLoopUtils.h
new file mode 100644
index 00000000000..41379b75d00
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/MachineLoopUtils.h
@@ -0,0 +1,41 @@
+//=- MachineLoopUtils.h - Helper functions for manipulating loops -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H
+#define LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H
+
+namespace llvm {
+class MachineBasicBlock;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+
+enum LoopPeelDirection {
+  LPD_Front, ///< Peel the first iteration of the loop.
+  LPD_Back   ///< Peel the last iteration of the loop.
+};
+
+/// Peels a single block loop. Loop must have two successors, one of which
+/// must be itself. Similarly it must have two predecessors, one of which must
+/// be itself.
+///
+/// The loop block is copied and inserted into the CFG such that two copies of
+/// the loop follow on from each other. The copy is inserted either before or
+/// after the loop based on Direction.
+///
+/// Phis are updated and an unconditional branch inserted at the end of the
+/// clone so as to execute a single iteration.
+///
+/// The trip count of Loop is not updated.
+MachineBasicBlock *PeelSingleBlockLoop(LoopPeelDirection Direction,
+                                       MachineBasicBlock *Loop,
+                                       MachineRegisterInfo &MRI,
+                                       const TargetInstrInfo *TII);
+
+} // namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H
diff --git a/llvm/include/llvm/CodeGen/ModuloSchedule.h b/llvm/include/llvm/CodeGen/ModuloSchedule.h
index 36cc843c884..81a9b63b64c 100644
--- a/llvm/include/llvm/CodeGen/ModuloSchedule.h
+++ b/llvm/include/llvm/CodeGen/ModuloSchedule.h
@@ -62,8 +62,10 @@
 
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineLoopUtils.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include <deque>
 #include <vector>
 
 namespace llvm {
@@ -142,9 +144,7 @@ public:
   /// Return the rescheduled instructions in order.
   ArrayRef<MachineInstr *> getInstructions() { return ScheduledInstrs; }
 
-  void dump() {
-    print(dbgs());
-  }
+  void dump() { print(dbgs()); }
   void print(raw_ostream &OS);
 };
 
@@ -270,9 +270,6 @@ public:
 
 /// A reimplementation of ModuloScheduleExpander. It works by generating a
 /// standalone kernel loop and peeling out the prologs and epilogs.
-///
-/// FIXME: This implementation cannot yet generate valid code. It can generate
-/// a correct kernel but cannot peel out prologs and epilogs.
 class PeelingModuloScheduleExpander {
   ModuloSchedule &Schedule;
   MachineFunction &MF;
@@ -281,17 +278,70 @@ class PeelingModuloScheduleExpander {
   const TargetInstrInfo *TII;
   LiveIntervals *LIS;
 
+  /// The original loop block that gets rewritten in-place.
   MachineBasicBlock *BB;
+  /// The original loop preheader.
   MachineBasicBlock *Preheader;
+  /// All prolog and epilog blocks.
+  SmallVector<MachineBasicBlock *, 4> Prologs, Epilogs;
+  /// For every block, the stages that are produced.
+  DenseMap<MachineBasicBlock *, BitVector> LiveStages;
+  /// For every block, the stages that are available. A stage can be available
+  /// but not produced (in the epilog) or produced but not available (in the
+  /// prolog).
+  DenseMap<MachineBasicBlock *, BitVector> AvailableStages;
+
+  /// CanonicalMIs and BlockMIs form a bidirectional map between any of the
+  /// loop kernel clones.
+  DenseMap<MachineInstr *, MachineInstr *> CanonicalMIs;
+  DenseMap<std::pair<MachineBasicBlock *, MachineInstr *>, MachineInstr *>
+      BlockMIs;
+
+  /// State passed from peelKernel to peelPrologAndEpilogs().
+  std::deque<MachineBasicBlock *> PeeledFront, PeeledBack;
+
 public:
   PeelingModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
                                 LiveIntervals *LIS)
       : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
         TII(ST.getInstrInfo()), LIS(LIS) {}
 
+  void expand();
+
   /// Runs ModuloScheduleExpander and treats it as a golden input to validate
   /// aspects of the code generated by PeelingModuloScheduleExpander.
   void validateAgainstModuloScheduleExpander();
+
+protected:
+  /// Converts BB from the original loop body to the rewritten, pipelined
+  /// steady-state.
+  void rewriteKernel();
+
+private:
+  /// Peels one iteration of the rewritten kernel (BB) in the specified
+  /// direction.
+  MachineBasicBlock *peelKernel(LoopPeelDirection LPD);
+  /// Peel the kernel forwards and backwards to produce prologs and epilogs,
+  /// and stitch them together.
+  void peelPrologAndEpilogs();
+  /// All prolog and epilog blocks are clones of the kernel, so any produced
+  /// register in one block has an corollary in all other blocks.
+  Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB);
+  /// Change all users of MI, if MI is predicated out
+  /// (LiveStages[MI->getParent()] == false).
+  void rewriteUsesOf(MachineInstr *MI);
+  /// Insert branches between prologs, kernel and epilogs.
+  void fixupBranches();
+  /// Create a poor-man's LCSSA by cloning only the PHIs from the kernel block
+  /// to a block dominated by all prologs and epilogs. This allows us to treat
+  /// the loop exiting block as any other kernel clone.
+  MachineBasicBlock *CreateLCSSAExitingBlock();
+  /// Helper to get the stage of an instruction in the schedule.
+  unsigned getStage(MachineInstr *MI) {
+    if (CanonicalMIs.count(MI))
+      MI = CanonicalMIs[MI];
+    return Schedule.getStage(MI);
+  }
 };
 
 /// Expander that simply annotates each scheduled instruction with a post-instr
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 3cf0c60108e..50b469d6d93 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -80,6 +80,7 @@ add_llvm_library(LLVMCodeGen
   MachineInstr.cpp
   MachineLICM.cpp
   MachineLoopInfo.cpp
+  MachineLoopUtils.cpp
   MachineModuleInfo.cpp
   MachineModuleInfoImpls.cpp
   MachineOperand.cpp
diff --git a/llvm/lib/CodeGen/MachineLoopUtils.cpp b/llvm/lib/CodeGen/MachineLoopUtils.cpp
new file mode 100644
index 00000000000..e074b76082f
--- /dev/null
+++ b/llvm/lib/CodeGen/MachineLoopUtils.cpp
@@ -0,0 +1,132 @@
+//=- MachineLoopUtils.cpp - Functions for manipulating loops ----------------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+using namespace llvm;
+
+namespace {
+// MI's parent and BB are clones of each other. Find the equivalent copy of MI
+// in BB.
+MachineInstr &findEquivalentInstruction(MachineInstr &MI,
+                                        MachineBasicBlock *BB) {
+  MachineBasicBlock *PB = MI.getParent();
+  unsigned Offset = std::distance(PB->instr_begin(), MachineBasicBlock::instr_iterator(MI));
+  return *std::next(BB->instr_begin(), Offset);
+}
+} // namespace
+
+MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
+                                             MachineBasicBlock *Loop,
+                                             MachineRegisterInfo &MRI,
+                                             const TargetInstrInfo *TII) {
+  MachineFunction &MF = *Loop->getParent();
+  MachineBasicBlock *Preheader = *Loop->pred_begin();
+  if (Preheader == Loop)
+    Preheader = *std::next(Loop->pred_begin());
+  MachineBasicBlock *Exit = *Loop->succ_begin();
+  if (Exit == Loop)
+    Exit = *std::next(Loop->succ_begin());
+
+  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(Loop->getBasicBlock());
+  if (Direction == LPD_Front)
+    MF.insert(Loop->getIterator(), NewBB);
+  else
+    MF.insert(std::next(Loop->getIterator()), NewBB);
+
+  // FIXME: Add DenseMapInfo trait for Register so we can use it as a key.
+  DenseMap<unsigned, Register> Remaps;
+  auto InsertPt = NewBB->end();
+  for (MachineInstr &MI : *Loop) {
+    MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
+    NewBB->insert(InsertPt, NewMI);
+    for (MachineOperand &MO : NewMI->defs()) {
+      Register OrigR = MO.getReg();
+      if (OrigR.isPhysical())
+        continue;
+      Register &R = Remaps[OrigR];
+      R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
+      MO.setReg(R);
+
+      if (Direction == LPD_Back) {
+        // Replace all uses outside the original loop with the new register.
+        // FIXME: is the use_iterator stable enough to mutate register uses
+        // while iterating?
+        SmallVector<MachineOperand *, 4> Uses;
+        for (auto &Use : MRI.use_operands(OrigR))
+          if (Use.getParent()->getParent() != Loop)
+            Uses.push_back(&Use);
+        for (auto *Use : Uses) {
+          MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg()));
+          Use->setReg(R);
+        }
+      }
+    }
+  }
+
+  for (auto I = NewBB->getFirstNonPHI(); I != NewBB->end(); ++I)
+    for (MachineOperand &MO : I->uses())
+      if (MO.isReg() && Remaps.count(MO.getReg()))
+        MO.setReg(Remaps[MO.getReg()]);
+
+  for (auto I = NewBB->begin(); I->isPHI(); ++I) {
+    MachineInstr &MI = *I;
+    unsigned LoopRegIdx = 3, InitRegIdx = 1;
+    if (MI.getOperand(2).getMBB() != Preheader)
+      std::swap(LoopRegIdx, InitRegIdx);
+    MachineInstr &OrigPhi = findEquivalentInstruction(MI, Loop);
+    assert(OrigPhi.isPHI());
+    if (Direction == LPD_Front) {
+      // When peeling front, we are only left with the initial value from the
+      // preheader.
+      Register R = MI.getOperand(LoopRegIdx).getReg();
+      if (Remaps.count(R))
+        R = Remaps[R];
+      OrigPhi.getOperand(InitRegIdx).setReg(R);
+      MI.RemoveOperand(LoopRegIdx + 1);
+      MI.RemoveOperand(LoopRegIdx + 0);
+    } else {
+      // When peeling back, the initial value is the loop-carried value from
+      // the original loop.
+      Register LoopReg = OrigPhi.getOperand(LoopRegIdx).getReg();
+      MI.getOperand(LoopRegIdx).setReg(LoopReg);
+      MI.RemoveOperand(InitRegIdx + 1);
+      MI.RemoveOperand(InitRegIdx + 0);
+    }
+  }
+
+  DebugLoc DL;
+  if (Direction == LPD_Front) {
+    Preheader->replaceSuccessor(Loop, NewBB);
+    NewBB->addSuccessor(Loop);
+    Loop->replacePhiUsesWith(Preheader, NewBB);
+    if (TII->removeBranch(*Preheader) > 0)
+      TII->insertBranch(*Preheader, NewBB, nullptr, {}, DL);
+    TII->removeBranch(*NewBB);
+    TII->insertBranch(*NewBB, Loop, nullptr, {}, DL);
+  } else {
+    Loop->replaceSuccessor(Exit, NewBB);
+    Exit->replacePhiUsesWith(Loop, NewBB);
+    NewBB->addSuccessor(Exit);
+
+    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+    SmallVector<MachineOperand, 4> Cond;
+    bool CanAnalyzeBr = !TII->analyzeBranch(*Loop, TBB, FBB, Cond);
+    (void)CanAnalyzeBr;
+    assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
+    TII->removeBranch(*Loop);
+    TII->insertBranch(*Loop, TBB == Exit ? NewBB : TBB,
+                      FBB == Exit ? NewBB : FBB, Cond, DL);
+    if (TII->removeBranch(*NewBB) > 0)
+      TII->insertBranch(*NewBB, Exit, nullptr, {}, DL);
+  }
+
+  return NewBB;
+}
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 9591211fd9e..89c9f6093a9 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -557,10 +557,7 @@ void SwingSchedulerDAG::schedule() {
   // The experimental code generator can't work if there are InstChanges.
   if (ExperimentalCodeGen && NewInstrChanges.empty()) {
     PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
-    // Experimental code generation isn't complete yet, but it can partially
-    // validate the code it generates against the original
-    // ModuloScheduleExpander.
-    MSE.validateAgainstModuloScheduleExpander();
+    MSE.expand();
   } else {
     ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
     MSE.expand();
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index a68153cf3b6..30aa81487c8 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -10,6 +10,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopUtils.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -1564,6 +1565,266 @@ private:
 };
 } // namespace
 
+MachineBasicBlock *
+PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
+  MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
+  if (LPD == LPD_Front)
+    PeeledFront.push_back(NewBB);
+  else
+    PeeledBack.push_front(NewBB);
+  for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
+       ++I, ++NI) {
+    CanonicalMIs[&*I] = &*I;
+    CanonicalMIs[&*NI] = &*I;
+    BlockMIs[{NewBB, &*I}] = &*NI;
+    BlockMIs[{BB, &*I}] = &*I;
+  }
+  return NewBB;
+}
+
+void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
+  BitVector LS(Schedule.getNumStages(), true);
+  BitVector AS(Schedule.getNumStages(), true);
+  LiveStages[BB] = LS;
+  AvailableStages[BB] = AS;
+
+  // Peel out the prologs.
+  LS.reset();
+  for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
+    LS[I] = 1;
+    Prologs.push_back(peelKernel(LPD_Front));
+    LiveStages[Prologs.back()] = LS;
+    AvailableStages[Prologs.back()] = LS;
+  }
+
+  // Create a block that will end up as the new loop exiting block (dominated by
+  // all prologs and epilogs). It will only contain PHIs, in the same order as
+  // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
+  // that the exiting block is a (sub) clone of BB. This in turn gives us the
+  // property that any value deffed in BB but used outside of BB is used by a
+  // PHI in the exiting block.
+  MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock();
+
+  // Push out the epilogs, again in reverse order.
+  // We can't assume anything about the minumum loop trip count at this point,
+  // so emit a fairly complex epilog:
+  //  K[0, 1, 2]     // Kernel runs stages 0, 1, 2
+  //  E0[2] <- P1    // Epilog runs stage 2 only, so the state after is [0].
+  //  E1[1, 2] <- P0 // Epilog 1 moves the last item from stage 0 to stage 2.
+  //
+  // This creates a single-successor single-predecessor sequence of blocks for
+  // each epilog, which are kept this way for simplicity at this stage and
+  // cleaned up by the optimizer later.
+  for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
+    Epilogs.push_back(nullptr);
+    for (int J = Schedule.getNumStages() - 1; J >= I; --J) {
+      LS.reset();
+      LS[J] = 1;
+      Epilogs.back() = peelKernel(LPD_Back);
+      LiveStages[Epilogs.back()] = LS;
+      AvailableStages[Epilogs.back()] = AS;
+    }
+  }
+
+  // Now we've defined all the prolog and epilog blocks as a fallthrough
+  // sequence, add the edges that will be followed if the loop trip count is
+  // lower than the number of stages (connecting prologs directly with epilogs).
+  auto PI = Prologs.begin();
+  auto EI = Epilogs.begin();
+  assert(Prologs.size() == Epilogs.size());
+  for (; PI != Prologs.end(); ++PI, ++EI) {
+    MachineBasicBlock *Pred = *(*EI)->pred_begin();
+    (*PI)->addSuccessor(*EI);
+    for (MachineInstr &MI : (*EI)->phis()) {
+      Register Reg = MI.getOperand(1).getReg();
+      MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
+      if (Use && Use->getParent() == Pred)
+        Reg = getEquivalentRegisterIn(Reg, *PI);
+      MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
+      MI.addOperand(MachineOperand::CreateMBB(*PI));
+    }
+  }
+
+  // Create a list of all blocks in order.
+  SmallVector<MachineBasicBlock *, 8> Blocks;
+  llvm::copy(PeeledFront, std::back_inserter(Blocks));
+  Blocks.push_back(BB);
+  llvm::copy(PeeledBack, std::back_inserter(Blocks));
+
+  // Iterate in reverse order over all instructions, remapping as we go.
+  for (MachineBasicBlock *B : reverse(Blocks)) {
+    for (auto I = B->getFirstInstrTerminator()->getReverseIterator();
+         I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
+      MachineInstr *MI = &*I++;
+      rewriteUsesOf(MI);
+    }
+  }
+  // Now all remapping has been done, we're free to optimize the generated code.
+  for (MachineBasicBlock *B : reverse(Blocks))
+    EliminateDeadPhis(B, MRI, LIS);
+  EliminateDeadPhis(ExitingBB, MRI, LIS);
+}
+
+MachineBasicBlock *PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() {
+  MachineFunction &MF = *BB->getParent();
+  MachineBasicBlock *Exit = *BB->succ_begin();
+  if (Exit == BB)
+    Exit = *std::next(BB->succ_begin());
+
+  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+  MF.insert(std::next(BB->getIterator()), NewBB);
+
+  // Clone all phis in BB into NewBB and rewrite.
+  for (MachineInstr &MI : BB->phis()) {
+    auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
+    Register OldR = MI.getOperand(3).getReg();
+    Register R = MRI.createVirtualRegister(RC);
+    SmallVector<MachineInstr *, 4> Uses;
+    for (MachineInstr &Use : MRI.use_instructions(OldR))
+      if (Use.getParent() != BB)
+        Uses.push_back(&Use);
+    for (MachineInstr *Use : Uses)
+      Use->substituteRegister(OldR, R, /*SubIdx=*/0,
+                              *MRI.getTargetRegisterInfo());
+    MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
+        .addReg(OldR)
+        .addMBB(BB);
+    BlockMIs[{NewBB, &MI}] = NI;
+    CanonicalMIs[NI] = &MI;
+  }
+  BB->replaceSuccessor(Exit, NewBB);
+  Exit->replacePhiUsesWith(BB, NewBB);
+  NewBB->addSuccessor(Exit);
+
+  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+  SmallVector<MachineOperand, 4> Cond;
+  bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
+  (void)CanAnalyzeBr;
+  assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
+  TII->removeBranch(*BB);
+  TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
+                    Cond, DebugLoc());
+  TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
+  return NewBB;
+}
+
+Register
+PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg,
+                                                       MachineBasicBlock *BB) {
+  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
+  unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
+  return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
+}
+
+void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
+  if (MI->isPHI()) {
+    // This is an illegal PHI. The loop-carried (desired) value is operand 3,
+    // and it is produced by this block.
+    Register PhiR = MI->getOperand(0).getReg();
+    Register R = MI->getOperand(3).getReg();
+    int RMIStage = getStage(MRI.getUniqueVRegDef(R));
+    if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
+      R = MI->getOperand(1).getReg();
+    MRI.setRegClass(R, MRI.getRegClass(PhiR));
+    MRI.replaceRegWith(PhiR, R);
+    if (LIS)
+      LIS->RemoveMachineInstrFromMaps(*MI);
+    MI->eraseFromParent();
+    return;
+  }
+
+  int Stage = getStage(MI);
+  if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
+      LiveStages[MI->getParent()].test(Stage))
+    // Instruction is live, no rewriting to do.
+    return;
+
+  for (MachineOperand &DefMO : MI->defs()) {
+    SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
+    for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
+      // Only PHIs can use values from this block by construction.
+      // Match with the equivalent PHI in B.
+      assert(UseMI.isPHI());
+      Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
+                                             MI->getParent());
+      Subs.emplace_back(&UseMI, Reg);
+    }
+    for (auto &Sub : Subs)
+      Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
+                                    *MRI.getTargetRegisterInfo());
+  }
+  if (LIS)
+    LIS->RemoveMachineInstrFromMaps(*MI);
+  MI->eraseFromParent();
+}
+
+void PeelingModuloScheduleExpander::fixupBranches() {
+  std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> Info =
+      TII->analyzeLoopForPipelining(BB);
+  assert(Info);
+
+  // Work outwards from the kernel.
+  bool KernelDisposed = false;
+  int TC = Schedule.getNumStages() - 1;
+  for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
+       ++PI, ++EI, --TC) {
+    MachineBasicBlock *Prolog = *PI;
+    MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
+    MachineBasicBlock *Epilog = *EI;
+    SmallVector<MachineOperand, 4> Cond;
+    Optional<bool> StaticallyGreater =
+        Info->createTripCountGreaterCondition(TC, *Prolog, Cond);
+    if (!StaticallyGreater.hasValue()) {
+      LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
+      // Dynamically branch based on Cond.
+      TII->removeBranch(*Prolog);
+      TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
+    } else if (*StaticallyGreater == false) {
+      LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
+      // Prolog never falls through; branch to epilog and orphan interior
+      // blocks. Leave it to unreachable-block-elim to clean up.
+      Prolog->removeSuccessor(Fallthrough);
+      for (MachineInstr &P : Fallthrough->phis()) {
+        P.RemoveOperand(2);
+        P.RemoveOperand(1);
+      }
+      TII->removeBranch(*Prolog);
+      TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
+      KernelDisposed = true;
+    } else {
+      LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
+      // Prolog always falls through; remove incoming values in epilog.
+      Prolog->removeSuccessor(Epilog);
+      for (MachineInstr &P : Epilog->phis()) {
+        P.RemoveOperand(4);
+        P.RemoveOperand(3);
+      }
+    }
+  }
+
+  if (!KernelDisposed) {
+    Info->adjustTripCount(-(Schedule.getNumStages() - 1));
+    Info->setPreheader(Prologs.back());
+  } else {
+    Info->disposed();
+  }
+}
+
+void PeelingModuloScheduleExpander::rewriteKernel() {
+  KernelRewriter KR(*Schedule.getLoop(), Schedule);
+  KR.rewrite();
+}
+
+void PeelingModuloScheduleExpander::expand() {
+  BB = Schedule.getLoop()->getTopBlock();
+  Preheader = Schedule.getLoop()->getLoopPreheader();
+  LLVM_DEBUG(Schedule.dump());
+
+  rewriteKernel();
+  peelPrologAndEpilogs();
+  fixupBranches();
+}
+
 void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
   BB = Schedule.getLoop()->getTopBlock();
   Preheader = Schedule.getLoop()->getLoopPreheader();
@@ -1593,6 +1854,7 @@ void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
   // Now run the new expansion algorithm.
   KernelRewriter KR(*Schedule.getLoop(), Schedule);
   KR.rewrite();
+  peelPrologAndEpilogs();
 
   // Collect all illegal phis that the new algorithm created. We'll give these
   // to KernelOperandInfo.
diff --git a/llvm/test/CodeGen/Hexagon/pipeliner/swp-phi-start.mir b/llvm/test/CodeGen/Hexagon/pipeliner/swp-phi-start.mir
index 5b953f13b1b..aa553dccc7d 100644
--- a/llvm/test/CodeGen/Hexagon/pipeliner/swp-phi-start.mir
+++ b/llvm/test/CodeGen/Hexagon/pipeliner/swp-phi-start.mir
@@ -1,4 +1,4 @@
-# RUN: llc < %s -x mir -march=hexagon -run-pass=modulo-schedule-test | FileCheck %s
+# RUN: llc < %s -x mir -march=hexagon -run-pass=modulo-schedule-test -pipeliner-experimental-cg=true | FileCheck %s
 
 # Simple check for this sanity test; ensure all instructions are in stage 0 in
 # the prolog and stage 3 in the epilog.
diff --git a/llvm/test/CodeGen/Hexagon/swp-art-deps-rec.ll b/llvm/test/CodeGen/Hexagon/swp-art-deps-rec.ll
index 5272faf8f9b..f89df6e5573 100644
--- a/llvm/test/CodeGen/Hexagon/swp-art-deps-rec.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-art-deps-rec.ll
@@ -1,7 +1,7 @@
 ; REQUIRES: asserts
 
 ; RUN: llc -march=hexagon -mcpu=hexagonv65 -O3 -debug-only=pipeliner \
-; RUN: < %s 2>&1 | FileCheck %s
+; RUN: < %s 2>&1 -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that the artificial dependences are ignored while computing the
 ; circuits.
diff --git a/llvm/test/CodeGen/Hexagon/swp-bad-sched.ll b/llvm/test/CodeGen/Hexagon/swp-bad-sched.ll
index 74f0647a72a..ee93e8b3468 100644
--- a/llvm/test/CodeGen/Hexagon/swp-bad-sched.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-bad-sched.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc -march=hexagon -enable-pipeliner -enable-aa-sched-mi < %s | FileCheck %s
+; RUN: llc -march=hexagon -enable-pipeliner -enable-aa-sched-mi < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; CHECK: loop0(
 ; CHECK: loop0(.LBB0_[[LOOP:.]],
diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-1.ll b/llvm/test/CodeGen/Hexagon/swp-carried-1.ll
index b33cf522115..e5b5be4d430 100644
--- a/llvm/test/CodeGen/Hexagon/swp-carried-1.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-carried-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -rdf-opt=0 -disable-hexagon-misched -hexagon-initial-cfg-cleanup=0 -lsr-setupcost-depth-limit=1 < %s | FileCheck %s
+; RUN: llc -march=hexagon -rdf-opt=0 -disable-hexagon-misched -hexagon-initial-cfg-cleanup=0 -lsr-setupcost-depth-limit=1 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that we generate the correct code when a loop carried value
 ; is scheduled one stage earlier than it's use. The code in
diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir
index 8271e8b1b54..d9db8ec6194 100644
--- a/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 | FileCheck %s
+# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 -pipeliner-experimental-cg=true | FileCheck %s
 # REQUIRES: asserts
 
 # Test that the loop carried dependence check correctly identifies a recurrence.
diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir
index 126e6aa462b..5271a2db775 100644
--- a/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 | FileCheck %s
+# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 -pipeliner-experimental-cg=true | FileCheck %s
 # REQUIRES: asserts
 
 # Test that the loop carried dependence check correctly identifies a recurrence
diff --git a/llvm/test/CodeGen/Hexagon/swp-chain-refs.ll b/llvm/test/CodeGen/Hexagon/swp-chain-refs.ll
index d0e72be778a..5695f3d61b7 100644
--- a/llvm/test/CodeGen/Hexagon/swp-chain-refs.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-chain-refs.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=hexagon -enable-pipeliner=true -stats -o /dev/null < %s \
-; RUN:      2>&1 | FileCheck %s --check-prefix=STATS
+; RUN:      2>&1 -pipeliner-experimental-cg=true | FileCheck %s --check-prefix=STATS
 ; REQUIRES: asserts
 
 ; Test that we do not schedule chained references too far apart,
diff --git a/llvm/test/CodeGen/Hexagon/swp-change-dep1.ll b/llvm/test/CodeGen/Hexagon/swp-change-dep1.ll
index 855f43e5012..157bdd069f9 100644
--- a/llvm/test/CodeGen/Hexagon/swp-change-dep1.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-change-dep1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -enable-pipeliner -pipeliner-max-stages=1 < %s | FileCheck %s
+; RUN: llc -march=hexagon -enable-pipeliner -pipeliner-max-stages=1 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that we update the offset correctly for loads that are
 ; moved past stores. In these cases, we change the dependences
diff --git a/llvm/test/CodeGen/Hexagon/swp-change-deps.ll b/llvm/test/CodeGen/Hexagon/swp-change-deps.ll
index e2ca071f5f5..1b35c633c52 100644
--- a/llvm/test/CodeGen/Hexagon/swp-change-deps.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-change-deps.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that we generate the correct offsets for loads in the prolog
 ; after removing dependences on a post-increment instructions of the
diff --git a/llvm/test/CodeGen/Hexagon/swp-check-offset.ll b/llvm/test/CodeGen/Hexagon/swp-check-offset.ll
index 220ebde0f86..6a7211df12d 100644
--- a/llvm/test/CodeGen/Hexagon/swp-check-offset.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-check-offset.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner < %s | FileCheck %s
-; RUN: llc -march=hexagon -mcpu=hexagonv62 -enable-pipeliner < %s | FileCheck --check-prefix=CHECK-V62 %s
-; RUN: llc -march=hexagon -mcpu=hexagonv65 -enable-pipeliner < %s | FileCheck --check-prefix=CHECK-V65 %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner < %s -pipeliner-experimental-cg=true | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv62 -enable-pipeliner < %s -pipeliner-experimental-cg=true | FileCheck --check-prefix=CHECK-V62 %s
+; RUN: llc -march=hexagon -mcpu=hexagonv65 -enable-pipeliner < %s -pipeliner-experimental-cg=true | FileCheck --check-prefix=CHECK-V65 %s
 
 ;
 ; Make sure we pipeline the loop and that we generate the correct
diff --git a/llvm/test/CodeGen/Hexagon/swp-const-tc1.ll b/llvm/test/CodeGen/Hexagon/swp-const-tc1.ll
index 95dfc37e306..c785ee74513 100644
--- a/llvm/test/CodeGen/Hexagon/swp-const-tc1.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-const-tc1.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=hexagon -enable-pipeliner -enable-pipeliner-opt-size \
 ; RUN:     -verify-machineinstrs -hexagon-initial-cfg-cleanup=0 \
 ; RUN:     -enable-aa-sched-mi=false -hexagon-expand-condsets=0 \
-; RUN:     < %s | FileCheck %s
+; RUN:     < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Disable expand-condsets because it will assert on undefined registers.
 
diff --git a/llvm/test/CodeGen/Hexagon/swp-const-tc2.ll b/llvm/test/CodeGen/Hexagon/swp-const-tc2.ll
index 8b9f87f428d..29d12bd1439 100644
--- a/llvm/test/CodeGen/Hexagon/swp-const-tc2.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-const-tc2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -rdf-opt=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -rdf-opt=0 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that we fixup a pipelined loop correctly when the number of
 ; stages is greater than the compile-time loop trip count. In this
diff --git a/llvm/test/CodeGen/Hexagon/swp-const-tc3.ll b/llvm/test/CodeGen/Hexagon/swp-const-tc3.ll
index a8caebd09eb..48a61428538 100644
--- a/llvm/test/CodeGen/Hexagon/swp-const-tc3.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-const-tc3.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that the pipeliner correctly fixes up the pipelined CFG when the loop
 ; has a constant trip count, and the trip count is less than the number of
diff --git a/llvm/test/CodeGen/Hexagon/swp-conv3x3-nested.ll b/llvm/test/CodeGen/Hexagon/swp-conv3x3-nested.ll
index 48f33bd6d22..d14177cc684 100644
--- a/llvm/test/CodeGen/Hexagon/swp-conv3x3-nested.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-conv3x3-nested.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s -pipeliner-experimental-cg=true | FileCheck %s
 ; XFAIL: *
 ; LSR changes required.
 
diff --git a/llvm/test/CodeGen/Hexagon/swp-copytophi-dag.ll b/llvm/test/CodeGen/Hexagon/swp-copytophi-dag.ll
index 69743407c14..f511241a7c7 100644
--- a/llvm/test/CodeGen/Hexagon/swp-copytophi-dag.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-copytophi-dag.ll
@@ -1,7 +1,7 @@
 ; REQUIRES: asserts
 ;
 ; RUN: llc -march=hexagon -enable-pipeliner=true -debug-only=pipeliner < %s \
-; RUN: 2>&1 | FileCheck %s
+; RUN: 2>&1 -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that the artificial dependence is created as a result of
 ; CopyToPhi DAG mutation.
diff --git a/llvm/test/CodeGen/Hexagon/swp-dep-neg-offset.ll b/llvm/test/CodeGen/Hexagon/swp-dep-neg-offset.ll
index 7ba4286bf41..cc19ce1ae44 100644
--- a/llvm/test/CodeGen/Hexagon/swp-dep-neg-offset.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-dep-neg-offset.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -enable-pipeliner -hexagon-initial-cfg-cleanup=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -enable-pipeliner -hexagon-initial-cfg-cleanup=0 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that the code that changes the dependences does not allow
 ; a load with a negative offset to be overlapped with the post
diff --git a/llvm/test/CodeGen/Hexagon/swp-disable-Os.ll b/llvm/test/CodeGen/Hexagon/swp-disable-Os.ll
index cbdc3ba36f6..5698d37cb23 100644
--- a/llvm/test/CodeGen/Hexagon/swp-disable-Os.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-disable-Os.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s -pipeliner-experimental-cg=true | FileCheck %s
 ; CHECK: loop0(.LBB0_{{[0-9]+}},#347)
 
 target triple = "hexagon"
diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-numphis.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-numphis.ll
index a54ac582560..f57f94bf03c 100644
--- a/llvm/test/CodeGen/Hexagon/swp-epilog-numphis.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-epilog-numphis.ll
@@ -1,6 +1,6 @@
 ; XFAIL: *
 ; Needs some fixed in the pipeliner.
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; CHECK: endloop0
 ; CHECK: vmem
diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi2.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi2.ll
index b2a7dada33f..b32fed97f26 100644
--- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi2.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -enable-pipeliner -pipeliner-max-stages=3 < %s | FileCheck %s
+; RUN: llc -march=hexagon -enable-pipeliner -pipeliner-max-stages=3 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 %s.0 = type { i16, i8, i8, i16, i8, i8, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i8, i8, %s.1, [2 x [16 x %s.2]], i32 (i8*, i8*, i8*, i8*, i8*)*, %s.3*, %s.3*, [120 x i8], i8, i8, %s.4*, [2 x [120 x [8 x i8]]], [56 x i8], [2 x [121 x %s.5]], [2 x %s.5], %s.5*, %s.5*, i32, i32, i16, i8, i8, %s.7, %s.9, %s.11, %s.8*, %s.8* }
 %s.1 = type { i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, [16 x i8], i8, [4 x i8], [32 x i16], [32 x i16], [2 x i8], [4 x i8], [2 x [4 x i8]], [2 x [4 x i8]], i32, i32, i16, i8 }
diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi4.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi4.ll
index e85ea7654e0..8b611cfe0b4 100644
--- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi4.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi4.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -enable-pipeliner < %s | FileCheck %s
+; RUN: llc -march=hexagon -enable-pipeliner < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that we generate the correct value for a Phi in the epilog
 ; that is for a value defined two stages earlier. An extra copy in the
diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi5.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi5.ll
index a524dc0d5be..72c05284d69 100644
--- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi5.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi5.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that we use the correct name in an epilog phi for a phi value
 ; that is defined for the last time in the kernel. Previously, we
diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi8.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi8.ll
index 370d31d92c7..214307e2513 100644
--- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi8.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi8.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mno-pairing -mno-compound -hexagon-initial-cfg-cleanup=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mno-pairing -mno-compound -hexagon-initial-cfg-cleanup=0 < %s -pipeliner-experimental-cg=true | FileCheck %s
 ; XFAIL: *
 
 ; Test that we generate the correct phi names in the epilog when the pipeliner
diff --git a/llvm/test/CodeGen/Hexagon/swp-kernel-phi1.ll b/llvm/test/CodeGen/Hexagon/swp-kernel-phi1.ll
index 681e7492337..c87479f6e97 100644
--- a/llvm/test/CodeGen/Hexagon/swp-kernel-phi1.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-kernel-phi1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -enable-pipeliner-opt-size -hexagon-initial-cfg-cleanup=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -enable-pipeliner-opt-size -hexagon-initial-cfg-cleanup=0 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that we generate the correct names for the phis in the kernel for the
 ; incoming values. In this case, the loop contains a phi and has another phi
diff --git a/llvm/test/CodeGen/Hexagon/swp-large-rec.ll b/llvm/test/CodeGen/Hexagon/swp-large-rec.ll
index ee88aaffd5c..45d40df4ec0 100644
--- a/llvm/test/CodeGen/Hexagon/swp-large-rec.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-large-rec.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=hexagon -enable-pipeliner -stats \
 ; RUN:     -pipeliner-prune-loop-carried=false -fp-contract=fast \
-; RUN:     -o /dev/null < %s 2>&1 | FileCheck %s --check-prefix=STATS
+; RUN:     -o /dev/null < %s 2>&1 -pipeliner-experimental-cg=true | FileCheck %s --check-prefix=STATS
 ; REQUIRES: asserts
 
 ; That that we do not pipeline this loop. The recurrence is too large. If
diff --git a/llvm/test/CodeGen/Hexagon/swp-listen-loop3.ll b/llvm/test/CodeGen/Hexagon/swp-listen-loop3.ll
index 9b68cf95659..d8e4f003d53 100644
--- a/llvm/test/CodeGen/Hexagon/swp-listen-loop3.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-listen-loop3.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -pipeliner-ignore-recmii -pipeliner-max-stages=2 -enable-pipeliner < %s | FileCheck %s
+; RUN: llc -march=hexagon -pipeliner-ignore-recmii -pipeliner-max-stages=2 -enable-pipeliner < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; This is a loop we pipeline to three packets, though we could do bettter.
 
diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll b/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll
index b95e6241956..9f145189786 100644
--- a/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that the pipeliner schedules a store before the load in which there is a
 ; loop carried dependence. Previously, the loop carried dependence wasn't added
diff --git a/llvm/test/CodeGen/Hexagon/swp-lots-deps.ll b/llvm/test/CodeGen/Hexagon/swp-lots-deps.ll
index a657f92c5d5..631d02649d1 100644
--- a/llvm/test/CodeGen/Hexagon/swp-lots-deps.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-lots-deps.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -enable-pipeliner -stats -o /dev/null < %s 2>&1 | FileCheck %s --check-prefix=STATS
+; RUN: llc -march=hexagon -enable-pipeliner -stats -o /dev/null < %s 2>&1 -pipeliner-experimental-cg=true | FileCheck %s --check-prefix=STATS
 ; REQUIRES: asserts
 
 ; STATS: 1 pipeliner        - Number of loops software pipelined
diff --git a/llvm/test/CodeGen/Hexagon/swp-max.ll b/llvm/test/CodeGen/Hexagon/swp-max.ll
index 26238ea6fb3..32282204ec5 100644
--- a/llvm/test/CodeGen/Hexagon/swp-max.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-max.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner \
-; RUN:     -pipeliner-max-stages=2 < %s | FileCheck %s
+; RUN:     -pipeliner-max-stages=2 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 @A = global [8 x i32] [i32 4, i32 -3, i32 5, i32 -2, i32 -1, i32 2, i32 6, i32 -2], align 8
 
diff --git a/llvm/test/CodeGen/Hexagon/swp-maxstart.ll b/llvm/test/CodeGen/Hexagon/swp-maxstart.ll
index 811c94062a0..8d65e76913f 100644
--- a/llvm/test/CodeGen/Hexagon/swp-maxstart.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-maxstart.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -O3 < %s | FileCheck %s
+; RUN: llc -march=hexagon -O3 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that the MinStart computation, which is based upon the length
 ; of the chain edges, is computed correctly. A bug in the code allowed
diff --git a/llvm/test/CodeGen/Hexagon/swp-memrefs-epilog.ll b/llvm/test/CodeGen/Hexagon/swp-memrefs-epilog.ll
index 81f4d22cfd5..20e39dd08fd 100644
--- a/llvm/test/CodeGen/Hexagon/swp-memrefs-epilog.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-memrefs-epilog.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -O2 -fp-contract=fast < %s | FileCheck %s
+; RUN: llc -march=hexagon -O2 -fp-contract=fast < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that the memoperands for instructions in the epilog are updated
 ; correctly. Previously, the pipeliner updated the offset for the memoperands
diff --git a/llvm/test/CodeGen/Hexagon/swp-multi-loops.ll b/llvm/test/CodeGen/Hexagon/swp-multi-loops.ll
index fc2576af8ac..5a2e7d4e14d 100644
--- a/llvm/test/CodeGen/Hexagon/swp-multi-loops.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-multi-loops.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Make sure we attempt to pipeline all inner most loops.
 
diff --git a/llvm/test/CodeGen/Hexagon/swp-new-phi.ll b/llvm/test/CodeGen/Hexagon/swp-new-phi.ll
index 0ba3e30731a..d3c1058fe36 100644
--- a/llvm/test/CodeGen/Hexagon/swp-new-phi.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-new-phi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -enable-pipeliner -pipeliner-max-stages=2 < %s | FileCheck %s
+; RUN: llc -march=hexagon -enable-pipeliner -pipeliner-max-stages=2 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that the generatePhi code doesn't rename a a Phi instruction that's defined
 ; in the same block.  The bug causes a Phi to incorrectly depend on another Phi.
diff --git a/llvm/test/CodeGen/Hexagon/swp-order-copies.ll b/llvm/test/CodeGen/Hexagon/swp-order-copies.ll
index 5de0717654f..0a017c4ab7f 100644
--- a/llvm/test/CodeGen/Hexagon/swp-order-copies.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-order-copies.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that the instruction ordering code in the pipeliner fixes up dependences
 ; between post-increment register definitions and uses so that the register
diff --git a/llvm/test/CodeGen/Hexagon/swp-order-deps7.ll b/llvm/test/CodeGen/Hexagon/swp-order-deps7.ll
index d1d852bcae7..4cd29a4a0ba 100644
--- a/llvm/test/CodeGen/Hexagon/swp-order-deps7.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-order-deps7.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that the pipeliner cause an assert and correctly pipelines the
 ; loop.
diff --git a/llvm/test/CodeGen/Hexagon/swp-order.ll b/llvm/test/CodeGen/Hexagon/swp-order.ll
index bc16b8835b7..14cc682eb7e 100644
--- a/llvm/test/CodeGen/Hexagon/swp-order.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-order.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that when we order instructions in a packet we check for
 ; order dependences so that the source of an order dependence
diff --git a/llvm/test/CodeGen/Hexagon/swp-phi-ch-offset.ll b/llvm/test/CodeGen/Hexagon/swp-phi-ch-offset.ll
index 68cb69ba2ac..31b98328a2f 100644
--- a/llvm/test/CodeGen/Hexagon/swp-phi-ch-offset.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-phi-ch-offset.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -enable-pipeliner -pipeliner-max-stages=2 < %s | FileCheck %s
+; RUN: llc -march=hexagon -enable-pipeliner -pipeliner-max-stages=2 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that we generate the correct offsets after we removed unneeded
 ; chain dependences between Phis and generated a better pipeline.
diff --git a/llvm/test/CodeGen/Hexagon/swp-phi-chains.ll b/llvm/test/CodeGen/Hexagon/swp-phi-chains.ll
index e3a6c9db616..3037dcc2d5e 100644
--- a/llvm/test/CodeGen/Hexagon/swp-phi-chains.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-phi-chains.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -debug-only=pipeliner < %s -o - 2>&1 | FileCheck %s
+; RUN: llc -march=hexagon -debug-only=pipeliner < %s -o - 2>&1 -pipeliner-experimental-cg=true | FileCheck %s
 ; REQUIRES: asserts
 
 ; Test that there is a chain edge between two dependent Phis.
diff --git a/llvm/test/CodeGen/Hexagon/swp-phi-dep.ll b/llvm/test/CodeGen/Hexagon/swp-phi-dep.ll
index 38b56c1126a..ec7af41a31f 100644
--- a/llvm/test/CodeGen/Hexagon/swp-phi-dep.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-phi-dep.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 -enable-pipeliner -pipeliner-max-stages=2 < %s | FileCheck %s
+; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 -enable-pipeliner -pipeliner-max-stages=2 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Check that the pipelined code uses the proper address in the
 ; prolog and the kernel. The bug occurs when the address computation
diff --git a/llvm/test/CodeGen/Hexagon/swp-phi-ref.ll b/llvm/test/CodeGen/Hexagon/swp-phi-ref.ll
index d39252141d2..be838e767aa 100644
--- a/llvm/test/CodeGen/Hexagon/swp-phi-ref.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-phi-ref.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -enable-pipeliner -enable-bsb-sched=0 -join-liveintervals=false < %s | FileCheck %s
+; RUN: llc -march=hexagon -enable-pipeliner -enable-bsb-sched=0 -join-liveintervals=false < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; XFAIL: *
 ; This test is failing after post-ra machine sinking.
diff --git a/llvm/test/CodeGen/Hexagon/swp-pragma-disable.ii b/llvm/test/CodeGen/Hexagon/swp-pragma-disable.ii
index 80494f5ac10..b97065c129b 100644
--- a/llvm/test/CodeGen/Hexagon/swp-pragma-disable.ii
+++ b/llvm/test/CodeGen/Hexagon/swp-pragma-disable.ii
@@ -1,5 +1,5 @@
 ; RUN: llc -disable-lsr -march=hexagon -enable-pipeliner  \
-; RUN:     -debug-only=pipeliner < %s 2>&1 > /dev/null | FileCheck %s
+; RUN:     -debug-only=pipeliner < %s 2>&1 > /dev/null -pipeliner-experimental-cg=true | FileCheck %s
 ; REQUIRES: asserts
 ;
 ; Test that checks if pipeliner disabled by pragma 
diff --git a/llvm/test/CodeGen/Hexagon/swp-pragma-initiation-interval.ii b/llvm/test/CodeGen/Hexagon/swp-pragma-initiation-interval.ii
index 6a4ba7eaccd..2c6b606a99f 100644
--- a/llvm/test/CodeGen/Hexagon/swp-pragma-initiation-interval.ii
+++ b/llvm/test/CodeGen/Hexagon/swp-pragma-initiation-interval.ii
@@ -1,5 +1,5 @@
 ; RUN: llc -disable-lsr -march=hexagon -enable-pipeliner  \
-; RUN:     -debug-only=pipeliner < %s 2>&1 > /dev/null | FileCheck %s
+; RUN:     -debug-only=pipeliner < %s 2>&1 > /dev/null -pipeliner-experimental-cg=true | FileCheck %s
 ; REQUIRES: asserts
 ;
 ; Test that checks if the II set by pragma was taken by pipeliner.
diff --git a/llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll b/llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll
index 4a6fa5a6cfe..14b04a1cfe6 100644
--- a/llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -rdf-opt=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -rdf-opt=0 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that we generate the correct name for a value in a prolog block. The
 ; pipeliner was using an incorrect value for an instruction in the 2nd prolog
diff --git a/llvm/test/CodeGen/Hexagon/swp-rename.ll b/llvm/test/CodeGen/Hexagon/swp-rename.ll
index ab0cc11ec7e..eb60a0e38d0 100644
--- a/llvm/test/CodeGen/Hexagon/swp-rename.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-rename.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -enable-pipeliner < %s | FileCheck %s
+; RUN: llc -march=hexagon -enable-pipeliner < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; A test that the Phi rewrite logic is correct.
 
diff --git a/llvm/test/CodeGen/Hexagon/swp-resmii-1.ll b/llvm/test/CodeGen/Hexagon/swp-resmii-1.ll
index 1a9734860d5..1e2eefae6ac 100644
--- a/llvm/test/CodeGen/Hexagon/swp-resmii-1.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-resmii-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -enable-pipeliner -debug-only=pipeliner < %s -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc -march=hexagon -enable-pipeliner -debug-only=pipeliner < %s -o - 2>&1 > /dev/null -pipeliner-experimental-cg=true | FileCheck %s
 ; REQUIRES: asserts
 
 ; Test that checks that we compute the correct ResMII for haar.
diff --git a/llvm/test/CodeGen/Hexagon/swp-resmii.ll b/llvm/test/CodeGen/Hexagon/swp-resmii.ll
index 851d82ea50f..99812af3be5 100644
--- a/llvm/test/CodeGen/Hexagon/swp-resmii.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-resmii.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -disable-lsr -march=hexagon -enable-pipeliner  \
-; RUN:     -debug-only=pipeliner < %s 2>&1 > /dev/null | FileCheck %s
+; RUN:     -debug-only=pipeliner < %s 2>&1 > /dev/null -pipeliner-experimental-cg=true | FileCheck %s
 ; REQUIRES: asserts
 ;
 ; Test that checks if the ResMII is 1.
diff --git a/llvm/test/CodeGen/Hexagon/swp-reuse-phi-6.ll b/llvm/test/CodeGen/Hexagon/swp-reuse-phi-6.ll
index 7371ed10a71..6883e51503b 100644
--- a/llvm/test/CodeGen/Hexagon/swp-reuse-phi-6.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-reuse-phi-6.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that the pipeliner generates correct code when attempting to reuse
 ; an existing phi. This test case contains a phi that references another
diff --git a/llvm/test/CodeGen/Hexagon/swp-sigma.ll b/llvm/test/CodeGen/Hexagon/swp-sigma.ll
index 56742ca83a7..968fafc449d 100644
--- a/llvm/test/CodeGen/Hexagon/swp-sigma.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-sigma.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -O2 < %s | FileCheck %s
+; RUN: llc -march=hexagon -O2 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; We do not pipeline sigma yet, but the non-pipelined version
 ; with good scheduling is pretty fast. The compiler generates
diff --git a/llvm/test/CodeGen/Hexagon/swp-stages4.ll b/llvm/test/CodeGen/Hexagon/swp-stages4.ll
index 2d88094cf74..1b96aca2a48 100644
--- a/llvm/test/CodeGen/Hexagon/swp-stages4.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-stages4.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner -pipeliner-max-stages=2 -disable-block-placement=0 -hexagon-bit=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner -pipeliner-max-stages=2 -disable-block-placement=0 -hexagon-bit=0 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Test that we rename registers correctly for multiple stages when there is a
 ; Phi and depends upon another Phi.
 
 ; CHECK: = and
 ; CHECK: = and
-; CHECK: = and
+; CHECK: r[[REGA:[0-9]+]] = memub(r{{[0-9]+}}+#1)
 ; CHECK: r[[REG0:[0-9]+]] = and(r[[REG1:[0-9]+]],#255)
 ; CHECK-NOT: r[[REG0]] = and(r[[REG1]],#255)
 ; CHECK: loop0(.LBB0_[[LOOP:.]],
diff --git a/llvm/test/CodeGen/Hexagon/swp-stages5.ll b/llvm/test/CodeGen/Hexagon/swp-stages5.ll
index fdfb2101cd3..1f8463fbc30 100644
--- a/llvm/test/CodeGen/Hexagon/swp-stages5.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-stages5.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner -pipeliner-max-stages=2 -hexagon-bit=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner -pipeliner-max-stages=2 -hexagon-bit=0 < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Very similar to swp-stages4.ll, but the pipelined schedule is a little
 ; different.
diff --git a/llvm/test/CodeGen/Hexagon/swp-subreg.ll b/llvm/test/CodeGen/Hexagon/swp-subreg.ll
index d75b3afc7b7..b9754f4eb36 100644
--- a/llvm/test/CodeGen/Hexagon/swp-subreg.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-subreg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -enable-pipeliner -stats -o /dev/null < %s 2>&1 | FileCheck %s --check-prefix=STATS
+; RUN: llc -march=hexagon -enable-pipeliner -stats -o /dev/null < %s 2>&1 -pipeliner-experimental-cg=true | FileCheck %s --check-prefix=STATS
 ; REQUIRES: asserts
 
 ; We're unable to pipeline a loop with a subreg as an operand of a Phi.
diff --git a/llvm/test/CodeGen/Hexagon/swp-swap.ll b/llvm/test/CodeGen/Hexagon/swp-swap.ll
index a8432cb7d21..4cd073cb16b 100644
--- a/llvm/test/CodeGen/Hexagon/swp-swap.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-swap.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -enable-pipeliner -stats -o /dev/null < %s 2>&1 | FileCheck %s --check-prefix=STATS
+; RUN: llc -march=hexagon -enable-pipeliner -stats -o /dev/null < %s 2>&1 -pipeliner-experimental-cg=true | FileCheck %s --check-prefix=STATS
 ; REQUIRES: asserts
 
 ; Test that we don't pipeline, incorrectly, the swap operation.
diff --git a/llvm/test/CodeGen/Hexagon/swp-tfri.ll b/llvm/test/CodeGen/Hexagon/swp-tfri.ll
index 66b999e5590..f0c26045430 100644
--- a/llvm/test/CodeGen/Hexagon/swp-tfri.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-tfri.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -enable-pipeliner -hexagon-initial-cfg-cleanup=0 -stats -o /dev/null < %s 2>&1 | FileCheck %s --check-prefix=STATS
+; RUN: llc -march=hexagon -enable-pipeliner -hexagon-initial-cfg-cleanup=0 -stats -o /dev/null < %s 2>&1 -pipeliner-experimental-cg=true | FileCheck %s --check-prefix=STATS
 ; REQUIRES: asserts
 
 ; Check that we handle the case when a value is first defined in the loop.
diff --git a/llvm/test/CodeGen/Hexagon/swp-vect-dotprod.ll b/llvm/test/CodeGen/Hexagon/swp-vect-dotprod.ll
index 3ff88452499..4bd1a513429 100644
--- a/llvm/test/CodeGen/Hexagon/swp-vect-dotprod.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-vect-dotprod.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner < %s | FileCheck %s
-; RUN: llc -march=hexagon -mcpu=hexagonv5 -O2 < %s | FileCheck %s
-; RUN: llc -march=hexagon -mcpu=hexagonv5 -O3 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner < %s -pipeliner-experimental-cg=true | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -O2 < %s -pipeliner-experimental-cg=true | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -O3 < %s -pipeliner-experimental-cg=true | FileCheck %s
 ;
 ; Check that we pipeline a vectorized dot product in a single packet.
 ;
diff --git a/llvm/test/CodeGen/Hexagon/swp-vmult.ll b/llvm/test/CodeGen/Hexagon/swp-vmult.ll
index dfc7dd91324..fd9cdf9b38c 100644
--- a/llvm/test/CodeGen/Hexagon/swp-vmult.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-vmult.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: to-be-fixed
-; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner < %s -pipeliner-experimental-cg=true | FileCheck %s
 
 ; Multiply and accumulate
 ; CHECK: mpyi([[REG0:r([0-9]+)]],[[REG1:r([0-9]+)]])
diff --git a/llvm/test/CodeGen/Hexagon/swp-vsum.ll b/llvm/test/CodeGen/Hexagon/swp-vsum.ll
index 1c4d1c2ef01..5dcd2824550 100644
--- a/llvm/test/CodeGen/Hexagon/swp-vsum.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-vsum.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner < %s | FileCheck %s
-; RUN: llc -march=hexagon -mcpu=hexagonv60 -enable-pipeliner < %s | FileCheck %s --check-prefix=CHECKV60
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner < %s -pipeliner-experimental-cg=true | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv60 -enable-pipeliner < %s -pipeliner-experimental-cg=true | FileCheck %s --check-prefix=CHECKV60
 
 ; Simple vector total.
 ; CHECK: loop0(.LBB0_[[LOOP:.]],
author	James Molloy <jmolloy@google.com>	2019-10-02 12:46:44 +0000
committer	James Molloy <jmolloy@google.com>	2019-10-02 12:46:44 +0000
commit	9026518e7398eeced1f07f5f6003ba047209c033 (patch)
tree	003cd7a5581a310d3f737ef01cf107b5d92ef5a3 /llvm
parent	671fb3435862899db32ed20e680ded2ee665effd (diff)
download	bcm5719-llvm-9026518e7398eeced1f07f5f6003ba047209c033.tar.gz bcm5719-llvm-9026518e7398eeced1f07f5f6003ba047209c033.zip