diff options
| -rw-r--r-- | llvm/lib/CodeGen/IfConversion.cpp | 411 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll | 47 | 
2 files changed, 388 insertions, 70 deletions
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index 5477c787920..fc270a2c997 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -58,6 +58,8 @@ static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",                                         cl::init(false), cl::Hidden);  static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",                                      cl::init(false), cl::Hidden); +static cl::opt<bool> DisableForkedDiamond("disable-ifcvt-forked-diamond", +                                        cl::init(false), cl::Hidden);  static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",                                       cl::init(true), cl::Hidden); @@ -68,6 +70,7 @@ STATISTIC(NumTriangleRev,  "Number of triangle (R) if-conversions performed");  STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");  STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");  STATISTIC(NumDiamonds,     "Number of diamond if-conversions performed"); +STATISTIC(NumForkedDiamonds, "Number of forked-diamond if-conversions performed");  STATISTIC(NumIfConvBBs,    "Number of if-converted blocks");  STATISTIC(NumDupBBs,       "Number of duplicated blocks");  STATISTIC(NumUnpred,       "Number of true blocks of diamonds unpredicated"); @@ -82,7 +85,9 @@ namespace {        ICTriangleRev,   // Same as ICTriangle, but true path rev condition.        ICTriangleFalse, // Same as ICTriangle, but on the false path.        ICTriangle,      // BB is entry of a triangle sub-CFG. -      ICDiamond        // BB is entry of a diamond sub-CFG. +      ICDiamond,       // BB is entry of a diamond sub-CFG. +      ICForkedDiamond  // BB is entry of an almost diamond sub-CFG, with a +                       // common tail that can be shared.      };      /// BBInfo - One per MachineBasicBlock, this is used to cache the result @@ -114,6 +119,7 @@ namespace {        bool IsAnalyzed      : 1;        bool IsEnqueued      : 1;        bool IsBrAnalyzable  : 1; +      bool IsBrReversible  : 1;        bool HasFallThrough  : 1;        bool IsUnpredicable  : 1;        bool CannotBeCopied  : 1; @@ -128,9 +134,10 @@ namespace {        SmallVector<MachineOperand, 4> Predicate;        BBInfo() : IsDone(false), IsBeingAnalyzed(false),                   IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), -                 HasFallThrough(false), IsUnpredicable(false), -                 CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), -                 ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr), +                 IsBrReversible(false), HasFallThrough(false), +                 IsUnpredicable(false), CannotBeCopied(false), +                 ClobbersPred(false), NonPredSize(0), ExtraCost(0), +                 ExtraCost2(0), BB(nullptr), TrueBB(nullptr),                   FalseBB(nullptr) {}      }; @@ -148,11 +155,15 @@ namespace {      struct IfcvtToken {        BBInfo &BBI;        IfcvtKind Kind; -      bool NeedSubsumption;        unsigned NumDups;        unsigned NumDups2; -      IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0) -        : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {} +      bool NeedSubsumption : 1; +      bool TClobbersPred : 1; +      bool FClobbersPred : 1; +      IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0, +                 bool tc = false, bool fc = false) +        : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s), +          TClobbersPred(tc), FClobbersPred(fc) {}      };      /// BBAnalysis - Results of if-conversion feasibility analysis indexed by @@ -202,23 +213,40 @@ namespace {                         bool FalseBranch, unsigned &Dups,                         BranchProbability Prediction) const;      bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, -                      unsigned &Dups1, unsigned &Dups2) const; +                      unsigned &Dups1, unsigned &Dups2, +                      BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const; +    bool ValidForkedDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, +                            unsigned &Dups1, unsigned &Dups2, +                            BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;      void AnalyzeBranches(BBInfo &BBI);      void ScanInstructions(BBInfo &BBI,                            MachineBasicBlock::iterator &Begin,                            MachineBasicBlock::iterator &End) const; +    bool RescanInstructions( +        MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB, +        MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE, +        BBInfo &TrueBBI, BBInfo &FalseBBI) const;      void AnalyzeBlock(MachineBasicBlock *MBB,                        std::vector<std::unique_ptr<IfcvtToken>> &Tokens);      bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond, -                             bool isTriangle = false, bool RevBranch = false); +                             bool isTriangle = false, bool RevBranch = false, +                             bool hasCommonTail = false);      void AnalyzeBlocks(MachineFunction &MF,                         std::vector<std::unique_ptr<IfcvtToken>> &Tokens);      void InvalidatePreds(MachineBasicBlock *BB);      void RemoveExtraEdges(BBInfo &BBI);      bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);      bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); +    bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI, +                                unsigned NumDups1, unsigned NumDups2, +                                bool TClobbersPred, bool FClobbersPred, +                                bool RemoveTrueBranch, bool RemoveFalseBranch, +                                bool MergeAddEdges);      bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,                            unsigned NumDups1, unsigned NumDups2); +    bool IfConvertForkedDiamond(BBInfo &BBI, IfcvtKind Kind, +                              unsigned NumDups1, unsigned NumDups2, +                              bool TClobbers, bool FClobbers);      void PredicateBlock(BBInfo &BBI,                          MachineBasicBlock::iterator E,                          SmallVectorImpl<MachineOperand> &Cond, @@ -410,6 +438,19 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {          if (RetVal) ++NumDiamonds;          break;        } +      case ICForkedDiamond: { +        if (DisableForkedDiamond) break; +        DEBUG(dbgs() << "Ifcvt (Forked Diamond): BB#" +                     << BBI.BB->getNumber() << " (T:" +                     << BBI.TrueBB->getNumber() << ",F:" +                     << BBI.FalseBB->getNumber() << ") "); +        RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2, +                                      Token->TClobbersPred, +                                      Token->FClobbersPred); +        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); +        if (RetVal) ++NumForkedDiamonds; +        break; +      }        }        Change |= RetVal; @@ -627,8 +668,12 @@ static void countDuplicatedInstructions(    // If Dups1 includes all of a block, then don't count duplicate    // instructions at the end of the blocks. -  if (TIB == TIE || FIB == FIE) +  if (TIB == TIE || FIB == FIE) { +    // Same reason as below. We need end to point just after the last non-shared +    // instruction. +    ++TIE; ++FIE;      return; +  }    // Count duplicate instructions at the ends of the blocks.    while (TIE != TIB && FIE != FIB) { @@ -646,12 +691,124 @@ static void countDuplicatedInstructions(      --TIE;      --FIE;    } +  // TIE and FIE both now point at the last non-shared instruction, move them +  // forward. +  ++TIE; ++FIE; +} + +/// RescanInstructions - Run ScanInstructions on a pair of blocks. +/// @param TIB - True Iterator Begin, points to first non-shared instruction +/// @param FIB - False Iterator Begin, points to first non-shared instruction +/// @param TIE - True Iterator End, points past last non-shared instruction +/// @param FIE - False Iterator End, points past last non-shared instruction +/// @param TrueBBI  - BBInfo to update for the true block. +/// @param FalseBBI - BBInfo to update for the false block. +/// @returns - false if either block cannot be predicated or if both blocks end +///   with a predicate-clobbering instruction. +bool IfConverter::RescanInstructions( +    MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB, +    MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE, +    BBInfo &TrueBBI, BBInfo &FalseBBI) const { +  ScanInstructions(TrueBBI, TIB, TIE); +  if (TrueBBI.IsUnpredicable) +    return false; +  ScanInstructions(FalseBBI, FIB, FIE); +  if (FalseBBI.IsUnpredicable) +    return false; +  if (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred) +    return false; +  return true; +} + +/// ValidForkedDiamond - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) form a diamond if a common tail block is +/// extracted. +/// While not strictly a diamond, this pattern would form a diamond if +/// tail-merging had merged the shared tails. +///           EBB +///         _/   \_ +///         |     | +///        TBB   FBB +///        /  \ /   \ +///  FalseBB TrueBB FalseBB +/// Currently only handles analyzable branches. +/// Specifically excludes actual diamonds to avoid overlap. +bool IfConverter::ValidForkedDiamond( +    BBInfo &TrueBBI, BBInfo &FalseBBI, +    unsigned &Dups1, unsigned &Dups2, +    BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const { +  Dups1 = Dups2 = 0; +  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || +      FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) +    return false; + +  if (!TrueBBI.IsBrAnalyzable || !FalseBBI.IsBrAnalyzable) +    return false; +  // Don't IfConvert blocks that can't be folded into their predecessor. +  if  (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) +    return false; + +  // This function is specifically looking for conditional tails, as +  // unconditional tails are already handled by the standard diamond case. +  if (TrueBBI.BrCond.size() == 0 || +      FalseBBI.BrCond.size() == 0) +    return false; + +  MachineBasicBlock *TT = TrueBBI.TrueBB; +  MachineBasicBlock *TF = TrueBBI.FalseBB; +  MachineBasicBlock *FT = FalseBBI.TrueBB; +  MachineBasicBlock *FF = FalseBBI.FalseBB; + +  if (!TT) +    TT = getNextBlock(TrueBBI.BB); +  if (!TF) +    TF = getNextBlock(TrueBBI.BB); +  if (!FT) +    FT = getNextBlock(FalseBBI.BB); +  if (!FF) +    FF = getNextBlock(FalseBBI.BB); + +  if (!TT || !TF) +    return false; + +  // Check successors. If they don't match, bail. +  if (!((TT == FT && TF == FF) || (TF == FT && TT == FF))) +    return false; + +  if (TF == FT && TT == FF) { +    // If the branches are opposing, but we can't reverse, don't do it. +    if (!FalseBBI.IsBrReversible) +      return false; +    ReverseBranchCondition(FalseBBI); +  } + +  // Count duplicate instructions at the beginning of the true and false blocks. +  MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); +  MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); +  MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); +  MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); +  countDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, +                              *TrueBBI.BB, *FalseBBI.BB, +                              /* SkipConditionalBranches */ false); + +  TrueBBICalc.BB = TrueBBI.BB; +  FalseBBICalc.BB = FalseBBI.BB; +  if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) +    return false; +  // The size is used to decide whether to if-convert, and the shared portions +  // are subtracted off. Because of the subtraction, we just use the size that +  // was calculated by the original ScanInstructions, as it is correct. +  TrueBBICalc.NonPredSize = TrueBBI.NonPredSize; +  FalseBBICalc.NonPredSize = FalseBBI.NonPredSize; +  return true;  }  /// ValidDiamond - Returns true if the 'true' and 'false' blocks (along  /// with their common predecessor) forms a valid diamond shape for ifcvt. -bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, -                               unsigned &Dups1, unsigned &Dups2) const { +bool IfConverter::ValidDiamond( +    BBInfo &TrueBBI, BBInfo &FalseBBI, +    unsigned &Dups1, unsigned &Dups2, +    BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {    Dups1 = Dups2 = 0;    if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||        FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) @@ -672,8 +829,7 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,      return false;    // FIXME: Allow true block to have an early exit? -  if (TrueBBI.FalseBB || FalseBBI.FalseBB || -      (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)) +  if (TrueBBI.FalseBB || FalseBBI.FalseBB)      return false;    // Count duplicate instructions at the beginning and end of the true and @@ -685,6 +841,16 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,    countDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,                                *TrueBBI.BB, *FalseBBI.BB,                                /* SkipConditionalBranches */ true); + +  TrueBBICalc.BB = TrueBBI.BB; +  FalseBBICalc.BB = FalseBBI.BB; +  if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) +    return false; +  // The size is used to decide whether to if-convert, and the shared portions +  // are subtracted off. Because of the subtraction, we just use the size that +  // was calculated by the original ScanInstructions, as it is correct. +  TrueBBICalc.NonPredSize = TrueBBI.NonPredSize; +  FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;    return true;  } @@ -698,6 +864,9 @@ void IfConverter::AnalyzeBranches(BBInfo &BBI) {    BBI.BrCond.clear();    BBI.IsBrAnalyzable =        !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); +  SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); +  BBI.IsBrReversible = (RevCond.size() == 0) || +      !TII->ReverseBranchCondition(RevCond);    BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;    if (BBI.BrCond.size()) { @@ -813,11 +982,22 @@ void IfConverter::ScanInstructions(BBInfo &BBI,  /// FeasibilityAnalysis - Determine if the block is a suitable candidate to be  /// predicated by the specified predicate. +/// @param BBI BBInfo for the block to check +/// @param Pred Predicate array for the branch that leads to BBI +/// @param isTriangle true if the Analysis is for a triangle +/// @param RevBranch true if Reverse(Pred) leads to BBI (e.g. BBI is the false +///        case +/// @param hasCommonTail true if BBI shares a tail with a sibling block that +///        contains any instruction that would make the block unpredicable.  bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,                                        SmallVectorImpl<MachineOperand> &Pred, -                                      bool isTriangle, bool RevBranch) { +                                      bool isTriangle, bool RevBranch, +                                      bool hasCommonTail) {    // If the block is dead or unpredicable, then it cannot be predicated. -  if (BBI.IsDone || BBI.IsUnpredicable) +  // Two blocks may share a common unpredicable tail, but this doesn't prevent +  // them from being if-converted. The non-shared portion is assumed to have +  // been checked +  if (BBI.IsDone || (BBI.IsUnpredicable && !hasCommonTail))      return false;    // If it is already predicated but we couldn't analyze its terminator, the @@ -831,7 +1011,7 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,    if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))      return false; -  if (BBI.BrCond.size()) { +  if (!hasCommonTail && BBI.BrCond.size()) {      if (!isTriangle)        return false; @@ -939,25 +1119,58 @@ void IfConverter::AnalyzeBlock(      BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB); -    if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && -        MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) + -                                         TrueBBI.ExtraCost), TrueBBI.ExtraCost2, -                           *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) + -                                        FalseBBI.ExtraCost),FalseBBI.ExtraCost2, -                         Prediction) && -        FeasibilityAnalysis(TrueBBI, BBI.BrCond) && -        FeasibilityAnalysis(FalseBBI, RevCond)) { -      // Diamond: -      //   EBB -      //   / \_ -      //  |   | -      // TBB FBB -      //   \ / -      //  TailBB -      // Note TailBB can be empty. -      Tokens.push_back(llvm::make_unique<IfcvtToken>( -          BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2)); -      Enqueued = true; +    if (CanRevCond) { +      BBInfo TrueBBICalc, FalseBBICalc; +      auto feasibleDiamond = [&]() { +        return ( +            MeetIfcvtSizeLimit( +                *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) + +                              TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2, +                *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) + +                               FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2, +                Prediction) && +            FeasibilityAnalysis(TrueBBI, BBI.BrCond, +                                /* IsTriangle */ false, /* RevCond */ false, +                                /* hasCommonTail */ true) && +            FeasibilityAnalysis(FalseBBI, RevCond, +                                /* IsTriangle */ false, /* RevCond */ false, +                                /* hasCommonTail */ true)); +      }; + +      if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2, +                       TrueBBICalc, FalseBBICalc)) { +        if (feasibleDiamond()) { +          // Diamond: +          //   EBB +          //   / \_ +          //  |   | +          // TBB FBB +          //   \ / +          //  TailBB +          // Note TailBB can be empty. +          Tokens.push_back(llvm::make_unique<IfcvtToken>( +              BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2)); +          Enqueued = true; +        } +      } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2, +                                    TrueBBICalc, FalseBBICalc)) { +        if (feasibleDiamond()) { +          // ForkedDiamond: +          // if TBB and FBB have a common tail that includes their conditional +          // branch instructions, then we can If Convert this pattern. +          //          EBB +          //         _/ \_ +          //         |   | +          //        TBB  FBB +          //        / \ /   \ +          //  FalseBB TrueBB FalseBB +          // +          Tokens.push_back(llvm::make_unique<IfcvtToken>( +              BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2, +              (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); +          Enqueued = true; +        } +      }      }      if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) && @@ -1410,23 +1623,26 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {    return true;  } -/// IfConvertDiamond - If convert a diamond sub-CFG. -/// -bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, -                                   unsigned NumDups1, unsigned NumDups2) { -  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()]; -  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; -  MachineBasicBlock *TailBB = TrueBBI.TrueBB; -  // True block must fall through or end with an unanalyzable terminator. -  if (!TailBB) { -    if (blockAlwaysFallThrough(TrueBBI)) -      TailBB = FalseBBI.TrueBB; -    assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); -  } +/// IfConvertDiamondCommon - Common code shared between diamond conversions. +/// BBI, TrueBBI, and FalseBBI form the diamond shape. +/// NumDups1 - number of shared instructions at the beginning of TrueBBI and +///            FalseBBI +/// NumDups2 - number of shared instructions at the end of TrueBBI and FalseBBI +/// RemoveTrueBranch - Remove the branch of the true block before predicating +///                    Only false for unanalyzable fallthrough cases. +/// RemoveFalseBranch - Remove the branch of the false block before predicating +///                     Only false for unanalyzable fallthrough cases. +/// MergeAddEdges - Add successor edges when merging blocks. Only false for +///                 unanalyzable fallthrough +bool IfConverter::IfConvertDiamondCommon( +    BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI, +    unsigned NumDups1, unsigned NumDups2, +    bool TClobbersPred, bool FClobbersPred, +    bool RemoveTrueBranch, bool RemoveFalseBranch, +    bool MergeAddEdges) {    if (TrueBBI.IsDone || FalseBBI.IsDone || -      TrueBBI.BB->pred_size() > 1 || -      FalseBBI.BB->pred_size() > 1) { +      TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) {      // Something has changed. It's no longer safe to predicate these blocks.      BBI.IsAnalyzed = false;      TrueBBI.IsAnalyzed = false; @@ -1451,15 +1667,16 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,    // Figure out the more profitable ordering.    bool DoSwap = false; -  if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred) +  if (TClobbersPred && !FClobbersPred)      DoSwap = true; -  else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) { +  else if (TClobbersPred == FClobbersPred) {      if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)        DoSwap = true;    }    if (DoSwap) {      std::swap(BBI1, BBI2);      std::swap(Cond1, Cond2); +    std::swap(RemoveTrueBranch, RemoveFalseBranch);    }    // Remove the conditional branch from entry to the blocks. @@ -1506,11 +1723,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,    BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);    BBI2->BB->erase(BBI2->BB->begin(), DI2); -  // Remove branch from the 'true' block, unless it was not analyzable. -  // Non-analyzable branches need to be preserved, since in such cases, -  // the CFG structure is not an actual diamond (the join block may not -  // be present). -  if (BBI1->IsBrAnalyzable) +  if (RemoveTrueBranch)      BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);    // Remove duplicated instructions.    DI1 = BBI1->BB->end(); @@ -1529,9 +1742,9 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,    // must be removed.    RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI); -  // Remove 'false' block branch (unless it was not analyzable), and find -  // the last instruction to predicate. -  if (BBI2->IsBrAnalyzable) +  // Remove 'false' block branch, and find the last instruction to predicate. +  // Save the debug location. +  if (RemoveFalseBranch)      BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);    DI2 = BBI2->BB->end();    while (NumDups2 != 0) { @@ -1607,8 +1820,74 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,    PredicateBlock(*BBI2, DI2, *Cond2);    // Merge the true block into the entry of the diamond. -  MergeBlocks(BBI, *BBI1, TailBB == nullptr); -  MergeBlocks(BBI, *BBI2, TailBB == nullptr); +  MergeBlocks(BBI, *BBI1, MergeAddEdges); +  MergeBlocks(BBI, *BBI2, MergeAddEdges); +  return true; +} + +/// IfConvertForkedDiamond - If convert an almost-diamond sub-CFG where the true +/// and false blocks share a common tail. +bool IfConverter::IfConvertForkedDiamond( +    BBInfo &BBI, IfcvtKind Kind, +    unsigned NumDups1, unsigned NumDups2, +    bool TClobbersPred, bool FClobbersPred) { +  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()]; +  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + +  // Save the debug location for later. +  DebugLoc dl; +  MachineBasicBlock::iterator TIE = TrueBBI.BB->getFirstTerminator(); +  if (TIE != TrueBBI.BB->end()) +    dl = TIE->getDebugLoc(); +  // Removing branches from both blocks is safe, because we have already +  // determined that both blocks have the same branch instructions. The branch +  // will be added back at the end, unpredicated. +  if (!IfConvertDiamondCommon( +      BBI, TrueBBI, FalseBBI, +      NumDups1, NumDups2, +      TClobbersPred, FClobbersPred, +      /* RemoveTrueBranch */ true, /* RemoveFalseBranch */ true, +      /* MergeAddEdges */ true)) +    return false; + +  // Add back the branch. +  // Debug location saved above when removing the branch from BBI2 +  TII->InsertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB, +                    TrueBBI.BrCond, dl); + +  RemoveExtraEdges(BBI); + +  // Update block info. +  BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; +  InvalidatePreds(BBI.BB); + +  // FIXME: Must maintain LiveIns. +  return true; +} + +/// IfConvertDiamond - If convert a diamond sub-CFG. +/// +bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, +                                   unsigned NumDups1, unsigned NumDups2) { +  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()]; +  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; +  MachineBasicBlock *TailBB = TrueBBI.TrueBB; + +  // True block must fall through or end with an unanalyzable terminator. +  if (!TailBB) { +    if (blockAlwaysFallThrough(TrueBBI)) +      TailBB = FalseBBI.TrueBB; +    assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); +  } + +  if (!IfConvertDiamondCommon( +      BBI, TrueBBI, FalseBBI, +      NumDups1, NumDups2, +      TrueBBI.ClobbersPred, FalseBBI.ClobbersPred, +      /* RemoveTrueBranch */ TrueBBI.IsBrAnalyzable, +      /* RemoveFalseBranch */ FalseBBI.IsBrAnalyzable, +      /* MergeAddEdges */ TailBB == nullptr)) +    return false;    // If the if-converted block falls through or unconditionally branches into    // the tail block, and the tail block does not have other predecessors, then @@ -1631,7 +1910,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,        CanMergeTail = false;      else if (NumPreds == 1 && CanMergeTail) {        MachineBasicBlock::pred_iterator PI = TailBB->pred_begin(); -      if (*PI != BBI1->BB && *PI != BBI2->BB) +      if (*PI != TrueBBI.BB && *PI != FalseBBI.BB)          CanMergeTail = false;      }      if (CanMergeTail) { @@ -1647,8 +1926,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,    // RemoveExtraEdges won't work if the block has an unanalyzable branch,    // which can happen here if TailBB is unanalyzable and is merged, so    // explicitly remove BBI1 and BBI2 as successors. -  BBI.BB->removeSuccessor(BBI1->BB); -  BBI.BB->removeSuccessor(BBI2->BB, true); +  BBI.BB->removeSuccessor(TrueBBI.BB); +  BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true);    RemoveExtraEdges(BBI);    // Update block info. diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll index eb48ffb7d80..a78b0c13426 100644 --- a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll @@ -1,6 +1,7 @@  ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s  ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s -; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it |FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it -enable-tail-merge=0 | FileCheck %s  define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {  ; CHECK-LABEL: t1:  ; CHECK: ittt ne @@ -25,9 +26,9 @@ cond_next:  define i32 @t2(i32 %a, i32 %b) nounwind {  entry:  ; CHECK-LABEL: t2: -; CHECK: ite gt -; CHECK: subgt -; CHECK: suble +; CHECK: ite {{gt|le}} +; CHECK-DAG: suble +; CHECK-DAG: subgt  	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]  	br i1 %tmp1434, label %bb17, label %bb.outer @@ -60,6 +61,44 @@ bb17:		; preds = %cond_false, %cond_true, %entry  	ret i32 %a_addr.026.1  } +define i32 @t2_nomerge(i32 %a, i32 %b) nounwind { +entry: +; CHECK-LABEL: t2_nomerge: +; CHECK-NOT: ite {{gt|le}} +; CHECK-NOT: suble +; CHECK-NOT: subgt +	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1] +	br i1 %tmp1434, label %bb17, label %bb.outer + +bb.outer:		; preds = %cond_false, %entry +	%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]		; <i32> [#uses=5] +	%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1] +	br label %bb + +bb:		; preds = %cond_true, %bb.outer +	%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2] +	%tmp. = sub i32 0, %b_addr.021.0.ph		; <i32> [#uses=1] +	%tmp.40 = mul i32 %indvar, %tmp.		; <i32> [#uses=1] +	%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph		; <i32> [#uses=6] +	%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph		; <i1> [#uses=1] +	br i1 %tmp3, label %cond_true, label %cond_false + +cond_true:		; preds = %bb +	%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph		; <i32> [#uses=2] +	%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph		; <i1> [#uses=1] +	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1] +	br i1 %tmp1437, label %bb17, label %bb + +cond_false:		; preds = %bb +	%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0		; <i32> [#uses=2] +	%tmp14 = icmp eq i32 %b_addr.021.0.ph, %tmp10		; <i1> [#uses=1] +	br i1 %tmp14, label %bb17, label %bb.outer + +bb17:		; preds = %cond_false, %cond_true, %entry +	%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1] +	ret i32 %a_addr.026.1 +} +  @x = external global i32*		; <i32**> [#uses=1]  define void @foo(i32 %a) nounwind {  | 

