5 files changed, 724 insertions, 17 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 9afd8523762..9c375782856 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -778,6 +778,19 @@ public:
     return false;
   }
 
+  /// Return the increase in code size needed to predicate a contiguous run of
+  /// NumInsts instructions.
+  virtual unsigned extraSizeToPredicateInstructions(const MachineFunction &MF,
+                                                    unsigned NumInsts) const {
+    return 0;
+  }
+
+  /// Return an estimate for the code size reduction (in bytes) which will be
+  /// caused by removing the given branch instruction during if-conversion.
+  virtual unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const {
+    return getInstSizeInBytes(MI);
+  }
+
   /// Return true if it's profitable to unpredicate
   /// one side of a 'diamond', i.e. two sides of if-else predicated on mutually
   /// exclusive predicates.
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp
index e503c568f96..d9caa566069 100644
--- a/llvm/lib/CodeGen/IfConversion.cpp
+++ b/llvm/lib/CodeGen/IfConversion.cpp
@@ -285,14 +285,113 @@ namespace {
                                                    Prediction);
     }
 
-    bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
-                            unsigned TCycle, unsigned TExtra,
-                            MachineBasicBlock &FBB,
-                            unsigned FCycle, unsigned FExtra,
-                            BranchProbability Prediction) const {
-      return TCycle > 0 && FCycle > 0 &&
-        TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
-                                 Prediction);
+    bool MeetIfcvtSizeLimit(BBInfo &TBBInfo, BBInfo &FBBInfo,
+                            MachineBasicBlock &CommBB, unsigned Dups,
+                            BranchProbability Prediction, bool Forked) const {
+      const MachineFunction &MF = *TBBInfo.BB->getParent();
+      if (MF.getFunction().hasMinSize()) {
+        MachineBasicBlock::iterator TIB = TBBInfo.BB->begin();
+        MachineBasicBlock::iterator FIB = FBBInfo.BB->begin();
+        MachineBasicBlock::iterator TIE = TBBInfo.BB->end();
+        MachineBasicBlock::iterator FIE = FBBInfo.BB->end();
+
+        unsigned Dups1, Dups2;
+        if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
+                                         *TBBInfo.BB, *FBBInfo.BB,
+                                         /*SkipUnconditionalBranches*/ true))
+          llvm_unreachable("should already have been checked by ValidDiamond");
+
+        unsigned BranchBytes = 0;
+        unsigned CommonBytes = 0;
+
+        // Count common instructions at the start of the true and false blocks.
+        for (auto &I : make_range(TBBInfo.BB->begin(), TIB)) {
+          LLVM_DEBUG(dbgs() << "Common inst: " << I);
+          CommonBytes += TII->getInstSizeInBytes(I);
+        }
+        for (auto &I : make_range(FBBInfo.BB->begin(), FIB)) {
+          LLVM_DEBUG(dbgs() << "Common inst: " << I);
+          CommonBytes += TII->getInstSizeInBytes(I);
+        }
+
+        // Count instructions at the end of the true and false blocks, after
+        // the ones we plan to predicate. Analyzable branches will be removed
+        // (unless this is a forked diamond), and all other instructions are
+        // common between the two blocks.
+        for (auto &I : make_range(TIE, TBBInfo.BB->end())) {
+          if (I.isBranch() && TBBInfo.IsBrAnalyzable && !Forked) {
+            LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+            BranchBytes += TII->predictBranchSizeForIfCvt(I);
+          } else {
+            LLVM_DEBUG(dbgs() << "Common inst: " << I);
+            CommonBytes += TII->getInstSizeInBytes(I);
+          }
+        }
+        for (auto &I : make_range(FIE, FBBInfo.BB->end())) {
+          if (I.isBranch() && FBBInfo.IsBrAnalyzable && !Forked) {
+            LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+            BranchBytes += TII->predictBranchSizeForIfCvt(I);
+          } else {
+            LLVM_DEBUG(dbgs() << "Common inst: " << I);
+            CommonBytes += TII->getInstSizeInBytes(I);
+          }
+        }
+        for (auto &I : CommBB.terminators()) {
+          if (I.isBranch()) {
+            LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+            BranchBytes += TII->predictBranchSizeForIfCvt(I);
+          }
+        }
+
+        // The common instructions in one branch will be eliminated, halving
+        // their code size.
+        CommonBytes /= 2;
+
+        // Count the instructions which we need to predicate.
+        unsigned NumPredicatedInstructions = 0;
+        for (auto &I : make_range(TIB, TIE)) {
+          if (!I.isDebugInstr()) {
+            LLVM_DEBUG(dbgs() << "Predicating: " << I);
+            NumPredicatedInstructions++;
+          }
+        }
+        for (auto &I : make_range(FIB, FIE)) {
+          if (!I.isDebugInstr()) {
+            LLVM_DEBUG(dbgs() << "Predicating: " << I);
+            NumPredicatedInstructions++;
+          }
+        }
+
+        // Even though we're optimising for size at the expense of performance,
+        // avoid creating really long predicated blocks.
+        if (NumPredicatedInstructions > 15)
+          return false;
+
+        // Some targets (e.g. Thumb2) need to insert extra instructions to
+        // start predicated blocks.
+        unsigned ExtraPredicateBytes = TII->extraSizeToPredicateInstructions(
+            MF, NumPredicatedInstructions);
+
+        LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(BranchBytes=" << BranchBytes
+                          << ", CommonBytes=" << CommonBytes
+                          << ", NumPredicatedInstructions="
+                          << NumPredicatedInstructions
+                          << ", ExtraPredicateBytes=" << ExtraPredicateBytes
+                          << ")\n");
+        return (BranchBytes + CommonBytes) > ExtraPredicateBytes;
+      } else {
+        unsigned TCycle = TBBInfo.NonPredSize + TBBInfo.ExtraCost - Dups;
+        unsigned FCycle = FBBInfo.NonPredSize + FBBInfo.ExtraCost - Dups;
+        bool Res = TCycle > 0 && FCycle > 0 &&
+                   TII->isProfitableToIfCvt(
+                       *TBBInfo.BB, TCycle, TBBInfo.ExtraCost2, *FBBInfo.BB,
+                       FCycle, FBBInfo.ExtraCost2, Prediction);
+        LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(TCycle=" << TCycle
+                          << ", FCycle=" << FCycle
+                          << ", TExtra=" << TBBInfo.ExtraCost2 << ", FExtra="
+                          << FBBInfo.ExtraCost2 << ") = " << Res << "\n");
+        return Res;
+      }
     }
 
     /// Returns true if Block ends without a terminator.
@@ -842,6 +941,8 @@ bool IfConverter::ValidForkedDiamond(
 
   TrueBBICalc.BB = TrueBBI.BB;
   FalseBBICalc.BB = FalseBBI.BB;
+  TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable;
+  FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable;
   if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
     return false;
 
@@ -899,6 +1000,8 @@ bool IfConverter::ValidDiamond(
 
   TrueBBICalc.BB = TrueBBI.BB;
   FalseBBICalc.BB = FalseBBI.BB;
+  TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable;
+  FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable;
   if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
     return false;
   // The size is used to decide whether to if-convert, and the shared portions
@@ -1186,13 +1289,9 @@ void IfConverter::AnalyzeBlock(
 
     if (CanRevCond) {
       BBInfo TrueBBICalc, FalseBBICalc;
-      auto feasibleDiamond = [&]() {
-        bool MeetsSize = MeetIfcvtSizeLimit(
-            *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) +
-                          TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2,
-            *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) +
-                           FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2,
-            Prediction);
+      auto feasibleDiamond = [&](bool Forked) {
+        bool MeetsSize = MeetIfcvtSizeLimit(TrueBBICalc, FalseBBICalc, *BB,
+                                            Dups + Dups2, Prediction, Forked);
         bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond,
                                                 /* IsTriangle */ false, /* RevCond */ false,
                                                 /* hasCommonTail */ true);
@@ -1204,7 +1303,7 @@ void IfConverter::AnalyzeBlock(
 
       if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2,
                        TrueBBICalc, FalseBBICalc)) {
-        if (feasibleDiamond()) {
+        if (feasibleDiamond(false)) {
           // Diamond:
           //   EBB
           //   / \_
@@ -1220,7 +1319,7 @@ void IfConverter::AnalyzeBlock(
         }
       } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2,
                                     TrueBBICalc, FalseBBICalc)) {
-        if (feasibleDiamond()) {
+        if (feasibleDiamond(true)) {
           // ForkedDiamond:
           // if TBB and FBB have a common tail that includes their conditional
           // branch instructions, then we can If Convert this pattern.
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index de53cb3b59d..684cd1def97 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2079,6 +2079,38 @@ isProfitableToIfCvt(MachineBasicBlock &TBB,
   return PredCost <= UnpredCost;
 }
 
+unsigned
+ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF,
+                                                   unsigned NumInsts) const {
+  // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
+  // ARM has a condition code field in every predicable instruction, using it
+  // doesn't change code size.
+  return Subtarget.isThumb2() ? divideCeil(NumInsts, 4) * 2 : 0;
+}
+
+unsigned
+ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const {
+  // If this branch is likely to be folded into the comparison to form a
+  // CB(N)Z, then removing it won't reduce code size at all, because that will
+  // just replace the CB(N)Z with a CMP.
+  if (MI.getOpcode() == ARM::t2Bcc &&
+      findCMPToFoldIntoCBZ(&MI, &getRegisterInfo()))
+    return 0;
+
+  unsigned Size = getInstSizeInBytes(MI);
+
+  // For Thumb2, all branches are 32-bit instructions during the if conversion
+  // pass, but may be replaced with 16-bit instructions during size reduction.
+  // Since the branches considered by if conversion tend to be forward branches
+  // over small basic blocks, they are very likely to be in range for the
+  // narrow instructions, so we assume the final code size will be half what it
+  // currently is.
+  if (Subtarget.isThumb2())
+    Size /= 2;
+
+  return Size;
+}
+
 bool
 ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
                                             MachineBasicBlock &FMBB) const {
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index e70695a4d97..c232b6f0b45 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -276,6 +276,10 @@ public:
     return NumCycles == 1;
   }
 
+  unsigned extraSizeToPredicateInstructions(const MachineFunction &MF,
+                                            unsigned NumInsts) const override;
+  unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override;
+
   bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
                                  MachineBasicBlock &FMBB) const override;
 
diff --git a/llvm/test/CodeGen/ARM/ifcvt-size.mir b/llvm/test/CodeGen/ARM/ifcvt-size.mir
new file mode 100644
index 00000000000..a5c31cbab4a
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/ifcvt-size.mir
@@ -0,0 +1,559 @@
+# RUN: llc %s -o - -run-pass=if-converter -debug-only=if-converter 2>%t| FileCheck %s
+# RUN: FileCheck %s < %t --check-prefix=DEBUG
+# REQUIRES: asserts
+
+# When optimising for size, we use a different set of heuristics for
+# if-conversion, which take into account the size of the instructions, not the
+# time taken to execute them. This is more complicated for Thumb, where it if
+# also affected by selection of narrow branch instructions, insertion if IT
+# instructions, and selection of the CB(N)Z instructions.
+
+--- |
+  target triple = "thumbv7-unknown-linux-gnueabi"
+
+  define void @fn1() minsize {
+  entry:
+    unreachable
+  if.then:
+    unreachable
+  if.else:
+    unreachable
+  if.end:
+    unreachable
+  }
+
+  define void @fn2() minsize {
+  entry:
+    unreachable
+  if.then:
+    unreachable
+  if.else:
+    unreachable
+  if.end:
+    unreachable
+  }
+
+  define void @fn3() minsize {
+  entry:
+    unreachable
+  if.then:
+    unreachable
+  if.else:
+    unreachable
+  if.end:
+    unreachable
+  }
+
+  define void @fn4() minsize "target-features"="-thumb-mode" {
+  entry:
+    unreachable
+  if.then:
+    unreachable
+  if.else:
+    unreachable
+  if.end:
+    unreachable
+  }
+
+  define void @fn5() minsize {
+  entry:
+    unreachable
+  if.then:
+    unreachable
+  if.else:
+    unreachable
+  if.end:
+    unreachable
+  }
+
+  define void @fn6() minsize {
+  entry:
+    unreachable
+  if.then:
+    unreachable
+  if.else:
+    unreachable
+  if2.then:
+    unreachable
+  if2.else:
+    unreachable
+  }
+
+  define void @fn7() minsize "target-features"="-thumb-mode" {
+  entry:
+    unreachable
+  if.then:
+    unreachable
+  if.else:
+    unreachable
+  if.end:
+    unreachable
+  }
+
+  define void @fn8() minsize {
+  entry:
+    unreachable
+  if.then:
+    unreachable
+  if.else:
+    unreachable
+  if.end:
+    unreachable
+  }
+
+  define void @fn9() minsize {
+  entry:
+    unreachable
+  if.then:
+    unreachable
+  if.else:
+    unreachable
+  lab1:
+    unreachable
+  }
+...
+---
+name:            fn1
+alignment:       1
+tracksRegLiveness: true
+
+# If-conversion is profitable here because it will remove two branches of 2
+# bytes each (assuming they can become narrow branches later), and will only
+# add 2 bytes with the IT instruction.
+
+# CHECK-LABEL: name:            fn1
+# CHECK:      t2CMPri
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRSHi12
+# CHECK-NEXT: t2MOVi
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn1'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
+
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $r0, $r1, $r2, $r3
+
+    t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.2, 11, killed $cpsr
+
+  bb.1.if.then:
+    successors: %bb.3(0x80000000)
+    liveins: $r0, $r3
+
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    t2B %bb.3, 14, $noreg
+
+  bb.2.if.else:
+    successors: %bb.3(0x80000000)
+    liveins: $r1, $r3
+
+    renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
+
+  bb.3.if.end:
+    liveins: $r0, $r3
+
+    renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
+    t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
+    tBX_RET 14, $noreg, implicit $r0
+
+---
+name:            fn2
+alignment:       1
+tracksRegLiveness: true
+
+# If-conversion is not profitable here, because the 5 conditional instructions
+# would require 2 IT instructions.
+
+# CHECK-LABEL: name:            fn2
+# CHECK:      t2CMPri
+# CHECK-NEXT: t2Bcc
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn2'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=4)
+
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $r0, $r1, $r2, $r3
+
+    t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.2, 11, killed $cpsr
+
+  bb.1.if.then:
+    successors: %bb.3(0x80000000)
+    liveins: $r0, $r3
+
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    t2B %bb.3, 14, $noreg
+
+  bb.2.if.else:
+    successors: %bb.3(0x80000000)
+    liveins: $r1, $r3
+
+    renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
+
+  bb.3.if.end:
+    liveins: $r0, $r3
+
+    renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
+    t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
+    tBX_RET 14, $noreg, implicit $r0
+
+---
+name:            fn3
+alignment:       1
+tracksRegLiveness: true
+
+# Here, the true and false blocks both end in a tBX_RET instruction. One of
+# these will be removed, saving 2 bytes, and the remaining one isn't
+# conditional, so doesn't push us over the limit of 4 instructions in an IT
+# block.
+
+# CHECK-LABEL: name:            fn3
+# CHECK:      t2CMPri
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRSHi12
+# CHECK-NEXT: tBX_RET
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn3'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
+
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $r0, $r1, $r2, $r3
+
+    t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.2, 11, killed $cpsr
+
+  bb.1.if.then:
+    liveins: $r0, $r3
+
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    tBX_RET 14, $noreg, implicit $r0
+
+  bb.2.if.else:
+    liveins: $r1, $r3
+
+    renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
+    tBX_RET 14, $noreg, implicit $r0
+
+---
+name:            fn4
+alignment:       1
+tracksRegLiveness: true
+
+# This is the same as fn2, but compiled for ARM, which doesn't need IT
+# instructions, so if-conversion is profitable.
+
+# CHECK-LABEL: name:            fn4
+# CHECK:      CMPri
+# CHECK-NEXT: LDRi12
+# CHECK-NEXT: LDRi12
+# CHECK-NEXT: LDRSH
+# CHECK-NEXT: LDRi12
+# CHECK-NEXT: LDRi12
+# CHECK-NEXT: MOVi
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn4'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=8, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=0)
+
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $r0, $r1, $r2, $r3
+
+    CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
+    Bcc %bb.2, 11, killed $cpsr
+
+  bb.1.if.then:
+    successors: %bb.3(0x80000000)
+    liveins: $r0, $r3
+
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    B %bb.3
+
+  bb.2.if.else:
+    successors: %bb.3(0x80000000)
+    liveins: $r1, $r3
+
+    renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg
+
+  bb.3.if.end:
+    liveins: $r0, $r3
+
+    renamable $r1 = MOVi 0, 14, $noreg, $noreg
+    STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
+    BX_RET 14, $noreg, implicit $r0
+
+---
+name:            fn5
+alignment:       1
+tracksRegLiveness: true
+
+# Here, the compare and conditional branch can be turned into a CBZ, so we
+# don't want to if-convert.
+
+# CHECK-LABEL: name:            fn5
+# CHECK: t2CMPri
+# CHECK: t2Bcc
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn5'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=0, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
+
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.2(0x50000000)
+    liveins: $r0, $r1, $r2
+
+    t2CMPri killed renamable $r2, 0, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.2, 1, killed $cpsr
+
+  bb.1.if.then:
+    liveins: $r0
+
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    tBX_RET 14, $noreg, implicit $r0
+
+  bb.2.if.else:
+    liveins: $r1
+
+    renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
+    tBX_RET 14, $noreg, implicit $r0
+
+---
+name:            fn6
+alignment:       1
+tracksRegLiveness: true
+
+# This is a forked-diamond pattern, we recognise that the conditional branches
+# at the ends of the true and false blocks are the same, and can be shared.
+
+# CHECK-LABEL: name:            fn6
+# CHECK:      t2CMPri
+# CHECK-NEXT: t2LDRSHi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2CMPri
+# CHECK-NEXT: t2Bcc
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn6'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=12, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
+
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.2(0x50000000)
+    liveins: $r0, $r1, $r2, $r3
+
+    t2CMPri killed renamable $r2, 4, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.2, 1, killed $cpsr
+
+  bb.1.if.then:
+    successors: %bb.3(0x30000000), %bb.4(0x50000000)
+    liveins: $r0, $r3
+
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.3.if2.then, 1, killed $cpsr
+    t2B %bb.4.if2.else, 14, $noreg
+
+  bb.2.if.else:
+    successors: %bb.3(0x30000000), %bb.4(0x50000000)
+    liveins: $r0, $r1, $r3
+
+    renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
+    t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.3.if2.then, 1, killed $cpsr
+    t2B %bb.4.if2.else, 14, $noreg
+
+  bb.3.if2.then:
+    liveins: $r0, $r1, $r3
+
+    t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
+    tBX_RET 14, $noreg, implicit $r0
+
+  bb.4.if2.else:
+    liveins: $r0
+
+    tBX_RET 14, $noreg, implicit $r0
+
+---
+name:            fn7
+alignment:       1
+tracksRegLiveness: true
+
+# When compiling for ARM, it would be good for code size to generate very long
+# runs of conditional instructions, but we put an (arbitrary) limit on this to
+# avoid generating code which is very bad for performance, and only saves a few
+# bytes of code size.
+
+# CHECK-LABEL: name:            fn7
+# CHECK:      CMPri
+# CHECK-NEXT: Bcc
+
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $r0, $r1, $r2, $r3
+
+    CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
+    Bcc %bb.2, 11, killed $cpsr
+
+  bb.1.if.then:
+    successors: %bb.3(0x80000000)
+    liveins: $r0, $r3
+
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    B %bb.3
+
+  bb.2.if.else:
+    successors: %bb.3(0x80000000)
+    liveins: $r1, $r3
+
+    renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg
+    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg
+
+  bb.3.if.end:
+    liveins: $r0, $r3
+
+    renamable $r1 = MOVi 0, 14, $noreg, $noreg
+    STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
+    BX_RET 14, $noreg, implicit $r0
+
+---
+name:            fn8
+alignment:       1
+tracksRegLiveness: true
+
+# The first t2LDRi12 instruction in each branch is the same, so one copy of it
+# will be removed, and it doesn't need to be predicated, keeping us under the 4
+# instruction IT block limit.
+
+# CHECK-LABEL: name:            fn8
+# CHECK:      t2CMPri
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRSHi12
+# CHECK-NEXT: t2MOVi
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn8'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=4, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
+
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $r0, $r1, $r2, $r3
+
+    t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.2, 11, killed $cpsr
+
+  bb.1.if.then:
+    successors: %bb.3(0x80000000)
+    liveins: $r0, $r3
+
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = t2LDRi12 killed renamable $r0, 4, 14, $noreg
+    t2B %bb.3, 14, $noreg
+
+  bb.2.if.else:
+    successors: %bb.3(0x80000000)
+    liveins: $r0, $r3
+
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
+
+  bb.3.if.end:
+    liveins: $r0, $r3
+
+    renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
+    t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
+    tBX_RET 14, $noreg, implicit $r0
+
+---
+name:            fn9
+alignment:       2
+tracksRegLiveness: true
+
+# The INLINEASM_BR instructions aren't analyzable, but they are identical so we
+# can still do diamond if-conversion. From a code-size POV, they are common
+# instructions, so one will be removed, and they don't need an IT block slot.
+
+# CHECK-LABEL: name:            fn9
+# CHECK:      tCMPi8
+# CHECK-NEXT: tLDRi
+# CHECK-NEXT: tLDRi
+# CHECK-NEXT: tLDRi
+# CHECK-NEXT: t2LDRSHi12
+# CHECK-NEXT: INLINEASM_BR
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn9'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=6, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
+
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.3(0x50000000)
+    liveins: $r0, $r1, $r2
+
+    tCMPi8 killed renamable $r2, 42, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.3, 1, killed $cpsr
+
+  bb.1.if.then:
+    successors:  %bb.5(0x7fffffff)
+    liveins: $r0
+
+    renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg
+    INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1)
+
+  bb.3.if.else:
+    successors: %bb.5(0x7fffffff)
+    liveins: $r1
+
+    renamable $r0 = tLDRi killed renamable $r1, 0, 14, $noreg
+    renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg
+    renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
+    INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1)
+
+  bb.5.lab1 (address-taken):
+    liveins: $r0
+
+    renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 5, 14, $noreg
+    tBX_RET 14, $noreg, implicit $r0
+...