summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/TargetInstrInfo.h13
-rw-r--r--llvm/lib/CodeGen/IfConversion.cpp133
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp32
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.h4
-rw-r--r--llvm/test/CodeGen/ARM/ifcvt-size.mir559
5 files changed, 724 insertions, 17 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 9afd8523762..9c375782856 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -778,6 +778,19 @@ public:
return false;
}
+ /// Return the increase in code size needed to predicate a contiguous run of
+ /// NumInsts instructions.
+ virtual unsigned extraSizeToPredicateInstructions(const MachineFunction &MF,
+ unsigned NumInsts) const {
+ return 0;
+ }
+
+ /// Return an estimate for the code size reduction (in bytes) which will be
+ /// caused by removing the given branch instruction during if-conversion.
+ virtual unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const {
+ return getInstSizeInBytes(MI);
+ }
+
/// Return true if it's profitable to unpredicate
/// one side of a 'diamond', i.e. two sides of if-else predicated on mutually
/// exclusive predicates.
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp
index e503c568f96..d9caa566069 100644
--- a/llvm/lib/CodeGen/IfConversion.cpp
+++ b/llvm/lib/CodeGen/IfConversion.cpp
@@ -285,14 +285,113 @@ namespace {
Prediction);
}
- bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
- unsigned TCycle, unsigned TExtra,
- MachineBasicBlock &FBB,
- unsigned FCycle, unsigned FExtra,
- BranchProbability Prediction) const {
- return TCycle > 0 && FCycle > 0 &&
- TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
- Prediction);
+ bool MeetIfcvtSizeLimit(BBInfo &TBBInfo, BBInfo &FBBInfo,
+ MachineBasicBlock &CommBB, unsigned Dups,
+ BranchProbability Prediction, bool Forked) const {
+ const MachineFunction &MF = *TBBInfo.BB->getParent();
+ if (MF.getFunction().hasMinSize()) {
+ MachineBasicBlock::iterator TIB = TBBInfo.BB->begin();
+ MachineBasicBlock::iterator FIB = FBBInfo.BB->begin();
+ MachineBasicBlock::iterator TIE = TBBInfo.BB->end();
+ MachineBasicBlock::iterator FIE = FBBInfo.BB->end();
+
+ unsigned Dups1, Dups2;
+ if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
+ *TBBInfo.BB, *FBBInfo.BB,
+ /*SkipUnconditionalBranches*/ true))
+ llvm_unreachable("should already have been checked by ValidDiamond");
+
+ unsigned BranchBytes = 0;
+ unsigned CommonBytes = 0;
+
+ // Count common instructions at the start of the true and false blocks.
+ for (auto &I : make_range(TBBInfo.BB->begin(), TIB)) {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+ for (auto &I : make_range(FBBInfo.BB->begin(), FIB)) {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+
+ // Count instructions at the end of the true and false blocks, after
+ // the ones we plan to predicate. Analyzable branches will be removed
+ // (unless this is a forked diamond), and all other instructions are
+ // common between the two blocks.
+ for (auto &I : make_range(TIE, TBBInfo.BB->end())) {
+ if (I.isBranch() && TBBInfo.IsBrAnalyzable && !Forked) {
+ LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+ BranchBytes += TII->predictBranchSizeForIfCvt(I);
+ } else {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+ }
+ for (auto &I : make_range(FIE, FBBInfo.BB->end())) {
+ if (I.isBranch() && FBBInfo.IsBrAnalyzable && !Forked) {
+ LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+ BranchBytes += TII->predictBranchSizeForIfCvt(I);
+ } else {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+ }
+ for (auto &I : CommBB.terminators()) {
+ if (I.isBranch()) {
+ LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+ BranchBytes += TII->predictBranchSizeForIfCvt(I);
+ }
+ }
+
+ // The common instructions in one branch will be eliminated, halving
+ // their code size.
+ CommonBytes /= 2;
+
+ // Count the instructions which we need to predicate.
+ unsigned NumPredicatedInstructions = 0;
+ for (auto &I : make_range(TIB, TIE)) {
+ if (!I.isDebugInstr()) {
+ LLVM_DEBUG(dbgs() << "Predicating: " << I);
+ NumPredicatedInstructions++;
+ }
+ }
+ for (auto &I : make_range(FIB, FIE)) {
+ if (!I.isDebugInstr()) {
+ LLVM_DEBUG(dbgs() << "Predicating: " << I);
+ NumPredicatedInstructions++;
+ }
+ }
+
+ // Even though we're optimising for size at the expense of performance,
+ // avoid creating really long predicated blocks.
+ if (NumPredicatedInstructions > 15)
+ return false;
+
+ // Some targets (e.g. Thumb2) need to insert extra instructions to
+ // start predicated blocks.
+ unsigned ExtraPredicateBytes = TII->extraSizeToPredicateInstructions(
+ MF, NumPredicatedInstructions);
+
+ LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(BranchBytes=" << BranchBytes
+ << ", CommonBytes=" << CommonBytes
+ << ", NumPredicatedInstructions="
+ << NumPredicatedInstructions
+ << ", ExtraPredicateBytes=" << ExtraPredicateBytes
+ << ")\n");
+ return (BranchBytes + CommonBytes) > ExtraPredicateBytes;
+ } else {
+ unsigned TCycle = TBBInfo.NonPredSize + TBBInfo.ExtraCost - Dups;
+ unsigned FCycle = FBBInfo.NonPredSize + FBBInfo.ExtraCost - Dups;
+ bool Res = TCycle > 0 && FCycle > 0 &&
+ TII->isProfitableToIfCvt(
+ *TBBInfo.BB, TCycle, TBBInfo.ExtraCost2, *FBBInfo.BB,
+ FCycle, FBBInfo.ExtraCost2, Prediction);
+ LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(TCycle=" << TCycle
+ << ", FCycle=" << FCycle
+ << ", TExtra=" << TBBInfo.ExtraCost2 << ", FExtra="
+ << FBBInfo.ExtraCost2 << ") = " << Res << "\n");
+ return Res;
+ }
}
/// Returns true if Block ends without a terminator.
@@ -842,6 +941,8 @@ bool IfConverter::ValidForkedDiamond(
TrueBBICalc.BB = TrueBBI.BB;
FalseBBICalc.BB = FalseBBI.BB;
+ TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable;
+ FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable;
if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
return false;
@@ -899,6 +1000,8 @@ bool IfConverter::ValidDiamond(
TrueBBICalc.BB = TrueBBI.BB;
FalseBBICalc.BB = FalseBBI.BB;
+ TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable;
+ FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable;
if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
return false;
// The size is used to decide whether to if-convert, and the shared portions
@@ -1186,13 +1289,9 @@ void IfConverter::AnalyzeBlock(
if (CanRevCond) {
BBInfo TrueBBICalc, FalseBBICalc;
- auto feasibleDiamond = [&]() {
- bool MeetsSize = MeetIfcvtSizeLimit(
- *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) +
- TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2,
- *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) +
- FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2,
- Prediction);
+ auto feasibleDiamond = [&](bool Forked) {
+ bool MeetsSize = MeetIfcvtSizeLimit(TrueBBICalc, FalseBBICalc, *BB,
+ Dups + Dups2, Prediction, Forked);
bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond,
/* IsTriangle */ false, /* RevCond */ false,
/* hasCommonTail */ true);
@@ -1204,7 +1303,7 @@ void IfConverter::AnalyzeBlock(
if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2,
TrueBBICalc, FalseBBICalc)) {
- if (feasibleDiamond()) {
+ if (feasibleDiamond(false)) {
// Diamond:
// EBB
// / \_
@@ -1220,7 +1319,7 @@ void IfConverter::AnalyzeBlock(
}
} else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2,
TrueBBICalc, FalseBBICalc)) {
- if (feasibleDiamond()) {
+ if (feasibleDiamond(true)) {
// ForkedDiamond:
// if TBB and FBB have a common tail that includes their conditional
// branch instructions, then we can If Convert this pattern.
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index de53cb3b59d..684cd1def97 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2079,6 +2079,38 @@ isProfitableToIfCvt(MachineBasicBlock &TBB,
return PredCost <= UnpredCost;
}
+unsigned
+ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF,
+ unsigned NumInsts) const {
+ // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
+ // ARM has a condition code field in every predicable instruction, using it
+ // doesn't change code size.
+ return Subtarget.isThumb2() ? divideCeil(NumInsts, 4) * 2 : 0;
+}
+
+unsigned
+ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const {
+ // If this branch is likely to be folded into the comparison to form a
+ // CB(N)Z, then removing it won't reduce code size at all, because that will
+ // just replace the CB(N)Z with a CMP.
+ if (MI.getOpcode() == ARM::t2Bcc &&
+ findCMPToFoldIntoCBZ(&MI, &getRegisterInfo()))
+ return 0;
+
+ unsigned Size = getInstSizeInBytes(MI);
+
+ // For Thumb2, all branches are 32-bit instructions during the if conversion
+ // pass, but may be replaced with 16-bit instructions during size reduction.
+ // Since the branches considered by if conversion tend to be forward branches
+ // over small basic blocks, they are very likely to be in range for the
+ // narrow instructions, so we assume the final code size will be half what it
+ // currently is.
+ if (Subtarget.isThumb2())
+ Size /= 2;
+
+ return Size;
+}
+
bool
ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB) const {
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index e70695a4d97..c232b6f0b45 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -276,6 +276,10 @@ public:
return NumCycles == 1;
}
+ unsigned extraSizeToPredicateInstructions(const MachineFunction &MF,
+ unsigned NumInsts) const override;
+ unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override;
+
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB) const override;
diff --git a/llvm/test/CodeGen/ARM/ifcvt-size.mir b/llvm/test/CodeGen/ARM/ifcvt-size.mir
new file mode 100644
index 00000000000..a5c31cbab4a
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/ifcvt-size.mir
@@ -0,0 +1,559 @@
+# RUN: llc %s -o - -run-pass=if-converter -debug-only=if-converter 2>%t| FileCheck %s
+# RUN: FileCheck %s < %t --check-prefix=DEBUG
+# REQUIRES: asserts
+
+# When optimising for size, we use a different set of heuristics for
+# if-conversion, which take into account the size of the instructions, not the
+# time taken to execute them. This is more complicated for Thumb, where it if
+# also affected by selection of narrow branch instructions, insertion if IT
+# instructions, and selection of the CB(N)Z instructions.
+
+--- |
+ target triple = "thumbv7-unknown-linux-gnueabi"
+
+ define void @fn1() minsize {
+ entry:
+ unreachable
+ if.then:
+ unreachable
+ if.else:
+ unreachable
+ if.end:
+ unreachable
+ }
+
+ define void @fn2() minsize {
+ entry:
+ unreachable
+ if.then:
+ unreachable
+ if.else:
+ unreachable
+ if.end:
+ unreachable
+ }
+
+ define void @fn3() minsize {
+ entry:
+ unreachable
+ if.then:
+ unreachable
+ if.else:
+ unreachable
+ if.end:
+ unreachable
+ }
+
+ define void @fn4() minsize "target-features"="-thumb-mode" {
+ entry:
+ unreachable
+ if.then:
+ unreachable
+ if.else:
+ unreachable
+ if.end:
+ unreachable
+ }
+
+ define void @fn5() minsize {
+ entry:
+ unreachable
+ if.then:
+ unreachable
+ if.else:
+ unreachable
+ if.end:
+ unreachable
+ }
+
+ define void @fn6() minsize {
+ entry:
+ unreachable
+ if.then:
+ unreachable
+ if.else:
+ unreachable
+ if2.then:
+ unreachable
+ if2.else:
+ unreachable
+ }
+
+ define void @fn7() minsize "target-features"="-thumb-mode" {
+ entry:
+ unreachable
+ if.then:
+ unreachable
+ if.else:
+ unreachable
+ if.end:
+ unreachable
+ }
+
+ define void @fn8() minsize {
+ entry:
+ unreachable
+ if.then:
+ unreachable
+ if.else:
+ unreachable
+ if.end:
+ unreachable
+ }
+
+ define void @fn9() minsize {
+ entry:
+ unreachable
+ if.then:
+ unreachable
+ if.else:
+ unreachable
+ lab1:
+ unreachable
+ }
+...
+---
+name: fn1
+alignment: 1
+tracksRegLiveness: true
+
+# If-conversion is profitable here because it will remove two branches of 2
+# bytes each (assuming they can become narrow branches later), and will only
+# add 2 bytes with the IT instruction.
+
+# CHECK-LABEL: name: fn1
+# CHECK: t2CMPri
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRSHi12
+# CHECK-NEXT: t2MOVi
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn1'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
+
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $r0, $r1, $r2, $r3
+
+ t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
+ t2Bcc %bb.2, 11, killed $cpsr
+
+ bb.1.if.then:
+ successors: %bb.3(0x80000000)
+ liveins: $r0, $r3
+
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ t2B %bb.3, 14, $noreg
+
+ bb.2.if.else:
+ successors: %bb.3(0x80000000)
+ liveins: $r1, $r3
+
+ renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
+
+ bb.3.if.end:
+ liveins: $r0, $r3
+
+ renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
+ t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
+ tBX_RET 14, $noreg, implicit $r0
+
+---
+name: fn2
+alignment: 1
+tracksRegLiveness: true
+
+# If-conversion is not profitable here, because the 5 conditional instructions
+# would require 2 IT instructions.
+
+# CHECK-LABEL: name: fn2
+# CHECK: t2CMPri
+# CHECK-NEXT: t2Bcc
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn2'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=4)
+
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $r0, $r1, $r2, $r3
+
+ t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
+ t2Bcc %bb.2, 11, killed $cpsr
+
+ bb.1.if.then:
+ successors: %bb.3(0x80000000)
+ liveins: $r0, $r3
+
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ t2B %bb.3, 14, $noreg
+
+ bb.2.if.else:
+ successors: %bb.3(0x80000000)
+ liveins: $r1, $r3
+
+ renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
+
+ bb.3.if.end:
+ liveins: $r0, $r3
+
+ renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
+ t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
+ tBX_RET 14, $noreg, implicit $r0
+
+---
+name: fn3
+alignment: 1
+tracksRegLiveness: true
+
+# Here, the true and false blocks both end in a tBX_RET instruction. One of
+# these will be removed, saving 2 bytes, and the remaining one isn't
+# conditional, so doesn't push us over the limit of 4 instructions in an IT
+# block.
+
+# CHECK-LABEL: name: fn3
+# CHECK: t2CMPri
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRSHi12
+# CHECK-NEXT: tBX_RET
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn3'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
+
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $r0, $r1, $r2, $r3
+
+ t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
+ t2Bcc %bb.2, 11, killed $cpsr
+
+ bb.1.if.then:
+ liveins: $r0, $r3
+
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ tBX_RET 14, $noreg, implicit $r0
+
+ bb.2.if.else:
+ liveins: $r1, $r3
+
+ renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
+ tBX_RET 14, $noreg, implicit $r0
+
+---
+name: fn4
+alignment: 1
+tracksRegLiveness: true
+
+# This is the same as fn2, but compiled for ARM, which doesn't need IT
+# instructions, so if-conversion is profitable.
+
+# CHECK-LABEL: name: fn4
+# CHECK: CMPri
+# CHECK-NEXT: LDRi12
+# CHECK-NEXT: LDRi12
+# CHECK-NEXT: LDRSH
+# CHECK-NEXT: LDRi12
+# CHECK-NEXT: LDRi12
+# CHECK-NEXT: MOVi
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn4'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=8, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=0)
+
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $r0, $r1, $r2, $r3
+
+ CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
+ Bcc %bb.2, 11, killed $cpsr
+
+ bb.1.if.then:
+ successors: %bb.3(0x80000000)
+ liveins: $r0, $r3
+
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ B %bb.3
+
+ bb.2.if.else:
+ successors: %bb.3(0x80000000)
+ liveins: $r1, $r3
+
+ renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg
+
+ bb.3.if.end:
+ liveins: $r0, $r3
+
+ renamable $r1 = MOVi 0, 14, $noreg, $noreg
+ STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
+ BX_RET 14, $noreg, implicit $r0
+
+---
+name: fn5
+alignment: 1
+tracksRegLiveness: true
+
+# Here, the compare and conditional branch can be turned into a CBZ, so we
+# don't want to if-convert.
+
+# CHECK-LABEL: name: fn5
+# CHECK: t2CMPri
+# CHECK: t2Bcc
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn5'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=0, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
+
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x30000000), %bb.2(0x50000000)
+ liveins: $r0, $r1, $r2
+
+ t2CMPri killed renamable $r2, 0, 14, $noreg, implicit-def $cpsr
+ t2Bcc %bb.2, 1, killed $cpsr
+
+ bb.1.if.then:
+ liveins: $r0
+
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ tBX_RET 14, $noreg, implicit $r0
+
+ bb.2.if.else:
+ liveins: $r1
+
+ renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
+ tBX_RET 14, $noreg, implicit $r0
+
+---
+name: fn6
+alignment: 1
+tracksRegLiveness: true
+
+# This is a forked-diamond pattern, we recognise that the conditional branches
+# at the ends of the true and false blocks are the same, and can be shared.
+
+# CHECK-LABEL: name: fn6
+# CHECK: t2CMPri
+# CHECK-NEXT: t2LDRSHi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2CMPri
+# CHECK-NEXT: t2Bcc
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn6'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=12, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
+
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x30000000), %bb.2(0x50000000)
+ liveins: $r0, $r1, $r2, $r3
+
+ t2CMPri killed renamable $r2, 4, 14, $noreg, implicit-def $cpsr
+ t2Bcc %bb.2, 1, killed $cpsr
+
+ bb.1.if.then:
+ successors: %bb.3(0x30000000), %bb.4(0x50000000)
+ liveins: $r0, $r3
+
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr
+ t2Bcc %bb.3.if2.then, 1, killed $cpsr
+ t2B %bb.4.if2.else, 14, $noreg
+
+ bb.2.if.else:
+ successors: %bb.3(0x30000000), %bb.4(0x50000000)
+ liveins: $r0, $r1, $r3
+
+ renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
+ t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr
+ t2Bcc %bb.3.if2.then, 1, killed $cpsr
+ t2B %bb.4.if2.else, 14, $noreg
+
+ bb.3.if2.then:
+ liveins: $r0, $r1, $r3
+
+ t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
+ tBX_RET 14, $noreg, implicit $r0
+
+ bb.4.if2.else:
+ liveins: $r0
+
+ tBX_RET 14, $noreg, implicit $r0
+
+---
+name: fn7
+alignment: 1
+tracksRegLiveness: true
+
+# When compiling for ARM, it would be good for code size to generate very long
+# runs of conditional instructions, but we put an (arbitrary) limit on this to
+# avoid generating code which is very bad for performance, and only saves a few
+# bytes of code size.
+
+# CHECK-LABEL: name: fn7
+# CHECK: CMPri
+# CHECK-NEXT: Bcc
+
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $r0, $r1, $r2, $r3
+
+ CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
+ Bcc %bb.2, 11, killed $cpsr
+
+ bb.1.if.then:
+ successors: %bb.3(0x80000000)
+ liveins: $r0, $r3
+
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ B %bb.3
+
+ bb.2.if.else:
+ successors: %bb.3(0x80000000)
+ liveins: $r1, $r3
+
+ renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg
+ renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg
+
+ bb.3.if.end:
+ liveins: $r0, $r3
+
+ renamable $r1 = MOVi 0, 14, $noreg, $noreg
+ STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
+ BX_RET 14, $noreg, implicit $r0
+
+---
+name: fn8
+alignment: 1
+tracksRegLiveness: true
+
+# The first t2LDRi12 instruction in each branch is the same, so one copy of it
+# will be removed, and it doesn't need to be predicated, keeping us under the 4
+# instruction IT block limit.
+
+# CHECK-LABEL: name: fn8
+# CHECK: t2CMPri
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRi12
+# CHECK-NEXT: t2LDRSHi12
+# CHECK-NEXT: t2MOVi
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn8'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=4, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
+
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $r0, $r1, $r2, $r3
+
+ t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
+ t2Bcc %bb.2, 11, killed $cpsr
+
+ bb.1.if.then:
+ successors: %bb.3(0x80000000)
+ liveins: $r0, $r3
+
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = t2LDRi12 killed renamable $r0, 4, 14, $noreg
+ t2B %bb.3, 14, $noreg
+
+ bb.2.if.else:
+ successors: %bb.3(0x80000000)
+ liveins: $r0, $r3
+
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
+
+ bb.3.if.end:
+ liveins: $r0, $r3
+
+ renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
+ t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
+ tBX_RET 14, $noreg, implicit $r0
+
+---
+name: fn9
+alignment: 2
+tracksRegLiveness: true
+
+# The INLINEASM_BR instructions aren't analyzable, but they are identical so we
+# can still do diamond if-conversion. From a code-size POV, they are common
+# instructions, so one will be removed, and they don't need an IT block slot.
+
+# CHECK-LABEL: name: fn9
+# CHECK: tCMPi8
+# CHECK-NEXT: tLDRi
+# CHECK-NEXT: tLDRi
+# CHECK-NEXT: tLDRi
+# CHECK-NEXT: t2LDRSHi12
+# CHECK-NEXT: INLINEASM_BR
+
+# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn9'
+# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=6, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
+
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x30000000), %bb.3(0x50000000)
+ liveins: $r0, $r1, $r2
+
+ tCMPi8 killed renamable $r2, 42, 14, $noreg, implicit-def $cpsr
+ t2Bcc %bb.3, 1, killed $cpsr
+
+ bb.1.if.then:
+ successors: %bb.5(0x7fffffff)
+ liveins: $r0
+
+ renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg
+ INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1)
+
+ bb.3.if.else:
+ successors: %bb.5(0x7fffffff)
+ liveins: $r1
+
+ renamable $r0 = tLDRi killed renamable $r1, 0, 14, $noreg
+ renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg
+ renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
+ INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1)
+
+ bb.5.lab1 (address-taken):
+ liveins: $r0
+
+ renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 5, 14, $noreg
+ tBX_RET 14, $noreg, implicit $r0
+...
OpenPOWER on IntegriCloud