diff options
| -rw-r--r-- | llvm/lib/Target/ARM/ARMConstantIslandPass.cpp | 137 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/O3-pipeline.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir | 201 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll | 158 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir | 184 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir | 201 | 
6 files changed, 856 insertions, 27 deletions
| diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 548910e5f8c..0533fbdc071 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -29,6 +29,7 @@  #include "llvm/CodeGen/LivePhysRegs.h"  #include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineFunctionPass.h"  #include "llvm/CodeGen/MachineInstr.h" @@ -70,6 +71,7 @@ STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");  STATISTIC(NumCBZ,        "Number of CBZ / CBNZ formed");  STATISTIC(NumJTMoved,    "Number of jump table destination blocks moved");  STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted"); +STATISTIC(NumLEInserted, "Number of LE backwards branches inserted");  static cl::opt<bool>  AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), @@ -213,6 +215,7 @@ namespace {      const ARMBaseInstrInfo *TII;      const ARMSubtarget *STI;      ARMFunctionInfo *AFI; +    MachineDominatorTree *DT = nullptr;      bool isThumb;      bool isThumb1;      bool isThumb2; @@ -225,6 +228,12 @@ namespace {      bool runOnMachineFunction(MachineFunction &MF) override; +    void getAnalysisUsage(AnalysisUsage &AU) const override { +      AU.setPreservesCFG(); +      AU.addRequired<MachineDominatorTree>(); +      MachineFunctionPass::getAnalysisUsage(AU); +    } +      MachineFunctionProperties getRequiredProperties() const override {        return MachineFunctionProperties().set(            MachineFunctionProperties::Property::NoVRegs); @@ -350,6 +359,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {    isPositionIndependentOrROPI =        STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();    AFI = MF->getInfo<ARMFunctionInfo>(); +  DT = &getAnalysis<MachineDominatorTree>();    isThumb = AFI->isThumbFunction();    isThumb1 = AFI->isThumb1OnlyFunction(); @@ -1809,16 +1819,10 @@ bool ARMConstantIslands::optimizeThumb2Instructions() {    return MadeChange;  } +  bool ARMConstantIslands::optimizeThumb2Branches() { -  bool MadeChange = false; -  // The order in which branches appear in ImmBranches is approximately their -  // order within the function body. By visiting later branches first, we reduce -  // the distance between earlier forward branches and their targets, making it -  // more likely that the cbn?z optimization, which can only apply to forward -  // branches, will succeed. -  for (unsigned i = ImmBranches.size(); i != 0; --i) { -    ImmBranch &Br = ImmBranches[i-1]; +  auto TryShrinkBranch = [this](ImmBranch &Br) {      unsigned Opcode = Br.MI->getOpcode();      unsigned NewOpc = 0;      unsigned Scale = 1; @@ -1846,47 +1850,115 @@ bool ARMConstantIslands::optimizeThumb2Branches() {          BBUtils->adjustBBSize(MBB, -2);          BBUtils->adjustBBOffsetsAfter(MBB);          ++NumT2BrShrunk; -        MadeChange = true; +        return true;        }      } +    return false; +  }; -    Opcode = Br.MI->getOpcode(); -    if (Opcode != ARM::tBcc) -      continue; +  struct ImmCompare { +    MachineInstr* MI = nullptr; +    unsigned NewOpc = 0; +  }; + +  auto FindCmpForCBZ = [this](ImmBranch &Br, ImmCompare &ImmCmp, +                              MachineBasicBlock *DestBB) { +    ImmCmp.MI = nullptr; +    ImmCmp.NewOpc = 0;      // If the conditional branch doesn't kill CPSR, then CPSR can be liveout      // so this transformation is not safe.      if (!Br.MI->killsRegister(ARM::CPSR)) -      continue; +      return false; -    NewOpc = 0;      unsigned PredReg = 0; +    unsigned NewOpc = 0;      ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg);      if (Pred == ARMCC::EQ)        NewOpc = ARM::tCBZ;      else if (Pred == ARMCC::NE)        NewOpc = ARM::tCBNZ; -    if (!NewOpc) -      continue; -    MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); +    else +      return false; +      // Check if the distance is within 126. Subtract starting offset by 2      // because the cmp will be eliminated.      unsigned BrOffset = BBUtils->getOffsetOf(Br.MI) + 4 - 2;      BBInfoVector &BBInfo = BBUtils->getBBInfo();      unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;      if (BrOffset >= DestOffset || (DestOffset - BrOffset) > 126) -      continue; +      return false;      // Search backwards to find a tCMPi8      auto *TRI = STI->getRegisterInfo();      MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br.MI, TRI);      if (!CmpMI || CmpMI->getOpcode() != ARM::tCMPi8) +      return false; + +    ImmCmp.MI = CmpMI; +    ImmCmp.NewOpc = NewOpc; +    return true; +  }; + +  auto TryConvertToLE = [this](ImmBranch &Br, ImmCompare &Cmp) { +    if (Br.MI->getOpcode() != ARM::t2Bcc || !STI->hasLOB() || +        STI->hasMinSize()) +      return false; + +    MachineBasicBlock *MBB = Br.MI->getParent(); +    MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); +    if (BBUtils->getOffsetOf(MBB) < BBUtils->getOffsetOf(DestBB) || +        !BBUtils->isBBInRange(Br.MI, DestBB, 4094)) +      return false; + +    if (!DT->dominates(DestBB, MBB)) +      return false; + +    // We queried for the CBN?Z opcode based upon the 'ExitBB', the opposite +    // target of Br. So now we need to reverse the condition. +    Cmp.NewOpc = Cmp.NewOpc == ARM::tCBZ ? ARM::tCBNZ : ARM::tCBZ; + +    MachineInstrBuilder MIB = BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), +                                      TII->get(ARM::t2LE)); +    MIB.add(Br.MI->getOperand(0)); +    Br.MI->eraseFromParent(); +    Br.MI = MIB; +    ++NumLEInserted; +    return true; +  }; + +  bool MadeChange = false; + +  // The order in which branches appear in ImmBranches is approximately their +  // order within the function body. By visiting later branches first, we reduce +  // the distance between earlier forward branches and their targets, making it +  // more likely that the cbn?z optimization, which can only apply to forward +  // branches, will succeed. +  for (ImmBranch &Br : reverse(ImmBranches)) { +    MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); +    MachineBasicBlock *MBB = Br.MI->getParent(); +    MachineBasicBlock *ExitBB = &MBB->back() == Br.MI ? +      MBB->getFallThrough() : +      MBB->back().getOperand(0).getMBB(); + +    ImmCompare Cmp; +    if (FindCmpForCBZ(Br, Cmp, ExitBB) && TryConvertToLE(Br, Cmp)) { +      DestBB = ExitBB; +      MadeChange = true; +    } else { +      FindCmpForCBZ(Br, Cmp, DestBB); +      MadeChange |= TryShrinkBranch(Br); +    } + +    unsigned Opcode = Br.MI->getOpcode(); +    if ((Opcode != ARM::tBcc && Opcode != ARM::t2LE) || !Cmp.NewOpc)        continue; -    Register Reg = CmpMI->getOperand(0).getReg(); +    Register Reg = Cmp.MI->getOperand(0).getReg();      // Check for Kill flags on Reg. If they are present remove them and set kill      // on the new CBZ. +    auto *TRI = STI->getRegisterInfo();      MachineBasicBlock::iterator KillMI = Br.MI;      bool RegKilled = false;      do { @@ -1896,19 +1968,32 @@ bool ARMConstantIslands::optimizeThumb2Branches() {          RegKilled = true;          break;        } -    } while (KillMI != CmpMI); +    } while (KillMI != Cmp.MI);      // Create the new CBZ/CBNZ -    MachineBasicBlock *MBB = Br.MI->getParent(); -    LLVM_DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI); +    LLVM_DEBUG(dbgs() << "Fold: " << *Cmp.MI << " and: " << *Br.MI);      MachineInstr *NewBR = -        BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(NewOpc)) +        BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(Cmp.NewOpc))              .addReg(Reg, getKillRegState(RegKilled))              .addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags()); -    CmpMI->eraseFromParent(); -    Br.MI->eraseFromParent(); -    Br.MI = NewBR; + +    Cmp.MI->eraseFromParent(); +    BBInfoVector &BBInfo = BBUtils->getBBInfo();      BBInfo[MBB->getNumber()].Size -= 2; + +    if (Br.MI->getOpcode() == ARM::tBcc) { +      Br.MI->eraseFromParent(); +      Br.MI = NewBR; +    } else if (&MBB->back() != Br.MI) { +      // We've generated an LE and already erased the original conditional +      // branch. The CBN?Z is now used to branch to the other successor, so an +      // unconditional branch terminator is now redundant. +      MachineInstr *LastMI = &MBB->back(); +      if (LastMI != Br.MI) { +        BBInfo[MBB->getNumber()].Size -= LastMI->getDesc().getSize(); +        LastMI->eraseFromParent(); +      } +    }      BBUtils->adjustBBOffsetsAfter(MBB);      ++NumCBZ;      MadeChange = true; diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index 7650d682736..d6111f579a5 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -143,8 +143,8 @@  ; CHECK-NEXT:      Thumb2 instruction size reduce pass  ; CHECK-NEXT:      Unpack machine instruction bundles  ; CHECK-NEXT:      optimise barriers pass -; CHECK-NEXT:      ARM constant island placement and branch shortening pass  ; CHECK-NEXT:      MachineDominator Tree Construction +; CHECK-NEXT:      ARM constant island placement and branch shortening pass  ; CHECK-NEXT:      Machine Natural Loop Construction  ; CHECK-NEXT:      ARM Low Overhead Loops pass  ; CHECK-NEXT:      Contiguously Lay Out Funclets diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir new file mode 100644 index 00000000000..5e327b0e08c --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir @@ -0,0 +1,201 @@ +# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-LOB +# RUN: llc -mtriple=thumbv8.1m.main -mattr=-lob %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-NOLOB + +# CHECK-NOLOB-NOT: t2LE + +# CHECK-LOB: bb.3.land.rhs: +# CHECK-LOB:   renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg +# CHECK-LOB:   tCBNZ $r0, %bb.8 +# CHECK-LOB:   t2LE %bb.3 +# CHECK-LOB: bb.7.while.body19: +# CHECK-LOB:   tCBZ $r0, %bb.8 +# CHECK-LOB:   t2LE %bb.6 +# CHECK-LOB: bb.8: + +--- | +  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +  target triple = "thumbv8.1m.main" +   +  %struct.head_s = type { %struct.head_s*, %struct.data_s* } +  %struct.data_s = type { i16, i16 } +   +  ; Function Attrs: norecurse nounwind readonly +  define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr #0 { +  entry: +    %idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1 +    %0 = load i16, i16* %idx, align 2 +    %cmp = icmp sgt i16 %0, -1 +    br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader +   +  while.cond9.preheader:                            ; preds = %entry +    %1 = icmp eq %struct.head_s* %list, null +    br i1 %1, label %return, label %land.rhs11.lr.ph +   +  land.rhs11.lr.ph:                                 ; preds = %while.cond9.preheader +    %data16143 = bitcast %struct.data_s* %info to i16* +    %2 = load i16, i16* %data16143, align 2 +    %conv15 = sext i16 %2 to i32 +    br label %land.rhs11 +   +  while.cond.preheader:                             ; preds = %entry +    %3 = icmp eq %struct.head_s* %list, null +    br i1 %3, label %return, label %land.rhs.preheader +   +  land.rhs.preheader:                               ; preds = %while.cond.preheader +    br label %land.rhs +   +  land.rhs:                                         ; preds = %land.rhs.preheader, %while.body +    %list.addr.033 = phi %struct.head_s* [ %6, %while.body ], [ %list, %land.rhs.preheader ] +    %info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1 +    %4 = load %struct.data_s*, %struct.data_s** %info2, align 4 +    %idx3 = getelementptr inbounds %struct.data_s, %struct.data_s* %4, i32 0, i32 1 +    %5 = load i16, i16* %idx3, align 2 +    %cmp7 = icmp eq i16 %5, %0 +    br i1 %cmp7, label %return, label %while.body +   +  while.body:                                       ; preds = %land.rhs +    %next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s** +    %6 = load %struct.head_s*, %struct.head_s** %next4, align 4 +    %tobool = icmp ne %struct.head_s* %6, null +    br i1 %tobool, label %return, label %land.rhs +   +  land.rhs11:                                       ; preds = %while.body19, %land.rhs11.lr.ph +    %list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %10, %while.body19 ] +    %info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1 +    %7 = load %struct.data_s*, %struct.data_s** %info12, align 4 +    %data165 = bitcast %struct.data_s* %7 to i16* +    %8 = load i16, i16* %data165, align 2 +    %9 = and i16 %8, 255 +    %and = zext i16 %9 to i32 +    %cmp16 = icmp eq i32 %and, %conv15 +    br i1 %cmp16, label %return, label %while.body19 +   +  while.body19:                                     ; preds = %land.rhs11 +    %next206 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s** +    %10 = load %struct.head_s*, %struct.head_s** %next206, align 4 +    %tobool10 = icmp eq %struct.head_s* %10, null +    br i1 %tobool10, label %return, label %land.rhs11 +   +  return:                                           ; preds = %while.body19, %land.rhs11, %while.body, %land.rhs, %while.cond.preheader, %while.cond9.preheader +    %retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ] +    ret %struct.head_s* %retval.0 +  } +   +  attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+lob,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" } +   +... +--- +name:            search +alignment:       1 +exposesReturnsTwice: false +legalized:       false +regBankSelected: false +selected:        false +failedISel:      false +tracksRegLiveness: true +hasWinCFI:       false +registers:       [] +liveins: +  - { reg: '$r0', virtual-reg: '' } +  - { reg: '$r1', virtual-reg: '' } +frameInfo: +  isFrameAddressTaken: false +  isReturnAddressTaken: false +  hasStackMap:     false +  hasPatchPoint:   false +  stackSize:       0 +  offsetAdjustment: 0 +  maxAlignment:    1 +  adjustsStack:    false +  hasCalls:        false +  stackProtector:  '' +  maxCallFrameSize: 0 +  cvBytesOfCalleeSavedRegisters: 0 +  hasOpaqueSPAdjustment: false +  hasVAStart:      false +  hasMustTailInVarArgFunc: false +  localFrameSize:  0 +  savePoint:       '' +  restorePoint:    '' +fixedStack:      [] +stack:           [] +callSites:       [] +constants:       [] +machineFunctionInfo: {} +body:             | +  bb.0.entry: +    successors: %bb.5(0x50000000), %bb.1(0x30000000) +    liveins: $r0, $r1 +   +    renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14, $noreg :: (load 2 from %ir.idx) +    t2CMPri renamable $r2, -1, 14, $noreg, implicit-def $cpsr +    t2Bcc %bb.1, 13, killed $cpsr +   +  bb.5.while.cond.preheader: +    successors: %bb.8(0x30000000), %bb.6(0x50000000) +    liveins: $r0, $r2 +   +    tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr +    t2Bcc %bb.8, 0, killed $cpsr +   +  bb.6.land.rhs.preheader: +    successors: %bb.7(0x80000000) +    liveins: $r0, $r2 +   +    renamable $r1 = tUXTH killed renamable $r2, 14, $noreg +   +  bb.7.land.rhs: +    successors: %bb.8(0x04000000), %bb.7(0x7c000000) +    liveins: $r0, $r1 +   +    renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info2) +    renamable $r2 = tLDRHi killed renamable $r2, 1, 14, $noreg :: (load 2 from %ir.idx3) +    tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr +    t2IT 0, 8, implicit-def $itstate +    tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate +    renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next4) +    tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr +    t2Bcc %bb.7, 0, killed $cpsr +    t2B %bb.8, 14, $noreg +   +  bb.1.while.cond9.preheader: +    successors: %bb.8(0x30000000), %bb.2(0x50000000) +    liveins: $r0, $r1 +   +    tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr +    t2Bcc %bb.8, 0, killed $cpsr +   +  bb.2.land.rhs11.lr.ph: +    successors: %bb.3(0x80000000) +    liveins: $r0, $r1 +   +    renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14, $noreg :: (load 2 from %ir.data16143) +   +  bb.3.land.rhs11: +    successors: %bb.9(0x04000000), %bb.4(0x7c000000) +    liveins: $r0, $r1 +   +    renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info12) +    renamable $r2 = tLDRBi killed renamable $r2, 0, 14, $noreg :: (load 1 from %ir.data165, align 2) +    tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr +    t2Bcc %bb.9, 0, killed $cpsr +   +  bb.4.while.body19: +    successors: %bb.8(0x04000000), %bb.3(0x7c000000) +    liveins: $r0, $r1 +   +    renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next206) +    tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr +    t2Bcc %bb.3, 1, killed $cpsr +   +  bb.8: +    successors: %bb.9(0x80000000) +   +    renamable $r0, dead $cpsr = tMOVi8 0, 14, $noreg +   +  bb.9.return: +    liveins: $r0 +   +    tBX_RET 14, $noreg, implicit killed $r0 + +... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll new file mode 100644 index 00000000000..78ddb35e756 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll @@ -0,0 +1,158 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main %s -o - | FileCheck %s + +define void @cbz_exit(i32* %in, i32* %res) { +; CHECK-LABEL: cbz_exit: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    subs r2, r0, #4 +; CHECK-NEXT:    mov.w r0, #-1 +; CHECK-NEXT:  .LBB0_1: @ %loop +; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT:    ldr r3, [r2, #4]! +; CHECK-NEXT:    adds r0, #1 +; CHECK-NEXT:    cbz r3, .LBB0_2 +; CHECK-NEXT:    le .LBB0_1 +; CHECK-NEXT:  .LBB0_2: @ %exit +; CHECK-NEXT:    str r0, [r1] +; CHECK-NEXT:    bx lr +entry: +  br label %loop + +loop: +  %offset = phi i32 [ 0, %entry ], [ %next, %loop ] +  %ptr = getelementptr i32, i32* %in, i32 %offset +  %val = load i32, i32* %ptr +  %next = add i32 %offset, 1 +  %cmp = icmp eq i32 %val, 0 +  br i1 %cmp, label %exit, label %loop + +exit: +  store i32 %offset, i32* %res +  ret void +} + +define void @cbnz_exit(i32* %in, i32* %res) { +; CHECK-LABEL: cbnz_exit: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    subs r2, r0, #4 +; CHECK-NEXT:    mov.w r0, #-1 +; CHECK-NEXT:  .LBB1_1: @ %loop +; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT:    ldr r3, [r2, #4]! +; CHECK-NEXT:    adds r0, #1 +; CHECK-NEXT:    cbnz r3, .LBB1_2 +; CHECK-NEXT:    le .LBB1_1 +; CHECK-NEXT:  .LBB1_2: @ %exit +; CHECK-NEXT:    str r0, [r1] +; CHECK-NEXT:    bx lr +entry: +  br label %loop + +loop: +  %offset = phi i32 [ 0, %entry ], [ %next, %loop ] +  %ptr = getelementptr i32, i32* %in, i32 %offset +  %val = load i32, i32* %ptr +  %next = add i32 %offset, 1 +  %cmp = icmp ne i32 %val, 0 +  br i1 %cmp, label %exit, label %loop + +exit: +  store i32 %offset, i32* %res +  ret void +} + +define void @cbnz_exit_too_large(i32* %in, i32* %res) { +; CHECK-LABEL: cbnz_exit_too_large: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    subs r2, r0, #4 +; CHECK-NEXT:    mov.w r0, #-1 +; CHECK-NEXT:  .LBB2_1: @ %loop +; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT:    ldr r12, [r2, #4]! +; CHECK-NEXT:    .zero 4090 +; CHECK-NEXT:    adds r0, #1 +; CHECK-NEXT:    cmp.w r12, #0 +; CHECK-NEXT:    beq.w .LBB2_1 +; CHECK-NEXT:  @ %bb.2: @ %exit +; CHECK-NEXT:    str r0, [r1] +; CHECK-NEXT:    bx lr +entry: +  br label %loop + +loop: +  %offset = phi i32 [ 0, %entry ], [ %next, %loop ] +  %ptr = getelementptr i32, i32* %in, i32 %offset +  %val = load i32, i32* %ptr +  %next = add i32 %offset, 1 +  %cmp = icmp ne i32 %val, 0 +  %size = call i32 @llvm.arm.space(i32 4090, i32 undef) +  br i1 %cmp, label %exit, label %loop + +exit: +  store i32 %offset, i32* %res +  ret void +} + +define void @cbz_exit_minsize(i32* %in, i32* %res) #0 { +; CHECK-LABEL: cbz_exit_minsize: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    movs r2, #0 +; CHECK-NEXT:  .LBB3_1: @ %loop +; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT:    ldr.w r3, [r0, r2, lsl #2] +; CHECK-NEXT:    adds r2, #1 +; CHECK-NEXT:    cmp r3, #0 +; CHECK-NEXT:    bne .LBB3_1 +; CHECK-NEXT:  @ %bb.2: @ %exit +; CHECK-NEXT:    subs r0, r2, #1 +; CHECK-NEXT:    str r0, [r1] +; CHECK-NEXT:    bx lr +entry: +  br label %loop + +loop: +  %offset = phi i32 [ 0, %entry ], [ %next, %loop ] +  %ptr = getelementptr i32, i32* %in, i32 %offset +  %val = load i32, i32* %ptr +  %next = add i32 %offset, 1 +  %cmp = icmp eq i32 %val, 0 +  br i1 %cmp, label %exit, label %loop + +exit: +  store i32 %offset, i32* %res +  ret void +} + +define void @cbnz_exit_minsize(i32* %in, i32* %res) #0 { +; CHECK-LABEL: cbnz_exit_minsize: +; CHECK:       @ %bb.0: @ %entry +; CHECK-NEXT:    movs r2, #0 +; CHECK-NEXT:  .LBB4_1: @ %loop +; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT:    ldr.w r3, [r0, r2, lsl #2] +; CHECK-NEXT:    adds r2, #1 +; CHECK-NEXT:    cmp r3, #0 +; CHECK-NEXT:    beq .LBB4_1 +; CHECK-NEXT:  @ %bb.2: @ %exit +; CHECK-NEXT:    subs r0, r2, #1 +; CHECK-NEXT:    str r0, [r1] +; CHECK-NEXT:    bx lr +entry: +  br label %loop + +loop: +  %offset = phi i32 [ 0, %entry ], [ %next, %loop ] +  %ptr = getelementptr i32, i32* %in, i32 %offset +  %val = load i32, i32* %ptr +  %next = add i32 %offset, 1 +  %cmp = icmp ne i32 %val, 0 +  br i1 %cmp, label %exit, label %loop + +exit: +  store i32 %offset, i32* %res +  ret void +} + +attributes #0 = { minsize optsize } + +declare i32 @llvm.arm.space(i32 immarg, i32); diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir new file mode 100644 index 00000000000..ffabd94c934 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir @@ -0,0 +1,184 @@ +# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s +# CHECK-NOT: t2LE + +--- | +  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +  target triple = "thumbv8.1m.main-unknown-unknown" +   +  %struct.head_s = type { %struct.head_s*, %struct.data_s* } +  %struct.data_s = type { i16, i16 } +   +  ; Function Attrs: norecurse nounwind readonly +  define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr #0 { +  entry: +    %idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1 +    %tmp = load i16, i16* %idx, align 2 +    %cmp = icmp sgt i16 %tmp, -1 +    br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader +   +  while.cond9.preheader:                            ; preds = %entry +    %0 = icmp eq %struct.head_s* %list, null +    br i1 %0, label %return, label %land.rhs11.lr.ph +   +  land.rhs11.lr.ph:                                 ; preds = %while.cond9.preheader +    %data16143 = bitcast %struct.data_s* %info to i16* +    %tmp1 = load i16, i16* %data16143, align 2 +    %conv15 = sext i16 %tmp1 to i32 +    br label %land.rhs11 +   +  while.cond.preheader:                             ; preds = %entry +    %1 = icmp eq %struct.head_s* %list, null +    br i1 %1, label %return, label %land.rhs.preheader +   +  land.rhs.preheader:                               ; preds = %while.cond.preheader +    br label %land.rhs +   +  while.body:                                       ; preds = %land.rhs +    %next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s** +    %tmp4 = load %struct.head_s*, %struct.head_s** %next4, align 4 +    %tobool = icmp eq %struct.head_s* %tmp4, null +    br i1 %tobool, label %return, label %land.rhs +   +  land.rhs:                                         ; preds = %land.rhs.preheader, %while.body +    %list.addr.033 = phi %struct.head_s* [ %tmp4, %while.body ], [ %list, %land.rhs.preheader ] +    %info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1 +    %tmp2 = load %struct.data_s*, %struct.data_s** %info2, align 4 +    %idx3 = getelementptr inbounds %struct.data_s, %struct.data_s* %tmp2, i32 0, i32 1 +    %tmp3 = load i16, i16* %idx3, align 2 +    %cmp7 = icmp eq i16 %tmp3, %tmp +    br i1 %cmp7, label %return, label %while.body +   +  while.body19:                                     ; preds = %land.rhs11 +    %next205 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s** +    %tmp8 = load %struct.head_s*, %struct.head_s** %next205, align 4 +    %tobool10 = icmp eq %struct.head_s* %tmp8, null +    br i1 %tobool10, label %return, label %land.rhs11 +   +  land.rhs11:                                       ; preds = %while.body19, %land.rhs11.lr.ph +    %list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %tmp8, %while.body19 ] +    %info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1 +    %tmp5 = load %struct.data_s*, %struct.data_s** %info12, align 4 +    %data166 = bitcast %struct.data_s* %tmp5 to i16* +    %tmp6 = load i16, i16* %data166, align 2 +    %2 = and i16 %tmp6, 255 +    %and = zext i16 %2 to i32 +    %cmp16 = icmp eq i32 %and, %conv15 +    br i1 %cmp16, label %return, label %while.body19 +   +  return:                                           ; preds = %land.rhs11, %while.body19, %land.rhs, %while.body, %while.cond.preheader, %while.cond9.preheader +    %retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ] +    ret %struct.head_s* %retval.0 +  } +   +  attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+lob,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" } +   +... +--- +name:            search +alignment:       1 +exposesReturnsTwice: false +legalized:       false +regBankSelected: false +selected:        false +failedISel:      false +tracksRegLiveness: true +hasWinCFI:       false +registers:       [] +liveins: +  - { reg: '$r0', virtual-reg: '' } +  - { reg: '$r1', virtual-reg: '' } +frameInfo: +  isFrameAddressTaken: false +  isReturnAddressTaken: false +  hasStackMap:     false +  hasPatchPoint:   false +  stackSize:       0 +  offsetAdjustment: 0 +  maxAlignment:    1 +  adjustsStack:    false +  hasCalls:        false +  stackProtector:  '' +  maxCallFrameSize: 0 +  cvBytesOfCalleeSavedRegisters: 0 +  hasOpaqueSPAdjustment: false +  hasVAStart:      false +  hasMustTailInVarArgFunc: false +  localFrameSize:  0 +  savePoint:       '' +  restorePoint:    '' +fixedStack:      [] +stack:           [] +callSites:       [] +constants:       [] +machineFunctionInfo: {} +body:             | +  bb.0.entry: +    successors: %bb.3(0x50000000), %bb.1(0x30000000) +    liveins: $r0, $r1 +   +    renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14, $noreg :: (load 2 from %ir.idx) +    t2CMPri renamable $r2, -1, 14, $noreg, implicit-def $cpsr +    t2Bcc %bb.1, 13, killed $cpsr +   +  bb.3.while.cond.preheader: +    successors: %bb.4(0x80000000) +    liveins: $r0, $r2 +   +    tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr +    t2IT 0, 4, implicit-def $itstate +    renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate +    tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate +    renamable $r1 = tUXTH killed renamable $r2, 14, $noreg +   +  bb.4.land.rhs: +    successors: %bb.6(0x04000000), %bb.5(0x7c000000) +    liveins: $r0, $r1 +   +    renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info2) +    renamable $r2 = tLDRHi killed renamable $r2, 1, 14, $noreg :: (load 2 from %ir.idx3) +    tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr +    t2Bcc %bb.6, 0, killed $cpsr +   +  bb.5.while.body: +    successors: %bb.4(0x7c000000) +    liveins: $r0, $r1 +   +    renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next4) +    tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr +    t2IT 0, 4, implicit-def $itstate +    renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate +    tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate +    t2B %bb.4, 14, $noreg +   +  bb.6.return: +    liveins: $r0 +   +    tBX_RET 14, $noreg, implicit $r0 +   +  bb.1.while.cond9.preheader: +    successors: %bb.2(0x80000000) +    liveins: $r0, $r1 +   +    tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr +    t2IT 0, 4, implicit-def $itstate +    renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate +    tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate +    renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14, $noreg :: (load 2 from %ir.data16143) +   +  bb.2.land.rhs11: +    successors: %bb.2(0x7c000000) +    liveins: $r0, $r1 +   +    renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info12) +    renamable $r2 = tLDRBi killed renamable $r2, 0, 14, $noreg :: (load 1 from %ir.data166, align 2) +    tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr +    t2IT 0, 8, implicit-def $itstate +    tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate +    renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next205) +    tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr +    t2IT 0, 4, implicit-def $itstate +    renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate +    tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate +    t2B %bb.2, 14, $noreg + +... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir new file mode 100644 index 00000000000..6d274943413 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir @@ -0,0 +1,201 @@ +# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-LOB +# RUN: llc -mtriple=thumbv8.1m.main -mattr=-lob %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-NOLOB + +# CHECK-NOLOB-NOT: t2LE + +# CHECK-LOB: bb.3.land.rhs: +# CHECK-LOB:   tCBZ $r0, %bb.8 +# CHECK-LOB:   t2LE %bb.3 +# CHECK-LOB: bb.6.land.rhs11: +# CHECK-LOB: bb.7.while.body19: +# CHECK-LOB:   tCBZ $r0, %bb.8 +# CHECK-LOB:   t2LE %bb.6 +# CHECK-LOB: bb.8: + +--- | +  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +  target triple = "thumbv8.1m.main-unknown-unknown" +   +  %struct.head_s = type { %struct.head_s*, %struct.data_s* } +  %struct.data_s = type { i16, i16 } +   +  ; Function Attrs: norecurse nounwind readonly +  define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr #0 { +  entry: +    %idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1 +    %0 = load i16, i16* %idx, align 2 +    %cmp = icmp sgt i16 %0, -1 +    br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader +   +  while.cond9.preheader:                            ; preds = %entry +    %1 = icmp eq %struct.head_s* %list, null +    br i1 %1, label %return, label %land.rhs11.lr.ph +   +  land.rhs11.lr.ph:                                 ; preds = %while.cond9.preheader +    %data16143 = bitcast %struct.data_s* %info to i16* +    %2 = load i16, i16* %data16143, align 2 +    %conv15 = sext i16 %2 to i32 +    br label %land.rhs11 +   +  while.cond.preheader:                             ; preds = %entry +    %3 = icmp eq %struct.head_s* %list, null +    br i1 %3, label %return, label %land.rhs.preheader +   +  land.rhs.preheader:                               ; preds = %while.cond.preheader +    br label %land.rhs +   +  land.rhs:                                         ; preds = %land.rhs.preheader, %while.body +    %list.addr.033 = phi %struct.head_s* [ %6, %while.body ], [ %list, %land.rhs.preheader ] +    %info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1 +    %4 = load %struct.data_s*, %struct.data_s** %info2, align 4 +    %idx3 = getelementptr inbounds %struct.data_s, %struct.data_s* %4, i32 0, i32 1 +    %5 = load i16, i16* %idx3, align 2 +    %cmp7 = icmp eq i16 %5, %0 +    br i1 %cmp7, label %return, label %while.body +   +  while.body:                                       ; preds = %land.rhs +    %next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s** +    %6 = load %struct.head_s*, %struct.head_s** %next4, align 4 +    %tobool = icmp eq %struct.head_s* %6, null +    br i1 %tobool, label %return, label %land.rhs +   +  land.rhs11:                                       ; preds = %while.body19, %land.rhs11.lr.ph +    %list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %10, %while.body19 ] +    %info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1 +    %7 = load %struct.data_s*, %struct.data_s** %info12, align 4 +    %data165 = bitcast %struct.data_s* %7 to i16* +    %8 = load i16, i16* %data165, align 2 +    %9 = and i16 %8, 255 +    %and = zext i16 %9 to i32 +    %cmp16 = icmp eq i32 %and, %conv15 +    br i1 %cmp16, label %return, label %while.body19 +   +  while.body19:                                     ; preds = %land.rhs11 +    %next206 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s** +    %10 = load %struct.head_s*, %struct.head_s** %next206, align 4 +    %tobool10 = icmp eq %struct.head_s* %10, null +    br i1 %tobool10, label %return, label %land.rhs11 +   +  return:                                           ; preds = %while.body19, %land.rhs11, %while.body, %land.rhs, %while.cond.preheader, %while.cond9.preheader +    %retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ] +    ret %struct.head_s* %retval.0 +  } +   +  attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+lob,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" } +   +... +--- +name:            search +alignment:       1 +exposesReturnsTwice: false +legalized:       false +regBankSelected: false +selected:        false +failedISel:      false +tracksRegLiveness: true +hasWinCFI:       false +registers:       [] +liveins: +  - { reg: '$r0', virtual-reg: '' } +  - { reg: '$r1', virtual-reg: '' } +frameInfo: +  isFrameAddressTaken: false +  isReturnAddressTaken: false +  hasStackMap:     false +  hasPatchPoint:   false +  stackSize:       0 +  offsetAdjustment: 0 +  maxAlignment:    1 +  adjustsStack:    false +  hasCalls:        false +  stackProtector:  '' +  maxCallFrameSize: 0 +  cvBytesOfCalleeSavedRegisters: 0 +  hasOpaqueSPAdjustment: false +  hasVAStart:      false +  hasMustTailInVarArgFunc: false +  localFrameSize:  0 +  savePoint:       '' +  restorePoint:    '' +fixedStack:      [] +stack:           [] +callSites:       [] +constants:       [] +machineFunctionInfo: {} +body:             | +  bb.0.entry: +    successors: %bb.5(0x50000000), %bb.1(0x30000000) +    liveins: $r0, $r1 +   +    renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14, $noreg :: (load 2 from %ir.idx) +    t2CMPri renamable $r2, -1, 14, $noreg, implicit-def $cpsr +    t2Bcc %bb.1, 13, killed $cpsr +   +  bb.5.while.cond.preheader: +    successors: %bb.8(0x30000000), %bb.6(0x50000000) +    liveins: $r0, $r2 +   +    tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr +    t2Bcc %bb.8, 0, killed $cpsr +   +  bb.6.land.rhs.preheader: +    successors: %bb.7(0x80000000) +    liveins: $r0, $r2 +   +    renamable $r1 = tUXTH killed renamable $r2, 14, $noreg +   +  bb.7.land.rhs: +    successors: %bb.8(0x04000000), %bb.7(0x7c000000) +    liveins: $r0, $r1 +   +    renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info2) +    renamable $r2 = tLDRHi killed renamable $r2, 1, 14, $noreg :: (load 2 from %ir.idx3) +    tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr +    t2IT 0, 8, implicit-def $itstate +    tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate +    renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next4) +    tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr +    t2Bcc %bb.7, 1, killed $cpsr +    t2B %bb.8, 14, $noreg +   +  bb.1.while.cond9.preheader: +    successors: %bb.8(0x30000000), %bb.2(0x50000000) +    liveins: $r0, $r1 +   +    tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr +    t2Bcc %bb.8, 0, killed $cpsr +   +  bb.2.land.rhs11.lr.ph: +    successors: %bb.3(0x80000000) +    liveins: $r0, $r1 +   +    renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14, $noreg :: (load 2 from %ir.data16143) +   +  bb.3.land.rhs11: +    successors: %bb.9(0x04000000), %bb.4(0x7c000000) +    liveins: $r0, $r1 +   +    renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info12) +    renamable $r2 = tLDRBi killed renamable $r2, 0, 14, $noreg :: (load 1 from %ir.data165, align 2) +    tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr +    t2Bcc %bb.9, 0, killed $cpsr +   +  bb.4.while.body19: +    successors: %bb.8(0x04000000), %bb.3(0x7c000000) +    liveins: $r0, $r1 +   +    renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next206) +    tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr +    t2Bcc %bb.3, 1, killed $cpsr +   +  bb.8: +    successors: %bb.9(0x80000000) +   +    renamable $r0, dead $cpsr = tMOVi8 0, 14, $noreg +   +  bb.9.return: +    liveins: $r0 +   +    tBX_RET 14, $noreg, implicit killed $r0 + +... | 

