diff options
| -rw-r--r-- | llvm/lib/Target/ARM/Thumb2SizeReduction.cpp | 67 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/avoid-cpsr-rmw.ll | 38 | 
2 files changed, 79 insertions, 26 deletions
diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index 89a155c5a7f..e5fc8b4fdd5 100644 --- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -146,7 +146,8 @@ namespace {      /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.      DenseMap<unsigned, unsigned> ReduceOpcodeMap; -    bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use); +    bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, +                             bool IsSelfLoop);      bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,                           bool is2Addr, ARMCC::CondCodes Pred, @@ -157,19 +158,21 @@ namespace {      bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,                         const ReduceEntry &Entry, bool LiveCPSR, -                       MachineInstr *CPSRDef); +                       MachineInstr *CPSRDef, bool IsSelfLoop);      /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address      /// instruction.      bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,                         const ReduceEntry &Entry, -                       bool LiveCPSR, MachineInstr *CPSRDef); +                       bool LiveCPSR, MachineInstr *CPSRDef, +                       bool IsSelfLoop);      /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit      /// non-two-address instruction.      bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,                          const ReduceEntry &Entry, -                        bool LiveCPSR, MachineInstr *CPSRDef); +                        bool LiveCPSR, MachineInstr *CPSRDef, +                        bool IsSelfLoop);      /// ReduceMBB - Reduce width of instructions in the specified basic block.      bool ReduceMBB(MachineBasicBlock &MBB); @@ -210,10 +213,17 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {  /// In this case it would have been ok to narrow the mul.w to muls since there  /// are indirect RAW dependency between the muls and the mul.w  bool -Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) { -  if (!Def || !STI->avoidCPSRPartialUpdate()) +Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, +                                      bool FirstInSelfLoop) { +  // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder). +  if (!STI->avoidCPSRPartialUpdate())      return false; +  if (!Def) +    // If this BB loops back to itself, conservatively avoid narrowing the +    // first instruction that does partial flag update. +    return FirstInSelfLoop; +    SmallSet<unsigned, 2> Defs;    for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {      const MachineOperand &MO = Def->getOperand(i); @@ -476,15 +486,16 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,  bool  Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,                                  const ReduceEntry &Entry, -                                bool LiveCPSR, MachineInstr *CPSRDef) { +                                bool LiveCPSR, MachineInstr *CPSRDef, +                                bool IsSelfLoop) {    unsigned Opc = MI->getOpcode();    if (Opc == ARM::t2ADDri) {      // If the source register is SP, try to reduce to tADDrSPi, otherwise      // it's a normal reduce.      if (MI->getOperand(1).getReg() != ARM::SP) { -      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) +      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))          return true; -      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); +      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);      }      // Try to reduce to tADDrSPi.      unsigned Imm = MI->getOperand(2).getImm(); @@ -535,12 +546,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,        switch (Opc) {        default: break;        case ARM::t2ADDSri: { -        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) +        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))            return true;          // fallthrough        }        case ARM::t2ADDSrr: -        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); +        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);        }      }      break; @@ -552,13 +563,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,    case ARM::t2UXTB:    case ARM::t2UXTH:      if (MI->getOperand(2).getImm() == 0) -      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); +      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);      break;    case ARM::t2MOVi16:      // Can convert only 'pure' immediate operands, not immediates obtained as      // globals' addresses.      if (MI->getOperand(1).isImm()) -      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); +      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);      break;    case ARM::t2CMPrr: {      // Try to reduce to the lo-reg only version first. Why there are two @@ -568,9 +579,9 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,      // source insn opcode. So for now, we hack a local entry record to use.      static const ReduceEntry NarrowEntry =        { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 }; -    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef)) +    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop))        return true; -    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); +    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);    }    }    return false; @@ -579,7 +590,8 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,  bool  Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,                                  const ReduceEntry &Entry, -                                bool LiveCPSR, MachineInstr *CPSRDef) { +                                bool LiveCPSR, MachineInstr *CPSRDef, +                                bool IsSelfLoop) {    if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))      return false; @@ -637,7 +649,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,    // Avoid adding a false dependency on partial flag update by some 16-bit    // instructions which has the 's' bit set.    if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && -      canAddPseudoFlagDep(CPSRDef, MI)) +      canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))      return false;    // Add the 16-bit instruction. @@ -674,7 +686,8 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,  bool  Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,                                   const ReduceEntry &Entry, -                                 bool LiveCPSR, MachineInstr *CPSRDef) { +                                 bool LiveCPSR, MachineInstr *CPSRDef, +                                 bool IsSelfLoop) {    if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))      return false; @@ -727,7 +740,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,    // Avoid adding a false dependency on partial flag update by some 16-bit    // instructions which has the 's' bit set.    if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && -      canAddPseudoFlagDep(CPSRDef, MI)) +      canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))      return false;    // Add the 16-bit instruction. @@ -818,6 +831,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {    bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);    MachineInstr *CPSRDef = 0; +  // If this BB loops back to itself, conservatively avoid narrowing the +  // first instruction that does partial flag update. +  bool IsSelfLoop = MBB.isSuccessor(&MBB);    MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();    MachineBasicBlock::iterator NextMII;    for (; MII != E; MII = NextMII) { @@ -832,7 +848,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {        const ReduceEntry &Entry = ReduceTable[OPI->second];        // Ignore "special" cases for now.        if (Entry.Special) { -        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) { +        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {            Modified = true;            MachineBasicBlock::iterator I = prior(NextMII);            MI = &*I; @@ -842,7 +858,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {        // Try to transform to a 16-bit two-address instruction.        if (Entry.NarrowOpc2 && -          ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) { +          ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {          Modified = true;          MachineBasicBlock::iterator I = prior(NextMII);          MI = &*I; @@ -851,7 +867,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {        // Try to transform to a 16-bit non-two-address instruction.        if (Entry.NarrowOpc1 && -          ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) { +          ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {          Modified = true;          MachineBasicBlock::iterator I = prior(NextMII);          MI = &*I; @@ -861,12 +877,15 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {    ProcessNext:      bool DefCPSR = false;      LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); -    if (MI->getDesc().isCall()) +    if (MI->getDesc().isCall()) {        // Calls don't really set CPSR.        CPSRDef = 0; -    else if (DefCPSR) +      IsSelfLoop = false; +    } else if (DefCPSR) {        // This is the last CPSR defining instruction.        CPSRDef = MI; +      IsSelfLoop = false; +    }    }    return Modified; diff --git a/llvm/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/llvm/test/CodeGen/ARM/avoid-cpsr-rmw.ll index 92aff7007f2..877ec180626 100644 --- a/llvm/test/CodeGen/ARM/avoid-cpsr-rmw.ll +++ b/llvm/test/CodeGen/ARM/avoid-cpsr-rmw.ll @@ -3,9 +3,9 @@  ; dependency) when it isn't dependent on last CPSR defining instruction.  ; rdar://8928208 -define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone { +define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {   entry: -; CHECK: t: +; CHECK: t1:  ; CHECK: muls [[REG:(r[0-9]+)]], r2, r3  ; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r0, r1  ; CHECK-NEXT: muls r0, [[REG2]], [[REG]] @@ -14,3 +14,37 @@ define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {    %2 = mul nsw i32 %0, %1    ret i32 %2  } + +; Avoid partial CPSR dependency via loop backedge. +; rdar://10357570 +define void @t2(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind { +entry: +; CHECK: t2: +  %tobool7 = icmp eq i32* %ptr2, null +  br i1 %tobool7, label %while.end, label %while.body + +while.body: +; CHECK: while.body +; CHECK: mul r{{[0-9]+}} +; CHECK-NOT: muls +  %ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ] +  %ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ] +  %0 = load i32* %ptr1.addr.09, align 4 +  %arrayidx1 = getelementptr inbounds i32* %ptr1.addr.09, i32 1 +  %1 = load i32* %arrayidx1, align 4 +  %arrayidx3 = getelementptr inbounds i32* %ptr1.addr.09, i32 2 +  %2 = load i32* %arrayidx3, align 4 +  %arrayidx4 = getelementptr inbounds i32* %ptr1.addr.09, i32 3 +  %3 = load i32* %arrayidx4, align 4 +  %add.ptr = getelementptr inbounds i32* %ptr1.addr.09, i32 4 +  %mul = mul i32 %1, %0 +  %mul5 = mul i32 %mul, %2 +  %mul6 = mul i32 %mul5, %3 +  store i32 %mul6, i32* %ptr2.addr.08, align 4 +  %incdec.ptr = getelementptr inbounds i32* %ptr2.addr.08, i32 -1 +  %tobool = icmp eq i32* %incdec.ptr, null +  br i1 %tobool, label %while.end, label %while.body + +while.end: +  ret void +}  | 

