diff options
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 73 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.h | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/su-addsub-overflow.ll | 135 | 
3 files changed, 207 insertions, 2 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index f72e2c28ac0..aeda7c06a27 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1041,7 +1041,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,    if (!Subtarget->isThumb1Only())      setOperationAction(ISD::SETCCE, MVT::i32, Custom); -  setOperationAction(ISD::BRCOND,    MVT::Other, Expand); +  setOperationAction(ISD::BRCOND,    MVT::Other, Custom);    setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);    setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);    setOperationAction(ISD::BR_CC,     MVT::f64,   Custom); @@ -3894,6 +3894,10 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {    return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);  } +// This function returns three things: the arithmetic computation itself +// (Value), a comparison (OverflowCmp), and a condition code (ARMcc).  The +// comparison and the condition code define the case in which the arithmetic +// computation *does not* overflow.  std::pair<SDValue, SDValue>  ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,                                   SDValue &ARMcc) const { @@ -3919,7 +3923,11 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,      break;    case ISD::UADDO:      ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); -    Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); +    // We use ADDC here to correspond to its use in LowerUnsignedALUO. +    // We do not use it in the USUBO case as Value may not be used. +    Value = DAG.getNode(ARMISD::ADDC, dl, +                        DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS) +                .getValue(0);      OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);      break;    case ISD::SSUBO: @@ -4518,6 +4526,39 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {    return SDValue();  } +SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { +  SDValue Chain = Op.getOperand(0); +  SDValue Cond = Op.getOperand(1); +  SDValue Dest = Op.getOperand(2); +  SDLoc dl(Op); + +  // Optimize {s|u}{add|sub}.with.overflow feeding into a branch instruction. +  unsigned Opc = Cond.getOpcode(); +  if (Cond.getResNo() == 1 && (Opc == ISD::SADDO || Opc == ISD::UADDO || +                               Opc == ISD::SSUBO || Opc == ISD::USUBO)) { +    // Only lower legal XALUO ops. +    if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) +      return SDValue(); + +    // The actual operation with overflow check. +    SDValue Value, OverflowCmp; +    SDValue ARMcc; +    std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); + +    // Reverse the condition code. +    ARMCC::CondCodes CondCode = +        (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue(); +    CondCode = ARMCC::getOppositeCondition(CondCode); +    ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); +    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + +    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, +                       OverflowCmp); +  } + +  return SDValue(); +} +  SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {    SDValue Chain = Op.getOperand(0);    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); @@ -4538,6 +4579,33 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {      }    } +  // Optimize {s|u}{add|sub}.with.overflow feeding into a branch instruction. +  unsigned Opc = LHS.getOpcode(); +  if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) && +      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || +       Opc == ISD::USUBO) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { +    // Only lower legal XALUO ops. +    if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) +      return SDValue(); + +    // The actual operation with overflow check. +    SDValue Value, OverflowCmp; +    SDValue ARMcc; +    std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc); + +    if ((CC == ISD::SETNE) != isOneConstant(RHS)) { +      // Reverse the condition code. +      ARMCC::CondCodes CondCode = +          (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue(); +      CondCode = ARMCC::getOppositeCondition(CondCode); +      ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); +    } +    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + +    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, +                       OverflowCmp); +  } +    if (LHS.getValueType() == MVT::i32) {      SDValue ARMcc;      SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); @@ -7793,6 +7861,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {    case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);    case ISD::SELECT:        return LowerSELECT(Op, DAG);    case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG); +  case ISD::BRCOND:        return LowerBRCOND(Op, DAG);    case ISD::BR_CC:         return LowerBR_CC(Op, DAG);    case ISD::BR_JT:         return LowerBR_JT(Op, DAG);    case ISD::VASTART:       return LowerVASTART(Op, DAG); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 5d6bc6368b0..bf63dfae440 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -644,6 +644,7 @@ class VectorType;      SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; +    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/ARM/su-addsub-overflow.ll b/llvm/test/CodeGen/ARM/su-addsub-overflow.ll new file mode 100644 index 00000000000..eef53128203 --- /dev/null +++ b/llvm/test/CodeGen/ARM/su-addsub-overflow.ll @@ -0,0 +1,135 @@ +; RUN: llc < %s -mtriple=arm-eabi -mcpu=generic | FileCheck %s + +define i32 @sadd(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: sadd: +; CHECK:    mov r[[R0:[0-9]+]], r0 +; CHECK-NEXT:    add r[[R1:[0-9]+]], r[[R0]], r1 +; CHECK-NEXT:    cmp r[[R1]], r[[R0]] +; CHECK-NEXT:    movvc pc, lr +entry: +  %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) +  %1 = extractvalue { i32, i1 } %0, 1 +  br i1 %1, label %trap, label %cont + +trap: +  tail call void @llvm.trap() #2 +  unreachable + +cont: +  %2 = extractvalue { i32, i1 } %0, 0 +  ret i32 %2 + +} + +define i32 @uadd(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: uadd: +; CHECK:    mov r[[R0:[0-9]+]], r0 +; CHECK-NEXT:    adds r[[R1:[0-9]+]], r[[R0]], r1 +; CHECK-NEXT:    cmp r[[R1]], r[[R0]] +; CHECK-NEXT:    movhs pc, lr +entry: +  %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) +  %1 = extractvalue { i32, i1 } %0, 1 +  br i1 %1, label %trap, label %cont + +trap: +  tail call void @llvm.trap() #2 +  unreachable + +cont: +  %2 = extractvalue { i32, i1 } %0, 0 +  ret i32 %2 + +} + +define i32 @ssub(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: ssub: +; CHECK:    cmp r0, r1 +; CHECK-NEXT:    subvc r0, r0, r1 +; CHECK-NEXT:    movvc pc, lr +entry: +  %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) +  %1 = extractvalue { i32, i1 } %0, 1 +  br i1 %1, label %trap, label %cont + +trap: +  tail call void @llvm.trap() #2 +  unreachable + +cont: +  %2 = extractvalue { i32, i1 } %0, 0 +  ret i32 %2 + +} + +define i32 @usub(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: usub: +; CHECK:    mov r[[R0:[0-9]+]], r0 +; CHECK-NEXT:    subs r[[R1:[0-9]+]], r[[R0]], r1 +; CHECK-NEXT:    cmp r[[R0]], r1 +; CHECK-NEXT:    movhs pc, lr +entry: +  %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) +  %1 = extractvalue { i32, i1 } %0, 1 +  br i1 %1, label %trap, label %cont + +trap: +  tail call void @llvm.trap() #2 +  unreachable + +cont: +  %2 = extractvalue { i32, i1 } %0, 0 +  ret i32 %2 + +} + +define void @sum(i32* %a, i32* %b, i32 %n) local_unnamed_addr #0 { +; CHECK-LABEL: sum: +; CHECK:    ldr [[R0:r[0-9]+]], +; CHECK-NEXT:    ldr [[R1:r[0-9]+|lr]], +; CHECK-NEXT:    add [[R2:r[0-9]+]], [[R1]], [[R0]] +; CHECK-NEXT:    cmp [[R2]], [[R1]] +; CHECK-NEXT:    strvc [[R2]], +; CHECK-NEXT:    addvc +; CHECK-NEXT:    cmpvc +; CHECK-NEXT:    bvs +entry: +  %cmp7 = icmp eq i32 %n, 0 +  br i1 %cmp7, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: +  ret void + +for.body: +  %i.08 = phi i32 [ %7, %cont2 ], [ 0, %entry ] +  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.08 +  %0 = load i32, i32* %arrayidx, align 4 +  %arrayidx1 = getelementptr inbounds i32, i32* %a, i32 %i.08 +  %1 = load i32, i32* %arrayidx1, align 4 +  %2 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %1, i32 %0) +  %3 = extractvalue { i32, i1 } %2, 1 +  br i1 %3, label %trap, label %cont + +trap: +  tail call void @llvm.trap() #2 +  unreachable + +cont: +  %4 = extractvalue { i32, i1 } %2, 0 +  store i32 %4, i32* %arrayidx1, align 4 +  %5 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.08, i32 1) +  %6 = extractvalue { i32, i1 } %5, 1 +  br i1 %6, label %trap, label %cont2 + +cont2: +  %7 = extractvalue { i32, i1 } %5, 0 +  %cmp = icmp eq i32 %7, %n +  br i1 %cmp, label %for.cond.cleanup, label %for.body + +} + +declare void @llvm.trap() #2 +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1  | 

