summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp37
-rw-r--r--llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll (renamed from llvm/test/CodeGen/ARM/su-addsub-overflow.ll)40
2 files changed, 72 insertions, 5 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 908e356acba..47c4712aad6 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -3942,6 +3942,29 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
break;
+ case ISD::UMULO:
+ // We generate a UMUL_LOHI and then check if the high word is 0.
+ ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
+ Value = DAG.getNode(ISD::UMUL_LOHI, dl,
+ DAG.getVTList(Op.getValueType(), Op.getValueType()),
+ LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
+ DAG.getConstant(0, dl, MVT::i32));
+ Value = Value.getValue(0); // We only want the low 32 bits for the result.
+ break;
+ case ISD::SMULO:
+ // We generate a SMUL_LOHI and then check if all the bits of the high word
+ // are the same as the sign bit of the low word.
+ ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
+ Value = DAG.getNode(ISD::SMUL_LOHI, dl,
+ DAG.getVTList(Op.getValueType(), Op.getValueType()),
+ LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
+ DAG.getNode(ISD::SRA, dl, Op.getValueType(),
+ Value.getValue(0),
+ DAG.getConstant(31, dl, MVT::i32)));
+ Value = Value.getValue(0); // We only want the low 32 bits for the result.
+ break;
} // switch (...)
return std::make_pair(Value, OverflowCmp);
@@ -4534,10 +4557,12 @@ SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(2);
SDLoc dl(Op);
- // Optimize {s|u}{add|sub}.with.overflow feeding into a branch instruction.
+ // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
+ // instruction.
unsigned Opc = Cond.getOpcode();
- if (Cond.getResNo() == 1 && (Opc == ISD::SADDO || Opc == ISD::UADDO ||
- Opc == ISD::SSUBO || Opc == ISD::USUBO)) {
+ if (Cond.getResNo() == 1 &&
+ (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+ Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
return SDValue();
@@ -4581,11 +4606,13 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
}
}
- // Optimize {s|u}{add|sub}.with.overflow feeding into a branch instruction.
+ // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
+ // instruction.
unsigned Opc = LHS.getOpcode();
if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
(Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
- Opc == ISD::USUBO) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
return SDValue();
diff --git a/llvm/test/CodeGen/ARM/su-addsub-overflow.ll b/llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
index 04e59e05b6d..2bfd18720bc 100644
--- a/llvm/test/CodeGen/ARM/su-addsub-overflow.ll
+++ b/llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
@@ -76,6 +76,44 @@ cont:
}
+define i32 @smul(i32 %a, i32 %b) local_unnamed_addr #0 {
+; CHECK-LABEL: smul:
+; CHECK: smull r0, r[[RHI:[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK-NEXT: cmp r[[RHI]], r0, asr #31
+; CHECK-NEXT: moveq pc, lr
+entry:
+ %0 = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+ %1 = extractvalue { i32, i1 } %0, 1
+ br i1 %1, label %trap, label %cont
+
+trap:
+ tail call void @llvm.trap() #2
+ unreachable
+
+cont:
+ %2 = extractvalue { i32, i1 } %0, 0
+ ret i32 %2
+}
+
+define i32 @umul(i32 %a, i32 %b) local_unnamed_addr #0 {
+; CHECK-LABEL: umul:
+; CHECK: umull r0, r[[RHI:[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK-NEXT: cmp r[[RHI]], #0
+; CHECK-NEXT: moveq pc, lr
+entry:
+ %0 = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+ %1 = extractvalue { i32, i1 } %0, 1
+ br i1 %1, label %trap, label %cont
+
+trap:
+ tail call void @llvm.trap() #2
+ unreachable
+
+cont:
+ %2 = extractvalue { i32, i1 } %0, 0
+ ret i32 %2
+}
+
define void @sum(i32* %a, i32* %b, i32 %n) local_unnamed_addr #0 {
; CHECK-LABEL: sum:
; CHECK: ldr [[R0:r[0-9]+]],
@@ -164,3 +202,5 @@ declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) #1
declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) #1
+declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) #1
OpenPOWER on IntegriCloud