summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp11
-rw-r--r--llvm/test/CodeGen/AArch64/uadd_sat_vec.ll21
2 files changed, 18 insertions, 14 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index fcda0e513ec..8bddebd75e5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5426,9 +5426,20 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
if (Opcode == ISD::UADDSAT) {
+ if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+ // (LHS + RHS) | OverflowMask
+ SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
+ return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
+ }
// Overflow ? 0xffff.... : (LHS + RHS)
return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
} else if (Opcode == ISD::USUBSAT) {
+ if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+ // (LHS - RHS) & ~OverflowMask
+ SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
+ SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
+ return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
+ }
// Overflow ? 0 : (LHS - RHS)
return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
} else {
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
index efa99e80976..55b42e79053 100644
--- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
@@ -404,8 +404,7 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: add v1.2d, v0.2d, v1.2d
; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d
-; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%z = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %z
@@ -418,10 +417,8 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; CHECK-NEXT: add v3.2d, v1.2d, v3.2d
; CHECK-NEXT: cmhi v0.2d, v0.2d, v2.2d
; CHECK-NEXT: cmhi v1.2d, v1.2d, v3.2d
-; CHECK-NEXT: bic v2.16b, v2.16b, v0.16b
-; CHECK-NEXT: bic v3.16b, v3.16b, v1.16b
-; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b
+; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: orr v1.16b, v3.16b, v1.16b
; CHECK-NEXT: ret
%z = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
ret <4 x i64> %z
@@ -438,14 +435,10 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
; CHECK-NEXT: cmhi v1.2d, v1.2d, v5.2d
; CHECK-NEXT: cmhi v2.2d, v2.2d, v6.2d
; CHECK-NEXT: cmhi v3.2d, v3.2d, v7.2d
-; CHECK-NEXT: bic v4.16b, v4.16b, v0.16b
-; CHECK-NEXT: bic v5.16b, v5.16b, v1.16b
-; CHECK-NEXT: bic v6.16b, v6.16b, v2.16b
-; CHECK-NEXT: bic v7.16b, v7.16b, v3.16b
-; CHECK-NEXT: orr v0.16b, v0.16b, v4.16b
-; CHECK-NEXT: orr v1.16b, v1.16b, v5.16b
-; CHECK-NEXT: orr v2.16b, v2.16b, v6.16b
-; CHECK-NEXT: orr v3.16b, v3.16b, v7.16b
+; CHECK-NEXT: orr v0.16b, v4.16b, v0.16b
+; CHECK-NEXT: orr v1.16b, v5.16b, v1.16b
+; CHECK-NEXT: orr v2.16b, v6.16b, v2.16b
+; CHECK-NEXT: orr v3.16b, v7.16b, v3.16b
; CHECK-NEXT: ret
%z = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
ret <8 x i64> %z
OpenPOWER on IntegriCloud