4 files changed, 45 insertions, 46 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 792e4a537ea..bcb899a9e02 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1149,20 +1149,22 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
 
 /// Try to combine the compare into a call to the llvm.uadd.with.overflow
 /// intrinsic. Return true if any changes were made.
-static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI) {
-  // TODO: Why is this transform limited by this condition?
-  if (TLI.hasMultipleConditionRegisters())
-    return false;
-
+static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI,
+                                      const DataLayout &DL) {
   Value *A, *B;
   Instruction *AddI;
   if (!match(Cmp,
              m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
     return false;
 
+  // Allow the transform as long as we have an integer type that is not
+  // obviously illegal and unsupported.
   Type *Ty = AddI->getType();
   if (!isa<IntegerType>(Ty))
     return false;
+  EVT CodegenVT = TLI.getValueType(DL, Ty);
+  if (!CodegenVT.isSimple() && TLI.isOperationExpand(ISD::UADDO, CodegenVT))
+    return false;
 
   // We don't want to move around uses of condition values this late, so we we
   // check if it is legal to create the call to the intrinsic in the basic
@@ -1263,11 +1265,12 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
   return MadeChange;
 }
 
-static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
+static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
+                                  const DataLayout &DL) {
   if (sinkCmpExpression(Cmp, TLI))
     return true;
 
-  if (combineToUAddWithOverflow(Cmp, TLI))
+  if (combineToUAddWithOverflow(Cmp, TLI, DL))
     return true;
 
   return false;
@@ -6714,7 +6717,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
   }
 
   if (CmpInst *CI = dyn_cast<CmpInst>(I))
-    if (TLI && optimizeCmpExpression(CI, *TLI))
+    if (TLI && optimizeCmpExpression(CI, *TLI, *DL))
       return true;
 
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
index 515ddfe976c..7f4e1b8cc2a 100644
--- a/llvm/test/CodeGen/PowerPC/sat-add.ll
+++ b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -24,12 +24,11 @@ define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
 define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi 5, 3, 42
 ; CHECK-NEXT:    rlwinm 3, 3, 0, 24, 31
+; CHECK-NEXT:    addi 3, 3, 42
+; CHECK-NEXT:    andi. 4, 3, 256
 ; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    clrlwi 6, 5, 24
-; CHECK-NEXT:    cmplw 3, 6
-; CHECK-NEXT:    isel 3, 4, 5, 1
+; CHECK-NEXT:    isel 3, 3, 4, 2
 ; CHECK-NEXT:    blr
   %a = add i8 %x, 42
   %c = icmp ugt i8 %x, %a
@@ -70,12 +69,11 @@ define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
 define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi 5, 3, 42
 ; CHECK-NEXT:    rlwinm 3, 3, 0, 16, 31
+; CHECK-NEXT:    addi 3, 3, 42
+; CHECK-NEXT:    andis. 4, 3, 1
 ; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    clrlwi 6, 5, 16
-; CHECK-NEXT:    cmplw 3, 6
-; CHECK-NEXT:    isel 3, 4, 5, 1
+; CHECK-NEXT:    isel 3, 3, 4, 2
 ; CHECK-NEXT:    blr
   %a = add i16 %x, 42
   %c = icmp ugt i16 %x, %a
@@ -117,8 +115,8 @@ define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi 5, 3, 42
 ; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    cmplw 0, 3, 5
-; CHECK-NEXT:    isel 3, 4, 5, 1
+; CHECK-NEXT:    cmplw 0, 5, 3
+; CHECK-NEXT:    isel 3, 4, 5, 0
 ; CHECK-NEXT:    blr
   %a = add i32 %x, 42
   %c = icmp ugt i32 %x, %a
@@ -160,8 +158,8 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi 5, 3, 42
 ; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    cmpld 3, 5
-; CHECK-NEXT:    isel 3, 4, 5, 1
+; CHECK-NEXT:    cmpld 5, 3
+; CHECK-NEXT:    isel 3, 4, 5, 0
 ; CHECK-NEXT:    blr
   %a = add i64 %x, 42
   %c = icmp ugt i64 %x, %a
@@ -204,12 +202,12 @@ define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
 define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    add 4, 3, 4
+; CHECK-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; CHECK-NEXT:    rlwinm 3, 3, 0, 24, 31
-; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    clrlwi 6, 4, 24
-; CHECK-NEXT:    cmplw 3, 6
-; CHECK-NEXT:    isel 3, 5, 4, 1
+; CHECK-NEXT:    add 3, 3, 4
+; CHECK-NEXT:    andi. 4, 3, 256
+; CHECK-NEXT:    li 4, -1
+; CHECK-NEXT:    isel 3, 3, 4, 2
 ; CHECK-NEXT:    blr
   %a = add i8 %x, %y
   %c = icmp ugt i8 %x, %a
@@ -255,12 +253,12 @@ define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
 define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    add 4, 3, 4
+; CHECK-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; CHECK-NEXT:    rlwinm 3, 3, 0, 16, 31
-; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    clrlwi 6, 4, 16
-; CHECK-NEXT:    cmplw 3, 6
-; CHECK-NEXT:    isel 3, 5, 4, 1
+; CHECK-NEXT:    add 3, 3, 4
+; CHECK-NEXT:    andis. 4, 3, 1
+; CHECK-NEXT:    li 4, -1
+; CHECK-NEXT:    isel 3, 3, 4, 2
 ; CHECK-NEXT:    blr
   %a = add i16 %x, %y
   %c = icmp ugt i16 %x, %a
@@ -306,8 +304,8 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    add 4, 3, 4
 ; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    cmplw 0, 3, 4
-; CHECK-NEXT:    isel 3, 5, 4, 1
+; CHECK-NEXT:    cmplw 0, 4, 3
+; CHECK-NEXT:    isel 3, 5, 4, 0
 ; CHECK-NEXT:    blr
   %a = add i32 %x, %y
   %c = icmp ugt i32 %x, %a
@@ -351,8 +349,8 @@ define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    add 4, 3, 4
 ; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    cmpld 3, 4
-; CHECK-NEXT:    isel 3, 5, 4, 1
+; CHECK-NEXT:    cmpld 4, 3
+; CHECK-NEXT:    isel 3, 5, 4, 0
 ; CHECK-NEXT:    blr
   %a = add i64 %x, %y
   %c = icmp ugt i64 %x, %a
diff --git a/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll b/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll
index dbf32f0782f..2bc13cc57d2 100644
--- a/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll
+++ b/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll
@@ -252,15 +252,14 @@ define void @test_18446744073709551615(i64*, i64*) {
 define i1 @illegal_type(i17 %x, i17* %p) {
 ; CHECK-LABEL: illegal_type:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $131071, %edi # imm = 0x1FFFF
 ; CHECK-NEXT:    addl $29, %edi
-; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    andl $131071, %ecx # imm = 0x1FFFF
-; CHECK-NEXT:    cmpl %edi, %ecx
-; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    movw %di, (%rsi)
-; CHECK-NEXT:    shrl $16, %ecx
-; CHECK-NEXT:    movb %cl, 2(%rsi)
+; CHECK-NEXT:    andl $131071, %edi # imm = 0x1FFFF
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shrl $16, %eax
+; CHECK-NEXT:    movb %al, 2(%rsi)
+; CHECK-NEXT:    cmpl $29, %edi
+; CHECK-NEXT:    setb %al
 ; CHECK-NEXT:    retq
   %a = add i17 %x, 29
   store i17 %a, i17* %p
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
index 6b91a3b3c18..6be9661cc63 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
@@ -163,11 +163,10 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, i16* %p) {
 
 define i1 @uaddo_i42_increment_illegal_type(i42 %x, i42* %p) {
 ; CHECK-LABEL: @uaddo_i42_increment_illegal_type(
-; CHECK-NEXT:    [[UADD_OVERFLOW:%.*]] = call { i42, i1 } @llvm.uadd.with.overflow.i42(i42 [[X:%.*]], i42 1)
-; CHECK-NEXT:    [[UADD:%.*]] = extractvalue { i42, i1 } [[UADD_OVERFLOW]], 0
-; CHECK-NEXT:    [[OVERFLOW:%.*]] = extractvalue { i42, i1 } [[UADD_OVERFLOW]], 1
-; CHECK-NEXT:    store i42 [[UADD]], i42* [[P:%.*]]
-; CHECK-NEXT:    ret i1 [[OVERFLOW]]
+; CHECK-NEXT:    [[A:%.*]] = add i42 [[X:%.*]], 1
+; CHECK-NEXT:    [[OV:%.*]] = icmp eq i42 [[A]], 0
+; CHECK-NEXT:    store i42 [[A]], i42* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV]]
 ;
   %a = add i42 %x, 1
   %ov = icmp eq i42 %a, 0