diff options
| -rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 19 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/sat-add.ll | 50 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll | 13 | ||||
| -rw-r--r-- | llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll | 9 |
4 files changed, 45 insertions, 46 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 792e4a537ea..bcb899a9e02 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1149,20 +1149,22 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, /// Try to combine the compare into a call to the llvm.uadd.with.overflow /// intrinsic. Return true if any changes were made. -static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI) { - // TODO: Why is this transform limited by this condition? - if (TLI.hasMultipleConditionRegisters()) - return false; - +static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI, + const DataLayout &DL) { Value *A, *B; Instruction *AddI; if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI)))) return false; + // Allow the transform as long as we have an integer type that is not + // obviously illegal and unsupported. Type *Ty = AddI->getType(); if (!isa<IntegerType>(Ty)) return false; + EVT CodegenVT = TLI.getValueType(DL, Ty); + if (!CodegenVT.isSimple() && TLI.isOperationExpand(ISD::UADDO, CodegenVT)) + return false; // We don't want to move around uses of condition values this late, so we we // check if it is legal to create the call to the intrinsic in the basic @@ -1263,11 +1265,12 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { return MadeChange; } -static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { +static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI, + const DataLayout &DL) { if (sinkCmpExpression(Cmp, TLI)) return true; - if (combineToUAddWithOverflow(Cmp, TLI)) + if (combineToUAddWithOverflow(Cmp, TLI, DL)) return true; return false; @@ -6714,7 +6717,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { } if (CmpInst *CI = dyn_cast<CmpInst>(I)) - if (TLI && optimizeCmpExpression(CI, *TLI)) + if (TLI && optimizeCmpExpression(CI, *TLI, *DL)) return true; if (LoadInst *LI = dyn_cast<LoadInst>(I)) { diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll index 515ddfe976c..7f4e1b8cc2a 100644 --- a/llvm/test/CodeGen/PowerPC/sat-add.ll +++ b/llvm/test/CodeGen/PowerPC/sat-add.ll @@ -24,12 +24,11 @@ define i8 @unsigned_sat_constant_i8_using_min(i8 %x) { define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) { ; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum: ; CHECK: # %bb.0: -; CHECK-NEXT: addi 5, 3, 42 ; CHECK-NEXT: rlwinm 3, 3, 0, 24, 31 +; CHECK-NEXT: addi 3, 3, 42 +; CHECK-NEXT: andi. 4, 3, 256 ; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: clrlwi 6, 5, 24 -; CHECK-NEXT: cmplw 3, 6 -; CHECK-NEXT: isel 3, 4, 5, 1 +; CHECK-NEXT: isel 3, 3, 4, 2 ; CHECK-NEXT: blr %a = add i8 %x, 42 %c = icmp ugt i8 %x, %a @@ -70,12 +69,11 @@ define i16 @unsigned_sat_constant_i16_using_min(i16 %x) { define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) { ; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum: ; CHECK: # %bb.0: -; CHECK-NEXT: addi 5, 3, 42 ; CHECK-NEXT: rlwinm 3, 3, 0, 16, 31 +; CHECK-NEXT: addi 3, 3, 42 +; CHECK-NEXT: andis. 4, 3, 1 ; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: clrlwi 6, 5, 16 -; CHECK-NEXT: cmplw 3, 6 -; CHECK-NEXT: isel 3, 4, 5, 1 +; CHECK-NEXT: isel 3, 3, 4, 2 ; CHECK-NEXT: blr %a = add i16 %x, 42 %c = icmp ugt i16 %x, %a @@ -117,8 +115,8 @@ define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: addi 5, 3, 42 ; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: cmplw 0, 3, 5 -; CHECK-NEXT: isel 3, 4, 5, 1 +; CHECK-NEXT: cmplw 0, 5, 3 +; CHECK-NEXT: isel 3, 4, 5, 0 ; CHECK-NEXT: blr %a = add i32 %x, 42 %c = icmp ugt i32 %x, %a @@ -160,8 +158,8 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: addi 5, 3, 42 ; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: cmpld 3, 5 -; CHECK-NEXT: isel 3, 4, 5, 1 +; CHECK-NEXT: cmpld 5, 3 +; CHECK-NEXT: isel 3, 4, 5, 0 ; CHECK-NEXT: blr %a = add i64 %x, 42 %c = icmp ugt i64 %x, %a @@ -204,12 +202,12 @@ define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) { define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) { ; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum: ; CHECK: # %bb.0: -; CHECK-NEXT: add 4, 3, 4 +; CHECK-NEXT: rlwinm 4, 4, 0, 24, 31 ; CHECK-NEXT: rlwinm 3, 3, 0, 24, 31 -; CHECK-NEXT: li 5, -1 -; CHECK-NEXT: clrlwi 6, 4, 24 -; CHECK-NEXT: cmplw 3, 6 -; CHECK-NEXT: isel 3, 5, 4, 1 +; CHECK-NEXT: add 3, 3, 4 +; CHECK-NEXT: andi. 4, 3, 256 +; CHECK-NEXT: li 4, -1 +; CHECK-NEXT: isel 3, 3, 4, 2 ; CHECK-NEXT: blr %a = add i8 %x, %y %c = icmp ugt i8 %x, %a @@ -255,12 +253,12 @@ define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) { define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) { ; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum: ; CHECK: # %bb.0: -; CHECK-NEXT: add 4, 3, 4 +; CHECK-NEXT: rlwinm 4, 4, 0, 16, 31 ; CHECK-NEXT: rlwinm 3, 3, 0, 16, 31 -; CHECK-NEXT: li 5, -1 -; CHECK-NEXT: clrlwi 6, 4, 16 -; CHECK-NEXT: cmplw 3, 6 -; CHECK-NEXT: isel 3, 5, 4, 1 +; CHECK-NEXT: add 3, 3, 4 +; CHECK-NEXT: andis. 4, 3, 1 +; CHECK-NEXT: li 4, -1 +; CHECK-NEXT: isel 3, 3, 4, 2 ; CHECK-NEXT: blr %a = add i16 %x, %y %c = icmp ugt i16 %x, %a @@ -306,8 +304,8 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: add 4, 3, 4 ; CHECK-NEXT: li 5, -1 -; CHECK-NEXT: cmplw 0, 3, 4 -; CHECK-NEXT: isel 3, 5, 4, 1 +; CHECK-NEXT: cmplw 0, 4, 3 +; CHECK-NEXT: isel 3, 5, 4, 0 ; CHECK-NEXT: blr %a = add i32 %x, %y %c = icmp ugt i32 %x, %a @@ -351,8 +349,8 @@ define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: add 4, 3, 4 ; CHECK-NEXT: li 5, -1 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: isel 3, 5, 4, 1 +; CHECK-NEXT: cmpld 4, 3 +; CHECK-NEXT: isel 3, 5, 4, 0 ; CHECK-NEXT: blr %a = add i64 %x, %y %c = icmp ugt i64 %x, %a diff --git a/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll b/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll index dbf32f0782f..2bc13cc57d2 100644 --- a/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll +++ b/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll @@ -252,15 +252,14 @@ define void @test_18446744073709551615(i64*, i64*) { define i1 @illegal_type(i17 %x, i17* %p) { ; CHECK-LABEL: illegal_type: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $131071, %edi # imm = 0x1FFFF ; CHECK-NEXT: addl $29, %edi -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: andl $131071, %ecx # imm = 0x1FFFF -; CHECK-NEXT: cmpl %edi, %ecx -; CHECK-NEXT: setne %al ; CHECK-NEXT: movw %di, (%rsi) -; CHECK-NEXT: shrl $16, %ecx -; CHECK-NEXT: movb %cl, 2(%rsi) +; CHECK-NEXT: andl $131071, %edi # imm = 0x1FFFF +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $16, %eax +; CHECK-NEXT: movb %al, 2(%rsi) +; CHECK-NEXT: cmpl $29, %edi +; CHECK-NEXT: setb %al ; CHECK-NEXT: retq %a = add i17 %x, 29 store i17 %a, i17* %p diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll index 6b91a3b3c18..6be9661cc63 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll @@ -163,11 +163,10 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, i16* %p) { define i1 @uaddo_i42_increment_illegal_type(i42 %x, i42* %p) { ; CHECK-LABEL: @uaddo_i42_increment_illegal_type( -; CHECK-NEXT: [[UADD_OVERFLOW:%.*]] = call { i42, i1 } @llvm.uadd.with.overflow.i42(i42 [[X:%.*]], i42 1) -; CHECK-NEXT: [[UADD:%.*]] = extractvalue { i42, i1 } [[UADD_OVERFLOW]], 0 -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i42, i1 } [[UADD_OVERFLOW]], 1 -; CHECK-NEXT: store i42 [[UADD]], i42* [[P:%.*]] -; CHECK-NEXT: ret i1 [[OVERFLOW]] +; CHECK-NEXT: [[A:%.*]] = add i42 [[X:%.*]], 1 +; CHECK-NEXT: [[OV:%.*]] = icmp eq i42 [[A]], 0 +; CHECK-NEXT: store i42 [[A]], i42* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV]] ; %a = add i42 %x, 1 %ov = icmp eq i42 %a, 0 |

