[CodeGenPrepare] limit overflow intrinsic matching to a single basic block

Using/updating a dominator tree to match math overflow patterns may be very expensive in compile-time (because of the way CGP uses a DT), so just handle the single-block case. Also, we were restarting the iterator loops when doing the overflow intrinsic transforms by marking the dominator tree for update. That was done to prevent iterating over a removed instruction. But we can postpone the deletion using the existing "RemovedInsts" structure, and that means we don't need to update the DT. See post-commit thread for rL354298 for more details: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20190422/646276.html Differential Revision: https://reviews.llvm.org/D61075 llvm-svn: 359879
author: Sanjay Patel <spatel@rotateright.com> 2019-05-03 13:09:18 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2019-05-03 13:09:18 +0000
commit: 8ff072e48eceee35ff105d5d47853a9307302293 (patch)
tree: fd0d688c7b777abc105aed1840931e622a7c4389 /llvm/test/Transforms/CodeGenPrepare/X86
parent: fd75ee9154d25a7ac4b806a193ef7ad846703c0c (diff)
download: bcm5719-llvm-8ff072e48eceee35ff105d5d47853a9307302293.tar.gz
bcm5719-llvm-8ff072e48eceee35ff105d5d47853a9307302293.zip
2 files changed, 14 insertions, 15 deletions
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll b/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll
index dc638425355..05389bfe8d0 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll
@@ -14,11 +14,10 @@ define i1 @PR41004(i32 %x, i32 %y, i32 %t1) {
 ; CHECK-NEXT:    br label [[SELECT_END]]
 ; CHECK:       select.end:
 ; CHECK-NEXT:    [[MUL:%.*]] = phi i32 [ [[REM]], [[SELECT_TRUE_SINK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[T1:%.*]], i32 1)
-; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
-; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[MATH]], [[MUL]]
-; CHECK-NEXT:    ret i1 [[OV]]
+; CHECK-NEXT:    [[NEG:%.*]] = add i32 [[T1:%.*]], -1
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[NEG]], [[MUL]]
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[T1]], 0
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
 ;
 entry:
   %rem = srem i32 %x, 2
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
index ab636c39ddb..9ba1d7db5ad 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
@@ -47,15 +47,16 @@ define i64 @uaddo3(i64 %a, i64 %b) nounwind ssp {
   ret i64 %Q
 }
 
+; TODO? CGP sinks the compare before we have a chance to form the overflow intrinsic.
+
 define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
 ; CHECK-LABEL: @uaddo4(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]]
 ; CHECK:       next:
-; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
-; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
-; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
-; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]]
+; CHECK-NEXT:    [[Q:%.*]] = select i1 [[TMP0]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret i64 0
@@ -362,7 +363,7 @@ define i1 @usubo_ne_constant0_op1_i32(i32 %x, i32* %p) {
   ret i1 %ov
 }
 
-; Verify insertion point for multi-BB.
+; This used to verify insertion point for multi-BB, but now we just bail out.
 
 declare void @call(i1)
 
@@ -371,15 +372,14 @@ define i1 @usubo_ult_sub_dominates_i64(i64 %x, i64 %y, i64* %p, i1 %cond) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
 ; CHECK:       t:
-; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]])
-; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
-; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
-; CHECK-NEXT:    store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    store i64 [[S]], i64* [[P:%.*]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[END:%.*]], label [[F]]
 ; CHECK:       f:
 ; CHECK-NEXT:    ret i1 [[COND]]
 ; CHECK:       end:
-; CHECK-NEXT:    ret i1 [[OV1]]
+; CHECK-NEXT:    [[OV:%.*]] = icmp ult i64 [[X]], [[Y]]
+; CHECK-NEXT:    ret i1 [[OV]]
 ;
 entry:
   br i1 %cond, label %t, label %f
author	Sanjay Patel <spatel@rotateright.com>	2019-05-03 13:09:18 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2019-05-03 13:09:18 +0000
commit	8ff072e48eceee35ff105d5d47853a9307302293 (patch)
tree	fd0d688c7b777abc105aed1840931e622a7c4389 /llvm/test/Transforms/CodeGenPrepare/X86
parent	fd75ee9154d25a7ac4b806a193ef7ad846703c0c (diff)
download	bcm5719-llvm-8ff072e48eceee35ff105d5d47853a9307302293.tar.gz bcm5719-llvm-8ff072e48eceee35ff105d5d47853a9307302293.zip