summaryrefslogtreecommitdiffstats
path: root/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
diff options
context:
space:
mode:
authorSilviu Baranga <silviu.baranga@arm.com>2016-04-25 09:27:16 +0000
committerSilviu Baranga <silviu.baranga@arm.com>2016-04-25 09:27:16 +0000
commit795c629ec93cfa4da558df3231c1309fbe6883be (patch)
treeb41c451865a0d8d0698edc33c905f0d09c9573bf /llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
parenta44d44cb2ea8264c905d65e8bb1c94f3abbfac3d (diff)
downloadbcm5719-llvm-795c629ec93cfa4da558df3231c1309fbe6883be.tar.gz
bcm5719-llvm-795c629ec93cfa4da558df3231c1309fbe6883be.zip
[SCEV] Improve the run-time checking of the NoWrap predicate
Summary: This implements a new method of run-time checking the NoWrap SCEV predicates, which should be easier to optimize and nicer for targets that don't correctly handle multiplication/addition of large integer types (like i128). If the AddRec is {a,+,b} and the backedge taken count is c, the idea is to check that |b| * c doesn't have unsigned overflow, and depending on the sign of b, that: a + |b| * c >= a (b >= 0) or a - |b| * c <= a (b <= 0) where the comparisons above are signed or unsigned, depending on the flag that we're checking. The advantage of doing this is that we avoid extending to a larger type and we avoid the multiplication of large types (multiplying i128 can be expensive). Reviewers: sanjoy Subscribers: llvm-commits, mzolotukhin Differential Revision: http://reviews.llvm.org/D19266 llvm-svn: 267389
Diffstat (limited to 'llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll')
-rw-r--r--llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll139
1 files changed, 127 insertions, 12 deletions
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
index ee9078c6607..5f305cebc27 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
@@ -38,9 +38,32 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; LV-LABEL: f1
; LV-LABEL: for.body.lver.check
-; LV: [[PredCheck0:%[^ ]*]] = icmp ne i128
-; LV: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
-; LV: [[PredCheck1:%[^ ]*]] = icmp ne i128
+
+; LV: [[BETrunc:%[^ ]*]] = trunc i64 [[BE:%[^ ]*]] to i32
+; LV-NEXT: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc]])
+; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
+; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
+; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 0, [[OFMulResult]]
+; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 0, [[OFMulResult]]
+; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp ugt i32 [[SubEnd]], 0
+; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp ult i32 [[AddEnd]], 0
+; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg]], i1 [[CmpPos]]
+; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
+; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
+; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
+
+; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
+
+; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
+; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
+; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
+; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
+; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
+
; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
define void @f1(i16* noalias %a,
@@ -111,9 +134,31 @@ for.end: ; preds = %for.body
; LV-LABEL: f2
; LV-LABEL: for.body.lver.check
-; LV: [[PredCheck0:%[^ ]*]] = icmp ne i128
-; LV: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
-; LV: [[PredCheck1:%[^ ]*]] = icmp ne i128
+
+; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
+; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
+; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
+; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 [[Start:%[^ ]*]], [[OFMulResult]]
+; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 [[Start]], [[OFMulResult]]
+; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp ugt i32 [[SubEnd]], [[Start]]
+; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp ult i32 [[AddEnd]], [[Start]]
+; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg]], i1 [[CmpPos]]
+; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
+; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
+; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
+
+; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
+
+; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
+; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
+; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[Start:%[^ ]*]], [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[Start]], [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[Start]]
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[Start]]
+; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
+; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
+
; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
define void @f2(i16* noalias %a,
@@ -169,9 +214,31 @@ for.end: ; preds = %for.body
; LV-LABEL: f3
; LV-LABEL: for.body.lver.check
-; LV: [[PredCheck0:%[^ ]*]] = icmp ne i128
-; LV: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
-; LV: [[PredCheck1:%[^ ]*]] = icmp ne i128
+
+; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
+; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
+; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
+; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 0, [[OFMulResult]]
+; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 0, [[OFMulResult]]
+; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp sgt i32 [[SubEnd]], 0
+; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp slt i32 [[AddEnd]], 0
+; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg]], i1 [[CmpPos]]
+; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
+; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
+; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
+
+; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
+
+; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
+; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
+; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
+; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
+; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
+
; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
define void @f3(i16* noalias %a,
@@ -223,9 +290,31 @@ for.end: ; preds = %for.body
; LV-LABEL: f4
; LV-LABEL: for.body.lver.check
-; LV: [[PredCheck0:%[^ ]*]] = icmp ne i128
-; LV: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
-; LV: [[PredCheck1:%[^ ]*]] = icmp ne i128
+
+; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
+; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
+; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
+; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 [[Start:%[^ ]*]], [[OFMulResult]]
+; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 [[Start]], [[OFMulResult]]
+; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp sgt i32 [[SubEnd]], [[Start]]
+; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp slt i32 [[AddEnd]], [[Start]]
+; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg]], i1 [[CmpPos]]
+; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
+; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
+; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
+
+; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
+
+; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
+; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
+; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[Start:%[^ ]*]], [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[Start]], [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[Start]]
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[Start]]
+; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
+; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
+
; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
define void @f4(i16* noalias %a,
@@ -280,6 +369,32 @@ for.end: ; preds = %for.body
; LV-LABEL: f5
; LV-LABEL: for.body.lver.check
+; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
+; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
+; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
+; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 [[Start:%[^ ]*]], [[OFMulResult]]
+; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 [[Start]], [[OFMulResult]]
+; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp sgt i32 [[SubEnd]], [[Start]]
+; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp slt i32 [[AddEnd]], [[Start]]
+; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg]], i1 [[CmpPos]]
+; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
+; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
+; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
+
+; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
+
+; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
+; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
+; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[Start:%[^ ]*]], [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[Start]], [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[Start]]
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[Start]]
+; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
+; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
+
+; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
+; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
define void @f5(i16* noalias %a,
i16* noalias %b, i64 %N) {
entry:
OpenPOWER on IntegriCloud