[IndVars] Eliminate op.with.overflow when possible (re-apply)

Summary: If we can prove that an op.with.overflow intrinsic does not overflow, we can get rid of the intrinsic, and replace it with non-wrapping arithmetic. This was first checked in at r265913 but reverted in r265950 because it exposed some issues around how SCEV handled post-inc add recurrences. Those issues have now been fixed. Reviewers: atrick, regehr Subscribers: sanjoy, mcrosier, llvm-commits Differential Revision: http://reviews.llvm.org/D18685 llvm-svn: 271153
author: Sanjoy Das <sanjoy@playingwithpointers.com> 2016-05-29 00:36:25 +0000
committer: Sanjoy Das <sanjoy@playingwithpointers.com> 2016-05-29 00:36:25 +0000
commit: ae09b3cd4c8d647d99dd8467426df8f2846e02f7 (patch)
tree: b077296f696af407d33cd4b49bad95f16c9384a9 /llvm/test/Transforms/IndVarSimplify
parent: f49ca52b9d1ddf3c842a2ee89caa86ec2b059cfc (diff)
download: bcm5719-llvm-ae09b3cd4c8d647d99dd8467426df8f2846e02f7.tar.gz
bcm5719-llvm-ae09b3cd4c8d647d99dd8467426df8f2846e02f7.zip
1 files changed, 137 insertions, 0 deletions
diff --git a/llvm/test/Transforms/IndVarSimplify/overflow-intrinsics.ll b/llvm/test/Transforms/IndVarSimplify/overflow-intrinsics.ll
new file mode 100644
index 00000000000..7715abc8ada
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/overflow-intrinsics.ll
@@ -0,0 +1,137 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f_sadd(i8* %a) {
+; CHECK-LABEL: @f_sadd(
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+  %i.04 = phi i32 [ 0, %entry ], [ %2, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1)
+  %1 = extractvalue { i32, i1 } %0, 1
+; CHECK: for.body:
+; CHECK-NOT: @llvm.sadd.with.overflow
+; CHECK:  br i1 false, label %trap, label %cont, !nosanitize !0
+  br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+  tail call void @llvm.trap() #2, !nosanitize !{}
+  unreachable, !nosanitize !{}
+
+cont:                                             ; preds = %for.body
+  %2 = extractvalue { i32, i1 } %0, 0
+  %cmp = icmp slt i32 %2, 16
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+define void @f_uadd(i8* %a) {
+; CHECK-LABEL: @f_uadd(
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+  %i.04 = phi i32 [ 0, %entry ], [ %2, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.04, i32 1)
+  %1 = extractvalue { i32, i1 } %0, 1
+; CHECK: for.body:
+; CHECK-NOT: @llvm.uadd.with.overflow
+; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
+  br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+  tail call void @llvm.trap(), !nosanitize !{}
+  unreachable, !nosanitize !{}
+
+cont:                                             ; preds = %for.body
+  %2 = extractvalue { i32, i1 } %0, 0
+  %cmp = icmp slt i32 %2, 16
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+define void @f_ssub(i8* nocapture %a) {
+; CHECK-LABEL: @f_ssub(
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+  %i.04 = phi i32 [ 15, %entry ], [ %2, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %i.04, i32 1)
+  %1 = extractvalue { i32, i1 } %0, 1
+; CHECK: for.body:
+; CHECK-NOT: @llvm.ssub.with.overflow.i32
+; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
+  br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+  tail call void @llvm.trap(), !nosanitize !{}
+  unreachable, !nosanitize !{}
+
+cont:                                             ; preds = %for.body
+  %2 = extractvalue { i32, i1 } %0, 0
+  %cmp = icmp sgt i32 %2, -1
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+define void @f_usub(i8* nocapture %a) {
+; CHECK-LABEL: @f_usub(
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+  %i.04 = phi i32 [ 15, %entry ], [ %2, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %i.04, i32 1)
+  %1 = extractvalue { i32, i1 } %0, 1
+
+; It is theoretically possible to prove this, but SCEV cannot
+; represent non-unsigned-wrapping subtraction operations.
+
+; CHECK: for.body:
+; CHECK:  [[COND:%[^ ]+]] = extractvalue { i32, i1 } %1, 1
+; CHECK-NEXT:  br i1 [[COND]], label %trap, label %cont, !nosanitize !0
+  br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+  tail call void @llvm.trap(), !nosanitize !{}
+  unreachable, !nosanitize !{}
+
+cont:                                             ; preds = %for.body
+  %2 = extractvalue { i32, i1 } %0, 0
+  %cmp = icmp sgt i32 %2, -1
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
+declare void @llvm.trap() #2
author	Sanjoy Das <sanjoy@playingwithpointers.com>	2016-05-29 00:36:25 +0000
committer	Sanjoy Das <sanjoy@playingwithpointers.com>	2016-05-29 00:36:25 +0000
commit	ae09b3cd4c8d647d99dd8467426df8f2846e02f7 (patch)
tree	b077296f696af407d33cd4b49bad95f16c9384a9 /llvm/test/Transforms/IndVarSimplify
parent	f49ca52b9d1ddf3c842a2ee89caa86ec2b059cfc (diff)
download	bcm5719-llvm-ae09b3cd4c8d647d99dd8467426df8f2846e02f7.tar.gz bcm5719-llvm-ae09b3cd4c8d647d99dd8467426df8f2846e02f7.zip