summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp27
-rw-r--r--llvm/test/Transforms/IndVarSimplify/elim-extend.ll2
-rw-r--r--llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll10
-rw-r--r--llvm/test/Transforms/IndVarSimplify/lftr-wide-trip-count.ll158
-rw-r--r--llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll5
5 files changed, 195 insertions, 7 deletions
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 056886f80e6..aa722dbb286 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1989,7 +1989,34 @@ linearFunctionTestReplace(Loop *L,
DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
} else {
+ // We try to extend trip count first. If that doesn't work we truncate IV.
+ // Zext(trunc(IV)) == IV implies equivalence of the following two:
+ // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
+ // one of the two holds, extend the trip count, otherwise we truncate IV.
+ bool Extended = false;
+ const SCEV *IV = SE->getSCEV(CmpIndVar);
+ const SCEV *ZExtTrunc =
+ SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
+ ExitCnt->getType()),
+ CmpIndVar->getType());
+
+ if (ZExtTrunc == IV) {
+ Extended = true;
+ ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
+ "wide.trip.count");
+ } else {
+ const SCEV *SExtTrunc =
+ SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
+ ExitCnt->getType()),
+ CmpIndVar->getType());
+ if (SExtTrunc == IV) {
+ Extended = true;
+ ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
+ "wide.trip.count");
+ }
+ }
+ if (!Extended)
CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
"lftr.wideiv");
}
diff --git a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll
index 8daac23ea74..6b6d5974165 100644
--- a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll
+++ b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll
@@ -41,6 +41,8 @@ entry:
br i1 %precond, label %loop, label %return
; CHECK: loop:
; CHECK-NOT: sext
+; CHECK: wide.trip.count = sext
+; CHECK-NOT: sext
; CHECK: exit:
loop:
%iv = phi i32 [ %postiv, %loop ], [ %init, %entry ]
diff --git a/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll b/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll
index 97635b560d6..763156d16c6 100644
--- a/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll
+++ b/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -S | FileCheck %s
+; RUN: opt < %s -indvars -S | FileCheck %s --implicit-check-not sext --implicit-check-not zext
target datalayout = "p:64:64:64-n32:64"
@@ -7,8 +7,8 @@ target datalayout = "p:64:64:64-n32:64"
; the IV is considered signed or unsigned.
define void @foo(i32* %A, i32* %B, i32* %C, i32 %N) {
; CHECK-LABEL: @foo(
-; CHECK-NOT: zext
-; CHECK-NOT: sext
+; CHECK: wide.trip.count = zext
+; CHECK: ret void
entry:
%cmp1 = icmp slt i32 0, %N
br i1 %cmp1, label %for.body.lr.ph, label %for.end
@@ -45,8 +45,8 @@ for.end: ; preds = %for.cond.for.end_cr
define void @foo1(i32* %A, i32* %B, i32* %C, i32 %N) {
; CHECK-LABEL: @foo1(
-; CHECK-NOT: zext
-; CHECK-NOT: sext
+; CHECK: wide.trip.count = zext
+; CHECK: ret void
entry:
%cmp1 = icmp slt i32 0, %N
br i1 %cmp1, label %for.body.lr.ph, label %for.end
diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-wide-trip-count.ll b/llvm/test/Transforms/IndVarSimplify/lftr-wide-trip-count.ll
new file mode 100644
index 00000000000..25c91ddef81
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/lftr-wide-trip-count.ll
@@ -0,0 +1,158 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+; Provide legal integer types.
+target datalayout = "n8:16:32:64"
+
+
+define void @test1(float* %autoc,
+ float* %data,
+ float %d, i32 %data_len, i32 %sample) nounwind {
+entry:
+ %sub = sub i32 %data_len, %sample
+ %cmp4 = icmp eq i32 %data_len, %sample
+ br i1 %cmp4, label %for.end, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 68719476736, %entry ]
+ %temp = trunc i64 %indvars.iv to i32
+ %add = add i32 %temp, %sample
+ %idxprom = zext i32 %add to i64
+ %arrayidx = getelementptr inbounds float, float* %data, i64 %idxprom
+ %temp1 = load float, float* %arrayidx, align 4
+ %mul = fmul float %temp1, %d
+ %arrayidx2 = getelementptr inbounds float, float* %autoc, i64 %indvars.iv
+ %temp2 = load float, float* %arrayidx2, align 4
+ %add3 = fadd float %temp2, %mul
+ store float %add3, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %temp3 = trunc i64 %indvars.iv.next to i32
+ %cmp = icmp ult i32 %temp3, %sub
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+
+; CHECK-LABEL: @test1(
+
+; With the given initial value for IV, it is not legal to widen
+; trip count to IV size
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %sub
+; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit
+}
+
+define float @test2(float* %a,
+ float* %b,
+ i32 zeroext %m) local_unnamed_addr #0 {
+entry:
+ %cmp5 = icmp ugt i32 %m, 500
+ br i1 %cmp5, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %sum.07 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+ %i.06 = phi i32 [ %inc, %for.body ], [ 500, %for.body.preheader ]
+ %idxprom = zext i32 %i.06 to i64
+ %arrayidx = getelementptr inbounds float, float* %b, i64 %idxprom
+ %temp = load float, float* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds float, float* %a, i64 %idxprom
+ %temp1 = load float, float* %arrayidx2, align 4
+ %mul = fmul float %temp, %temp1
+ %add = fadd float %sum.07, %mul
+ %inc = add i32 %i.06, 1
+ %cmp = icmp ult i32 %inc, %m
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.end.loopexit ]
+ ret float %sum.0.lcssa
+
+; CHECK-LABEL: @test2(
+; Trip count should be widened and LFTR should canonicalize the condition
+; CHECK: %wide.trip.count = zext
+; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
+; CHECK: br i1 %exitcond
+}
+
+define float @test3(float* %b,
+ i32 signext %m) local_unnamed_addr #0 {
+entry:
+ %cmp5 = icmp sgt i32 %m, -10
+ br i1 %cmp5, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %sum.07 = phi float [ %add1, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+ %i.06 = phi i32 [ %inc, %for.body ], [ -10, %for.body.preheader ]
+ %add = add nsw i32 %i.06, 20
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds float, float* %b, i64 %idxprom
+ %temp = load float, float* %arrayidx, align 4
+ %conv = sitofp i32 %i.06 to float
+ %mul = fmul float %conv, %temp
+ %add1 = fadd float %sum.07, %mul
+ %inc = add nsw i32 %i.06, 1
+ %cmp = icmp slt i32 %inc, %m
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add1, %for.end.loopexit ]
+ ret float %sum.0.lcssa
+
+; CHECK-LABEL: @test3(
+; Trip count should be widened and LFTR should canonicalize the condition
+; CHECK: %wide.trip.count = sext
+; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
+; CHECK: br i1 %exitcond
+}
+
+define float @test4(float* %b,
+ i32 signext %m) local_unnamed_addr #0 {
+entry:
+ %cmp5 = icmp sgt i32 %m, 10
+ br i1 %cmp5, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %sum.07 = phi float [ %add1, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+ %i.06 = phi i32 [ %inc, %for.body ], [ 10, %for.body.preheader ]
+ %add = add nsw i32 %i.06, 20
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds float, float* %b, i64 %idxprom
+ %temp = load float, float* %arrayidx, align 4
+ %conv = sitofp i32 %i.06 to float
+ %mul = fmul float %conv, %temp
+ %add1 = fadd float %sum.07, %mul
+ %inc = add nsw i32 %i.06, 1
+ %cmp = icmp slt i32 %inc, %m
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ %add1.lcssa = phi float [ %add1, %for.body ]
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add1.lcssa, %for.end.loopexit ]
+ ret float %sum.0.lcssa
+
+; CHECK-LABEL: @test4(
+; Trip count should be widened and LFTR should canonicalize the condition
+; CHECK: %wide.trip.count = zext
+; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
+; CHECK: br i1 %exitcond
+}
+
+
diff --git a/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll b/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
index 3c8c98bccb9..6a7e5b70ca1 100644
--- a/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
@@ -33,8 +33,9 @@ for.end: ; preds = %for.body, %entry
; CHECK-LABEL: @test1(
; check that we turn the IV test into an eq.
-; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %su
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %wide.trip.count = zext i32 %sub to i64
+; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit
}
OpenPOWER on IntegriCloud