diff options
10 files changed, 26 insertions, 50 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 045704da842..ecd3854c210 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -46,7 +46,7 @@ STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); static cl::opt<bool> -UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(true), cl::Hidden, +UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolled loops to be unrolled " "with epilog instead of prolog.")); diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll index ebb9444d07a..b800b4ac545 100644 --- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll +++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG ; Tests for unrolling loops with run-time trip counts diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll index d1b5f84661b..b5299bb17f8 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-runtime=true | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll index a2e2f8811d6..5d7c6482478 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG ; This tests that setting the unroll count works diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll index 63b386c08d0..3ce8702a946 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-count=8 | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG ; Choose a smaller, power-of-two, unroll count if the loop is too large. diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop4.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop4.ll index 8f1589134f7..4a9104eb00d 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop4.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop4.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -O2 -unroll-runtime=true | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -O2 -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -O2 -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG ; Check runtime unrolling prologue can be promoted by LICM pass. diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll index 78a3eeb1a37..e8d51775ce1 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll @@ -11,6 +11,9 @@ entry: %cmp1 = icmp eq i3 %n, 0 br i1 %cmp1, label %for.end, label %for.body +; UNROLL-16-NOT: for.body.prol: +; UNROLL-4: for.body.prol: + for.body: ; preds = %for.body, %entry ; UNROLL-16-LABEL: for.body: ; UNROLL-4-LABEL: for.body: @@ -36,10 +39,6 @@ for.body: ; preds = %for.body, %entry ; UNROLL-16-LABEL: for.end ; UNROLL-4-LABEL: for.end - -; UNROLL-16-NOT: for.body.epil: -; UNROLL-4: for.body.epil: - for.end: ; preds = %for.body, %entry %sum.0.lcssa = phi i3 [ 0, %entry ], [ %add, %for.body ] ret i3 %sum.0.lcssa diff --git a/llvm/test/Transforms/LoopUnroll/tripcount-overflow.ll b/llvm/test/Transforms/LoopUnroll/tripcount-overflow.ll index 7156629af6d..0d5681b539a 100644 --- a/llvm/test/Transforms/LoopUnroll/tripcount-overflow.ll +++ b/llvm/test/Transforms/LoopUnroll/tripcount-overflow.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll b/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll index 163a469661c..1e42203876e 100644 --- a/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll +++ b/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll @@ -4,14 +4,14 @@ ; RUN: opt < %s -O2 -S | FileCheck %s ; After loop unroll: -; %niter.nsub = add nsw i32 %niter, -1 +; %dec18 = add nsw i32 %dec18.in, -1 ; ... -; %niter.nsub.1 = add nsw i32 %niter.nsub, -1 +; %dec18.1 = add nsw i32 %dec18, -1 ; should be merged to: -; %dec18.1 = add nsw i32 %niter, -2 +; %dec18.1 = add nsw i32 %dec18.in, -2 ; ; CHECK-LABEL: @_Z3fn1v( -; CHECK: %niter.nsub.1 = add i32 %niter, -2 +; CHECK: %dec18.1 = add nsw i32 %dec18.in, -2 ; ModuleID = '<stdin>' target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll b/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll index 5b405a030a1..2843e627b3c 100644 --- a/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll +++ b/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -108,29 +108,6 @@ for.end: ; preds = %for.body !3 = !{!3, !4} !4 = !{!"llvm.loop.unroll.full"} -; #pragma clang loop unroll(full) -; Loop should be fully unrolled, even for optsize. -; -; CHECK-LABEL: @loop64_with_full_optsize( -; CHECK-NOT: br i1 -define void @loop64_with_full_optsize(i32* nocapture %a) optsize { -entry: - br label %for.body - -for.body: ; preds = %for.body, %entry - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv - %0 = load i32, i32* %arrayidx, align 4 - %inc = add nsw i32 %0, 1 - store i32 %inc, i32* %arrayidx, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 64 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3 - -for.end: ; preds = %for.body - ret void -} - ; #pragma clang loop unroll_count(4) ; Loop should be unrolled 4 times. ; @@ -194,14 +171,14 @@ for.end: ; preds = %for.body, %entry ; should be duplicated (original and 4x unrolled). ; ; CHECK-LABEL: @runtime_loop_with_count4( -; CHECK: for.body +; CHECK: for.body.prol: ; CHECK: store +; CHECK-NOT: store +; CHECK: br i1 +; CHECK: for.body ; CHECK: store ; CHECK: store ; CHECK: store -; CHECK-NOT: store -; CHECK: br i1 -; CHECK: for.body.epil: ; CHECK: store ; CHECK-NOT: store ; CHECK: br i1 @@ -310,6 +287,10 @@ for.end: ; preds = %for.body ; (original and 8x). ; ; CHECK-LABEL: @runtime_loop_with_enable( +; CHECK: for.body.prol: +; CHECK: store +; CHECK-NOT: store +; CHECK: br i1 ; CHECK: for.body: ; CHECK: store i32 ; CHECK: store i32 @@ -321,10 +302,6 @@ for.end: ; preds = %for.body ; CHECK: store i32 ; CHECK-NOT: store i32 ; CHECK: br i1 -; CHECK: for.body.epil: -; CHECK: store -; CHECK-NOT: store -; CHECK: br i1 define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) { entry: %cmp3 = icmp sgt i32 %b, 0 @@ -351,13 +328,13 @@ for.end: ; preds = %for.body, %entry ; should be duplicated (original and 3x unrolled). ; ; CHECK-LABEL: @runtime_loop_with_count3( -; CHECK: for.body -; CHECK: store -; CHECK: store +; CHECK: for.body.prol: ; CHECK: store ; CHECK-NOT: store ; CHECK: br i1 -; CHECK: for.body.epil: +; CHECK: for.body +; CHECK: store +; CHECK: store ; CHECK: store ; CHECK-NOT: store ; CHECK: br i1 |

