From 8c68171fef79b4194ebb639e66eb04ab55f5059f Mon Sep 17 00:00:00 2001 From: Michael Zolotukhin Date: Tue, 12 May 2015 17:20:03 +0000 Subject: Reimplement heuristic for estimating complete-unroll optimization effects. Summary: This patch reimplements heuristic that tries to estimate optimization beneftis from complete loop unrolling. In this patch I kept the minimal changes - e.g. I removed code handling branches and folding compares. That's a promising area, but now there are too many questions to discuss before we can enable it. Test Plan: Tests are included in the patch. Reviewers: hfinkel, chandlerc Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D8816 llvm-svn: 237156 --- .../Transforms/LoopUnroll/full-unroll-bad-geps.ll | 34 ++++++++++++++++++++++ .../LoopUnroll/full-unroll-heuristics.ll | 4 +-- 2 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll (limited to 'llvm/test') diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll new file mode 100644 index 00000000000..4c99bc73880 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll @@ -0,0 +1,34 @@ +; Check that we don't crash on corner cases. +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=10 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=20 -o /dev/null +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo1() { +entry: + br label %for.body + +for.body: + %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %idx = zext i32 undef to i64 + %add.ptr = getelementptr inbounds i64, i64* null, i64 %idx + %inc = add nuw nsw i64 %phi, 1 + %cmp = icmp ult i64 %inc, 999 + br i1 %cmp, label %for.body, label %for.exit + +for.exit: + ret void +} + +define void @foo2() { +entry: + br label %for.body + +for.body: + %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %x = getelementptr i32, <4 x i32*> undef, <4 x i32> + %inc = add nuw nsw i64 %phi, 1 + %cmp = icmp ult i64 %inc, 999 + br i1 %cmp, label %for.body, label %for.exit + +for.exit: + ret void +} diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics.ll index a9104adeb97..2dab2fbf2e4 100644 --- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics.ll +++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics.ll @@ -17,8 +17,8 @@ ; optimizations to remove ~55% of the instructions, the loop body size is 9, ; and unrolled size is 65. -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=10 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=30 | FileCheck %s -check-prefix=TEST1 -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=100 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=30 | FileCheck %s -check-prefix=TEST2 +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=10 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=20 | FileCheck %s -check-prefix=TEST1 +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=100 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=20 | FileCheck %s -check-prefix=TEST2 ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=100 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=80 | FileCheck %s -check-prefix=TEST3 ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=100 -unroll-threshold=100 -unroll-percent-of-optimized-for-complete-unroll=80 | FileCheck %s -check-prefix=TEST4 -- cgit v1.2.3