summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTobias Grosser <tobias@grosser.es>2015-08-20 13:45:05 +0000
committerTobias Grosser <tobias@grosser.es>2015-08-20 13:45:05 +0000
commit42e2489553ff6baafae8a909c123bfadd751fe4a (patch)
tree314ff8335a997525d439ff91b099a9fc291ab3df
parent048327166298f52948d7e48110a441a694955af8 (diff)
downloadbcm5719-llvm-42e2489553ff6baafae8a909c123bfadd751fe4a.tar.gz
bcm5719-llvm-42e2489553ff6baafae8a909c123bfadd751fe4a.zip
Add experimental support for trivial register tiling
Register tiling in Polly is for now just an additional level of tiling which is fully unrolled. It is disabled by default. To make this useful for more than experiments, we still need a cost function as well as possibly further optimizations that teach LLVM to actually put some of the values we got into scalar registers. llvm-svn: 245564
-rw-r--r--polly/lib/Transform/ScheduleOptimizer.cpp30
-rw-r--r--polly/test/ScheduleOptimizer/rectangular-tiling.ll21
2 files changed, 51 insertions, 0 deletions
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp
index 8c773d817a1..4fcc4e4e04a 100644
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp
@@ -142,6 +142,24 @@ static cl::list<int>
cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
cl::cat(PollyCategory));
+static cl::opt<bool> RegisterTiling("polly-register-tiling",
+ cl::desc("Enable register tiling"),
+ cl::init(false), cl::ZeroOrMore,
+ cl::cat(PollyCategory));
+
+static cl::opt<int> RegisterDefaultTileSize(
+ "polly-register-tiling-default-tile-size",
+ cl::desc("The default register tile size (if not enough were provided by"
+ " --polly-register-tile-sizes)"),
+ cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::cat(PollyCategory));
+
+static cl::list<int>
+ RegisterTileSizes("polly-register-tile-sizes",
+ cl::desc("A tile size for each loop dimension, filled "
+ "with --polly-register-tile-size"),
+ cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
+ cl::cat(PollyCategory));
+
namespace {
class IslScheduleOptimizer : public ScopPass {
@@ -289,6 +307,11 @@ IslScheduleOptimizer::prevectSchedBand(__isl_take isl_schedule_node *Node,
Node = isl_schedule_node_band_tile(Node, Sizes);
Node = isl_schedule_node_child(Node, 0);
Node = isl_schedule_node_band_sink(Node);
+
+ // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise,
+ // we will have troubles to match it in the backend.
+ Node = isl_schedule_node_band_set_ast_build_options(
+ Node, isl_union_set_read_from_str(Ctx, "{unroll[x]: 1 = 0}"));
Node = isl_schedule_node_child(Node, 0);
return Node;
}
@@ -348,6 +371,13 @@ IslScheduleOptimizer::optimizeBand(__isl_take isl_schedule_node *Node,
if (SecondLevelTiling)
Node = tileNode(Node, SecondLevelTileSizes, SecondLevelDefaultTileSize);
+ if (RegisterTiling) {
+ auto *Ctx = isl_schedule_node_get_ctx(Node);
+ Node = tileNode(Node, RegisterTileSizes, RegisterDefaultTileSize);
+ Node = isl_schedule_node_band_set_ast_build_options(
+ Node, isl_union_set_read_from_str(Ctx, "{unroll[x]}"));
+ }
+
if (PollyVectorizerChoice == VECTORIZER_NONE)
return Node;
diff --git a/polly/test/ScheduleOptimizer/rectangular-tiling.ll b/polly/test/ScheduleOptimizer/rectangular-tiling.ll
index 4d3bbe650ca..617039b0911 100644
--- a/polly/test/ScheduleOptimizer/rectangular-tiling.ll
+++ b/polly/test/ScheduleOptimizer/rectangular-tiling.ll
@@ -7,6 +7,13 @@
; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \
; RUN: FileCheck %s --check-prefix=TWOLEVEL
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-opt-isl -analyze \
+; RUN: -polly-2nd-level-tiling -polly-ast \
+; RUN: -polly-tile-sizes=256,16 -polly-no-early-exit \
+; RUN: -polly-register-tiling \
+; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \
+; RUN: FileCheck %s --check-prefix=TWO-PLUS-REGISTER
+
; CHECK: for (int c0 = 0; c0 <= 3; c0 += 1)
; CHECK: for (int c1 = 0; c1 <= 31; c1 += 1)
; CHECK: for (int c2 = 0; c2 <= 255; c2 += 1)
@@ -27,6 +34,20 @@
; TWOLEVEL: Stmt_for_body3(256 * c0 + 16 * c2 + c4, 16 * c1 + 8 * c3 + c5);
+; TWO-PLUS-REGISTER: for (int c0 = 0; c0 <= 3; c0 += 1)
+; TWO-PLUS-REGISTER: for (int c1 = 0; c1 <= 31; c1 += 1)
+; TWO-PLUS-REGISTER: for (int c2 = 0; c2 <= 15; c2 += 1)
+; TWO-PLUS-REGISTER: for (int c3 = 0; c3 <= 1; c3 += 1)
+; TWO-PLUS-REGISTER: for (int c4 = 0; c4 <= 7; c4 += 1)
+; TWO-PLUS-REGISTER: for (int c5 = 0; c5 <= 3; c5 += 1) {
+; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 2 * c5);
+; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 2 * c5 + 1);
+; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5);
+; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5 + 1);
+; TWO-PLUS-REGISTER: }
+
+
+
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
; Function Attrs: nounwind
OpenPOWER on IntegriCloud