summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/lib/Transform/ScheduleOptimizer.cpp30
-rw-r--r--polly/test/ScheduleOptimizer/rectangular-tiling.ll21
2 files changed, 51 insertions, 0 deletions
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp
index 8c773d817a1..4fcc4e4e04a 100644
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp
@@ -142,6 +142,24 @@ static cl::list<int>
cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
cl::cat(PollyCategory));
+static cl::opt<bool> RegisterTiling("polly-register-tiling",
+ cl::desc("Enable register tiling"),
+ cl::init(false), cl::ZeroOrMore,
+ cl::cat(PollyCategory));
+
+static cl::opt<int> RegisterDefaultTileSize(
+ "polly-register-tiling-default-tile-size",
+ cl::desc("The default register tile size (if not enough were provided by"
+ " --polly-register-tile-sizes)"),
+ cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::cat(PollyCategory));
+
+static cl::list<int>
+ RegisterTileSizes("polly-register-tile-sizes",
+ cl::desc("A tile size for each loop dimension, filled "
+ "with --polly-register-tile-size"),
+ cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
+ cl::cat(PollyCategory));
+
namespace {
class IslScheduleOptimizer : public ScopPass {
@@ -289,6 +307,11 @@ IslScheduleOptimizer::prevectSchedBand(__isl_take isl_schedule_node *Node,
Node = isl_schedule_node_band_tile(Node, Sizes);
Node = isl_schedule_node_child(Node, 0);
Node = isl_schedule_node_band_sink(Node);
+
+ // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise,
+ // we will have troubles to match it in the backend.
+ Node = isl_schedule_node_band_set_ast_build_options(
+ Node, isl_union_set_read_from_str(Ctx, "{unroll[x]: 1 = 0}"));
Node = isl_schedule_node_child(Node, 0);
return Node;
}
@@ -348,6 +371,13 @@ IslScheduleOptimizer::optimizeBand(__isl_take isl_schedule_node *Node,
if (SecondLevelTiling)
Node = tileNode(Node, SecondLevelTileSizes, SecondLevelDefaultTileSize);
+ if (RegisterTiling) {
+ auto *Ctx = isl_schedule_node_get_ctx(Node);
+ Node = tileNode(Node, RegisterTileSizes, RegisterDefaultTileSize);
+ Node = isl_schedule_node_band_set_ast_build_options(
+ Node, isl_union_set_read_from_str(Ctx, "{unroll[x]}"));
+ }
+
if (PollyVectorizerChoice == VECTORIZER_NONE)
return Node;
diff --git a/polly/test/ScheduleOptimizer/rectangular-tiling.ll b/polly/test/ScheduleOptimizer/rectangular-tiling.ll
index 4d3bbe650ca..617039b0911 100644
--- a/polly/test/ScheduleOptimizer/rectangular-tiling.ll
+++ b/polly/test/ScheduleOptimizer/rectangular-tiling.ll
@@ -7,6 +7,13 @@
; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \
; RUN: FileCheck %s --check-prefix=TWOLEVEL
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-opt-isl -analyze \
+; RUN: -polly-2nd-level-tiling -polly-ast \
+; RUN: -polly-tile-sizes=256,16 -polly-no-early-exit \
+; RUN: -polly-register-tiling \
+; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \
+; RUN: FileCheck %s --check-prefix=TWO-PLUS-REGISTER
+
; CHECK: for (int c0 = 0; c0 <= 3; c0 += 1)
; CHECK: for (int c1 = 0; c1 <= 31; c1 += 1)
; CHECK: for (int c2 = 0; c2 <= 255; c2 += 1)
@@ -27,6 +34,20 @@
; TWOLEVEL: Stmt_for_body3(256 * c0 + 16 * c2 + c4, 16 * c1 + 8 * c3 + c5);
+; TWO-PLUS-REGISTER: for (int c0 = 0; c0 <= 3; c0 += 1)
+; TWO-PLUS-REGISTER: for (int c1 = 0; c1 <= 31; c1 += 1)
+; TWO-PLUS-REGISTER: for (int c2 = 0; c2 <= 15; c2 += 1)
+; TWO-PLUS-REGISTER: for (int c3 = 0; c3 <= 1; c3 += 1)
+; TWO-PLUS-REGISTER: for (int c4 = 0; c4 <= 7; c4 += 1)
+; TWO-PLUS-REGISTER: for (int c5 = 0; c5 <= 3; c5 += 1) {
+; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 2 * c5);
+; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 2 * c5 + 1);
+; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5);
+; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5 + 1);
+; TWO-PLUS-REGISTER: }
+
+
+
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
; Function Attrs: nounwind
OpenPOWER on IntegriCloud