diff options
| -rw-r--r-- | polly/lib/Transform/ScheduleOptimizer.cpp | 30 | ||||
| -rw-r--r-- | polly/test/ScheduleOptimizer/rectangular-tiling.ll | 21 |
2 files changed, 51 insertions, 0 deletions
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index 8c773d817a1..4fcc4e4e04a 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -142,6 +142,24 @@ static cl::list<int> cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); +static cl::opt<bool> RegisterTiling("polly-register-tiling", + cl::desc("Enable register tiling"), + cl::init(false), cl::ZeroOrMore, + cl::cat(PollyCategory)); + +static cl::opt<int> RegisterDefaultTileSize( + "polly-register-tiling-default-tile-size", + cl::desc("The default register tile size (if not enough were provided by" + " --polly-register-tile-sizes)"), + cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::cat(PollyCategory)); + +static cl::list<int> + RegisterTileSizes("polly-register-tile-sizes", + cl::desc("A tile size for each loop dimension, filled " + "with --polly-register-tile-size"), + cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated, + cl::cat(PollyCategory)); + namespace { class IslScheduleOptimizer : public ScopPass { @@ -289,6 +307,11 @@ IslScheduleOptimizer::prevectSchedBand(__isl_take isl_schedule_node *Node, Node = isl_schedule_node_band_tile(Node, Sizes); Node = isl_schedule_node_child(Node, 0); Node = isl_schedule_node_band_sink(Node); + + // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise, + // we will have troubles to match it in the backend. + Node = isl_schedule_node_band_set_ast_build_options( + Node, isl_union_set_read_from_str(Ctx, "{unroll[x]: 1 = 0}")); Node = isl_schedule_node_child(Node, 0); return Node; } @@ -348,6 +371,13 @@ IslScheduleOptimizer::optimizeBand(__isl_take isl_schedule_node *Node, if (SecondLevelTiling) Node = tileNode(Node, SecondLevelTileSizes, SecondLevelDefaultTileSize); + if (RegisterTiling) { + auto *Ctx = isl_schedule_node_get_ctx(Node); + Node = tileNode(Node, RegisterTileSizes, RegisterDefaultTileSize); + Node = isl_schedule_node_band_set_ast_build_options( + Node, isl_union_set_read_from_str(Ctx, "{unroll[x]}")); + } + if (PollyVectorizerChoice == VECTORIZER_NONE) return Node; diff --git a/polly/test/ScheduleOptimizer/rectangular-tiling.ll b/polly/test/ScheduleOptimizer/rectangular-tiling.ll index 4d3bbe650ca..617039b0911 100644 --- a/polly/test/ScheduleOptimizer/rectangular-tiling.ll +++ b/polly/test/ScheduleOptimizer/rectangular-tiling.ll @@ -7,6 +7,13 @@ ; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \ ; RUN: FileCheck %s --check-prefix=TWOLEVEL +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-opt-isl -analyze \ +; RUN: -polly-2nd-level-tiling -polly-ast \ +; RUN: -polly-tile-sizes=256,16 -polly-no-early-exit \ +; RUN: -polly-register-tiling \ +; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \ +; RUN: FileCheck %s --check-prefix=TWO-PLUS-REGISTER + ; CHECK: for (int c0 = 0; c0 <= 3; c0 += 1) ; CHECK: for (int c1 = 0; c1 <= 31; c1 += 1) ; CHECK: for (int c2 = 0; c2 <= 255; c2 += 1) @@ -27,6 +34,20 @@ ; TWOLEVEL: Stmt_for_body3(256 * c0 + 16 * c2 + c4, 16 * c1 + 8 * c3 + c5); +; TWO-PLUS-REGISTER: for (int c0 = 0; c0 <= 3; c0 += 1) +; TWO-PLUS-REGISTER: for (int c1 = 0; c1 <= 31; c1 += 1) +; TWO-PLUS-REGISTER: for (int c2 = 0; c2 <= 15; c2 += 1) +; TWO-PLUS-REGISTER: for (int c3 = 0; c3 <= 1; c3 += 1) +; TWO-PLUS-REGISTER: for (int c4 = 0; c4 <= 7; c4 += 1) +; TWO-PLUS-REGISTER: for (int c5 = 0; c5 <= 3; c5 += 1) { +; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 2 * c5); +; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 2 * c5 + 1); +; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5); +; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5 + 1); +; TWO-PLUS-REGISTER: } + + + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" ; Function Attrs: nounwind |

