diff options
-rw-r--r-- | polly/lib/Transform/ScheduleOptimizer.cpp | 5 | ||||
-rw-r--r-- | polly/test/ScheduleOptimizer/rectangular-tiling.ll | 23 |
2 files changed, 24 insertions, 4 deletions
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index 4fcc4e4e04a..10614478c50 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -306,12 +306,11 @@ IslScheduleOptimizer::prevectSchedBand(__isl_take isl_schedule_node *Node, isl_multi_val_set_val(Sizes, 0, isl_val_int_from_si(Ctx, VectorWidth)); Node = isl_schedule_node_band_tile(Node, Sizes); Node = isl_schedule_node_child(Node, 0); - Node = isl_schedule_node_band_sink(Node); - // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise, // we will have troubles to match it in the backend. Node = isl_schedule_node_band_set_ast_build_options( - Node, isl_union_set_read_from_str(Ctx, "{unroll[x]: 1 = 0}")); + Node, isl_union_set_read_from_str(Ctx, "{ unroll[x]: 1 = 0 }")); + Node = isl_schedule_node_band_sink(Node); Node = isl_schedule_node_child(Node, 0); return Node; } diff --git a/polly/test/ScheduleOptimizer/rectangular-tiling.ll b/polly/test/ScheduleOptimizer/rectangular-tiling.ll index 617039b0911..2aa8917f2fd 100644 --- a/polly/test/ScheduleOptimizer/rectangular-tiling.ll +++ b/polly/test/ScheduleOptimizer/rectangular-tiling.ll @@ -14,6 +14,14 @@ ; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \ ; RUN: FileCheck %s --check-prefix=TWO-PLUS-REGISTER +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-opt-isl -analyze \ +; RUN: -polly-2nd-level-tiling -polly-ast \ +; RUN: -polly-tile-sizes=256,16 -polly-no-early-exit \ +; RUN: -polly-register-tiling -polly-register-tile-sizes=2,4 \ +; RUN: -polly-vectorizer=polly \ +; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \ +; RUN: FileCheck %s --check-prefix=TWO-PLUS-REGISTER-PLUS-VECTORIZATION + ; CHECK: for (int c0 = 0; c0 <= 3; c0 += 1) ; CHECK: for (int c1 = 0; c1 <= 31; c1 += 1) ; CHECK: for (int c2 = 0; c2 <= 255; c2 += 1) @@ -46,7 +54,20 @@ ; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5 + 1); ; TWO-PLUS-REGISTER: } - +; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma known-parallel +; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c0 = 0; c0 <= 3; c0 += 1) +; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c1 = 0; c1 <= 31; c1 += 1) +; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c2 = 0; c2 <= 15; c2 += 1) +; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c3 = 0; c3 <= 1; c3 += 1) +; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c4 = 0; c4 <= 7; c4 += 1) +; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c5 = 0; c5 <= 1; c5 += 1) { +; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma simd +; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1) +; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 4 * c5 + c8); +; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma simd +; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1) +; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 4 * c5 + c8); +; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: } target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" |