diff options
| -rw-r--r-- | polly/lib/Transform/ScheduleOptimizer.cpp | 10 | ||||
| -rw-r--r-- | polly/test/ScheduleOptimizer/prevectorization.ll | 28 |
2 files changed, 36 insertions, 2 deletions
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index 9a54c1da67a..7ff8e7c265a 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -107,6 +107,12 @@ static cl::opt<std::string> cl::desc("Maximize the band depth (yes/no)"), cl::Hidden, cl::init("yes"), cl::ZeroOrMore, cl::cat(PollyCategory)); +static cl::opt<int> PrevectorWidth( + "polly-prevect-width", + cl::desc( + "The number of loop iterations to strip-mine for pre-vectorization"), + cl::Hidden, cl::init(4), cl::ZeroOrMore, cl::cat(PollyCategory)); + static cl::opt<int> DefaultTileSize( "polly-default-tile-size", cl::desc("The default tile size (if not enough were provided by" @@ -176,7 +182,7 @@ private: /// reason about parallelism. static __isl_give isl_schedule_node * prevectSchedBand(__isl_take isl_schedule_node *Node, unsigned DimToVectorize, - int VectorWidth = 4); + int VectorWidth); /// @brief Apply additional optimizations on the bands in the schedule tree. /// @@ -298,7 +304,7 @@ isl_schedule_node *IslScheduleOptimizer::optimizeBand(isl_schedule_node *Node, for (int i = Dims - 1; i >= 0; i--) if (isl_schedule_node_band_member_get_coincident(Node, i)) { - Node = IslScheduleOptimizer::prevectSchedBand(Node, i); + Node = IslScheduleOptimizer::prevectSchedBand(Node, i, PrevectorWidth); break; } diff --git a/polly/test/ScheduleOptimizer/prevectorization.ll b/polly/test/ScheduleOptimizer/prevectorization.ll index 67cab767f1a..fdce15f54c1 100644 --- a/polly/test/ScheduleOptimizer/prevectorization.ll +++ b/polly/test/ScheduleOptimizer/prevectorization.ll @@ -1,5 +1,11 @@ ; RUN: opt -S %loadPolly -polly-detect-unprofitable -basicaa -polly-opt-isl -polly-vectorizer=polly -polly-ast -analyze < %s | FileCheck %s ; RUN: opt -S %loadPolly -polly-detect-unprofitable -basicaa -polly-opt-isl -polly-vectorizer=stripmine -polly-ast -analyze < %s | FileCheck %s + +; RUN: opt -S %loadPolly -polly-detect-unprofitable -basicaa -polly-opt-isl \ +; RUN: -polly-vectorizer=polly -polly-ast -analyze \ +; RUN: -polly-prevect-width=16 < %s | \ +; RUN: FileCheck %s -check-prefix=VEC16 + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @C = common global [1536 x [1536 x float]] zeroinitializer, align 16 @@ -73,6 +79,28 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe ; CHECK: for (int c6 = 0; c6 <= 3; c6 += 1) ; CHECK: Stmt_for_body8(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5); +; VEC16: { +; VEC16: #pragma known-parallel +; VEC16: for (int c0 = 0; c0 <= 47; c0 += 1) +; VEC16: for (int c1 = 0; c1 <= 47; c1 += 1) +; VEC16: for (int c2 = 0; c2 <= 31; c2 += 1) +; VEC16: for (int c3 = 0; c3 <= 1; c3 += 1) +; VEC16: #pragma simd +; VEC16: for (int c4 = 0; c4 <= 15; c4 += 1) +; VEC16: Stmt_for_body3(32 * c0 + c2, 32 * c1 + 16 * c3 + c4); +; VEC16: #pragma known-parallel +; VEC16: for (int c0 = 0; c0 <= 47; c0 += 1) +; VEC16: for (int c1 = 0; c1 <= 47; c1 += 1) +; VEC16: for (int c2 = 0; c2 <= 47; c2 += 1) +; VEC16: for (int c3 = 0; c3 <= 31; c3 += 1) +; VEC16: for (int c4 = 0; c4 <= 1; c4 += 1) +; VEC16: for (int c5 = 0; c5 <= 31; c5 += 1) +; VEC16: #pragma simd +; VEC16: for (int c6 = 0; c6 <= 15; c6 += 1) +; VEC16: Stmt_for_body8(32 * c0 + c3, 32 * c1 + 16 * c4 + c6, 32 * c2 + c5); +; VEC16: } + + !llvm.ident = !{!0} !0 = !{!"clang version 3.5.0 "} |

