summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/lib/Transform/ScheduleOptimizer.cpp10
-rw-r--r--polly/test/ScheduleOptimizer/prevectorization.ll28
2 files changed, 36 insertions, 2 deletions
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp
index 9a54c1da67a..7ff8e7c265a 100644
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp
@@ -107,6 +107,12 @@ static cl::opt<std::string>
cl::desc("Maximize the band depth (yes/no)"), cl::Hidden,
cl::init("yes"), cl::ZeroOrMore, cl::cat(PollyCategory));
+static cl::opt<int> PrevectorWidth(
+ "polly-prevect-width",
+ cl::desc(
+ "The number of loop iterations to strip-mine for pre-vectorization"),
+ cl::Hidden, cl::init(4), cl::ZeroOrMore, cl::cat(PollyCategory));
+
static cl::opt<int> DefaultTileSize(
"polly-default-tile-size",
cl::desc("The default tile size (if not enough were provided by"
@@ -176,7 +182,7 @@ private:
/// reason about parallelism.
static __isl_give isl_schedule_node *
prevectSchedBand(__isl_take isl_schedule_node *Node, unsigned DimToVectorize,
- int VectorWidth = 4);
+ int VectorWidth);
/// @brief Apply additional optimizations on the bands in the schedule tree.
///
@@ -298,7 +304,7 @@ isl_schedule_node *IslScheduleOptimizer::optimizeBand(isl_schedule_node *Node,
for (int i = Dims - 1; i >= 0; i--)
if (isl_schedule_node_band_member_get_coincident(Node, i)) {
- Node = IslScheduleOptimizer::prevectSchedBand(Node, i);
+ Node = IslScheduleOptimizer::prevectSchedBand(Node, i, PrevectorWidth);
break;
}
diff --git a/polly/test/ScheduleOptimizer/prevectorization.ll b/polly/test/ScheduleOptimizer/prevectorization.ll
index 67cab767f1a..fdce15f54c1 100644
--- a/polly/test/ScheduleOptimizer/prevectorization.ll
+++ b/polly/test/ScheduleOptimizer/prevectorization.ll
@@ -1,5 +1,11 @@
; RUN: opt -S %loadPolly -polly-detect-unprofitable -basicaa -polly-opt-isl -polly-vectorizer=polly -polly-ast -analyze < %s | FileCheck %s
; RUN: opt -S %loadPolly -polly-detect-unprofitable -basicaa -polly-opt-isl -polly-vectorizer=stripmine -polly-ast -analyze < %s | FileCheck %s
+
+; RUN: opt -S %loadPolly -polly-detect-unprofitable -basicaa -polly-opt-isl \
+; RUN: -polly-vectorizer=polly -polly-ast -analyze \
+; RUN: -polly-prevect-width=16 < %s | \
+; RUN: FileCheck %s -check-prefix=VEC16
+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
@@ -73,6 +79,28 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
; CHECK: for (int c6 = 0; c6 <= 3; c6 += 1)
; CHECK: Stmt_for_body8(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5);
+; VEC16: {
+; VEC16: #pragma known-parallel
+; VEC16: for (int c0 = 0; c0 <= 47; c0 += 1)
+; VEC16: for (int c1 = 0; c1 <= 47; c1 += 1)
+; VEC16: for (int c2 = 0; c2 <= 31; c2 += 1)
+; VEC16: for (int c3 = 0; c3 <= 1; c3 += 1)
+; VEC16: #pragma simd
+; VEC16: for (int c4 = 0; c4 <= 15; c4 += 1)
+; VEC16: Stmt_for_body3(32 * c0 + c2, 32 * c1 + 16 * c3 + c4);
+; VEC16: #pragma known-parallel
+; VEC16: for (int c0 = 0; c0 <= 47; c0 += 1)
+; VEC16: for (int c1 = 0; c1 <= 47; c1 += 1)
+; VEC16: for (int c2 = 0; c2 <= 47; c2 += 1)
+; VEC16: for (int c3 = 0; c3 <= 31; c3 += 1)
+; VEC16: for (int c4 = 0; c4 <= 1; c4 += 1)
+; VEC16: for (int c5 = 0; c5 <= 31; c5 += 1)
+; VEC16: #pragma simd
+; VEC16: for (int c6 = 0; c6 <= 15; c6 += 1)
+; VEC16: Stmt_for_body8(32 * c0 + c3, 32 * c1 + 16 * c4 + c6, 32 * c2 + c5);
+; VEC16: }
+
+
!llvm.ident = !{!0}
!0 = !{!"clang version 3.5.0 "}
OpenPOWER on IntegriCloud