summaryrefslogtreecommitdiffstats
path: root/polly/lib/CodeGen/PPCGCodeGeneration.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'polly/lib/CodeGen/PPCGCodeGeneration.cpp')
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp19
1 files changed, 19 insertions, 0 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index 1830be96af2..6b872c35351 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -163,6 +163,12 @@ public:
/// occurred which prevents us from generating valid GPU code.
bool BuildSuccessful = true;
+ /// The maximal number of loops surrounding a sequential kernel.
+ unsigned DeepestSequential = 0;
+
+ /// The maximal number of loops surrounding a parallel kernel.
+ unsigned DeepestParallel = 0;
+
private:
/// A vector of array base pointers for which a new ScopArrayInfo was created.
///
@@ -1179,6 +1185,13 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) {
isl_id_free(Id);
isl_ast_node_free(KernelStmt);
+ if (Kernel->n_grid > 1)
+ DeepestParallel =
+ std::max(DeepestParallel, isl_space_dim(Kernel->space, isl_dim_set));
+ else
+ DeepestSequential =
+ std::max(DeepestSequential, isl_space_dim(Kernel->space, isl_dim_set));
+
Value *BlockDimX, *BlockDimY, *BlockDimZ;
std::tie(BlockDimX, BlockDimY, BlockDimZ) = getBlockSizes(Kernel);
@@ -2417,6 +2430,12 @@ public:
NodeBuilder.create(Root);
NodeBuilder.finalize();
+ /// In case a sequential kernel has more surrounding loops as any parallel
+ /// kernel, the SCoP is probably mostly sequential. Hence, there is no
+ /// point in running it on a CPU.
+ if (NodeBuilder.DeepestSequential > NodeBuilder.DeepestParallel)
+ SplitBlock->getTerminator()->setOperand(0, Builder.getFalse());
+
if (!NodeBuilder.BuildSuccessful)
SplitBlock->getTerminator()->setOperand(0, Builder.getFalse());
}
OpenPOWER on IntegriCloud