diff options
Diffstat (limited to 'polly/lib/CodeGen/PPCGCodeGeneration.cpp')
| -rw-r--r-- | polly/lib/CodeGen/PPCGCodeGeneration.cpp | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 1830be96af2..6b872c35351 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -163,6 +163,12 @@ public: /// occurred which prevents us from generating valid GPU code. bool BuildSuccessful = true; + /// The maximal number of loops surrounding a sequential kernel. + unsigned DeepestSequential = 0; + + /// The maximal number of loops surrounding a parallel kernel. + unsigned DeepestParallel = 0; + private: /// A vector of array base pointers for which a new ScopArrayInfo was created. /// @@ -1179,6 +1185,13 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) { isl_id_free(Id); isl_ast_node_free(KernelStmt); + if (Kernel->n_grid > 1) + DeepestParallel = + std::max(DeepestParallel, isl_space_dim(Kernel->space, isl_dim_set)); + else + DeepestSequential = + std::max(DeepestSequential, isl_space_dim(Kernel->space, isl_dim_set)); + Value *BlockDimX, *BlockDimY, *BlockDimZ; std::tie(BlockDimX, BlockDimY, BlockDimZ) = getBlockSizes(Kernel); @@ -2417,6 +2430,12 @@ public: NodeBuilder.create(Root); NodeBuilder.finalize(); + /// In case a sequential kernel has more surrounding loops as any parallel + /// kernel, the SCoP is probably mostly sequential. Hence, there is no + /// point in running it on a CPU. + if (NodeBuilder.DeepestSequential > NodeBuilder.DeepestParallel) + SplitBlock->getTerminator()->setOperand(0, Builder.getFalse()); + if (!NodeBuilder.BuildSuccessful) SplitBlock->getTerminator()->setOperand(0, Builder.getFalse()); } |

