summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSiddharth Bhat <siddu.druid@gmail.com>2017-08-17 21:57:23 +0000
committerSiddharth Bhat <siddu.druid@gmail.com>2017-08-17 21:57:23 +0000
commitb46847c035703f1f3c6a6f8c6833daf7c381b8a1 (patch)
tree899661e9600d24bd33c0b2779b2beb7c9a6e05e1
parente608ef76357e1dcedf7e49e6c4f43468b9431812 (diff)
downloadbcm5719-llvm-b46847c035703f1f3c6a6f8c6833daf7c381b8a1.tar.gz
bcm5719-llvm-b46847c035703f1f3c6a6f8c6833daf7c381b8a1.zip
[ScopInliner] Add a simple Scop-based inliner to polly.
We add a ScopInliner pass which inlines functions based on a simple heuristic: Let `g` call `f`. If we can model all of `f` as a Scop, we inline `f` into `g`. This requires `-polly-detect-full-function` to be enabled. So, the pass asserts that `-polly-detect-full-function` is enabled. Differential Revision: https://reviews.llvm.org/D36832 llvm-svn: 311126
-rw-r--r--polly/include/polly/LinkAllPasses.h2
-rw-r--r--polly/include/polly/ScopDetection.h1
-rw-r--r--polly/lib/Analysis/ScopDetection.cpp15
-rw-r--r--polly/lib/CMakeLists.txt1
-rw-r--r--polly/lib/Support/RegisterPasses.cpp1
-rw-r--r--polly/lib/Transform/ScopInliner.cpp119
-rw-r--r--polly/test/ScopInliner/ignore-declares.ll8
-rw-r--r--polly/test/ScopInliner/invariant-load-func.ll76
-rw-r--r--polly/test/ScopInliner/simple-inline-loop.ll62
9 files changed, 278 insertions, 7 deletions
diff --git a/polly/include/polly/LinkAllPasses.h b/polly/include/polly/LinkAllPasses.h
index ee5bb6f4678..56a7330624a 100644
--- a/polly/include/polly/LinkAllPasses.h
+++ b/polly/include/polly/LinkAllPasses.h
@@ -32,6 +32,7 @@ class RegionPass;
namespace polly {
llvm::Pass *createCodePreparationPass();
+llvm::Pass *createScopInlinerPass();
llvm::Pass *createDeadCodeElimPass();
llvm::Pass *createDependenceInfoPass();
llvm::Pass *createDependenceInfoWrapperPassPass();
@@ -108,6 +109,7 @@ struct PollyForcePassLinking {
namespace llvm {
class PassRegistry;
void initializeCodePreparationPass(llvm::PassRegistry &);
+void initializeScopInlinerPass(llvm::PassRegistry &);
void initializeDeadCodeElimPass(llvm::PassRegistry &);
void initializeJSONExporterPass(llvm::PassRegistry &);
void initializeJSONImporterPass(llvm::PassRegistry &);
diff --git a/polly/include/polly/ScopDetection.h b/polly/include/polly/ScopDetection.h
index 46921cea703..11b37099dc8 100644
--- a/polly/include/polly/ScopDetection.h
+++ b/polly/include/polly/ScopDetection.h
@@ -113,6 +113,7 @@ extern bool PollyUseRuntimeAliasChecks;
extern bool PollyProcessUnprofitable;
extern bool PollyInvariantLoadHoisting;
extern bool PollyAllowUnsignedOperations;
+extern bool PollyAllowFullFunction;
/// A function attribute which will cause Polly to skip the function
extern llvm::StringRef PollySkipFnAttr;
diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp
index 18496d487b8..6b5ed8408e0 100644
--- a/polly/lib/Analysis/ScopDetection.cpp
+++ b/polly/lib/Analysis/ScopDetection.cpp
@@ -1,4 +1,3 @@
-//===----- ScopDetection.cpp - Detect Scops --------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -107,10 +106,12 @@ static cl::list<std::string> IgnoredFunctions(
"ANY of the regexes provided."),
cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
-static cl::opt<bool>
- AllowFullFunction("polly-detect-full-functions",
- cl::desc("Allow the detection of full functions"),
- cl::init(false), cl::cat(PollyCategory));
+bool polly::PollyAllowFullFunction;
+static cl::opt<bool, true>
+ XAllowFullFunction("polly-detect-full-functions",
+ cl::desc("Allow the detection of full functions"),
+ cl::location(polly::PollyAllowFullFunction),
+ cl::init(false), cl::cat(PollyCategory));
static cl::opt<std::string> OnlyRegion(
"polly-only-region",
@@ -1541,7 +1542,7 @@ bool ScopDetection::isValidRegion(DetectionContext &Context) const {
DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
- if (!AllowFullFunction && CurRegion.isTopLevelRegion()) {
+ if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
DEBUG(dbgs() << "Top level region is invalid\n");
return false;
}
@@ -1564,7 +1565,7 @@ bool ScopDetection::isValidRegion(DetectionContext &Context) const {
// SCoP cannot contain the entry block of the function, because we need
// to insert alloca instruction there when translate scalar to array.
- if (!AllowFullFunction &&
+ if (!PollyAllowFullFunction &&
CurRegion.getEntry() ==
&(CurRegion.getEntry()->getParent()->getEntryBlock()))
return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
diff --git a/polly/lib/CMakeLists.txt b/polly/lib/CMakeLists.txt
index f0f8f3a1581..cba633af2dd 100644
--- a/polly/lib/CMakeLists.txt
+++ b/polly/lib/CMakeLists.txt
@@ -65,6 +65,7 @@ add_library(PollyCore OBJECT
Transform/Simplify.cpp
Transform/MaximalStaticExpansion.cpp
Transform/RewriteByReferenceParameters.cpp
+ Transform/ScopInliner.cpp
${POLLY_HEADER_FILES}
)
set_target_properties(PollyCore PROPERTIES FOLDER "Polly")
diff --git a/polly/lib/Support/RegisterPasses.cpp b/polly/lib/Support/RegisterPasses.cpp
index fc6ae5430f8..20f7ea64146 100644
--- a/polly/lib/Support/RegisterPasses.cpp
+++ b/polly/lib/Support/RegisterPasses.cpp
@@ -264,6 +264,7 @@ void initializePollyPasses(PassRegistry &Registry) {
initializePollyCanonicalizePass(Registry);
initializePolyhedralInfoPass(Registry);
initializeScopDetectionWrapperPassPass(Registry);
+ initializeScopInlinerPass(Registry);
initializeScopInfoRegionPassPass(Registry);
initializeScopInfoWrapperPassPass(Registry);
initializeRewriteByrefParamsPass(Registry);
diff --git a/polly/lib/Transform/ScopInliner.cpp b/polly/lib/Transform/ScopInliner.cpp
new file mode 100644
index 00000000000..443e544b39f
--- /dev/null
+++ b/polly/lib/Transform/ScopInliner.cpp
@@ -0,0 +1,119 @@
+//===---- ScopInliner.cpp - Polyhedral based inliner ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+/// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Take a SCC and:
+// 1. If it has more than one component, bail out (contains cycles)
+// 2. If it has just one component, and if the function is entirely a scop,
+// inline it.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "polly-scop-inliner"
+
+#include "polly/LinkAllPasses.h"
+#include "polly/RegisterPasses.h"
+#include "polly/ScopDetection.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
+
+using namespace polly;
+extern bool polly::PollyAllowFullFunction;
+
+namespace {
+class ScopInliner : public CallGraphSCCPass {
+public:
+ static char ID;
+
+ ScopInliner() : CallGraphSCCPass(ID) {}
+
+ bool doInitialization(CallGraph &CG) override {
+ if (!polly::PollyAllowFullFunction) {
+ report_fatal_error(
+ "Aborting from ScopInliner because it only makes sense to run with "
+ "-polly-allow-full-function. "
+ "The heurtistic for ScopInliner checks that the full function is a "
+ "Scop, which happens if and only if polly-allow-full-function is "
+ " enabled. "
+ " If not, the entry block is not included in the Scop");
+ }
+ return true;
+ }
+
+ bool runOnSCC(CallGraphSCC &SCC) override {
+ // We do not try to inline non-trivial SCCs because this would lead to
+ // "infinite" inlining if we are not careful.
+ if (SCC.size() > 1)
+ return false;
+ assert(SCC.size() == 1 && "found empty SCC");
+ Function *F = (*SCC.begin())->getFunction();
+
+ // If the function is a nullptr, or the function is a declaration.
+ if (!F)
+ return false;
+ if (F->isDeclaration()) {
+ DEBUG(dbgs() << "Skipping " << F->getName()
+ << "because it is a declaration.\n");
+ return false;
+ }
+
+ PassBuilder PB;
+ FunctionAnalysisManager FAM;
+ FAM.registerPass([] { return ScopAnalysis(); });
+ PB.registerFunctionAnalyses(FAM);
+
+ RegionInfo &RI = FAM.getResult<RegionInfoAnalysis>(*F);
+ ScopDetection &SD = FAM.getResult<ScopAnalysis>(*F);
+
+ const bool HasScopAsTopLevelRegion =
+ SD.ValidRegions.count(RI.getTopLevelRegion()) > 0;
+
+ if (HasScopAsTopLevelRegion) {
+ DEBUG(dbgs() << "Skipping " << F->getName()
+ << " has scop as top level region");
+ F->addFnAttr(llvm::Attribute::AlwaysInline);
+
+ ModuleAnalysisManager MAM;
+ PB.registerModuleAnalyses(MAM);
+ ModulePassManager MPM;
+ MPM.addPass(AlwaysInlinerPass());
+ Module *M = F->getParent();
+ assert(M && "Function has illegal module");
+ MPM.run(*M, MAM);
+ } else {
+ DEBUG(dbgs() << F->getName()
+ << " does NOT have scop as top level region\n");
+ }
+
+ return false;
+ };
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+};
+
+} // namespace
+char ScopInliner::ID;
+
+Pass *polly::createScopInlinerPass() {
+ ScopInliner *pass = new ScopInliner();
+ return pass;
+}
+
+INITIALIZE_PASS_BEGIN(
+ ScopInliner, "polly-scop-inliner",
+ "inline functions based on how much of the function is a scop.", false,
+ false)
+INITIALIZE_PASS_END(
+ ScopInliner, "polly-scop-inliner",
+ "inline functions based on how much of the function is a scop.", false,
+ false)
diff --git a/polly/test/ScopInliner/ignore-declares.ll b/polly/test/ScopInliner/ignore-declares.ll
new file mode 100644
index 00000000000..4f678588a36
--- /dev/null
+++ b/polly/test/ScopInliner/ignore-declares.ll
@@ -0,0 +1,8 @@
+; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \
+; RUN: -polly-scops -analyze < %s
+
+; Check that we do not crash if there are declares. We should skip function
+; declarations and not try to query for domtree.
+
+declare void @foo()
+
diff --git a/polly/test/ScopInliner/invariant-load-func.ll b/polly/test/ScopInliner/invariant-load-func.ll
new file mode 100644
index 00000000000..52387f84288
--- /dev/null
+++ b/polly/test/ScopInliner/invariant-load-func.ll
@@ -0,0 +1,76 @@
+; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \
+; RUN: -polly-scops -analyze -polly-invariant-load-hoisting < %s | FileCheck %s
+
+; Check that we inline a function that requires invariant load hoisting
+; correctly.
+; CHECK: Max Loop Depth: 2
+
+; REQUIRES: pollyacc
+
+
+; void to_be_inlined(int A[], int *begin, int *end) {
+; for(int i = *begin; i < *end; i++) {
+; A[i] = 10;
+; }
+; }
+;
+; static const int N = 1000;
+;
+; void inline_site(int A[], int *begin, int *end) {
+; for(int i = 0; i < N; i++)
+; to_be_inlined(A);
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.12.0"
+
+define void @to_be_inlined(i32* %A, i32* %begin, i32* %end) {
+entry:
+ br label %entry.split
+
+entry.split: ; preds = %entry
+ %tmp = load i32, i32* %begin, align 4
+ %tmp21 = load i32, i32* %end, align 4
+ %cmp3 = icmp slt i32 %tmp, %tmp21
+ br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry.split
+ %tmp1 = sext i32 %tmp to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %indvars.iv4 = phi i64 [ %tmp1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv4
+ store i32 10, i32* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv4, 1
+ %tmp2 = load i32, i32* %end, align 4
+ %tmp3 = sext i32 %tmp2 to i64
+ %cmp = icmp slt i64 %indvars.iv.next, %tmp3
+ br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split
+ ret void
+}
+
+
+define void @inline_site(i32* %A, i32* %begin, i32 *%end) {
+entry:
+ br label %entry.split
+
+entry.split: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %entry.split, %for.body
+ %i.01 = phi i32 [ 0, %entry.split ], [ %inc, %for.body ]
+ tail call void @to_be_inlined(i32* %A, i32* %begin, i32* %end)
+ %inc = add nuw nsw i32 %i.01, 1
+ %exitcond = icmp eq i32 %inc, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
diff --git a/polly/test/ScopInliner/simple-inline-loop.ll b/polly/test/ScopInliner/simple-inline-loop.ll
new file mode 100644
index 00000000000..c849e1e48cb
--- /dev/null
+++ b/polly/test/ScopInliner/simple-inline-loop.ll
@@ -0,0 +1,62 @@
+; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \
+; RUN: -polly-scops -analyze < %s | FileCheck %s
+
+; Check that we get the 2 nested loops by inlining `to_be_inlined` into
+; `inline_site`.
+; CHECK: Max Loop Depth: 2
+
+; static const int N = 1000;
+;
+; void to_be_inlined(int A[]) {
+; for(int i = 0; i < N; i++)
+; A[i] *= 10;
+; }
+;
+; void inline_site(int A[]) {
+; for(int i = 0; i < N; i++)
+; to_be_inlined(A);
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.12.0"
+
+
+define void @to_be_inlined(i32* %A) {
+entry:
+ br label %entry.split
+
+entry.split: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %entry.split, %for.body
+ %indvars.iv1 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1
+ %tmp = load i32, i32* %arrayidx, align 4
+ %mul = mul nsw i32 %tmp, 10
+ store i32 %mul, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+define void @inline_site(i32* %A) {
+entry:
+ br label %entry.split
+
+entry.split: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %entry.split, %for.body
+ %i.01 = phi i32 [ 0, %entry.split ], [ %inc, %for.body ]
+ tail call void @to_be_inlined(i32* %A)
+ %inc = add nuw nsw i32 %i.01, 1
+ %exitcond = icmp eq i32 %inc, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
OpenPOWER on IntegriCloud