diff options
-rw-r--r-- | polly/include/polly/CodeGen/CodeGeneration.h | 3 | ||||
-rw-r--r-- | polly/include/polly/CodeGen/IRBuilder.h | 38 | ||||
-rw-r--r-- | polly/lib/CodeGen/IRBuilder.cpp | 93 | ||||
-rw-r--r-- | polly/lib/CodeGen/IslCodeGeneration.cpp | 4 | ||||
-rw-r--r-- | polly/lib/Support/RegisterPasses.cpp | 7 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll | 12 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll | 24 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/annotated_alias_scopes.ll | 78 |
8 files changed, 225 insertions, 34 deletions
diff --git a/polly/include/polly/CodeGen/CodeGeneration.h b/polly/include/polly/CodeGen/CodeGeneration.h index f8273c36669..1627987fd08 100644 --- a/polly/include/polly/CodeGen/CodeGeneration.h +++ b/polly/include/polly/CodeGen/CodeGeneration.h @@ -36,6 +36,9 @@ enum CodeGenChoice { }; extern CodeGenChoice PollyCodeGenChoice; +/// @brief Flag to turn on/off annotation of alias scopes. +extern bool PollyAnnotateAliasScopes; + static inline int getNumberOfIterations(__isl_take isl_set *Domain) { int Dim = isl_set_dim(Domain, isl_dim_set); diff --git a/polly/include/polly/CodeGen/IRBuilder.h b/polly/include/polly/CodeGen/IRBuilder.h index 463f3933863..3c9218f3f3d 100644 --- a/polly/include/polly/CodeGen/IRBuilder.h +++ b/polly/include/polly/CodeGen/IRBuilder.h @@ -18,14 +18,34 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/Analysis/LoopInfo.h" +namespace llvm { +class ScalarEvolution; +} + namespace polly { +class Scop; -/// @brief Helper class to annotate newly generated loops with metadata. +/// @brief Helper class to annotate newly generated SCoPs with metadata. /// -/// This stack-like structure will keep track of all loops, and annotate -/// memory instructions and loop headers according to all parallel loops. +/// The annotations are twofold: +/// 1) Loops are stored in a stack-like structure in the order they are +/// constructed and the LoopID metadata node is added to the backedge. +/// Contained memory instructions and loop headers are annotated according +/// to all parallel surrounding loops. +/// 2) The new SCoP is assumed alias free (either due to the result of +/// AliasAnalysis queries or runtime alias checks). We annotate therefore +/// all memory instruction with alias scopes to indicate that fact to +/// later optimizations. +/// These alias scopes live in a new alias domain only used in this SCoP. +/// Each base pointer has its own alias scope and is annotated to not +/// alias with any access to different base pointers. class LoopAnnotator { public: + LoopAnnotator(); + + /// @brief Build all alias scopes for the given SCoP. + void buildAliasScopes(Scop &S); + /// @brief Add a new loop @p L which is parallel if @p IsParallel is true. void pushLoop(llvm::Loop *L, bool IsParallel); @@ -40,11 +60,23 @@ public: bool IsParallel) const; private: + /// @brief The ScalarEvolution analysis we use to find base pointers. + llvm::ScalarEvolution *SE; + /// @brief All loops currently under construction. llvm::SmallVector<llvm::Loop *, 8> ActiveLoops; /// @brief Metadata pointing to parallel loops currently under construction. llvm::SmallVector<llvm::MDNode *, 8> ParallelLoops; + + /// @brief The alias scope domain for the current SCoP. + llvm::MDNode *AliasScopeDomain; + + /// @brief A map from base pointers to its alias scope. + llvm::DenseMap<llvm::Value *, llvm::MDNode *> AliasScopeMap; + + /// @brief A map from base pointers to an alias scope list of other pointers. + llvm::DenseMap<llvm::Value *, llvm::MDNode *> OtherAliasScopeListMap; }; /// @brief Add Polly specifics when running IRBuilder. diff --git a/polly/lib/CodeGen/IRBuilder.cpp b/polly/lib/CodeGen/IRBuilder.cpp index 2fc84191774..d01dca0df0a 100644 --- a/polly/lib/CodeGen/IRBuilder.cpp +++ b/polly/lib/CodeGen/IRBuilder.cpp @@ -14,35 +14,83 @@ #include "polly/CodeGen/IRBuilder.h" +#include "polly/ScopInfo.h" +#include "polly/Support/ScopHelper.h" + #include "llvm/IR/Metadata.h" #include "llvm/Support/Debug.h" using namespace llvm; using namespace polly; -/// @brief Get the loop id metadata node. -/// -/// Each loop is identified by a self referencing metadata node of the form: +/// @brief Get a self referencing id metadata node. /// -/// '!n = metadata !{metadata !n}' +/// The MDNode looks like this (if arg0/arg1 are not null): /// -/// This functions creates such metadata on demand if not yet available. +/// '!n = metadata !{metadata !n, arg0, arg1}' /// -/// @return The loop id metadata node. -static MDNode *getLoopID(Loop *L) { - Value *Args[] = {0}; - MDNode *LoopID = MDNode::get(L->getHeader()->getContext(), Args); - LoopID->replaceOperandWith(0, LoopID); - return LoopID; +/// @return The self referencing id metadata node. +static MDNode *getID(LLVMContext &Ctx, Value *arg0 = nullptr, + Value *arg1 = nullptr) { + MDNode *ID; + SmallVector<Value *, 3> Args; + Args.push_back(nullptr); + + if (arg0) + Args.push_back(arg0); + if (arg1) + Args.push_back(arg1); + + ID = MDNode::get(Ctx, Args); + ID->replaceOperandWith(0, ID); + return ID; +} + +LoopAnnotator::LoopAnnotator() : SE(nullptr), AliasScopeDomain(nullptr) {} + +void LoopAnnotator::buildAliasScopes(Scop &S) { + SE = S.getSE(); + + LLVMContext &Ctx = SE->getContext(); + AliasScopeDomain = getID(Ctx, MDString::get(Ctx, "polly.alias.scope.domain")); + + AliasScopeMap.clear(); + OtherAliasScopeListMap.clear(); + + SetVector<Value *> BasePtrs; + for (ScopStmt *Stmt : S) + for (MemoryAccess *MA : *Stmt) + BasePtrs.insert(MA->getBaseAddr()); + + std::string AliasScopeStr = "polly.alias.scope."; + for (Value *BasePtr : BasePtrs) + AliasScopeMap[BasePtr] = getID( + Ctx, AliasScopeDomain, + MDString::get(Ctx, (AliasScopeStr + BasePtr->getName()).str().c_str())); + + for (Value *BasePtr : BasePtrs) { + MDNode *AliasScopeList = MDNode::get(Ctx, {}); + for (const auto &AliasScopePair : AliasScopeMap) { + if (BasePtr == AliasScopePair.first) + continue; + + Value *Args = {AliasScopePair.second}; + AliasScopeList = + MDNode::concatenate(AliasScopeList, MDNode::get(Ctx, Args)); + } + + OtherAliasScopeListMap[BasePtr] = AliasScopeList; + } } void polly::LoopAnnotator::pushLoop(Loop *L, bool IsParallel) { + ActiveLoops.push_back(L); if (!IsParallel) return; BasicBlock *Header = L->getHeader(); - MDNode *Id = getLoopID(L); + MDNode *Id = getID(Header->getContext()); Value *Args[] = {Id}; MDNode *Ids = ParallelLoops.empty() ? MDNode::get(Header->getContext(), Args) @@ -71,7 +119,26 @@ void polly::LoopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, } void polly::LoopAnnotator::annotate(Instruction *Inst) { - if (!Inst->mayReadOrWriteMemory() || ParallelLoops.empty()) + if (!Inst->mayReadOrWriteMemory()) + return; + + // TODO: Use the ScopArrayInfo once available here. + if (AliasScopeDomain) { + Value *BasePtr = nullptr; + if (isa<StoreInst>(Inst) || isa<LoadInst>(Inst)) { + const SCEV *PtrSCEV = SE->getSCEV(getPointerOperand(*Inst)); + const SCEV *BaseSCEV = SE->getPointerBase(PtrSCEV); + if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(BaseSCEV)) + BasePtr = SU->getValue(); + } + + if (BasePtr) { + Inst->setMetadata("alias.scope", AliasScopeMap[BasePtr]); + Inst->setMetadata("noalias", OtherAliasScopeListMap[BasePtr]); + } + } + + if (ParallelLoops.empty()) return; Inst->setMetadata("llvm.mem.parallel_loop_access", ParallelLoops.back()); diff --git a/polly/lib/CodeGen/IslCodeGeneration.cpp b/polly/lib/CodeGen/IslCodeGeneration.cpp index f5599ee3a25..68ed9434462 100644 --- a/polly/lib/CodeGen/IslCodeGeneration.cpp +++ b/polly/lib/CodeGen/IslCodeGeneration.cpp @@ -605,6 +605,10 @@ public: assert(!S.getRegion().isTopLevelRegion() && "Top level regions are not supported"); + // Build the alias scopes for annotations first. + if (PollyAnnotateAliasScopes) + Annotator.buildAliasScopes(S); + BasicBlock *EnteringBB = simplifyRegion(&S, this); PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); diff --git a/polly/lib/Support/RegisterPasses.cpp b/polly/lib/Support/RegisterPasses.cpp index 7bc5973ff75..d556885bc95 100644 --- a/polly/lib/Support/RegisterPasses.cpp +++ b/polly/lib/Support/RegisterPasses.cpp @@ -141,6 +141,13 @@ static cl::opt<bool> cl::desc("Show the Polly CFG right after code generation"), cl::Hidden, cl::init(false), cl::cat(PollyCategory)); +bool polly::PollyAnnotateAliasScopes; +static cl::opt<bool, true> XPollyAnnotateAliasScopes( + "polly-annotate-alias-scopes", + cl::desc("Annotate memory instructions with alias scopes"), + cl::location(PollyAnnotateAliasScopes), cl::init(true), cl::ZeroOrMore, + cl::cat(PollyCategory)); + namespace polly { void initializePollyPasses(PassRegistry &Registry) { #ifdef CLOOG_FOUND diff --git a/polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll b/polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll index e6b0e73ebec..90a654b084b 100644 --- a/polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll +++ b/polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll @@ -3,16 +3,16 @@ ; Check that we mark multiple parallel loops correctly including the memory instructions. ; ; CHECK-DAG: %polly.loop_cond[[COuter:[0-9]*]] = icmp sle i64 %polly.indvar{{[0-9]*}}, 1022 -; CHECK-DAG: br i1 %polly.loop_cond[[COuter]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop !0 +; CHECK-DAG: br i1 %polly.loop_cond[[COuter]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDOuter:[0-9]*]] ; ; CHECK-DAG: %polly.loop_cond[[CInner:[0-9]*]] = icmp sle i64 %polly.indvar{{[0-9]*}}, 510 -; CHECK-DAG: br i1 %polly.loop_cond[[CInner]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop !2 +; CHECK-DAG: br i1 %polly.loop_cond[[CInner]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDInner:[0-9]*]] ; -; CHECK-DAG: store i32 %p_tmp{{[0-9]*}}, i32* %p_arrayidx{{[0-9]*}}, !llvm.mem.parallel_loop_access !1 +; CHECK-DAG: store i32 %p_tmp{{[0-9]*}}, i32* %p_arrayidx{{[0-9]*}}, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access !4 ; -; CHECK: !0 = metadata !{metadata !0} -; CHECK: !1 = metadata !{metadata !0, metadata !2} -; CHECK: !2 = metadata !{metadata !2} +; CHECK-DAG: ![[IDOuter]] = metadata !{metadata ![[IDOuter]]} +; CHECK-DAG: ![[IDInner]] = metadata !{metadata ![[IDInner]]} +; CHECK-DAG: !4 = metadata !{metadata ![[IDOuter]], metadata ![[IDInner]]} ; ; void jd(int *A) { ; for (int i = 0; i < 1024; i++) diff --git a/polly/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll b/polly/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll index 4ad91582fd5..d1b519c7994 100644 --- a/polly/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll +++ b/polly/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll @@ -38,17 +38,17 @@ ret: ; SEQUENTIAL: @test-one ; SEQUENTIAL-NOT: !llvm.mem.parallel_loop_access -; SEQUENTIAL-NOT: !llvm.loop !0 +; SEQUENTIAL-NOT: !llvm.loop ; SEQUENTIAL-SCEV: @test-one ; SEQUENTIAL-SCEV-NOT: !llvm.mem.parallel_loop_access ; SEQUENTIAL-SCEV-NOT: !llvm.loop ; PARALLEL: @test-one -; PARALLEL: store i32 1, i32* %p_scevgep, !llvm.mem.parallel_loop_access !0 -; PARALLEL: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop !0 +; PARALLEL: store i32 1, i32* %p_scevgep, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]] +; PARALLEL: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]] ; PARALLEL-SCEV: @test-one -; PARALLEL-SCEV: store i32 1, i32* %scevgep1, !llvm.mem.parallel_loop_access !0 -; PARALLEL-SCEV: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop !0 +; PARALLEL-SCEV: store i32 1, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]] +; PARALLEL-SCEV: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]] ; This loop has memory dependences that require at least a simple dependence ; analysis to detect the parallelism. @@ -87,16 +87,16 @@ ret: ; SEQUENTIAL: @test-two ; SEQUENTIAL-NOT: !llvm.mem.parallel_loop_access -; SEQUENTIAL-NOT: !llvm.loop !0 +; SEQUENTIAL-NOT: !llvm.loop ; SEQUENTIAL-SCEV: @test-two ; SEQUENTIAL-SCEV-NOT: !llvm.mem.parallel_loop_access ; SEQUENTIAL-SCEV-NOT: !llvm.loop ; PARALLEL: @test-two -; PARALLEL: %val_p_scalar_ = load i32* %p_scevgepload, !llvm.mem.parallel_loop_access !1 -; PARALLEL: store i32 %val_p_scalar_, i32* %p_scevgepstore, !llvm.mem.parallel_loop_access !1 -; PARALLEL: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop !1 +; PARALLEL: %val_p_scalar_ = load i32* %p_scevgepload, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]] +; PARALLEL: store i32 %val_p_scalar_, i32* %p_scevgepstore, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID]] +; PARALLEL: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]] ; PARALLEL-SCEV: @test-two -; PARALLEL-SCEV: %val_p_scalar_ = load i32* %scevgep, !llvm.mem.parallel_loop_access !1 -; PARALLEL-SCEV: store i32 %val_p_scalar_, i32* %scevgep1, !llvm.mem.parallel_loop_access !1 -; PARALLEL-SCEV: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop !1 +; PARALLEL-SCEV: %val_p_scalar_ = load i32* %scevgep, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]] +; PARALLEL-SCEV: store i32 %val_p_scalar_, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID]] +; PARALLEL-SCEV: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]] diff --git a/polly/test/Isl/CodeGen/annotated_alias_scopes.ll b/polly/test/Isl/CodeGen/annotated_alias_scopes.ll new file mode 100644 index 00000000000..05a8ee99bbc --- /dev/null +++ b/polly/test/Isl/CodeGen/annotated_alias_scopes.ll @@ -0,0 +1,78 @@ +; RUN: opt %loadPolly -polly-code-generator=isl -polly-codegen-isl -S < %s | FileCheck %s --check-prefix=SCOPES +; RUN: opt %loadPolly -polly-code-generator=isl -polly-codegen-isl -polly-annotate-alias-scopes=false -S < %s | FileCheck %s --check-prefix=NOSCOPES +; +; Check that we create alias scopes that indicate the accesses to A, B and C cannot alias in any way. +; +; SCOPES: %[[BIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds i32* %B, i64 %polly.indvar +; SCOPES: load i32* %[[BIdx]], !alias.scope ![[AliasScopeB:[0-9]*]], !noalias ![[NoAliasB:[0-9]*]] +; SCOPES: %[[CIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds float* %C, i64 %polly.indvar +; SCOPES: load float* %[[CIdx]], !alias.scope ![[AliasScopeC:[0-9]*]], !noalias ![[NoAliasC:[0-9]*]] +; SCOPES: %[[AIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds i32* %A, i64 %polly.indvar +; SCOPES: store i32 %{{[._a-zA-Z0-9]*}}, i32* %[[AIdx]], !alias.scope ![[AliasScopeA:[0-9]*]], !noalias ![[NoAliasA:[0-9]*]] +; +; SCOPES: ![[AliasScopeB]] = metadata !{metadata ![[AliasScopeB]], metadata !{{[0-9]*}}, metadata !"polly.alias.scope.B"} +; SCOPES: ![[NoAliasB]] = metadata !{ +; SCOPES-DAG: metadata ![[AliasScopeA]] +; SCOPES-DAG: metadata ![[AliasScopeC]] +; SCOPES: } +; SCOPES-DAG: ![[AliasScopeA]] = metadata !{metadata ![[AliasScopeA]], metadata !{{[0-9]*}}, metadata !"polly.alias.scope.A"} +; SCOPES-DAG: ![[AliasScopeC]] = metadata !{metadata ![[AliasScopeC]], metadata !{{[0-9]*}}, metadata !"polly.alias.scope.C"} +; SCOPES: ![[NoAliasC]] = metadata !{ +; SCOPES-DAG: metadata ![[AliasScopeA]] +; SCOPES-DAG: metadata ![[AliasScopeB]] +; SCOPES: } +; SCOPES: ![[NoAliasA]] = metadata !{ +; SCOPES-DAG: metadata ![[AliasScopeB]] +; SCOPES-DAG: metadata ![[AliasScopeC]] +; SCOPES: } +; +; NOSCOPES: %[[BIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds i32* %B, i64 %polly.indvar +; NOSCOPES: load i32* %[[BIdx]] +; NOSCOPES-NOT: alias.scope +; NOSCOPES-NOT: noalias +; NOSCOPES: %[[CIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds float* %C, i64 %polly.indvar +; NOSCOPES: load float* %[[CIdx]] +; NOSCOPES-NOT: alias.scope +; NOSCOPES-NOT: noalias +; NOSCOPES: %[[AIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds i32* %A, i64 %polly.indvar +; NOSCOPES: store i32 %{{[._a-zA-Z0-9]*}}, i32* %[[AIdx]] +; NOSCOPES-NOT: alias.scope +; NOSCOPES-NOT: noalias +; +; NOSCOPES-NOT: metadata +; +; void jd(int *A, int *B, float *C) { +; for (int i = 0; i < 1024; i++) +; A[i] = B[i] + C[i]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* %A, i32* %B, float* %C) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv + %tmp = load i32* %arrayidx, align 4 + %conv = sitofp i32 %tmp to float + %arrayidx2 = getelementptr inbounds float* %C, i64 %indvars.iv + %tmp1 = load float* %arrayidx2, align 4 + %add = fadd fast float %conv, %tmp1 + %conv3 = fptosi float %add to i32 + %arrayidx5 = getelementptr inbounds i32* %A, i64 %indvars.iv + store i32 %conv3, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} |