diff options
author | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2015-10-02 13:11:27 +0000 |
---|---|---|
committer | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2015-10-02 13:11:27 +0000 |
commit | 911951f4f85487df14c9b8e87921e1fb96539b7c (patch) | |
tree | 22348c42c42369861ae21f5874aa61b9cb05b1ab | |
parent | 478a7de18b9a65c47c2c7ae46e873c8638b45a9b (diff) | |
download | bcm5719-llvm-911951f4f85487df14c9b8e87921e1fb96539b7c.tar.gz bcm5719-llvm-911951f4f85487df14c9b8e87921e1fb96539b7c.zip |
Hand down referenced & globally mapped values to the subfunction
If a value is globally mapped (IslNodeBuilder::ValueMap) and
referenced in the code that will be put into a subfunction, we hand
down the new value to the subfunction.
This patch also removes code that handed down all invariant loads to
the subfunction. Instead, only needed invariant loads are given to the
subfunction. There are two possible reasons for an invariant load to
be handed down:
1) The invariant load is used in a block that is placed in the
subfunction but which is not the parent of the load. In this
case, the scalar access that will read the loaded value, will
cause its base pointer (the preloaded value) to be handed down to
the subfunction.
2) The invariant load is defined and used in a block that is placed
in the subfunction. With this patch we will hand down the
preloaded value to the subfunction as the invariant load is
globally mapped to that value.
llvm-svn: 249126
4 files changed, 116 insertions, 9 deletions
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index f10951885e1..8aaf9c2440e 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -175,6 +175,7 @@ struct SubtreeReferences { LoopInfo &LI; ScalarEvolution &SE; Region &R; + ValueMapT &GlobalMap; SetVector<Value *> &Values; SetVector<const SCEV *> &SCEVs; BlockGenerator &BlockGen; @@ -190,7 +191,8 @@ static int findReferencesInBlock(struct SubtreeReferences &References, References.SCEVs.insert( References.SE.getSCEVAtScope(SrcVal, References.LI.getLoopFor(BB))); continue; - } + } else if (Value *NewVal = References.GlobalMap.lookup(SrcVal)) + References.Values.insert(NewVal); return 0; } @@ -282,8 +284,8 @@ void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For, SetVector<const Loop *> &Loops) { SetVector<const SCEV *> SCEVs; - struct SubtreeReferences References = {LI, SE, S.getRegion(), - Values, SCEVs, getBlockGenerator()}; + struct SubtreeReferences References = { + LI, SE, S.getRegion(), ValueMap, Values, SCEVs, getBlockGenerator()}; for (const auto &I : IDToValue) Values.insert(I.second); @@ -590,11 +592,6 @@ void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) { SubtreeValues.insert(V); } - // Values preloaded prior to the SCoP need to be available in the subfunction. - const auto &InvariantAccesses = S.getInvariantAccesses(); - for (const InvariantAccessTy &IA : InvariantAccesses) - SubtreeValues.insert(ValueMap[IA.first->getAccessInstruction()]); - ParallelLoopGenerator::ValueToValueMapTy NewValues; ParallelLoopGenerator ParallelLoopGen(Builder, P, LI, DT, DL); diff --git a/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll index c8a0f68c6dc..921dc1927cd 100644 --- a/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll +++ b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll @@ -8,7 +8,7 @@ ; A[i] += A[0]; ; } ; -; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds { float*, float }, { float*, float }* %polly.par.userContext, i32 0, i32 1 +; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds { float, float* }, { float, float* }* %polly.par.userContext, i32 0 ; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll new file mode 100644 index 00000000000..7e0df742e11 --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly -polly-codegen -polly-parallel \ +; RUN: -polly-parallel-force -polly-detect-unprofitable -S < %s | FileCheck %s +; +; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction. +; +; void f(float *A) { +; for (int i = 1; i < 1000; i++) +; A[i] += /* split bb */ A[0]; +; } +; A[0] tmp (unused) A +; CHECK: %polly.par.userContext = alloca { float, float*, float* } +; +; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds +; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(float* nocapture %A) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body.split ] + %tmp = load float, float* %A, align 4 + br label %for.body.split + +for.body.split: + %arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx1, align 4 + %add = fadd float %tmp, %tmp1 + store float %add, float* %arrayidx1, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} diff --git a/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll new file mode 100644 index 00000000000..386e1420434 --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-codegen -polly-parallel \ +; RUN: -polly-parallel-force -polly-detect-unprofitable -S < %s | FileCheck %s +; +; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction but +; not B[0] as it is not needed +; +; void f(float *A, float *B) { +; // Not parallel +; for (int i = 1; i < 1000; i++) { +; B[i] = B[i+1] + B[0]; +; // Parallel +; for (int j = 1; j < 1000; j++) +; A[j] += A[0]; +; } +; } +; +; i A[0] A +; CHECK: %polly.par.userContext = alloca { i64, float, float* } +; +; CHECK: %polly.access.B.load = +; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds +; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load +; CHECK-NOT: store float %polly.access.B.load, float* %polly.subfn.storeaddr.polly.access.B.load +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(float* %A, float* %B) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.9, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc.9 ], [ 1, %entry ] + %exitcond3 = icmp ne i64 %indvars.iv1, 1000 + br i1 %exitcond3, label %for.body, label %for.end.11 + +for.body: ; preds = %for.cond + %tmp = load float, float* %B, align 4 + %arrayidx1 = getelementptr inbounds float, float* %B, i64 %indvars.iv1 + %iv.add = add nsw i64 %indvars.iv1, 1 + %arrayidx2 = getelementptr inbounds float, float* %B, i64 %iv.add + %tmp4 = load float, float* %arrayidx2, align 4 + %add = fadd float %tmp4, %tmp + store float %add, float* %arrayidx1, align 4 + br label %for.cond.2 + +for.cond.2: ; preds = %for.inc, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 1, %for.body ] + %exitcond = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond, label %for.body.4, label %for.end + +for.body.4: ; preds = %for.cond.2 + %tmp5 = load float, float* %A, align 4 + %arrayidx7 = getelementptr inbounds float, float* %A, i64 %indvars.iv + %tmp6 = load float, float* %arrayidx7, align 4 + %add8 = fadd float %tmp6, %tmp5 + store float %add8, float* %arrayidx7, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond.2 + +for.end: ; preds = %for.cond.2 + br label %for.inc.9 + +for.inc.9: ; preds = %for.end + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond + +for.end.11: ; preds = %for.cond + ret void +} |