summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohannes Doerfert <doerfert@cs.uni-saarland.de>2015-10-02 13:11:27 +0000
committerJohannes Doerfert <doerfert@cs.uni-saarland.de>2015-10-02 13:11:27 +0000
commit911951f4f85487df14c9b8e87921e1fb96539b7c (patch)
tree22348c42c42369861ae21f5874aa61b9cb05b1ab
parent478a7de18b9a65c47c2c7ae46e873c8638b45a9b (diff)
downloadbcm5719-llvm-911951f4f85487df14c9b8e87921e1fb96539b7c.tar.gz
bcm5719-llvm-911951f4f85487df14c9b8e87921e1fb96539b7c.zip
Hand down referenced & globally mapped values to the subfunction
If a value is globally mapped (IslNodeBuilder::ValueMap) and referenced in the code that will be put into a subfunction, we hand down the new value to the subfunction. This patch also removes code that handed down all invariant loads to the subfunction. Instead, only needed invariant loads are given to the subfunction. There are two possible reasons for an invariant load to be handed down: 1) The invariant load is used in a block that is placed in the subfunction but which is not the parent of the load. In this case, the scalar access that will read the loaded value, will cause its base pointer (the preloaded value) to be handed down to the subfunction. 2) The invariant load is defined and used in a block that is placed in the subfunction. With this patch we will hand down the preloaded value to the subfunction as the invariant load is globally mapped to that value. llvm-svn: 249126
-rw-r--r--polly/lib/CodeGen/IslNodeBuilder.cpp13
-rw-r--r--polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll2
-rw-r--r--polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll38
-rw-r--r--polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll72
4 files changed, 116 insertions, 9 deletions
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp
index f10951885e1..8aaf9c2440e 100644
--- a/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/polly/lib/CodeGen/IslNodeBuilder.cpp
@@ -175,6 +175,7 @@ struct SubtreeReferences {
LoopInfo &LI;
ScalarEvolution &SE;
Region &R;
+ ValueMapT &GlobalMap;
SetVector<Value *> &Values;
SetVector<const SCEV *> &SCEVs;
BlockGenerator &BlockGen;
@@ -190,7 +191,8 @@ static int findReferencesInBlock(struct SubtreeReferences &References,
References.SCEVs.insert(
References.SE.getSCEVAtScope(SrcVal, References.LI.getLoopFor(BB)));
continue;
- }
+ } else if (Value *NewVal = References.GlobalMap.lookup(SrcVal))
+ References.Values.insert(NewVal);
return 0;
}
@@ -282,8 +284,8 @@ void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For,
SetVector<const Loop *> &Loops) {
SetVector<const SCEV *> SCEVs;
- struct SubtreeReferences References = {LI, SE, S.getRegion(),
- Values, SCEVs, getBlockGenerator()};
+ struct SubtreeReferences References = {
+ LI, SE, S.getRegion(), ValueMap, Values, SCEVs, getBlockGenerator()};
for (const auto &I : IDToValue)
Values.insert(I.second);
@@ -590,11 +592,6 @@ void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
SubtreeValues.insert(V);
}
- // Values preloaded prior to the SCoP need to be available in the subfunction.
- const auto &InvariantAccesses = S.getInvariantAccesses();
- for (const InvariantAccessTy &IA : InvariantAccesses)
- SubtreeValues.insert(ValueMap[IA.first->getAccessInstruction()]);
-
ParallelLoopGenerator::ValueToValueMapTy NewValues;
ParallelLoopGenerator ParallelLoopGen(Builder, P, LI, DT, DL);
diff --git a/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll
index c8a0f68c6dc..921dc1927cd 100644
--- a/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll
+++ b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll
@@ -8,7 +8,7 @@
; A[i] += A[0];
; }
;
-; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds { float*, float }, { float*, float }* %polly.par.userContext, i32 0, i32 1
+; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds { float, float* }, { float, float* }* %polly.par.userContext, i32 0
; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
new file mode 100644
index 00000000000..7e0df742e11
--- /dev/null
+++ b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
@@ -0,0 +1,38 @@
+; RUN: opt %loadPolly -polly-codegen -polly-parallel \
+; RUN: -polly-parallel-force -polly-detect-unprofitable -S < %s | FileCheck %s
+;
+; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction.
+;
+; void f(float *A) {
+; for (int i = 1; i < 1000; i++)
+; A[i] += /* split bb */ A[0];
+; }
+; A[0] tmp (unused) A
+; CHECK: %polly.par.userContext = alloca { float, float*, float* }
+;
+; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds
+; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(float* nocapture %A) {
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body.split ]
+ %tmp = load float, float* %A, align 4
+ br label %for.body.split
+
+for.body.split:
+ %arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ %tmp1 = load float, float* %arrayidx1, align 4
+ %add = fadd float %tmp, %tmp1
+ store float %add, float* %arrayidx1, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
diff --git a/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll
new file mode 100644
index 00000000000..386e1420434
--- /dev/null
+++ b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll
@@ -0,0 +1,72 @@
+; RUN: opt %loadPolly -polly-codegen -polly-parallel \
+; RUN: -polly-parallel-force -polly-detect-unprofitable -S < %s | FileCheck %s
+;
+; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction but
+; not B[0] as it is not needed
+;
+; void f(float *A, float *B) {
+; // Not parallel
+; for (int i = 1; i < 1000; i++) {
+; B[i] = B[i+1] + B[0];
+; // Parallel
+; for (int j = 1; j < 1000; j++)
+; A[j] += A[0];
+; }
+; }
+;
+; i A[0] A
+; CHECK: %polly.par.userContext = alloca { i64, float, float* }
+;
+; CHECK: %polly.access.B.load =
+; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds
+; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load
+; CHECK-NOT: store float %polly.access.B.load, float* %polly.subfn.storeaddr.polly.access.B.load
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(float* %A, float* %B) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc.9, %entry
+ %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc.9 ], [ 1, %entry ]
+ %exitcond3 = icmp ne i64 %indvars.iv1, 1000
+ br i1 %exitcond3, label %for.body, label %for.end.11
+
+for.body: ; preds = %for.cond
+ %tmp = load float, float* %B, align 4
+ %arrayidx1 = getelementptr inbounds float, float* %B, i64 %indvars.iv1
+ %iv.add = add nsw i64 %indvars.iv1, 1
+ %arrayidx2 = getelementptr inbounds float, float* %B, i64 %iv.add
+ %tmp4 = load float, float* %arrayidx2, align 4
+ %add = fadd float %tmp4, %tmp
+ store float %add, float* %arrayidx1, align 4
+ br label %for.cond.2
+
+for.cond.2: ; preds = %for.inc, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 1, %for.body ]
+ %exitcond = icmp ne i64 %indvars.iv, 1000
+ br i1 %exitcond, label %for.body.4, label %for.end
+
+for.body.4: ; preds = %for.cond.2
+ %tmp5 = load float, float* %A, align 4
+ %arrayidx7 = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ %tmp6 = load float, float* %arrayidx7, align 4
+ %add8 = fadd float %tmp6, %tmp5
+ store float %add8, float* %arrayidx7, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body.4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond.2
+
+for.end: ; preds = %for.cond.2
+ br label %for.inc.9
+
+for.inc.9: ; preds = %for.end
+ %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1
+ br label %for.cond
+
+for.end.11: ; preds = %for.cond
+ ret void
+}
OpenPOWER on IntegriCloud