summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/lib/CodeGen/IslNodeBuilder.cpp12
-rw-r--r--polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll35
2 files changed, 44 insertions, 3 deletions
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp
index 865d8577e3f..c6479fe0cb9 100644
--- a/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/polly/lib/CodeGen/IslNodeBuilder.cpp
@@ -196,8 +196,13 @@ int IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) {
/// Extract the values and SCEVs needed to generate code for a block.
static int findReferencesInBlock(struct SubtreeReferences &References,
- const ScopStmt *Stmt, const BasicBlock *BB) {
- for (const Instruction &Inst : *BB)
+ const ScopStmt *Stmt, BasicBlock *BB) {
+ for (Instruction &Inst : *BB) {
+ // Include invariant loads
+ if (isa<LoadInst>(Inst))
+ if (Value *InvariantLoad = References.GlobalMap.lookup(&Inst))
+ References.Values.insert(InvariantLoad);
+
for (Value *SrcVal : Inst.operands()) {
auto *Scope = References.LI.getLoopFor(BB);
if (canSynthesize(SrcVal, References.S, &References.SE, Scope)) {
@@ -206,6 +211,7 @@ static int findReferencesInBlock(struct SubtreeReferences &References,
} else if (Value *NewVal = References.GlobalMap.lookup(SrcVal))
References.Values.insert(NewVal);
}
+ }
return 0;
}
@@ -218,7 +224,7 @@ isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
else {
assert(Stmt->isRegionStmt() &&
"Stmt was neither block nor region statement");
- for (const BasicBlock *BB : Stmt->getRegion()->blocks())
+ for (BasicBlock *BB : Stmt->getRegion()->blocks())
findReferencesInBlock(References, Stmt, BB);
}
diff --git a/polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll b/polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll
new file mode 100644
index 00000000000..4816a1b52e0
--- /dev/null
+++ b/polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll
@@ -0,0 +1,35 @@
+; RUN: opt %loadPolly -disable-output -polly-acc-dump-kernel-ir \
+; RUN: -polly-codegen-ppcg -polly-scops \
+; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s
+
+; Verify that invariant loads used in a kernel statement are correctly forwarded
+; as subtree value to the GPU kernel.
+
+; CHECK: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_0({{.*}} float %polly.access.p.load)
+; CHECK: store float %polly.access.p.load, float* %indvar2f.phiops
+
+define void @foo(float* %A, float* %p) {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [0, %entry], [%indvar.next, %loop]
+ %indvar.next = add i64 %indvar, 1
+ %invariant = load float, float* %p
+ %ptr = getelementptr float, float* %A, i64 %indvar
+ store float 42.0, float* %ptr
+ %cmp = icmp sle i64 %indvar, 1024
+ br i1 %cmp, label %loop, label %loop2
+
+loop2:
+ %indvar2 = phi i64 [0, %loop], [%indvar2.next, %loop2]
+ %indvar2f = phi float [%invariant, %loop], [%indvar2f, %loop2]
+ %indvar2.next = add i64 %indvar2, 1
+ store float %indvar2f, float* %A
+ %cmp2 = icmp sle i64 %indvar2, 1024
+ br i1 %cmp2, label %loop2, label %end
+
+end:
+ ret void
+
+}
OpenPOWER on IntegriCloud