diff options
| -rw-r--r-- | polly/lib/CodeGen/IslNodeBuilder.cpp | 12 | ||||
| -rw-r--r-- | polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll | 35 |
2 files changed, 44 insertions, 3 deletions
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index 865d8577e3f..c6479fe0cb9 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -196,8 +196,13 @@ int IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) { /// Extract the values and SCEVs needed to generate code for a block. static int findReferencesInBlock(struct SubtreeReferences &References, - const ScopStmt *Stmt, const BasicBlock *BB) { - for (const Instruction &Inst : *BB) + const ScopStmt *Stmt, BasicBlock *BB) { + for (Instruction &Inst : *BB) { + // Include invariant loads + if (isa<LoadInst>(Inst)) + if (Value *InvariantLoad = References.GlobalMap.lookup(&Inst)) + References.Values.insert(InvariantLoad); + for (Value *SrcVal : Inst.operands()) { auto *Scope = References.LI.getLoopFor(BB); if (canSynthesize(SrcVal, References.S, &References.SE, Scope)) { @@ -206,6 +211,7 @@ static int findReferencesInBlock(struct SubtreeReferences &References, } else if (Value *NewVal = References.GlobalMap.lookup(SrcVal)) References.Values.insert(NewVal); } + } return 0; } @@ -218,7 +224,7 @@ isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr, else { assert(Stmt->isRegionStmt() && "Stmt was neither block nor region statement"); - for (const BasicBlock *BB : Stmt->getRegion()->blocks()) + for (BasicBlock *BB : Stmt->getRegion()->blocks()) findReferencesInBlock(References, Stmt, BB); } diff --git a/polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll b/polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll new file mode 100644 index 00000000000..4816a1b52e0 --- /dev/null +++ b/polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly -disable-output -polly-acc-dump-kernel-ir \ +; RUN: -polly-codegen-ppcg -polly-scops \ +; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s + +; Verify that invariant loads used in a kernel statement are correctly forwarded +; as subtree value to the GPU kernel. + +; CHECK: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_0({{.*}} float %polly.access.p.load) +; CHECK: store float %polly.access.p.load, float* %indvar2f.phiops + +define void @foo(float* %A, float* %p) { +entry: + br label %loop + +loop: + %indvar = phi i64 [0, %entry], [%indvar.next, %loop] + %indvar.next = add i64 %indvar, 1 + %invariant = load float, float* %p + %ptr = getelementptr float, float* %A, i64 %indvar + store float 42.0, float* %ptr + %cmp = icmp sle i64 %indvar, 1024 + br i1 %cmp, label %loop, label %loop2 + +loop2: + %indvar2 = phi i64 [0, %loop], [%indvar2.next, %loop2] + %indvar2f = phi float [%invariant, %loop], [%indvar2f, %loop2] + %indvar2.next = add i64 %indvar2, 1 + store float %indvar2f, float* %A + %cmp2 = icmp sle i64 %indvar2, 1024 + br i1 %cmp2, label %loop2, label %end + +end: + ret void + +} |

