diff options
-rw-r--r-- | polly/include/polly/ScopInfo.h | 3 | ||||
-rw-r--r-- | polly/lib/CodeGen/PPCGCodeGeneration.cpp | 4 | ||||
-rw-r--r-- | polly/test/GPGPU/host-statement.ll | 12 |
3 files changed, 14 insertions, 5 deletions
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index b8b6679a493..aecab88b591 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -1939,6 +1939,9 @@ public: return InvariantEquivClasses; } + /// @brief Check if the scop has any invariant access. + bool hasInvariantAccesses() { return !InvariantEquivClasses.empty(); } + /// @brief Mark the SCoP as optimized by the scheduler. void markAsOptimized() { IsOptimized = true; } diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 050a71e0940..1621252ba10 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -1013,6 +1013,10 @@ public: DL = &S->getRegion().getEntry()->getParent()->getParent()->getDataLayout(); RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); + // We currently do not support scops with invariant loads. + if (S->hasInvariantAccesses()) + return false; + auto PPCGScop = createPPCGScop(); auto PPCGProg = createPPCGProg(PPCGScop); auto PPCGGen = generateGPU(PPCGScop, PPCGProg); diff --git a/polly/test/GPGPU/host-statement.ll b/polly/test/GPGPU/host-statement.ll index ded1d5834a8..73b0ea589e7 100644 --- a/polly/test/GPGPU/host-statement.ll +++ b/polly/test/GPGPU/host-statement.ll @@ -1,8 +1,10 @@ ; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \ +; RUN: -polly-invariant-load-hoisting=false \ ; RUN: -disable-output < %s | \ ; RUN: FileCheck -check-prefix=CODE %s ; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \ +; RUN: -polly-invariant-load-hoisting=false \ ; RUN: -disable-output < %s | \ ; RUN: FileCheck -check-prefix=KERNEL-IR %s @@ -16,15 +18,16 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0 ; This test case tests that we can correctly handle a ScopStmt that is ; scheduled on the host, instead of within a kernel. -; CODE: Code +; CODE-LABEL: Code ; CODE-NEXT: ==== ; CODE-NEXT: # host ; CODE-NEXT: { ; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice)); +; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_R, MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyHostToDevice)); ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(16); -; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_Q, p_0, p_1); +; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -39,14 +42,13 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0 ; CODE: { ; CODE-NEXT: dim3 k2_dimBlock(16, 32); ; CODE-NEXT: dim3 k2_dimGrid(16, p_1 <= -7650 ? 256 : -p_1 + floord(31 * p_1 + 30, 32) + 16); -; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1); +; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } ; CODE: } ; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyDeviceToHost)); -; CODE-NEXT: if (p_0 <= 510 && p_1 <= 510) -; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_R, dev_MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyDeviceToHost)); +; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_R, dev_MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyDeviceToHost)); ; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_Q, dev_MemRef_Q, (512) * (512) * sizeof(double), cudaMemcpyDeviceToHost)); ; CODE-NEXT: Stmt_for_cond33_preheader(); |