summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/include/polly/ScopInfo.h3
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp4
-rw-r--r--polly/test/GPGPU/host-statement.ll12
3 files changed, 14 insertions, 5 deletions
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index b8b6679a493..aecab88b591 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -1939,6 +1939,9 @@ public:
return InvariantEquivClasses;
}
+ /// @brief Check if the scop has any invariant access.
+ bool hasInvariantAccesses() { return !InvariantEquivClasses.empty(); }
+
/// @brief Mark the SCoP as optimized by the scheduler.
void markAsOptimized() { IsOptimized = true; }
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index 050a71e0940..1621252ba10 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -1013,6 +1013,10 @@ public:
DL = &S->getRegion().getEntry()->getParent()->getParent()->getDataLayout();
RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
+ // We currently do not support scops with invariant loads.
+ if (S->hasInvariantAccesses())
+ return false;
+
auto PPCGScop = createPPCGScop();
auto PPCGProg = createPPCGProg(PPCGScop);
auto PPCGGen = generateGPU(PPCGScop, PPCGProg);
diff --git a/polly/test/GPGPU/host-statement.ll b/polly/test/GPGPU/host-statement.ll
index ded1d5834a8..73b0ea589e7 100644
--- a/polly/test/GPGPU/host-statement.ll
+++ b/polly/test/GPGPU/host-statement.ll
@@ -1,8 +1,10 @@
; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
+; RUN: -polly-invariant-load-hoisting=false \
; RUN: -disable-output < %s | \
; RUN: FileCheck -check-prefix=CODE %s
; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
+; RUN: -polly-invariant-load-hoisting=false \
; RUN: -disable-output < %s | \
; RUN: FileCheck -check-prefix=KERNEL-IR %s
@@ -16,15 +18,16 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0
; This test case tests that we can correctly handle a ScopStmt that is
; scheduled on the host, instead of within a kernel.
-; CODE: Code
+; CODE-LABEL: Code
; CODE-NEXT: ====
; CODE-NEXT: # host
; CODE-NEXT: {
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
+; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_R, MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyHostToDevice));
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(16);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_Q, p_0, p_1);
+; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@@ -39,14 +42,13 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0
; CODE: {
; CODE-NEXT: dim3 k2_dimBlock(16, 32);
; CODE-NEXT: dim3 k2_dimGrid(16, p_1 <= -7650 ? 256 : -p_1 + floord(31 * p_1 + 30, 32) + 16);
-; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
+; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
; CODE: }
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
-; CODE-NEXT: if (p_0 <= 510 && p_1 <= 510)
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_R, dev_MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
+; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_R, dev_MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_Q, dev_MemRef_Q, (512) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
; CODE-NEXT: Stmt_for_cond33_preheader();
OpenPOWER on IntegriCloud