diff options
author | Tobias Grosser <tobias@grosser.es> | 2016-07-19 15:56:25 +0000 |
---|---|---|
committer | Tobias Grosser <tobias@grosser.es> | 2016-07-19 15:56:25 +0000 |
commit | 2d58a64e7ffd76a3c8cfd586aa72b171f10a38a9 (patch) | |
tree | c9fe19b974acf76dab5a8187903f897bfc856616 | |
parent | ccf48a2732ca453ed6f849ae8600a2deed15dee2 (diff) | |
download | bcm5719-llvm-2d58a64e7ffd76a3c8cfd586aa72b171f10a38a9.tar.gz bcm5719-llvm-2d58a64e7ffd76a3c8cfd586aa72b171f10a38a9.zip |
GPGPU: Bail out of scops with hoisted invariant loads
This is currently not supported and will only be added later. Also update the
test cases to ensure no invariant code hoisting is applied.
llvm-svn: 275987
-rw-r--r-- | polly/include/polly/ScopInfo.h | 3 | ||||
-rw-r--r-- | polly/lib/CodeGen/PPCGCodeGeneration.cpp | 4 | ||||
-rw-r--r-- | polly/test/GPGPU/host-statement.ll | 12 |
3 files changed, 14 insertions, 5 deletions
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index b8b6679a493..aecab88b591 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -1939,6 +1939,9 @@ public: return InvariantEquivClasses; } + /// @brief Check if the scop has any invariant access. + bool hasInvariantAccesses() { return !InvariantEquivClasses.empty(); } + /// @brief Mark the SCoP as optimized by the scheduler. void markAsOptimized() { IsOptimized = true; } diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 050a71e0940..1621252ba10 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -1013,6 +1013,10 @@ public: DL = &S->getRegion().getEntry()->getParent()->getParent()->getDataLayout(); RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); + // We currently do not support scops with invariant loads. + if (S->hasInvariantAccesses()) + return false; + auto PPCGScop = createPPCGScop(); auto PPCGProg = createPPCGProg(PPCGScop); auto PPCGGen = generateGPU(PPCGScop, PPCGProg); diff --git a/polly/test/GPGPU/host-statement.ll b/polly/test/GPGPU/host-statement.ll index ded1d5834a8..73b0ea589e7 100644 --- a/polly/test/GPGPU/host-statement.ll +++ b/polly/test/GPGPU/host-statement.ll @@ -1,8 +1,10 @@ ; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \ +; RUN: -polly-invariant-load-hoisting=false \ ; RUN: -disable-output < %s | \ ; RUN: FileCheck -check-prefix=CODE %s ; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \ +; RUN: -polly-invariant-load-hoisting=false \ ; RUN: -disable-output < %s | \ ; RUN: FileCheck -check-prefix=KERNEL-IR %s @@ -16,15 +18,16 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0 ; This test case tests that we can correctly handle a ScopStmt that is ; scheduled on the host, instead of within a kernel. -; CODE: Code +; CODE-LABEL: Code ; CODE-NEXT: ==== ; CODE-NEXT: # host ; CODE-NEXT: { ; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice)); +; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_R, MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyHostToDevice)); ; CODE-NEXT: { ; CODE-NEXT: dim3 k0_dimBlock(32); ; CODE-NEXT: dim3 k0_dimGrid(16); -; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_Q, p_0, p_1); +; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } @@ -39,14 +42,13 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0 ; CODE: { ; CODE-NEXT: dim3 k2_dimBlock(16, 32); ; CODE-NEXT: dim3 k2_dimGrid(16, p_1 <= -7650 ? 256 : -p_1 + floord(31 * p_1 + 30, 32) + 16); -; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1); +; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1); ; CODE-NEXT: cudaCheckKernel(); ; CODE-NEXT: } ; CODE: } ; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyDeviceToHost)); -; CODE-NEXT: if (p_0 <= 510 && p_1 <= 510) -; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_R, dev_MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyDeviceToHost)); +; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_R, dev_MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyDeviceToHost)); ; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_Q, dev_MemRef_Q, (512) * (512) * sizeof(double), cudaMemcpyDeviceToHost)); ; CODE-NEXT: Stmt_for_cond33_preheader(); |