diff options
| author | Tobias Grosser <tobias@grosser.es> | 2016-08-03 12:00:07 +0000 |
|---|---|---|
| committer | Tobias Grosser <tobias@grosser.es> | 2016-08-03 12:00:07 +0000 |
| commit | 629109b6333083d618d0b2d5759c6f43456639bc (patch) | |
| tree | 9705ed5a4d5b85da486322d32b8b2ce0ad8ebec0 | |
| parent | c59b3a2236646c616ceaea8b1a4f71ac70068e5e (diff) | |
| download | bcm5719-llvm-629109b6333083d618d0b2d5759c6f43456639bc.tar.gz bcm5719-llvm-629109b6333083d618d0b2d5759c6f43456639bc.zip | |
GPGPU: Mark kernel functions as polly.skip
Otherwise, we would try to re-optimize them with Polly-ACC and possibly even
generate kernels that try to offload themselves, which does not work as the
GPURuntime is not available on the accelerator and also does not make any
sense.
llvm-svn: 277589
| -rw-r--r-- | polly/include/polly/ScopDetection.h | 2 | ||||
| -rw-r--r-- | polly/lib/Analysis/ScopDetection.cpp | 2 | ||||
| -rw-r--r-- | polly/lib/CodeGen/PPCGCodeGeneration.cpp | 3 | ||||
| -rw-r--r-- | polly/test/GPGPU/double-parallel-loop.ll | 3 | ||||
| -rw-r--r-- | polly/test/GPGPU/host-control-flow.ll | 2 | ||||
| -rw-r--r-- | polly/test/GPGPU/kernel-params-only-some-arrays.ll | 4 | ||||
| -rw-r--r-- | polly/test/GPGPU/kernel-params-scop-parameter.ll | 2 |
7 files changed, 11 insertions, 7 deletions
diff --git a/polly/include/polly/ScopDetection.h b/polly/include/polly/ScopDetection.h index 0b73681edfc..75aaf30bcad 100644 --- a/polly/include/polly/ScopDetection.h +++ b/polly/include/polly/ScopDetection.h @@ -564,7 +564,7 @@ public: /// the function. /// /// @param F The function to mark as invalid. - void markFunctionAsInvalid(Function *F) const; + static void markFunctionAsInvalid(Function *F); /// @brief Verify if all valid Regions in this Function are still valid /// after some transformations. diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp index 3770ae06c4c..433a72f6aea 100644 --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -1402,7 +1402,7 @@ bool ScopDetection::isValidRegion(DetectionContext &Context) const { return true; } -void ScopDetection::markFunctionAsInvalid(Function *F) const { +void ScopDetection::markFunctionAsInvalid(Function *F) { F->addFnAttr(PollySkipFnAttr); } diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index db32ad40f8a..0d6e6ca078d 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -17,6 +17,7 @@ #include "polly/DependenceInfo.h" #include "polly/LinkAllPasses.h" #include "polly/Options.h" +#include "polly/ScopDetection.h" #include "polly/ScopInfo.h" #include "polly/Support/SCEVValidator.h" #include "llvm/ADT/PostOrderIterator.h" @@ -1170,6 +1171,8 @@ void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel, Builder.CreateRetVoid(); Builder.SetInsertPoint(EntryBlock, EntryBlock->begin()); + ScopDetection::markFunctionAsInvalid(FN); + insertKernelIntrinsics(Kernel); } diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll index f0839ba9c98..994641774dc 100644 --- a/polly/test/GPGPU/double-parallel-loop.ll +++ b/polly/test/GPGPU/double-parallel-loop.ll @@ -113,7 +113,7 @@ ; IR: polly.exiting: ; IR-NEXT: br label %polly.merge_new_and_old -; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) { +; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) #0 { ; KERNEL-IR-NEXT: entry: ; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64 @@ -171,6 +171,7 @@ ; KERNEL-IR-LABEL: polly.loop_preheader: ; preds = %entry ; KERNEL-IR-NEXT: br label %polly.loop_header +; KERNEL-IR: attributes #0 = { "polly.skip.fn" } ; KERNEL-ASM: .version 3.2 ; KERNEL-ASM-NEXT: .target sm_30 diff --git a/polly/test/GPGPU/host-control-flow.ll b/polly/test/GPGPU/host-control-flow.ll index 19f37b636ea..e7adced3333 100644 --- a/polly/test/GPGPU/host-control-flow.ll +++ b/polly/test/GPGPU/host-control-flow.ll @@ -42,7 +42,7 @@ ; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 98 ; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit -; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0) { +; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0) ; KERNEL-IR-LABEL: entry: ; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64 diff --git a/polly/test/GPGPU/kernel-params-only-some-arrays.ll b/polly/test/GPGPU/kernel-params-only-some-arrays.ll index 5c932b5db1d..171b5a3a952 100644 --- a/polly/test/GPGPU/kernel-params-only-some-arrays.ll +++ b/polly/test/GPGPU/kernel-params-only-some-arrays.ll @@ -21,7 +21,7 @@ ; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" ; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda" -; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) { +; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) ; KERNEL-NEXT: entry: ; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-NEXT: %b0 = zext i32 %0 to i64 @@ -36,7 +36,7 @@ ; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" ; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda" -; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B) { +; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B) ; KERNEL-NEXT: entry: ; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-NEXT: %b0 = zext i32 %0 to i64 diff --git a/polly/test/GPGPU/kernel-params-scop-parameter.ll b/polly/test/GPGPU/kernel-params-scop-parameter.ll index 8212457d579..5ec5d6579d3 100644 --- a/polly/test/GPGPU/kernel-params-scop-parameter.ll +++ b/polly/test/GPGPU/kernel-params-scop-parameter.ll @@ -9,7 +9,7 @@ ; A[i] += 42; ; } -; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n) { +; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n) target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" |

