summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTobias Grosser <tobias@grosser.es>2016-08-03 12:00:07 +0000
committerTobias Grosser <tobias@grosser.es>2016-08-03 12:00:07 +0000
commit629109b6333083d618d0b2d5759c6f43456639bc (patch)
tree9705ed5a4d5b85da486322d32b8b2ce0ad8ebec0
parentc59b3a2236646c616ceaea8b1a4f71ac70068e5e (diff)
downloadbcm5719-llvm-629109b6333083d618d0b2d5759c6f43456639bc.tar.gz
bcm5719-llvm-629109b6333083d618d0b2d5759c6f43456639bc.zip
GPGPU: Mark kernel functions as polly.skip
Otherwise, we would try to re-optimize them with Polly-ACC and possibly even generate kernels that try to offload themselves, which does not work as the GPURuntime is not available on the accelerator and also does not make any sense. llvm-svn: 277589
-rw-r--r--polly/include/polly/ScopDetection.h2
-rw-r--r--polly/lib/Analysis/ScopDetection.cpp2
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp3
-rw-r--r--polly/test/GPGPU/double-parallel-loop.ll3
-rw-r--r--polly/test/GPGPU/host-control-flow.ll2
-rw-r--r--polly/test/GPGPU/kernel-params-only-some-arrays.ll4
-rw-r--r--polly/test/GPGPU/kernel-params-scop-parameter.ll2
7 files changed, 11 insertions, 7 deletions
diff --git a/polly/include/polly/ScopDetection.h b/polly/include/polly/ScopDetection.h
index 0b73681edfc..75aaf30bcad 100644
--- a/polly/include/polly/ScopDetection.h
+++ b/polly/include/polly/ScopDetection.h
@@ -564,7 +564,7 @@ public:
/// the function.
///
/// @param F The function to mark as invalid.
- void markFunctionAsInvalid(Function *F) const;
+ static void markFunctionAsInvalid(Function *F);
/// @brief Verify if all valid Regions in this Function are still valid
/// after some transformations.
diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp
index 3770ae06c4c..433a72f6aea 100644
--- a/polly/lib/Analysis/ScopDetection.cpp
+++ b/polly/lib/Analysis/ScopDetection.cpp
@@ -1402,7 +1402,7 @@ bool ScopDetection::isValidRegion(DetectionContext &Context) const {
return true;
}
-void ScopDetection::markFunctionAsInvalid(Function *F) const {
+void ScopDetection::markFunctionAsInvalid(Function *F) {
F->addFnAttr(PollySkipFnAttr);
}
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index db32ad40f8a..0d6e6ca078d 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -17,6 +17,7 @@
#include "polly/DependenceInfo.h"
#include "polly/LinkAllPasses.h"
#include "polly/Options.h"
+#include "polly/ScopDetection.h"
#include "polly/ScopInfo.h"
#include "polly/Support/SCEVValidator.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -1170,6 +1171,8 @@ void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel,
Builder.CreateRetVoid();
Builder.SetInsertPoint(EntryBlock, EntryBlock->begin());
+ ScopDetection::markFunctionAsInvalid(FN);
+
insertKernelIntrinsics(Kernel);
}
diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll
index f0839ba9c98..994641774dc 100644
--- a/polly/test/GPGPU/double-parallel-loop.ll
+++ b/polly/test/GPGPU/double-parallel-loop.ll
@@ -113,7 +113,7 @@
; IR: polly.exiting:
; IR-NEXT: br label %polly.merge_new_and_old
-; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
+; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) #0 {
; KERNEL-IR-NEXT: entry:
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
@@ -171,6 +171,7 @@
; KERNEL-IR-LABEL: polly.loop_preheader: ; preds = %entry
; KERNEL-IR-NEXT: br label %polly.loop_header
+; KERNEL-IR: attributes #0 = { "polly.skip.fn" }
; KERNEL-ASM: .version 3.2
; KERNEL-ASM-NEXT: .target sm_30
diff --git a/polly/test/GPGPU/host-control-flow.ll b/polly/test/GPGPU/host-control-flow.ll
index 19f37b636ea..e7adced3333 100644
--- a/polly/test/GPGPU/host-control-flow.ll
+++ b/polly/test/GPGPU/host-control-flow.ll
@@ -42,7 +42,7 @@
; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 98
; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
-; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0) {
+; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0)
; KERNEL-IR-LABEL: entry:
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
diff --git a/polly/test/GPGPU/kernel-params-only-some-arrays.ll b/polly/test/GPGPU/kernel-params-only-some-arrays.ll
index 5c932b5db1d..171b5a3a952 100644
--- a/polly/test/GPGPU/kernel-params-only-some-arrays.ll
+++ b/polly/test/GPGPU/kernel-params-only-some-arrays.ll
@@ -21,7 +21,7 @@
; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
-; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
+; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
@@ -36,7 +36,7 @@
; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
-; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B) {
+; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
diff --git a/polly/test/GPGPU/kernel-params-scop-parameter.ll b/polly/test/GPGPU/kernel-params-scop-parameter.ll
index 8212457d579..5ec5d6579d3 100644
--- a/polly/test/GPGPU/kernel-params-scop-parameter.ll
+++ b/polly/test/GPGPU/kernel-params-scop-parameter.ll
@@ -9,7 +9,7 @@
; A[i] += 42;
; }
-; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n) {
+; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n)
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
OpenPOWER on IntegriCloud