summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/include/polly/ScopDetection.h2
-rw-r--r--polly/lib/Analysis/ScopDetection.cpp2
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp3
-rw-r--r--polly/test/GPGPU/double-parallel-loop.ll3
-rw-r--r--polly/test/GPGPU/host-control-flow.ll2
-rw-r--r--polly/test/GPGPU/kernel-params-only-some-arrays.ll4
-rw-r--r--polly/test/GPGPU/kernel-params-scop-parameter.ll2
7 files changed, 11 insertions, 7 deletions
diff --git a/polly/include/polly/ScopDetection.h b/polly/include/polly/ScopDetection.h
index 0b73681edfc..75aaf30bcad 100644
--- a/polly/include/polly/ScopDetection.h
+++ b/polly/include/polly/ScopDetection.h
@@ -564,7 +564,7 @@ public:
/// the function.
///
/// @param F The function to mark as invalid.
- void markFunctionAsInvalid(Function *F) const;
+ static void markFunctionAsInvalid(Function *F);
/// @brief Verify if all valid Regions in this Function are still valid
/// after some transformations.
diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp
index 3770ae06c4c..433a72f6aea 100644
--- a/polly/lib/Analysis/ScopDetection.cpp
+++ b/polly/lib/Analysis/ScopDetection.cpp
@@ -1402,7 +1402,7 @@ bool ScopDetection::isValidRegion(DetectionContext &Context) const {
return true;
}
-void ScopDetection::markFunctionAsInvalid(Function *F) const {
+void ScopDetection::markFunctionAsInvalid(Function *F) {
F->addFnAttr(PollySkipFnAttr);
}
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index db32ad40f8a..0d6e6ca078d 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -17,6 +17,7 @@
#include "polly/DependenceInfo.h"
#include "polly/LinkAllPasses.h"
#include "polly/Options.h"
+#include "polly/ScopDetection.h"
#include "polly/ScopInfo.h"
#include "polly/Support/SCEVValidator.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -1170,6 +1171,8 @@ void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel,
Builder.CreateRetVoid();
Builder.SetInsertPoint(EntryBlock, EntryBlock->begin());
+ ScopDetection::markFunctionAsInvalid(FN);
+
insertKernelIntrinsics(Kernel);
}
diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll
index f0839ba9c98..994641774dc 100644
--- a/polly/test/GPGPU/double-parallel-loop.ll
+++ b/polly/test/GPGPU/double-parallel-loop.ll
@@ -113,7 +113,7 @@
; IR: polly.exiting:
; IR-NEXT: br label %polly.merge_new_and_old
-; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
+; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) #0 {
; KERNEL-IR-NEXT: entry:
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
@@ -171,6 +171,7 @@
; KERNEL-IR-LABEL: polly.loop_preheader: ; preds = %entry
; KERNEL-IR-NEXT: br label %polly.loop_header
+; KERNEL-IR: attributes #0 = { "polly.skip.fn" }
; KERNEL-ASM: .version 3.2
; KERNEL-ASM-NEXT: .target sm_30
diff --git a/polly/test/GPGPU/host-control-flow.ll b/polly/test/GPGPU/host-control-flow.ll
index 19f37b636ea..e7adced3333 100644
--- a/polly/test/GPGPU/host-control-flow.ll
+++ b/polly/test/GPGPU/host-control-flow.ll
@@ -42,7 +42,7 @@
; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 98
; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
-; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0) {
+; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0)
; KERNEL-IR-LABEL: entry:
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
diff --git a/polly/test/GPGPU/kernel-params-only-some-arrays.ll b/polly/test/GPGPU/kernel-params-only-some-arrays.ll
index 5c932b5db1d..171b5a3a952 100644
--- a/polly/test/GPGPU/kernel-params-only-some-arrays.ll
+++ b/polly/test/GPGPU/kernel-params-only-some-arrays.ll
@@ -21,7 +21,7 @@
; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
-; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
+; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
@@ -36,7 +36,7 @@
; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
-; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B) {
+; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
diff --git a/polly/test/GPGPU/kernel-params-scop-parameter.ll b/polly/test/GPGPU/kernel-params-scop-parameter.ll
index 8212457d579..5ec5d6579d3 100644
--- a/polly/test/GPGPU/kernel-params-scop-parameter.ll
+++ b/polly/test/GPGPU/kernel-params-scop-parameter.ll
@@ -9,7 +9,7 @@
; A[i] += 42;
; }
-; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n) {
+; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n)
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
OpenPOWER on IntegriCloud