summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/include/polly/ScopInfo.h15
-rw-r--r--polly/lib/Analysis/ScopInfo.cpp15
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp4
-rw-r--r--polly/test/GPGPU/cuda-annotations.ll4
-rw-r--r--polly/test/GPGPU/cuda-managed-memory-simple.ll2
-rw-r--r--polly/test/GPGPU/host-control-flow.ll2
-rw-r--r--polly/test/GPGPU/invariant-load-hoisting.ll2
-rw-r--r--polly/test/GPGPU/kernel-params-only-some-arrays.ll12
-rw-r--r--polly/test/GPGPU/kernel-params-scop-parameter.ll2
-rw-r--r--polly/test/GPGPU/kernels-names-across-scops-funcs.ll124
10 files changed, 167 insertions, 15 deletions
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index 5898512f893..11c7b161e51 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -1616,6 +1616,12 @@ private:
/// The name of the SCoP (identical to the regions name)
std::string name;
+ /// The ID to be assigned to the next Scop in a function
+ static int NextScopID;
+
+ /// The name of the function currently under consideration
+ static std::string CurrentFunc;
+
// Access functions of the SCoP.
//
// This owns all the MemoryAccess objects of the Scop created in this pass.
@@ -1808,6 +1814,12 @@ private:
/// The smallest statement index not yet assigned.
long StmtIdx = 0;
+ /// A number that uniquely represents a Scop within its function
+ const int ID;
+
+ /// Return the ID for a new Scop within a function
+ static int getNextID(std::string ParentFunc);
+
/// Scop constructor; invoked from ScopBuilder::buildScop.
Scop(Region &R, ScalarEvolution &SE, LoopInfo &LI,
ScopDetection::DetectionContext &DC);
@@ -2378,6 +2390,9 @@ public:
/// Check if the SCoP is to be skipped by ScopPass passes.
bool isToBeSkipped() const { return SkipScop; }
+ /// Return the ID of the Scop
+ int getID() const { return ID; }
+
/// Get the name of the entry and exit blocks of this Scop.
///
/// These along with the function name can uniquely identify a Scop.
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index bf6972347b7..49a49f1fe1a 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -3499,6 +3499,18 @@ static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) {
return L ? (S.contains(L) ? L->getParentLoop() : L) : nullptr;
}
+int Scop::NextScopID = 0;
+
+std::string Scop::CurrentFunc = "";
+
+int Scop::getNextID(std::string ParentFunc) {
+ if (ParentFunc != CurrentFunc) {
+ CurrentFunc = ParentFunc;
+ NextScopID = 0;
+ }
+ return NextScopID++;
+}
+
Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI,
ScopDetection::DetectionContext &DC)
: SE(&ScalarEvolution), R(R), name(R.getNameStr()), IsOptimized(false),
@@ -3506,7 +3518,8 @@ Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI,
MaxLoopDepth(0), CopyStmtsNum(0), SkipScop(false), DC(DC),
IslCtx(isl_ctx_alloc(), isl_ctx_free), Context(nullptr),
Affinator(this, LI), AssumedContext(nullptr), InvalidContext(nullptr),
- Schedule(nullptr) {
+ Schedule(nullptr),
+ ID(getNextID((*R.getEntry()->getParent()).getName().str())) {
if (IslOnErrorAbort)
isl_options_set_on_error(getIslCtx(), ISL_ON_ERROR_ABORT);
buildContext();
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index d2e4b2b72e7..93a8417d886 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -686,8 +686,8 @@ private:
};
std::string GPUNodeBuilder::getKernelFuncName(int Kernel_id) {
- return "FUNC_" + S.getFunction().getName().str() + "_KERNEL_" +
- std::to_string(Kernel_id);
+ return "FUNC_" + S.getFunction().getName().str() + "_SCOP_" +
+ std::to_string(S.getID()) + "_KERNEL_" + std::to_string(Kernel_id);
}
void GPUNodeBuilder::initializeAfterRTH() {
diff --git a/polly/test/GPGPU/cuda-annotations.ll b/polly/test/GPGPU/cuda-annotations.ll
index 57b2e160983..9a662797a1b 100644
--- a/polly/test/GPGPU/cuda-annotations.ll
+++ b/polly/test/GPGPU/cuda-annotations.ll
@@ -4,11 +4,11 @@
; REQUIRES: pollyacc
-; KERNEL: define ptx_kernel void @FUNC_foo_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %n) #0 {
+; KERNEL: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %n) #0 {
; KERNEL: !nvvm.annotations = !{!0}
-; KERNEL: !0 = !{void (i8 addrspace(1)*, i64)* @FUNC_foo_KERNEL_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1}
+; KERNEL: !0 = !{void (i8 addrspace(1)*, i64)* @FUNC_foo_SCOP_0_KERNEL_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1}
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/polly/test/GPGPU/cuda-managed-memory-simple.ll b/polly/test/GPGPU/cuda-managed-memory-simple.ll
index 9d2af1da897..cc6ec52af84 100644
--- a/polly/test/GPGPU/cuda-managed-memory-simple.ll
+++ b/polly/test/GPGPU/cuda-managed-memory-simple.ll
@@ -54,7 +54,7 @@
; CHECK-NEXT: %22 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 3
; CHECK-NEXT: %23 = bitcast i32* %polly_launch_0_param_size_1 to i8*
; CHECK-NEXT: store i8* %23, i8** %22
-; CHECK-NEXT: %24 = call i8* @polly_getKernel(i8* getelementptr inbounds ([810 x i8], [810 x i8]* @FUNC_copy_KERNEL_0, i32 0, i32 0), i8* getelementptr inbounds ([19 x i8], [19 x i8]* @FUNC_copy_KERNEL_0_name, i32 0, i32 0))
+; CHECK-NEXT: %24 = call i8* @polly_getKernel(i8* getelementptr inbounds ([852 x i8], [852 x i8]* @FUNC_copy_SCOP_0_KERNEL_0, i32 0, i32 0), i8* getelementptr inbounds ([26 x i8], [26 x i8]* @FUNC_copy_SCOP_0_KERNEL_0_name, i32 0, i32 0))
; CHECK-NEXT: call void @polly_launchKernel(i8* %24, i32 2, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
; CHECK-NEXT: call void @polly_freeKernel(i8* %24)
; CHECK-NEXT: call void @polly_synchronizeDevice()
diff --git a/polly/test/GPGPU/host-control-flow.ll b/polly/test/GPGPU/host-control-flow.ll
index 15f66bbb160..f01398ee0a7 100644
--- a/polly/test/GPGPU/host-control-flow.ll
+++ b/polly/test/GPGPU/host-control-flow.ll
@@ -42,7 +42,7 @@
; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, 99
; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
-; KERNEL-IR: define ptx_kernel void @FUNC_foo_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %c0)
+; KERNEL-IR: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %c0)
; KERNEL-IR-LABEL: entry:
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
diff --git a/polly/test/GPGPU/invariant-load-hoisting.ll b/polly/test/GPGPU/invariant-load-hoisting.ll
index 58a668971df..5b90014abe4 100644
--- a/polly/test/GPGPU/invariant-load-hoisting.ll
+++ b/polly/test/GPGPU/invariant-load-hoisting.ll
@@ -21,7 +21,7 @@
; HOST-IR: call void @polly_launchKernel(i8* %215, i32 %221, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
; HOST-IR-NEXT: call void @polly_freeKernel(i8* %215)
;
-; KERNEL-IR: define ptx_kernel void @FUNC_f_KERNEL_0(i8 addrspace(1)* %MemRef_B, i8 addrspace(1)* %MemRef_A, i32 %n, i32 %tmp12) #0 {
+; KERNEL-IR: define ptx_kernel void @FUNC_f_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_B, i8 addrspace(1)* %MemRef_A, i32 %n, i32 %tmp12) #0 {
;
; Check that we generate correct GPU code in case of invariant load hoisting.
;
diff --git a/polly/test/GPGPU/kernel-params-only-some-arrays.ll b/polly/test/GPGPU/kernel-params-only-some-arrays.ll
index a2233cc736d..3134e4fc044 100644
--- a/polly/test/GPGPU/kernel-params-only-some-arrays.ll
+++ b/polly/test/GPGPU/kernel-params-only-some-arrays.ll
@@ -16,12 +16,12 @@
; B[i] += 42;
; }
-; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_KERNEL_0'
-; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_KERNEL_0"
+; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_0'
+; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_0"
; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
-; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_KERNEL_0(i8 addrspace(1)* %MemRef_A)
+; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_A)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
@@ -31,12 +31,12 @@
; KERNEL: ret void
; KERNEL-NEXT: }
-; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_KERNEL_1'
-; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_KERNEL_1"
+; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_1'
+; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_1"
; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
-; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_KERNEL_1(i8 addrspace(1)* %MemRef_B)
+; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_1(i8 addrspace(1)* %MemRef_B)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
diff --git a/polly/test/GPGPU/kernel-params-scop-parameter.ll b/polly/test/GPGPU/kernel-params-scop-parameter.ll
index d2d5abaa991..3cbd4043604 100644
--- a/polly/test/GPGPU/kernel-params-scop-parameter.ll
+++ b/polly/test/GPGPU/kernel-params-scop-parameter.ll
@@ -9,7 +9,7 @@
; A[i] += 42;
; }
-; KERNEL-IR: define ptx_kernel void @FUNC_kernel_params_scop_parameter_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %n)
+; KERNEL-IR: define ptx_kernel void @FUNC_kernel_params_scop_parameter_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %n)
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/polly/test/GPGPU/kernels-names-across-scops-funcs.ll b/polly/test/GPGPU/kernels-names-across-scops-funcs.ll
new file mode 100644
index 00000000000..22ad3f5bdac
--- /dev/null
+++ b/polly/test/GPGPU/kernels-names-across-scops-funcs.ll
@@ -0,0 +1,124 @@
+; RUN: opt %loadPolly -polly-process-unprofitable -polly-codegen-ppcg \
+; RUN: -polly-acc-dump-kernel-ir -disable-output < %s | \
+; RUN: FileCheck -check-prefix=KERNEL %s
+
+; REQUIRES: pollyacc
+
+; KERNEL: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_arg1, i32 %arg) #0 {
+; KERNEL: define ptx_kernel void @FUNC_foo_SCOP_1_KERNEL_0(i8 addrspace(1)* %MemRef_arg1, i32 %arg) #0 {
+; KERNEL: define ptx_kernel void @FUNC_foo2_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_arg1, i32 %arg) #0 {
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define void @foo(i32 %arg, i32* %arg1) #0 {
+bb:
+ br label %bb2
+
+bb2: ; preds = %bb
+ %tmp = icmp sgt i32 %arg, 0
+ br i1 %tmp, label %bb3, label %bb13
+
+bb3: ; preds = %bb2
+ br label %bb4
+
+bb4: ; preds = %bb4, %bb3
+ %tmp5 = phi i64 [ 0, %bb3 ], [ %tmp9, %bb4 ]
+ %tmp6 = getelementptr inbounds i32, i32* %arg1, i64 %tmp5
+ %tmp7 = load i32, i32* %tmp6, align 4, !tbaa !2
+ %tmp8 = add nsw i32 %tmp7, 1
+ store i32 %tmp8, i32* %tmp6, align 4, !tbaa !2
+ %tmp9 = add nuw nsw i64 %tmp5, 1
+ %tmp10 = zext i32 %arg to i64
+ %tmp11 = icmp ne i64 %tmp9, %tmp10
+ br i1 %tmp11, label %bb4, label %bb12
+
+bb12: ; preds = %bb4
+ br label %bb13
+
+bb13: ; preds = %bb12, %bb2
+ %tmp14 = tail call i64 @clock() #3
+ %tmp15 = icmp eq i64 %tmp14, 0
+ br i1 %tmp15, label %bb16, label %bb29
+
+bb16: ; preds = %bb13
+ %tmp17 = icmp sgt i32 %arg, 0
+ br i1 %tmp17, label %bb18, label %bb28
+
+bb18: ; preds = %bb16
+ br label %bb19
+
+bb19: ; preds = %bb19, %bb18
+ %tmp20 = phi i64 [ 0, %bb18 ], [ %tmp24, %bb19 ]
+ %tmp21 = getelementptr inbounds i32, i32* %arg1, i64 %tmp20
+ %tmp22 = load i32, i32* %tmp21, align 4, !tbaa !2
+ %tmp23 = add nsw i32 %tmp22, 1
+ store i32 %tmp23, i32* %tmp21, align 4, !tbaa !2
+ %tmp24 = add nuw nsw i64 %tmp20, 1
+ %tmp25 = zext i32 %arg to i64
+ %tmp26 = icmp ne i64 %tmp24, %tmp25
+ br i1 %tmp26, label %bb19, label %bb27
+
+bb27: ; preds = %bb19
+ br label %bb28
+
+bb28: ; preds = %bb27, %bb16
+ br label %bb29
+
+bb29: ; preds = %bb28, %bb13
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind
+declare i64 @clock() #2
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define void @foo2(i32 %arg, i32* %arg1) #0 {
+bb:
+ br label %bb2
+
+bb2: ; preds = %bb
+ %tmp = icmp sgt i32 %arg, 0
+ br i1 %tmp, label %bb3, label %bb13
+
+bb3: ; preds = %bb2
+ br label %bb4
+
+bb4: ; preds = %bb4, %bb3
+ %tmp5 = phi i64 [ 0, %bb3 ], [ %tmp9, %bb4 ]
+ %tmp6 = getelementptr inbounds i32, i32* %arg1, i64 %tmp5
+ %tmp7 = load i32, i32* %tmp6, align 4, !tbaa !2
+ %tmp8 = add nsw i32 %tmp7, 1
+ store i32 %tmp8, i32* %tmp6, align 4, !tbaa !2
+ %tmp9 = add nuw nsw i64 %tmp5, 1
+ %tmp10 = zext i32 %arg to i64
+ %tmp11 = icmp ne i64 %tmp9, %tmp10
+ br i1 %tmp11, label %bb4, label %bb12
+
+bb12: ; preds = %bb4
+ br label %bb13
+
+bb13: ; preds = %bb12, %bb2
+ ret void
+}
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (http://llvm.org/git/clang 98cf823022d1d71065c71e9338226ebf8bfa36ba) (http://llvm.org/git/llvm.git 4efa61f12928015bad233274ffa2e60c918e9a10)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
OpenPOWER on IntegriCloud