summaryrefslogtreecommitdiffstats
path: root/polly/test/Isl/CodeGen
diff options
context:
space:
mode:
authorSiddharth Bhat <siddu.druid@gmail.com>2017-06-02 08:01:22 +0000
committerSiddharth Bhat <siddu.druid@gmail.com>2017-06-02 08:01:22 +0000
commit07bee290de7569b413831c75dface6edb240a8d8 (patch)
tree68785f542fe688d058a988c82094b3ff074e641b /polly/test/Isl/CodeGen
parentaf199153ccc5a6b6165da3ab16fe6c8ead523b88 (diff)
downloadbcm5719-llvm-07bee290de7569b413831c75dface6edb240a8d8.tar.gz
bcm5719-llvm-07bee290de7569b413831c75dface6edb240a8d8.zip
[CodeGen] Extend Performance Counter to track per-scop information.
Previously, we would generate one performance counter for all scops. Now, we generate both the old information, as well as a per-scop performance counter to generate finer grained information. This patch needed a way to generate a unique name for a `Scop`. The start region, end region, and function name combined provides a unique `Scop` name. So, `Scop` has a new public API to provide its start and end region names. Differential Revision: https://reviews.llvm.org/D33723 llvm-svn: 304528
Diffstat (limited to 'polly/test/Isl/CodeGen')
-rw-r--r--polly/test/Isl/CodeGen/perf_monitoring.ll3
-rw-r--r--polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll100
2 files changed, 100 insertions, 3 deletions
diff --git a/polly/test/Isl/CodeGen/perf_monitoring.ll b/polly/test/Isl/CodeGen/perf_monitoring.ll
index 79505335e07..b63958b7bfa 100644
--- a/polly/test/Isl/CodeGen/perf_monitoring.ll
+++ b/polly/test/Isl/CodeGen/perf_monitoring.ll
@@ -49,7 +49,6 @@ return:
; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
; CHECK-NEXT: %9 = add i64 %8, %7
; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
-; CHECK-NEXT: br label %return
; CHECK: define weak_odr void @__polly_perf_final() {
@@ -66,8 +65,6 @@ return:
; CHECK-NEXT: %9 = call i32 @fflush(i8* null)
; CHECK-NEXT: %10 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @9, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @7, i32 0, i32 0), i64 %3, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @8, i32 0, i32 0))
; CHECK-NEXT: %11 = call i32 @fflush(i8* null)
-; CHECK-NEXT: ret void
-; CHECK-NEXT: }
; CHECK: define weak_odr void @__polly_perf_init() {
diff --git a/polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll b/polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll
new file mode 100644
index 00000000000..48c9c4c34e0
--- /dev/null
+++ b/polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll
@@ -0,0 +1,100 @@
+; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
+; RUN: -S < %s | FileCheck %s
+
+; void f(long A[], long N) {
+; long i;
+; if (true)
+; for (i = 0; i < N; ++i)
+; A[i] = i;
+; }
+; void g(long A[], long N) {
+; long i;
+; if (true)
+; for (i = 0; i < N; ++i)
+; A[i] = i;
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f(i64* %A, i64 %N) nounwind {
+entry:
+ fence seq_cst
+ br label %next
+
+next:
+ br i1 true, label %for.i, label %return
+
+for.i:
+ %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+ %scevgep = getelementptr i64, i64* %A, i64 %indvar
+ store i64 %indvar, i64* %scevgep
+ %indvar.next = add nsw i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %N
+ br i1 %exitcond, label %return, label %for.i
+
+return:
+ fence seq_cst
+ ret void
+}
+
+
+define void @g(i64* %A, i64 %N) nounwind {
+entry:
+ fence seq_cst
+ br label %next
+
+next:
+ br i1 true, label %for.i, label %return
+
+for.i:
+ %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+ %scevgep = getelementptr i64, i64* %A, i64 %indvar
+ store i64 %indvar, i64* %scevgep
+ %indvar.next = add nsw i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %N
+ br i1 %exitcond, label %return, label %for.i
+
+return:
+ fence seq_cst
+ ret void
+}
+
+; Declaration of globals
+; CHECK: @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0
+; CHECK: @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0
+
+; Bumping up counter in f
+; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
+; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
+; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
+; CHECK-NEXT: %7 = sub i64 %6, %5
+; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
+; CHECK-NEXT: %9 = add i64 %8, %7
+; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
+; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
+; CHECK-NEXT: %11 = add i64 %10, %7
+; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
+; CHECK-NEXT: br label %return
+
+; Bumping up counter in g
+; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
+; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
+; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
+; CHECK-NEXT: %7 = sub i64 %6, %5
+; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
+; CHECK-NEXT: %9 = add i64 %8, %7
+; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
+; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
+; CHECK-NEXT: %11 = add i64 %10, %7
+; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
+; CHECK-NEXT: br label %return
+
+; Final reporting prints
+; CHECK: %12 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
+; CHECK-NEXT: %13 = call i32 (...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @18, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @10, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @11, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([9 x i8], [9 x i8] addrspace(4)* @12, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @13, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(4)* @14, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @15, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @16, i32 0, i32 0), i64 %12, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @17, i32 0, i32 0))
+; CHECK-NEXT: %14 = call i32 @fflush(i8* null)
+; CHECK-NEXT: %15 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
+; CHECK-NEXT: %16 = call i32 (...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @27, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @19, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @20, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([9 x i8], [9 x i8] addrspace(4)* @21, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @22, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(4)* @23, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @24, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @25, i32 0, i32 0), i64 %15, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @26, i32 0, i32 0))
+; CHECK-NEXT: %17 = call i32 @fflush(i8* null)
+; CHECK-NEXT: ret void
OpenPOWER on IntegriCloud