diff options
author | Siddharth Bhat <siddu.druid@gmail.com> | 2017-06-02 08:01:22 +0000 |
---|---|---|
committer | Siddharth Bhat <siddu.druid@gmail.com> | 2017-06-02 08:01:22 +0000 |
commit | 07bee290de7569b413831c75dface6edb240a8d8 (patch) | |
tree | 68785f542fe688d058a988c82094b3ff074e641b /polly/test/Isl/CodeGen | |
parent | af199153ccc5a6b6165da3ab16fe6c8ead523b88 (diff) | |
download | bcm5719-llvm-07bee290de7569b413831c75dface6edb240a8d8.tar.gz bcm5719-llvm-07bee290de7569b413831c75dface6edb240a8d8.zip |
[CodeGen] Extend Performance Counter to track per-scop information.
Previously, we would generate one performance counter for all scops.
Now, we generate both the old information, as well as a per-scop
performance counter to generate finer grained information.
This patch needed a way to generate a unique name for a `Scop`.
The start region, end region, and function name combined provides a
unique `Scop` name. So, `Scop` has a new public API to provide its start
and end region names.
Differential Revision: https://reviews.llvm.org/D33723
llvm-svn: 304528
Diffstat (limited to 'polly/test/Isl/CodeGen')
-rw-r--r-- | polly/test/Isl/CodeGen/perf_monitoring.ll | 3 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll | 100 |
2 files changed, 100 insertions, 3 deletions
diff --git a/polly/test/Isl/CodeGen/perf_monitoring.ll b/polly/test/Isl/CodeGen/perf_monitoring.ll index 79505335e07..b63958b7bfa 100644 --- a/polly/test/Isl/CodeGen/perf_monitoring.ll +++ b/polly/test/Isl/CodeGen/perf_monitoring.ll @@ -49,7 +49,6 @@ return: ; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops ; CHECK-NEXT: %9 = add i64 %8, %7 ; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops -; CHECK-NEXT: br label %return ; CHECK: define weak_odr void @__polly_perf_final() { @@ -66,8 +65,6 @@ return: ; CHECK-NEXT: %9 = call i32 @fflush(i8* null) ; CHECK-NEXT: %10 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @9, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @7, i32 0, i32 0), i64 %3, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @8, i32 0, i32 0)) ; CHECK-NEXT: %11 = call i32 @fflush(i8* null) -; CHECK-NEXT: ret void -; CHECK-NEXT: } ; CHECK: define weak_odr void @__polly_perf_init() { diff --git a/polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll b/polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll new file mode 100644 index 00000000000..48c9c4c34e0 --- /dev/null +++ b/polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll @@ -0,0 +1,100 @@ +; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \ +; RUN: -S < %s | FileCheck %s + +; void f(long A[], long N) { +; long i; +; if (true) +; for (i = 0; i < N; ++i) +; A[i] = i; +; } +; void g(long A[], long N) { +; long i; +; if (true) +; for (i = 0; i < N; ++i) +; A[i] = i; +; } + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f(i64* %A, i64 %N) nounwind { +entry: + fence seq_cst + br label %next + +next: + br i1 true, label %for.i, label %return + +for.i: + %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ] + %scevgep = getelementptr i64, i64* %A, i64 %indvar + store i64 %indvar, i64* %scevgep + %indvar.next = add nsw i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %N + br i1 %exitcond, label %return, label %for.i + +return: + fence seq_cst + ret void +} + + +define void @g(i64* %A, i64 %N) nounwind { +entry: + fence seq_cst + br label %next + +next: + br i1 true, label %for.i, label %return + +for.i: + %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ] + %scevgep = getelementptr i64, i64* %A, i64 %indvar + store i64 %indvar, i64* %scevgep + %indvar.next = add nsw i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %N + br i1 %exitcond, label %return, label %for.i + +return: + fence seq_cst + ret void +} + +; Declaration of globals +; CHECK: @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0 +; CHECK: @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0 + +; Bumping up counter in f +; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting +; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start +; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*)) +; CHECK-NEXT: %7 = sub i64 %6, %5 +; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops +; CHECK-NEXT: %9 = add i64 %8, %7 +; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops +; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" +; CHECK-NEXT: %11 = add i64 %10, %7 +; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" +; CHECK-NEXT: br label %return + +; Bumping up counter in g +; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting +; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start +; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*)) +; CHECK-NEXT: %7 = sub i64 %6, %5 +; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops +; CHECK-NEXT: %9 = add i64 %8, %7 +; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops +; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" +; CHECK-NEXT: %11 = add i64 %10, %7 +; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" +; CHECK-NEXT: br label %return + +; Final reporting prints +; CHECK: %12 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" +; CHECK-NEXT: %13 = call i32 (...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @18, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @10, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @11, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([9 x i8], [9 x i8] addrspace(4)* @12, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @13, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(4)* @14, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @15, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @16, i32 0, i32 0), i64 %12, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @17, i32 0, i32 0)) +; CHECK-NEXT: %14 = call i32 @fflush(i8* null) +; CHECK-NEXT: %15 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" +; CHECK-NEXT: %16 = call i32 (...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @27, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @19, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @20, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([9 x i8], [9 x i8] addrspace(4)* @21, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @22, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(4)* @23, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @24, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @25, i32 0, i32 0), i64 %15, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @26, i32 0, i32 0)) +; CHECK-NEXT: %17 = call i32 @fflush(i8* null) +; CHECK-NEXT: ret void |