diff options
Diffstat (limited to 'polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll')
-rw-r--r-- | polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll | 100 |
1 files changed, 100 insertions, 0 deletions
diff --git a/polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll b/polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll new file mode 100644 index 00000000000..48c9c4c34e0 --- /dev/null +++ b/polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll @@ -0,0 +1,100 @@ +; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \ +; RUN: -S < %s | FileCheck %s + +; void f(long A[], long N) { +; long i; +; if (true) +; for (i = 0; i < N; ++i) +; A[i] = i; +; } +; void g(long A[], long N) { +; long i; +; if (true) +; for (i = 0; i < N; ++i) +; A[i] = i; +; } + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f(i64* %A, i64 %N) nounwind { +entry: + fence seq_cst + br label %next + +next: + br i1 true, label %for.i, label %return + +for.i: + %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ] + %scevgep = getelementptr i64, i64* %A, i64 %indvar + store i64 %indvar, i64* %scevgep + %indvar.next = add nsw i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %N + br i1 %exitcond, label %return, label %for.i + +return: + fence seq_cst + ret void +} + + +define void @g(i64* %A, i64 %N) nounwind { +entry: + fence seq_cst + br label %next + +next: + br i1 true, label %for.i, label %return + +for.i: + %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ] + %scevgep = getelementptr i64, i64* %A, i64 %indvar + store i64 %indvar, i64* %scevgep + %indvar.next = add nsw i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %N + br i1 %exitcond, label %return, label %for.i + +return: + fence seq_cst + ret void +} + +; Declaration of globals +; CHECK: @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0 +; CHECK: @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0 + +; Bumping up counter in f +; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting +; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start +; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*)) +; CHECK-NEXT: %7 = sub i64 %6, %5 +; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops +; CHECK-NEXT: %9 = add i64 %8, %7 +; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops +; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" +; CHECK-NEXT: %11 = add i64 %10, %7 +; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" +; CHECK-NEXT: br label %return + +; Bumping up counter in g +; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting +; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start +; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*)) +; CHECK-NEXT: %7 = sub i64 %6, %5 +; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops +; CHECK-NEXT: %9 = add i64 %8, %7 +; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops +; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" +; CHECK-NEXT: %11 = add i64 %10, %7 +; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" +; CHECK-NEXT: br label %return + +; Final reporting prints +; CHECK: %12 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" +; CHECK-NEXT: %13 = call i32 (...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @18, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @10, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @11, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([9 x i8], [9 x i8] addrspace(4)* @12, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @13, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(4)* @14, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @15, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @16, i32 0, i32 0), i64 %12, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @17, i32 0, i32 0)) +; CHECK-NEXT: %14 = call i32 @fflush(i8* null) +; CHECK-NEXT: %15 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" +; CHECK-NEXT: %16 = call i32 (...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @27, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @19, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @20, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([9 x i8], [9 x i8] addrspace(4)* @21, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @22, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(4)* @23, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @24, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @25, i32 0, i32 0), i64 %15, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @26, i32 0, i32 0)) +; CHECK-NEXT: %17 = call i32 @fflush(i8* null) +; CHECK-NEXT: ret void |