summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSiddharth Bhat <siddu.druid@gmail.com>2017-06-02 11:36:52 +0000
committerSiddharth Bhat <siddu.druid@gmail.com>2017-06-02 11:36:52 +0000
commit726c28f8c4951b5852c92afca329bb7b683c1ab3 (patch)
treebc24bd8165e0966835baeb7d57fdbb269584c429
parent01bf58d6ecd9304a7ab5180ff78f3c78187282f4 (diff)
downloadbcm5719-llvm-726c28f8c4951b5852c92afca329bb7b683c1ab3.tar.gz
bcm5719-llvm-726c28f8c4951b5852c92afca329bb7b683c1ab3.zip
[CodeGen] Track trip counts per-scop for performance measurement.
- Add a counter that is incremented once on exit from a scop. - Test cases got split into two: one to test the cycles, and another one to test trip counts. - Sample output: ```name=sample-output.txt scop function, entry block name, exit block name, total time, trip count warmup, %entry.split, %polly.merge_new_and_old, 5180, 1 f, %entry.split, %polly.merge_new_and_old, 409944, 500 g, %entry.split, %polly.merge_new_and_old, 1226, 1 ``` Differential Revision: https://reviews.llvm.org/D33822 llvm-svn: 304543
-rw-r--r--polly/include/polly/CodeGen/PerfMonitor.h3
-rw-r--r--polly/lib/CodeGen/PerfMonitor.cpp26
-rw-r--r--polly/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll75
-rw-r--r--polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll98
-rw-r--r--polly/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll75
5 files changed, 173 insertions, 104 deletions
diff --git a/polly/include/polly/CodeGen/PerfMonitor.h b/polly/include/polly/CodeGen/PerfMonitor.h
index 8bfa29e7883..e6245cdb3a8 100644
--- a/polly/include/polly/CodeGen/PerfMonitor.h
+++ b/polly/include/polly/CodeGen/PerfMonitor.h
@@ -62,6 +62,9 @@ private:
/// The total number of cycles spent in the current scop S.
llvm::Value *CyclesInCurrentScopPtr;
+ /// The total number of times the current scop S is executed.
+ llvm::Value *TripCountForCurrentScopPtr;
+
/// The total number of cycles spent within scops.
llvm::Value *CyclesInScopsPtr;
diff --git a/polly/lib/CodeGen/PerfMonitor.cpp b/polly/lib/CodeGen/PerfMonitor.cpp
index 5423d5a4fa6..0829934a07d 100644
--- a/polly/lib/CodeGen/PerfMonitor.cpp
+++ b/polly/lib/CodeGen/PerfMonitor.cpp
@@ -87,15 +87,18 @@ static std::string GetScopUniqueVarname(const Scop &S) {
std::string EntryString, ExitString;
std::tie(EntryString, ExitString) = S.getEntryExitStr();
- Name << "__polly_perf_cycles_in_" << std::string(S.getFunction().getName())
+ Name << "__polly_perf_in_" << std::string(S.getFunction().getName())
<< "_from__" << EntryString << "__to__" << ExitString;
return Name.str();
}
void PerfMonitor::addScopCounter() {
const std::string varname = GetScopUniqueVarname(S);
- TryRegisterGlobal(M, varname.c_str(), Builder.getInt64(0),
+ TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0),
&CyclesInCurrentScopPtr);
+
+ TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0),
+ &TripCountForCurrentScopPtr);
}
void PerfMonitor::addGlobalVariables() {
@@ -160,7 +163,7 @@ Function *PerfMonitor::insertFinalReporting() {
RuntimeDebugBuilder::createCPUPrinter(
Builder, "scop function, "
- "entry block name, exit block name, total time\n");
+ "entry block name, exit block name, total time, trip count\n");
ReturnFromFinal = Builder.CreateRetVoid();
return ExitFn;
}
@@ -179,13 +182,17 @@ void PerfMonitor::AppendScopReporting() {
Value *CyclesInCurrentScop =
Builder.CreateLoad(this->CyclesInCurrentScopPtr, true);
+
+ Value *TripCountForCurrentScop =
+ Builder.CreateLoad(this->TripCountForCurrentScopPtr, true);
+
std::string EntryName, ExitName;
std::tie(EntryName, ExitName) = S.getEntryExitStr();
// print in CSV for easy parsing with other tools.
- RuntimeDebugBuilder::createCPUPrinter(Builder, S.getFunction().getName(),
- ", ", EntryName, ", ", ExitName, ", ",
- CyclesInCurrentScop, "\n");
+ RuntimeDebugBuilder::createCPUPrinter(
+ Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ",
+ CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n");
ReturnFromFinal = Builder.CreateRetVoid();
}
@@ -288,4 +295,11 @@ void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
Value *CyclesInCurrentScop = Builder.CreateLoad(CyclesInCurrentScopPtr, true);
CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop);
Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true);
+
+ Value *TripCountForCurrentScop =
+ Builder.CreateLoad(TripCountForCurrentScopPtr, true);
+ TripCountForCurrentScop =
+ Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1));
+ Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr,
+ true);
}
diff --git a/polly/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll b/polly/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll
new file mode 100644
index 00000000000..6aec10c7891
--- /dev/null
+++ b/polly/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll
@@ -0,0 +1,75 @@
+; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
+; RUN: -S < %s | FileCheck %s
+
+; void f(long A[], long N) {
+; long i;
+; if (true)
+; for (i = 0; i < N; ++i)
+; A[i] = i;
+; }
+; void g(long A[], long N) {
+; long i;
+; if (true)
+; for (i = 0; i < N; ++i)
+; A[i] = i;
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f(i64* %A, i64 %N) nounwind {
+entry:
+ fence seq_cst
+ br label %next
+
+next:
+ br i1 true, label %for.i, label %return
+
+for.i:
+ %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+ %scevgep = getelementptr i64, i64* %A, i64 %indvar
+ store i64 %indvar, i64* %scevgep
+ %indvar.next = add nsw i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %N
+ br i1 %exitcond, label %return, label %for.i
+
+return:
+ fence seq_cst
+ ret void
+}
+
+
+define void @g(i64* %A, i64 %N) nounwind {
+entry:
+ fence seq_cst
+ br label %next
+
+next:
+ br i1 true, label %for.i, label %return
+
+for.i:
+ %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+ %scevgep = getelementptr i64, i64* %A, i64 %indvar
+ store i64 %indvar, i64* %scevgep
+ %indvar.next = add nsw i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %N
+ br i1 %exitcond, label %return, label %for.i
+
+return:
+ fence seq_cst
+ ret void
+}
+
+; Declaration of globals - Check for cycles declaration.
+; @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles" = weak thread_local(initialexec) constant i64 0
+; @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles" = weak thread_local(initialexec) constant i64 0
+
+; Bumping up number of cycles in f
+; CHECK: %10 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles"
+; CHECK-NEXT: %11 = add i64 %10, %7
+; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles"
+
+; Bumping up number of cycles in g
+; CHECK: %10 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles"
+; CHECK-NEXT: %11 = add i64 %10, %7
+; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles"
diff --git a/polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll b/polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll
deleted file mode 100644
index adffe2eb965..00000000000
--- a/polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll
+++ /dev/null
@@ -1,98 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
-; RUN: -S < %s | FileCheck %s
-
-; void f(long A[], long N) {
-; long i;
-; if (true)
-; for (i = 0; i < N; ++i)
-; A[i] = i;
-; }
-; void g(long A[], long N) {
-; long i;
-; if (true)
-; for (i = 0; i < N; ++i)
-; A[i] = i;
-; }
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @f(i64* %A, i64 %N) nounwind {
-entry:
- fence seq_cst
- br label %next
-
-next:
- br i1 true, label %for.i, label %return
-
-for.i:
- %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
- %scevgep = getelementptr i64, i64* %A, i64 %indvar
- store i64 %indvar, i64* %scevgep
- %indvar.next = add nsw i64 %indvar, 1
- %exitcond = icmp eq i64 %indvar.next, %N
- br i1 %exitcond, label %return, label %for.i
-
-return:
- fence seq_cst
- ret void
-}
-
-
-define void @g(i64* %A, i64 %N) nounwind {
-entry:
- fence seq_cst
- br label %next
-
-next:
- br i1 true, label %for.i, label %return
-
-for.i:
- %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
- %scevgep = getelementptr i64, i64* %A, i64 %indvar
- store i64 %indvar, i64* %scevgep
- %indvar.next = add nsw i64 %indvar, 1
- %exitcond = icmp eq i64 %indvar.next, %N
- br i1 %exitcond, label %return, label %for.i
-
-return:
- fence seq_cst
- ret void
-}
-
-; Declaration of globals
-; CHECK: @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0
-; CHECK: @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0
-
-; Bumping up counter in f
-; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
-; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
-; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
-; CHECK-NEXT: %7 = sub i64 %6, %5
-; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
-; CHECK-NEXT: %9 = add i64 %8, %7
-; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
-; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT: %11 = add i64 %10, %7
-; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT: br label %return
-
-; Bumping up counter in g
-; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
-; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
-; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
-; CHECK-NEXT: %7 = sub i64 %6, %5
-; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
-; CHECK-NEXT: %9 = add i64 %8, %7
-; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
-; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT: %11 = add i64 %10, %7
-; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT: br label %return
-
-; Final reporting prints
-; CHECK: %20 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT: %21 = call i32 (...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @25, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @18, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @19, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @20, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @21, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @22, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @23, i32 0, i32 0), i64 %20, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @24, i32 0, i32 0))
-; CHECK-NEXT: %22 = call i32 @fflush(i8* null)
-; CHECK-NEXT: %23 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT: %24 = call i32 (...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @33, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @26, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @27, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @28, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @29, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @30, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @31, i32 0, i32 0), i64 %23, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @32, i32 0, i32 0))
diff --git a/polly/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll b/polly/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll
new file mode 100644
index 00000000000..c751913745e
--- /dev/null
+++ b/polly/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll
@@ -0,0 +1,75 @@
+; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
+; RUN: -S < %s | FileCheck %s
+
+; void f(long A[], long N) {
+; long i;
+; if (true)
+; for (i = 0; i < N; ++i)
+; A[i] = i;
+; }
+; void g(long A[], long N) {
+; long i;
+; if (true)
+; for (i = 0; i < N; ++i)
+; A[i] = i;
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f(i64* %A, i64 %N) nounwind {
+entry:
+ fence seq_cst
+ br label %next
+
+next:
+ br i1 true, label %for.i, label %return
+
+for.i:
+ %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+ %scevgep = getelementptr i64, i64* %A, i64 %indvar
+ store i64 %indvar, i64* %scevgep
+ %indvar.next = add nsw i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %N
+ br i1 %exitcond, label %return, label %for.i
+
+return:
+ fence seq_cst
+ ret void
+}
+
+
+define void @g(i64* %A, i64 %N) nounwind {
+entry:
+ fence seq_cst
+ br label %next
+
+next:
+ br i1 true, label %for.i, label %return
+
+for.i:
+ %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+ %scevgep = getelementptr i64, i64* %A, i64 %indvar
+ store i64 %indvar, i64* %scevgep
+ %indvar.next = add nsw i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %N
+ br i1 %exitcond, label %return, label %for.i
+
+return:
+ fence seq_cst
+ ret void
+}
+
+; Declaration of globals - Check for cycles declaration.
+; CHECK: @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count" = weak thread_local(initialexec) constant i64 0
+; CHECK: @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count" = weak thread_local(initialexec) constant i64 0
+
+; Bumping up number of cycles in f
+; CHECK: %12 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count"
+; CHECK-NEXT: %13 = add i64 %12, 1
+; CHECK-NEXT: store volatile i64 %13, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count"
+
+; Bumping up number of cycles in g
+; CHECK: %12 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"
+; CHECK-NEXT: %13 = add i64 %12, 1
+; CHECK-NEXT: store volatile i64 %13, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"
OpenPOWER on IntegriCloud