summaryrefslogtreecommitdiffstats
path: root/tools/power/x86/turbostat/turbostat.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/power/x86/turbostat/turbostat.c')
-rw-r--r--tools/power/x86/turbostat/turbostat.c1214
1 files changed, 935 insertions, 279 deletions
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 5b1b807265a1..323b65edfc97 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -33,25 +33,32 @@
#include <signal.h>
#include <sys/time.h>
#include <stdlib.h>
+#include <getopt.h>
#include <dirent.h>
#include <string.h>
#include <ctype.h>
#include <sched.h>
#include <cpuid.h>
+#include <linux/capability.h>
+#include <errno.h>
char *proc_stat = "/proc/stat";
-unsigned int interval_sec = 5; /* set with -i interval_sec */
-unsigned int verbose; /* set with -v */
-unsigned int rapl_verbose; /* set with -R */
-unsigned int rapl_joules; /* set with -J */
-unsigned int thermal_verbose; /* set with -T */
-unsigned int summary_only; /* set with -S */
-unsigned int dump_only; /* set with -s */
+unsigned int interval_sec = 5;
+unsigned int debug;
+unsigned int rapl_joules;
+unsigned int summary_only;
+unsigned int dump_only;
unsigned int skip_c0;
unsigned int skip_c1;
unsigned int do_nhm_cstates;
unsigned int do_snb_cstates;
+unsigned int do_knl_cstates;
+unsigned int do_pc2;
+unsigned int do_pc3;
+unsigned int do_pc6;
+unsigned int do_pc7;
unsigned int do_c8_c9_c10;
+unsigned int do_skl_residency;
unsigned int do_slm_cstates;
unsigned int use_c1_residency_msr;
unsigned int has_aperf;
@@ -59,9 +66,7 @@ unsigned int has_epb;
unsigned int units = 1000000; /* MHz etc */
unsigned int genuine_intel;
unsigned int has_invariant_tsc;
-unsigned int do_nehalem_platform_info;
-unsigned int do_nehalem_turbo_ratio_limit;
-unsigned int do_ivt_turbo_ratio_limit;
+unsigned int do_nhm_platform_info;
unsigned int extra_msr_offset32;
unsigned int extra_msr_offset64;
unsigned int extra_delta_offset32;
@@ -79,8 +84,15 @@ unsigned int do_dts;
unsigned int do_ptm;
unsigned int tcc_activation_temp;
unsigned int tcc_activation_temp_override;
-double rapl_power_units, rapl_energy_units, rapl_time_units;
+double rapl_power_units, rapl_time_units;
+double rapl_dram_energy_units, rapl_energy_units;
double rapl_joule_counter_range;
+unsigned int do_core_perf_limit_reasons;
+unsigned int do_gfx_perf_limit_reasons;
+unsigned int do_ring_perf_limit_reasons;
+unsigned int crystal_hz;
+unsigned long long tsc_hz;
+int base_cpu;
#define RAPL_PKG (1 << 0)
/* 0x610 MSR_PKG_POWER_LIMIT */
@@ -93,18 +105,18 @@ double rapl_joule_counter_range;
#define RAPL_DRAM (1 << 3)
/* 0x618 MSR_DRAM_POWER_LIMIT */
/* 0x619 MSR_DRAM_ENERGY_STATUS */
- /* 0x61c MSR_DRAM_POWER_INFO */
#define RAPL_DRAM_PERF_STATUS (1 << 4)
/* 0x61b MSR_DRAM_PERF_STATUS */
+#define RAPL_DRAM_POWER_INFO (1 << 5)
+ /* 0x61c MSR_DRAM_POWER_INFO */
-#define RAPL_CORES (1 << 5)
+#define RAPL_CORES (1 << 6)
/* 0x638 MSR_PP0_POWER_LIMIT */
/* 0x639 MSR_PP0_ENERGY_STATUS */
-#define RAPL_CORE_POLICY (1 << 6)
+#define RAPL_CORE_POLICY (1 << 7)
/* 0x63a MSR_PP0_POLICY */
-
-#define RAPL_GFX (1 << 7)
+#define RAPL_GFX (1 << 8)
/* 0x640 MSR_PP1_POWER_LIMIT */
/* 0x641 MSR_PP1_ENERGY_STATUS */
/* 0x642 MSR_PP1_POLICY */
@@ -151,6 +163,10 @@ struct pkg_data {
unsigned long long pc8;
unsigned long long pc9;
unsigned long long pc10;
+ unsigned long long pkg_wtd_core_c0;
+ unsigned long long pkg_any_core_c0;
+ unsigned long long pkg_any_gfxe_c0;
+ unsigned long long pkg_both_core_gfxe_c0;
unsigned int package_id;
unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */
unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */
@@ -251,15 +267,13 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
sprintf(pathname, "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDONLY);
if (fd < 0)
- return -1;
+ err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
retval = pread(fd, msr, sizeof *msr, offset);
close(fd);
- if (retval != sizeof *msr) {
- fprintf(stderr, "%s offset 0x%llx read failed\n", pathname, (unsigned long long)offset);
- return -1;
- }
+ if (retval != sizeof *msr)
+ err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset);
return 0;
}
@@ -281,13 +295,12 @@ void print_header(void)
outp += sprintf(outp, " CPU");
if (has_aperf)
outp += sprintf(outp, " Avg_MHz");
- if (do_nhm_cstates)
+ if (has_aperf)
outp += sprintf(outp, " %%Busy");
if (has_aperf)
outp += sprintf(outp, " Bzy_MHz");
outp += sprintf(outp, " TSC_MHz");
- if (do_smi)
- outp += sprintf(outp, " SMI");
+
if (extra_delta_offset32)
outp += sprintf(outp, " count 0x%03X", extra_delta_offset32);
if (extra_delta_offset64)
@@ -296,9 +309,16 @@ void print_header(void)
outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32);
if (extra_msr_offset64)
outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64);
+
+ if (!debug)
+ goto done;
+
+ if (do_smi)
+ outp += sprintf(outp, " SMI");
+
if (do_nhm_cstates)
outp += sprintf(outp, " CPU%%c1");
- if (do_nhm_cstates && !do_slm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
outp += sprintf(outp, " CPU%%c3");
if (do_nhm_cstates)
outp += sprintf(outp, " CPU%%c6");
@@ -310,13 +330,20 @@ void print_header(void)
if (do_ptm)
outp += sprintf(outp, " PkgTmp");
- if (do_snb_cstates)
+ if (do_skl_residency) {
+ outp += sprintf(outp, " Totl%%C0");
+ outp += sprintf(outp, " Any%%C0");
+ outp += sprintf(outp, " GFX%%C0");
+ outp += sprintf(outp, " CPUGFX%%");
+ }
+
+ if (do_pc2)
outp += sprintf(outp, " Pkg%%pc2");
- if (do_nhm_cstates && !do_slm_cstates)
+ if (do_pc3)
outp += sprintf(outp, " Pkg%%pc3");
- if (do_nhm_cstates && !do_slm_cstates)
+ if (do_pc6)
outp += sprintf(outp, " Pkg%%pc6");
- if (do_snb_cstates)
+ if (do_pc7)
outp += sprintf(outp, " Pkg%%pc7");
if (do_c8_c9_c10) {
outp += sprintf(outp, " Pkg%%pc8");
@@ -337,7 +364,7 @@ void print_header(void)
outp += sprintf(outp, " PKG_%%");
if (do_rapl & RAPL_DRAM_PERF_STATUS)
outp += sprintf(outp, " RAM_%%");
- } else {
+ } else if (do_rapl && rapl_joules) {
if (do_rapl & RAPL_PKG)
outp += sprintf(outp, " Pkg_J");
if (do_rapl & RAPL_CORES)
@@ -353,6 +380,7 @@ void print_header(void)
outp += sprintf(outp, " time");
}
+ done:
outp += sprintf(outp, "\n");
}
@@ -390,10 +418,19 @@ int dump_counters(struct thread_data *t, struct core_data *c,
if (p) {
outp += sprintf(outp, "package: %d\n", p->package_id);
+
+ outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
+ outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
+ outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
+ outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
+
outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
- outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
- outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
- outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
+ if (do_pc3)
+ outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
+ if (do_pc6)
+ outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
+ if (do_pc7)
+ outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
@@ -457,31 +494,27 @@ int format_counters(struct thread_data *t, struct core_data *c,
outp += sprintf(outp, "%8d", t->cpu_id);
}
- /* AvgMHz */
+ /* Avg_MHz */
if (has_aperf)
outp += sprintf(outp, "%8.0f",
1.0 / units * t->aperf / interval_float);
- /* %c0 */
- if (do_nhm_cstates) {
+ /* %Busy */
+ if (has_aperf) {
if (!skip_c0)
outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc);
else
outp += sprintf(outp, "********");
}
- /* BzyMHz */
+ /* Bzy_MHz */
if (has_aperf)
outp += sprintf(outp, "%8.0f",
1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
- /* TSC */
+ /* TSC_MHz */
outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
- /* SMI */
- if (do_smi)
- outp += sprintf(outp, "%8d", t->smi_count);
-
/* delta */
if (extra_delta_offset32)
outp += sprintf(outp, " %11llu", t->extra_delta32);
@@ -497,6 +530,13 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (extra_msr_offset64)
outp += sprintf(outp, " 0x%016llx", t->extra_msr64);
+ if (!debug)
+ goto done;
+
+ /* SMI */
+ if (do_smi)
+ outp += sprintf(outp, "%8d", t->smi_count);
+
if (do_nhm_cstates) {
if (!skip_c1)
outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc);
@@ -508,7 +548,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
goto done;
- if (do_nhm_cstates && !do_slm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc);
if (do_nhm_cstates)
outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc);
@@ -522,16 +562,25 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
goto done;
+ /* PkgTmp */
if (do_ptm)
outp += sprintf(outp, "%8d", p->pkg_temp_c);
- if (do_snb_cstates)
+ /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
+ if (do_skl_residency) {
+ outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
+ outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
+ outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
+ outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
+ }
+
+ if (do_pc2)
outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc);
- if (do_nhm_cstates && !do_slm_cstates)
+ if (do_pc3)
outp += sprintf(outp, "%8.2f", 100.0 * p->pc3/t->tsc);
- if (do_nhm_cstates && !do_slm_cstates)
+ if (do_pc6)
outp += sprintf(outp, "%8.2f", 100.0 * p->pc6/t->tsc);
- if (do_snb_cstates)
+ if (do_pc7)
outp += sprintf(outp, "%8.2f", 100.0 * p->pc7/t->tsc);
if (do_c8_c9_c10) {
outp += sprintf(outp, "%8.2f", 100.0 * p->pc8/t->tsc);
@@ -556,12 +605,12 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (do_rapl & RAPL_GFX)
outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
if (do_rapl & RAPL_DRAM)
- outp += sprintf(outp, fmt8, p->energy_dram * rapl_energy_units / interval_float);
+ outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
if (do_rapl & RAPL_PKG_PERF_STATUS)
outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
if (do_rapl & RAPL_DRAM_PERF_STATUS)
outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
- } else {
+ } else if (do_rapl && rapl_joules) {
if (do_rapl & RAPL_PKG)
outp += sprintf(outp, fmt8,
p->energy_pkg * rapl_energy_units);
@@ -573,13 +622,13 @@ int format_counters(struct thread_data *t, struct core_data *c,
p->energy_gfx * rapl_energy_units);
if (do_rapl & RAPL_DRAM)
outp += sprintf(outp, fmt8,
- p->energy_dram * rapl_energy_units);
+ p->energy_dram * rapl_dram_energy_units);
if (do_rapl & RAPL_PKG_PERF_STATUS)
outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
if (do_rapl & RAPL_DRAM_PERF_STATUS)
outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
- outp += sprintf(outp, fmt8, interval_float);
+ outp += sprintf(outp, fmt8, interval_float);
}
done:
outp += sprintf(outp, "\n");
@@ -627,10 +676,20 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
void
delta_package(struct pkg_data *new, struct pkg_data *old)
{
+
+ if (do_skl_residency) {
+ old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
+ old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
+ old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
+ old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
+ }
old->pc2 = new->pc2 - old->pc2;
- old->pc3 = new->pc3 - old->pc3;
- old->pc6 = new->pc6 - old->pc6;
- old->pc7 = new->pc7 - old->pc7;
+ if (do_pc3)
+ old->pc3 = new->pc3 - old->pc3;
+ if (do_pc6)
+ old->pc6 = new->pc6 - old->pc6;
+ if (do_pc7)
+ old->pc7 = new->pc7 - old->pc7;
old->pc8 = new->pc8 - old->pc8;
old->pc9 = new->pc9 - old->pc9;
old->pc10 = new->pc10 - old->pc10;
@@ -670,24 +729,26 @@ delta_thread(struct thread_data *new, struct thread_data *old,
old->c1 = new->c1 - old->c1;
- if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
- old->aperf = new->aperf - old->aperf;
- old->mperf = new->mperf - old->mperf;
- } else {
+ if (has_aperf) {
+ if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
+ old->aperf = new->aperf - old->aperf;
+ old->mperf = new->mperf - old->mperf;
+ } else {
- if (!aperf_mperf_unstable) {
- fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
- fprintf(stderr, "* Frequency results do not cover entire interval *\n");
- fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
+ if (!aperf_mperf_unstable) {
+ fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
+ fprintf(stderr, "* Frequency results do not cover entire interval *\n");
+ fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
- aperf_mperf_unstable = 1;
+ aperf_mperf_unstable = 1;
+ }
+ /*
+ * mperf delta is likely a huge "positive" number
+ * can not use it for calculating c0 time
+ */
+ skip_c0 = 1;
+ skip_c1 = 1;
}
- /*
- * mperf delta is likely a huge "positive" number
- * can not use it for calculating c0 time
- */
- skip_c0 = 1;
- skip_c1 = 1;
}
@@ -712,7 +773,7 @@ delta_thread(struct thread_data *new, struct thread_data *old,
}
if (old->mperf == 0) {
- if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id);
+ if (debug > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id);
old->mperf = 1; /* divide by 0 protection */
}
@@ -768,10 +829,18 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
c->c7 = 0;
c->core_temp_c = 0;
+ p->pkg_wtd_core_c0 = 0;
+ p->pkg_any_core_c0 = 0;
+ p->pkg_any_gfxe_c0 = 0;
+ p->pkg_both_core_gfxe_c0 = 0;
+
p->pc2 = 0;
- p->pc3 = 0;
- p->pc6 = 0;
- p->pc7 = 0;
+ if (do_pc3)
+ p->pc3 = 0;
+ if (do_pc6)
+ p->pc6 = 0;
+ if (do_pc7)
+ p->pc7 = 0;
p->pc8 = 0;
p->pc9 = 0;
p->pc10 = 0;
@@ -809,10 +878,20 @@ int sum_counters(struct thread_data *t, struct core_data *c,
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0;
+ if (do_skl_residency) {
+ average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
+ average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
+ average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
+ average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
+ }
+
average.packages.pc2 += p->pc2;
- average.packages.pc3 += p->pc3;
- average.packages.pc6 += p->pc6;
- average.packages.pc7 += p->pc7;
+ if (do_pc3)
+ average.packages.pc3 += p->pc3;
+ if (do_pc6)
+ average.packages.pc6 += p->pc6;
+ if (do_pc7)
+ average.packages.pc7 += p->pc7;
average.packages.pc8 += p->pc8;
average.packages.pc9 += p->pc9;
average.packages.pc10 += p->pc10;
@@ -853,10 +932,20 @@ void compute_average(struct thread_data *t, struct core_data *c,
average.cores.c6 /= topo.num_cores;
average.cores.c7 /= topo.num_cores;
+ if (do_skl_residency) {
+ average.packages.pkg_wtd_core_c0 /= topo.num_packages;
+ average.packages.pkg_any_core_c0 /= topo.num_packages;
+ average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
+ average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
+ }
+
average.packages.pc2 /= topo.num_packages;
- average.packages.pc3 /= topo.num_packages;
- average.packages.pc6 /= topo.num_packages;
- average.packages.pc7 /= topo.num_packages;
+ if (do_pc3)
+ average.packages.pc3 /= topo.num_packages;
+ if (do_pc6)
+ average.packages.pc6 /= topo.num_packages;
+ if (do_pc7)
+ average.packages.pc7 /= topo.num_packages;
average.packages.pc8 /= topo.num_packages;
average.packages.pc9 /= topo.num_packages;
@@ -931,14 +1020,17 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
return 0;
- if (do_nhm_cstates && !do_slm_cstates) {
+ if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
return -6;
}
- if (do_nhm_cstates) {
+ if (do_nhm_cstates && !do_knl_cstates) {
if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
return -7;
+ } else if (do_knl_cstates) {
+ if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
+ return -7;
}
if (do_snb_cstates)
@@ -956,18 +1048,28 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0;
- if (do_nhm_cstates && !do_slm_cstates) {
+ if (do_skl_residency) {
+ if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
+ return -10;
+ if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
+ return -11;
+ if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
+ return -12;
+ if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
+ return -13;
+ }
+ if (do_pc3)
if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
return -9;
+ if (do_pc6)
if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
return -10;
- }
- if (do_snb_cstates) {
+ if (do_pc2)
if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
return -11;
+ if (do_pc7)
if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
return -12;
- }
if (do_c8_c9_c10) {
if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
return -13;
@@ -1014,36 +1116,95 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return 0;
}
-void print_verbose_header(void)
+/*
+ * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
+ * If you change the values, note they are used both in comparisons
+ * (>= PCL__7) and to index pkg_cstate_limit_strings[].
+ */
+
+#define PCLUKN 0 /* Unknown */
+#define PCLRSV 1 /* Reserved */
+#define PCL__0 2 /* PC0 */
+#define PCL__1 3 /* PC1 */
+#define PCL__2 4 /* PC2 */
+#define PCL__3 5 /* PC3 */
+#define PCL__4 6 /* PC4 */
+#define PCL__6 7 /* PC6 */
+#define PCL_6N 8 /* PC6 No Retention */
+#define PCL_6R 9 /* PC6 Retention */
+#define PCL__7 10 /* PC7 */
+#define PCL_7S 11 /* PC7 Shrink */
+#define PCL__8 12 /* PC8 */
+#define PCL__9 13 /* PC9 */
+#define PCLUNL 14 /* Unlimited */
+
+int pkg_cstate_limit = PCLUKN;
+char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
+ "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
+
+int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+
+static void
+dump_nhm_platform_info(void)
{
unsigned long long msr;
unsigned int ratio;
- if (!do_nehalem_platform_info)
- return;
-
- get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
+ get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr);
fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
ratio = (msr >> 40) & 0xFF;
- fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
+ fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 8) & 0xFF;
- fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
+ fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n",
ratio, bclk, ratio * bclk);
- get_msr(0, MSR_IA32_POWER_CTL, &msr);
+ get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
msr, msr & 0x2 ? "EN" : "DIS");
- if (!do_ivt_turbo_ratio_limit)
- goto print_nhm_turbo_ratio_limits;
+ return;
+}
+
+static void
+dump_hsw_turbo_ratio_limits(void)
+{
+ unsigned long long msr;
+ unsigned int ratio;
- get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
- fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
+ fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr);
+
+ ratio = (msr >> 8) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 0) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
+ ratio, bclk, ratio * bclk);
+ return;
+}
+
+static void
+dump_ivt_turbo_ratio_limits(void)
+{
+ unsigned long long msr;
+ unsigned int ratio;
+
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
+
+ fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr);
ratio = (msr >> 56) & 0xFF;
if (ratio)
@@ -1084,60 +1245,18 @@ void print_verbose_header(void)
if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
ratio, bclk, ratio * bclk);
+ return;
+}
-print_nhm_turbo_ratio_limits:
- get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
-
-#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
-#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
-
- fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
-
- fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ",
- (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
- (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
- (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
- (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
- (msr & (1 << 15)) ? "" : "UN",
- (unsigned int)msr & 7);
-
-
- switch(msr & 0x7) {
- case 0:
- fprintf(stderr, do_slm_cstates ? "no pkg states" : "pc0");
- break;
- case 1:
- fprintf(stderr, do_slm_cstates ? "no pkg states" : do_snb_cstates ? "pc2" : "pc0");
- break;
- case 2:
- fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc6-noret" : "pc3");
- break;
- case 3:
- fprintf(stderr, do_slm_cstates ? "invalid" : "pc6");
- break;
- case 4:
- fprintf(stderr, do_slm_cstates ? "pc4" : "pc7");
- break;
- case 5:
- fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc7s" : "invalid");
- break;
- case 6:
- fprintf(stderr, do_slm_cstates ? "pc6" : "invalid");
- break;
- case 7:
- fprintf(stderr, do_slm_cstates ? "pc7" : "unlimited");
- break;
- default:
- fprintf(stderr, "invalid");
- }
- fprintf(stderr, ")\n");
-
- if (!do_nehalem_turbo_ratio_limit)
- return;
+static void
+dump_nhm_turbo_ratio_limits(void)
+{
+ unsigned long long msr;
+ unsigned int ratio;
- get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
- fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
+ fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
ratio = (msr >> 56) & 0xFF;
if (ratio)
@@ -1178,6 +1297,91 @@ print_nhm_turbo_ratio_limits:
if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
ratio, bclk, ratio * bclk);
+ return;
+}
+
+static void
+dump_knl_turbo_ratio_limits(void)
+{
+ int cores;
+ unsigned int ratio;
+ unsigned long long msr;
+ int delta_cores;
+ int delta_ratio;
+ int i;
+
+ get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
+
+ fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n",
+ msr);
+
+ /**
+ * Turbo encoding in KNL is as follows:
+ * [7:0] -- Base value of number of active cores of bucket 1.
+ * [15:8] -- Base value of freq ratio of bucket 1.
+ * [20:16] -- +ve delta of number of active cores of bucket 2.
+ * i.e. active cores of bucket 2 =
+ * active cores of bucket 1 + delta
+ * [23:21] -- Negative delta of freq ratio of bucket 2.
+ * i.e. freq ratio of bucket 2 =
+ * freq ratio of bucket 1 - delta
+ * [28:24]-- +ve delta of number of active cores of bucket 3.
+ * [31:29]-- -ve delta of freq ratio of bucket 3.
+ * [36:32]-- +ve delta of number of active cores of bucket 4.
+ * [39:37]-- -ve delta of freq ratio of bucket 4.
+ * [44:40]-- +ve delta of number of active cores of bucket 5.
+ * [47:45]-- -ve delta of freq ratio of bucket 5.
+ * [52:48]-- +ve delta of number of active cores of bucket 6.
+ * [55:53]-- -ve delta of freq ratio of bucket 6.
+ * [60:56]-- +ve delta of number of active cores of bucket 7.
+ * [63:61]-- -ve delta of freq ratio of bucket 7.
+ */
+ cores = msr & 0xFF;
+ ratio = (msr >> 8) && 0xFF;
+ if (ratio > 0)
+ fprintf(stderr,
+ "%d * %.0f = %.0f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, cores);
+
+ for (i = 16; i < 64; i = i + 8) {
+ delta_cores = (msr >> i) & 0x1F;
+ delta_ratio = (msr >> (i + 5)) && 0x7;
+ if (!delta_cores || !delta_ratio)
+ return;
+ cores = cores + delta_cores;
+ ratio = ratio - delta_ratio;
+
+ /** -ve ratios will make successive ratio calculations
+ * negative. Hence return instead of carrying on.
+ */
+ if (ratio > 0)
+ fprintf(stderr,
+ "%d * %.0f = %.0f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, cores);
+ }
+}
+
+static void
+dump_nhm_cst_cfg(void)
+{
+ unsigned long long msr;
+
+ get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+
+#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
+#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
+
+ fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
+
+ fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
+ (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
+ (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
+ (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
+ (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
+ (msr & (1 << 15)) ? "" : "UN",
+ (unsigned int)msr & 7,
+ pkg_cstate_limit_strings[pkg_cstate_limit]);
+ return;
}
void free_all_buffers(void)
@@ -1243,12 +1447,41 @@ int parse_int_file(const char *fmt, ...)
}
/*
- * cpu_is_first_sibling_in_core(cpu)
- * return 1 if given CPU is 1st HT sibling in the core
+ * get_cpu_position_in_core(cpu)
+ * return the position of the CPU among its HT siblings in the core
+ * return -1 if the sibling is not in list
*/
-int cpu_is_first_sibling_in_core(int cpu)
+int get_cpu_position_in_core(int cpu)
{
- return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
+ char path[64];
+ FILE *filep;
+ int this_cpu;
+ char character;
+ int i;
+
+ sprintf(path,
+ "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
+ cpu);
+ filep = fopen(path, "r");
+ if (filep == NULL) {
+ perror(path);
+ exit(1);
+ }
+
+ for (i = 0; i < topo.num_threads_per_core; i++) {
+ fscanf(filep, "%d", &this_cpu);
+ if (this_cpu == cpu) {
+ fclose(filep);
+ return i;
+ }
+
+ /* Account for no separator after last thread*/
+ if (i != (topo.num_threads_per_core - 1))
+ fscanf(filep, "%c", &character);
+ }
+
+ fclose(filep);
+ return -1;
}
/*
@@ -1274,25 +1507,31 @@ int get_num_ht_siblings(int cpu)
{
char path[80];
FILE *filep;
- int sib1, sib2;
- int matches;
+ int sib1;
+ int matches = 0;
char character;
+ char str[100];
+ char *ch;
sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
filep = fopen_or_die(path, "r");
+
/*
* file format:
- * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4)
- * otherwinse 1 sibling (self).
+ * A ',' separated or '-' separated set of numbers
+ * (eg 1-2 or 1,3,4,5)
*/
- matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2);
+ fscanf(filep, "%d%c\n", &sib1, &character);
+ fseek(filep, 0, SEEK_SET);
+ fgets(str, 100, filep);
+ ch = strchr(str, character);
+ while (ch != NULL) {
+ matches++;
+ ch = strchr(ch+1, character);
+ }
fclose(filep);
-
- if (matches == 3)
- return 2;
- else
- return 1;
+ return matches+1;
}
/*
@@ -1456,20 +1695,73 @@ restart:
void check_dev_msr()
{
struct stat sb;
+ char pathname[32];
- if (stat("/dev/cpu/0/msr", &sb))
- err(-5, "no /dev/cpu/0/msr\n"
- "Try \"# modprobe msr\"");
+ sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+ if (stat(pathname, &sb))
+ if (system("/sbin/modprobe msr > /dev/null 2>&1"))
+ err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
}
-void check_super_user()
+void check_permissions()
{
- if (getuid() != 0)
- errx(-6, "must be root");
+ struct __user_cap_header_struct cap_header_data;
+ cap_user_header_t cap_header = &cap_header_data;
+ struct __user_cap_data_struct cap_data_data;
+ cap_user_data_t cap_data = &cap_data_data;
+ extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
+ int do_exit = 0;
+ char pathname[32];
+
+ /* check for CAP_SYS_RAWIO */
+ cap_header->pid = getpid();
+ cap_header->version = _LINUX_CAPABILITY_VERSION;
+ if (capget(cap_header, cap_data) < 0)
+ err(-6, "capget(2) failed");
+
+ if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
+ do_exit++;
+ warnx("capget(CAP_SYS_RAWIO) failed,"
+ " try \"# setcap cap_sys_rawio=ep %s\"", progname);
+ }
+
+ /* test file permissions */
+ sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+ if (euidaccess(pathname, R_OK)) {
+ do_exit++;
+ warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
+ }
+
+ /* if all else fails, thell them to be root */
+ if (do_exit)
+ if (getuid() != 0)
+ warnx("... or simply run as root");
+
+ if (do_exit)
+ exit(-6);
}
-int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
+/*
+ * NHM adds support for additional MSRs:
+ *
+ * MSR_SMI_COUNT 0x00000034
+ *
+ * MSR_NHM_PLATFORM_INFO 0x000000ce
+ * MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2
+ *
+ * MSR_PKG_C3_RESIDENCY 0x000003f8
+ * MSR_PKG_C6_RESIDENCY 0x000003f9
+ * MSR_CORE_C3_RESIDENCY 0x000003fc
+ * MSR_CORE_C6_RESIDENCY 0x000003fd
+ *
+ * Side effect:
+ * sets global pkg_cstate_limit to decode MSR_NHM_SNB_PKG_CST_CFG_CTL
+ */
+int probe_nhm_msrs(unsigned int family, unsigned int model)
{
+ unsigned long long msr;
+ int *pkg_cstate_limits;
+
if (!genuine_intel)
return 0;
@@ -1482,24 +1774,56 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
case 0x1F: /* Core i7 and i5 Processor - Nehalem */
case 0x25: /* Westmere Client - Clarkdale, Arrandale */
case 0x2C: /* Westmere EP - Gulftown */
+ case 0x2E: /* Nehalem-EX Xeon - Beckton */
+ case 0x2F: /* Westmere-EX Xeon - Eagleton */
+ pkg_cstate_limits = nhm_pkg_cstate_limits;
+ break;
case 0x2A: /* SNB */
case 0x2D: /* SNB Xeon */
case 0x3A: /* IVB */
case 0x3E: /* IVB Xeon */
+ pkg_cstate_limits = snb_pkg_cstate_limits;
+ break;
case 0x3C: /* HSW */
case 0x3F: /* HSX */
case 0x45: /* HSW */
case 0x46: /* HSW */
- case 0x37: /* BYT */
- case 0x4D: /* AVN */
case 0x3D: /* BDW */
+ case 0x47: /* BDW */
case 0x4F: /* BDX */
case 0x56: /* BDX-DE */
- return 1;
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
+ pkg_cstate_limits = hsw_pkg_cstate_limits;
+ break;
+ case 0x37: /* BYT */
+ case 0x4D: /* AVN */
+ pkg_cstate_limits = slv_pkg_cstate_limits;
+ break;
+ case 0x4C: /* AMT */
+ pkg_cstate_limits = amt_pkg_cstate_limits;
+ break;
+ case 0x57: /* PHI */
+ pkg_cstate_limits = phi_pkg_cstate_limits;
+ break;
+ default:
+ return 0;
+ }
+ get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+
+ pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
+
+ return 1;
+}
+int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ switch (model) {
+ /* Nehalem compatible, but do not include turbo-ratio limit support */
case 0x2E: /* Nehalem-EX Xeon - Beckton */
case 0x2F: /* Westmere-EX Xeon - Eagleton */
- default:
return 0;
+ default:
+ return 1;
}
}
int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
@@ -1512,11 +1836,66 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
switch (model) {
case 0x3E: /* IVB Xeon */
+ case 0x3F: /* HSW Xeon */
+ return 1;
+ default:
+ return 0;
+ }
+}
+int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case 0x3F: /* HSW Xeon */
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case 0x57: /* Knights Landing */
return 1;
default:
return 0;
}
}
+static void
+dump_cstate_pstate_config_info(family, model)
+{
+ if (!do_nhm_platform_info)
+ return;
+
+ dump_nhm_platform_info();
+
+ if (has_hsw_turbo_ratio_limit(family, model))
+ dump_hsw_turbo_ratio_limits();
+
+ if (has_ivt_turbo_ratio_limit(family, model))
+ dump_ivt_turbo_ratio_limits();
+
+ if (has_nhm_turbo_ratio_limit(family, model))
+ dump_nhm_turbo_ratio_limits();
+
+ if (has_knl_turbo_ratio_limit(family, model))
+ dump_knl_turbo_ratio_limits();
+
+ dump_nhm_cst_cfg();
+}
+
/*
* print_epb()
@@ -1545,7 +1924,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
return 0;
- switch (msr & 0x7) {
+ switch (msr & 0xF) {
case ENERGY_PERF_BIAS_PERFORMANCE:
epb_string = "performance";
break;
@@ -1564,6 +1943,103 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return 0;
}
+/*
+ * print_perf_limit()
+ */
+int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long msr;
+ int cpu;
+
+ cpu = t->cpu_id;
+
+ /* per-package */
+ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ return 0;
+
+ if (cpu_migrate(cpu)) {
+ fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+ return -1;
+ }
+
+ if (do_core_perf_limit_reasons) {
+ get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
+ fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
+ fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
+ (msr & 1 << 15) ? "bit15, " : "",
+ (msr & 1 << 14) ? "bit14, " : "",
+ (msr & 1 << 13) ? "Transitions, " : "",
+ (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
+ (msr & 1 << 11) ? "PkgPwrL2, " : "",
+ (msr & 1 << 10) ? "PkgPwrL1, " : "",
+ (msr & 1 << 9) ? "CorePwr, " : "",
+ (msr & 1 << 8) ? "Amps, " : "",
+ (msr & 1 << 6) ? "VR-Therm, " : "",
+ (msr & 1 << 5) ? "Auto-HWP, " : "",
+ (msr & 1 << 4) ? "Graphics, " : "",
+ (msr & 1 << 2) ? "bit2, " : "",
+ (msr & 1 << 1) ? "ThermStatus, " : "",
+ (msr & 1 << 0) ? "PROCHOT, " : "");
+ fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
+ (msr & 1 << 31) ? "bit31, " : "",
+ (msr & 1 << 30) ? "bit30, " : "",
+ (msr & 1 << 29) ? "Transitions, " : "",
+ (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
+ (msr & 1 << 27) ? "PkgPwrL2, " : "",
+ (msr & 1 << 26) ? "PkgPwrL1, " : "",
+ (msr & 1 << 25) ? "CorePwr, " : "",
+ (msr & 1 << 24) ? "Amps, " : "",
+ (msr & 1 << 22) ? "VR-Therm, " : "",
+ (msr & 1 << 21) ? "Auto-HWP, " : "",
+ (msr & 1 << 20) ? "Graphics, " : "",
+ (msr & 1 << 18) ? "bit18, " : "",
+ (msr & 1 << 17) ? "ThermStatus, " : "",
+ (msr & 1 << 16) ? "PROCHOT, " : "");
+
+ }
+ if (do_gfx_perf_limit_reasons) {
+ get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
+ fprintf(stderr, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
+ fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s)",
+ (msr & 1 << 0) ? "PROCHOT, " : "",
+ (msr & 1 << 1) ? "ThermStatus, " : "",
+ (msr & 1 << 4) ? "Graphics, " : "",
+ (msr & 1 << 6) ? "VR-Therm, " : "",
+ (msr & 1 << 8) ? "Amps, " : "",
+ (msr & 1 << 9) ? "GFXPwr, " : "",
+ (msr & 1 << 10) ? "PkgPwrL1, " : "",
+ (msr & 1 << 11) ? "PkgPwrL2, " : "");
+ fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s)\n",
+ (msr & 1 << 16) ? "PROCHOT, " : "",
+ (msr & 1 << 17) ? "ThermStatus, " : "",
+ (msr & 1 << 20) ? "Graphics, " : "",
+ (msr & 1 << 22) ? "VR-Therm, " : "",
+ (msr & 1 << 24) ? "Amps, " : "",
+ (msr & 1 << 25) ? "GFXPwr, " : "",
+ (msr & 1 << 26) ? "PkgPwrL1, " : "",
+ (msr & 1 << 27) ? "PkgPwrL2, " : "");
+ }
+ if (do_ring_perf_limit_reasons) {
+ get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
+ fprintf(stderr, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
+ fprintf(stderr, " (Active: %s%s%s%s%s%s)",
+ (msr & 1 << 0) ? "PROCHOT, " : "",
+ (msr & 1 << 1) ? "ThermStatus, " : "",
+ (msr & 1 << 6) ? "VR-Therm, " : "",
+ (msr & 1 << 8) ? "Amps, " : "",
+ (msr & 1 << 10) ? "PkgPwrL1, " : "",
+ (msr & 1 << 11) ? "PkgPwrL2, " : "");
+ fprintf(stderr, " (Logged: %s%s%s%s%s%s)\n",
+ (msr & 1 << 16) ? "PROCHOT, " : "",
+ (msr & 1 << 17) ? "ThermStatus, " : "",
+ (msr & 1 << 22) ? "VR-Therm, " : "",
+ (msr & 1 << 24) ? "Amps, " : "",
+ (msr & 1 << 26) ? "PkgPwrL1, " : "",
+ (msr & 1 << 27) ? "PkgPwrL2, " : "");
+ }
+ return 0;
+}
+
#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
@@ -1572,7 +2048,7 @@ double get_tdp(model)
unsigned long long msr;
if (do_rapl & RAPL_PKG_POWER_INFO)
- if (!get_msr(0, MSR_PKG_POWER_INFO, &msr))
+ if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
switch (model) {
@@ -1584,6 +2060,26 @@ double get_tdp(model)
}
}
+/*
+ * rapl_dram_energy_units_probe()
+ * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
+ */
+static double
+rapl_dram_energy_units_probe(int model, double rapl_energy_units)
+{
+ /* only called for genuine_intel, family 6 */
+
+ switch (model) {
+ case 0x3F: /* HSX */
+ case 0x4F: /* BDX */
+ case 0x56: /* BDX-DE */
+ case 0x57: /* KNL */
+ return (rapl_dram_energy_units = 15.3 / 1000000);
+ default:
+ return (rapl_energy_units);
+ }
+}
+
/*
* rapl_probe()
@@ -1609,16 +2105,22 @@ void rapl_probe(unsigned int family, unsigned int model)
case 0x45: /* HSW */
case 0x46: /* HSW */
case 0x3D: /* BDW */
+ case 0x47: /* BDW */
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
break;
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
+ do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ break;
case 0x3F: /* HSX */
case 0x4F: /* BDX */
case 0x56: /* BDX-DE */
- do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ case 0x57: /* KNL */
+ do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
break;
case 0x2D:
case 0x3E:
- do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
break;
case 0x37: /* BYT */
case 0x4D: /* AVN */
@@ -1629,7 +2131,7 @@ void rapl_probe(unsigned int family, unsigned int model)
}
/* units on package 0, verify later other packages match */
- if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr))
+ if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
return;
rapl_power_units = 1.0 / (1 << (msr & 0xF));
@@ -1638,6 +2140,8 @@ void rapl_probe(unsigned int family, unsigned int model)
else
rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
+ rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
+
time_unit = msr >> 16 & 0xF;
if (time_unit == 0)
time_unit = 0xA;
@@ -1647,12 +2151,33 @@ void rapl_probe(unsigned int family, unsigned int model)
tdp = get_tdp(model);
rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
- if (verbose)
+ if (debug)
fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
return;
}
+void perf_limit_reasons_probe(family, model)
+{
+ if (!genuine_intel)
+ return;
+
+ if (family != 6)
+ return;
+
+ switch (model) {
+ case 0x3C: /* HSW */
+ case 0x45: /* HSW */
+ case 0x46: /* HSW */
+ do_gfx_perf_limit_reasons = 1;
+ case 0x3F: /* HSX */
+ do_core_perf_limit_reasons = 1;
+ do_ring_perf_limit_reasons = 1;
+ default:
+ return;
+ }
+}
+
int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
unsigned long long msr;
@@ -1751,7 +2276,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
return -1;
- if (verbose) {
+ if (debug) {
fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
"(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
rapl_power_units, rapl_energy_units, rapl_time_units);
@@ -1787,19 +2312,18 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
((msr >> 48) & 1) ? "EN" : "DIS");
}
- if (do_rapl & RAPL_DRAM) {
+ if (do_rapl & RAPL_DRAM_POWER_INFO) {
if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
return -6;
-
fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
cpu, msr,
((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
-
-
+ }
+ if (do_rapl & RAPL_DRAM) {
if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
return -9;
fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
@@ -1808,7 +2332,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
print_power_limit_msr(cpu, msr, "DRAM Limit");
}
if (do_rapl & RAPL_CORE_POLICY) {
- if (verbose) {
+ if (debug) {
if (get_msr(cpu, MSR_PP0_POLICY, &msr))
return -7;
@@ -1816,7 +2340,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
}
}
if (do_rapl & RAPL_CORES) {
- if (verbose) {
+ if (debug) {
if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
return -9;
@@ -1826,7 +2350,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
}
}
if (do_rapl & RAPL_GFX) {
- if (verbose) {
+ if (debug) {
if (get_msr(cpu, MSR_PP1_POLICY, &msr))
return -8;
@@ -1842,8 +2366,15 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return 0;
}
+/*
+ * SNB adds support for additional MSRs:
+ *
+ * MSR_PKG_C7_RESIDENCY 0x000003fa
+ * MSR_CORE_C7_RESIDENCY 0x000003fe
+ * MSR_PKG_C2_RESIDENCY 0x0000060d
+ */
-int is_snb(unsigned int family, unsigned int model)
+int has_snb_msrs(unsigned int family, unsigned int model)
{
if (!genuine_intel)
return 0;
@@ -1858,14 +2389,24 @@ int is_snb(unsigned int family, unsigned int model)
case 0x45: /* HSW */
case 0x46: /* HSW */
case 0x3D: /* BDW */
+ case 0x47: /* BDW */
case 0x4F: /* BDX */
case 0x56: /* BDX-DE */
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
return 1;
}
return 0;
}
-int has_c8_c9_c10(unsigned int family, unsigned int model)
+/*
+ * HSW adds support for additional MSRs:
+ *
+ * MSR_PKG_C8_RESIDENCY 0x00000630
+ * MSR_PKG_C9_RESIDENCY 0x00000631
+ * MSR_PKG_C10_RESIDENCY 0x00000632
+ */
+int has_hsw_msrs(unsigned int family, unsigned int model)
{
if (!genuine_intel)
return 0;
@@ -1873,12 +2414,36 @@ int has_c8_c9_c10(unsigned int family, unsigned int model)
switch (model) {
case 0x45: /* HSW */
case 0x3D: /* BDW */
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * SKL adds support for additional MSRS:
+ *
+ * MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
+ * MSR_PKG_ANY_CORE_C0_RES 0x00000659
+ * MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
+ * MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
+ */
+int has_skl_msrs(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
return 1;
}
return 0;
}
+
int is_slm(unsigned int family, unsigned int model)
{
if (!genuine_intel)
@@ -1891,6 +2456,17 @@ int is_slm(unsigned int family, unsigned int model)
return 0;
}
+int is_knl(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+ switch (model) {
+ case 0x57: /* KNL */
+ return 1;
+ }
+ return 0;
+}
+
#define SLM_BCLK_FREQS 5
double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
@@ -1900,7 +2476,7 @@ double slm_bclk(void)
unsigned int i;
double freq;
- if (get_msr(0, MSR_FSB_FREQ, &msr))
+ if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
fprintf(stderr, "SLM BCLK: unknown\n");
i = msr & 0xf;
@@ -1917,7 +2493,7 @@ double slm_bclk(void)
double discover_bclk(unsigned int family, unsigned int model)
{
- if (is_snb(family, model))
+ if (has_snb_msrs(family, model))
return 100.00;
else if (is_slm(family, model))
return slm_bclk();
@@ -1965,15 +2541,15 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
}
/* Temperature Target MSR is Nehalem and newer only */
- if (!do_nehalem_platform_info)
+ if (!do_nhm_platform_info)
goto guess;
- if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr))
+ if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
goto guess;
target_c_local = (msr >> 16) & 0xFF;
- if (verbose)
+ if (debug)
fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
cpu, msr, target_c_local);
@@ -1991,7 +2567,7 @@ guess:
return 0;
}
-void check_cpuid()
+void process_cpuid()
{
unsigned int eax, ebx, ecx, edx, max_level;
unsigned int fms, family, model, stepping;
@@ -2003,7 +2579,7 @@ void check_cpuid()
if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
genuine_intel = 1;
- if (verbose)
+ if (debug)
fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ",
(char *)&ebx, (char *)&edx, (char *)&ecx);
@@ -2014,7 +2590,7 @@ void check_cpuid()
if (family == 6 || family == 0xf)
model += ((fms >> 16) & 0xf) << 4;
- if (verbose)
+ if (debug)
fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
max_level, family, model, stepping, family, model, stepping);
@@ -2029,18 +2605,15 @@ void check_cpuid()
ebx = ecx = edx = 0;
__get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx);
- if (max_level < 0x80000007)
- errx(1, "CPUID: no invariant TSC (max_level 0x%x)", max_level);
-
- /*
- * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
- * this check is valid for both Intel and AMD
- */
- __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
- has_invariant_tsc = edx & (1 << 8);
+ if (max_level >= 0x80000007) {
- if (!has_invariant_tsc)
- errx(1, "No invariant TSC");
+ /*
+ * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
+ * this check is valid for both Intel and AMD
+ */
+ __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
+ has_invariant_tsc = edx & (1 << 8);
+ }
/*
* APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
@@ -2053,36 +2626,88 @@ void check_cpuid()
do_ptm = eax & (1 << 6);
has_epb = ecx & (1 << 3);
- if (verbose)
- fprintf(stderr, "CPUID(6): %s%s%s%s\n",
- has_aperf ? "APERF" : "No APERF!",
- do_dts ? ", DTS" : "",
- do_ptm ? ", PTM": "",
- has_epb ? ", EPB": "");
-
- if (!has_aperf)
- errx(-1, "No APERF");
-
- do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
- do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */
- do_smi = do_nhm_cstates;
- do_snb_cstates = is_snb(family, model);
- do_c8_c9_c10 = has_c8_c9_c10(family, model);
+ if (debug)
+ fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sEPB\n",
+ has_aperf ? "" : "No ",
+ do_dts ? "" : "No ",
+ do_ptm ? "" : "No ",
+ has_epb ? "" : "No ");
+
+ if (max_level > 0x15) {
+ unsigned int eax_crystal;
+ unsigned int ebx_tsc;
+
+ /*
+ * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
+ */
+ eax_crystal = ebx_tsc = crystal_hz = edx = 0;
+ __get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx);
+
+ if (ebx_tsc != 0) {
+
+ if (debug && (ebx != 0))
+ fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
+ eax_crystal, ebx_tsc, crystal_hz);
+
+ if (crystal_hz == 0)
+ switch(model) {
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
+ crystal_hz = 24000000; /* 24 MHz */
+ break;
+ default:
+ crystal_hz = 0;
+ }
+
+ if (crystal_hz) {
+ tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
+ if (debug)
+ fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
+ tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
+ }
+ }
+ }
+
+ do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
+ do_snb_cstates = has_snb_msrs(family, model);
+ do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
+ do_pc3 = (pkg_cstate_limit >= PCL__3);
+ do_pc6 = (pkg_cstate_limit >= PCL__6);
+ do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
+ do_c8_c9_c10 = has_hsw_msrs(family, model);
+ do_skl_residency = has_skl_msrs(family, model);
do_slm_cstates = is_slm(family, model);
+ do_knl_cstates = is_knl(family, model);
bclk = discover_bclk(family, model);
- do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
- do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model);
rapl_probe(family, model);
+ perf_limit_reasons_probe(family, model);
+
+ if (debug)
+ dump_cstate_pstate_config_info();
return;
}
-
-void usage()
+void help()
{
- errx(1, "%s: [-v][-R][-T][-p|-P|-S][-c MSR#][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n",
- progname);
+ fprintf(stderr,
+ "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
+ "\n"
+ "Turbostat forks the specified COMMAND and prints statistics\n"
+ "when COMMAND completes.\n"
+ "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
+ "to print statistics, until interrupted.\n"
+ "--debug run in \"debug\" mode\n"
+ "--interval sec Override default 5-second measurement interval\n"
+ "--help print this help message\n"
+ "--counter msr print 32-bit counter at address \"msr\"\n"
+ "--Counter msr print 64-bit Counter at address \"msr\"\n"
+ "--msr msr print 32-bit value at address \"msr\"\n"
+ "--MSR msr print 64-bit Value at address \"msr\"\n"
+ "--version print version information\n"
+ "\n"
+ "For more help, run \"man turbostat\"\n");
}
@@ -2121,7 +2746,7 @@ void topology_probe()
if (!summary_only && topo.num_cpus > 1)
show_cpu = 1;
- if (verbose > 1)
+ if (debug > 1)
fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
@@ -2156,7 +2781,7 @@ void topology_probe()
int siblings;
if (cpu_is_not_present(i)) {
- if (verbose > 1)
+ if (debug > 1)
fprintf(stderr, "cpu%d NOT PRESENT\n", i);
continue;
}
@@ -2171,26 +2796,26 @@ void topology_probe()
siblings = get_num_ht_siblings(i);
if (siblings > max_siblings)
max_siblings = siblings;
- if (verbose > 1)
+ if (debug > 1)
fprintf(stderr, "cpu %d pkg %d core %d\n",
i, cpus[i].physical_package_id, cpus[i].core_id);
}
topo.num_cores_per_pkg = max_core_id + 1;
- if (verbose > 1)
+ if (debug > 1)
fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n",
max_core_id, topo.num_cores_per_pkg);
- if (!summary_only && topo.num_cores_per_pkg > 1)
+ if (debug && !summary_only && topo.num_cores_per_pkg > 1)
show_core = 1;
topo.num_packages = max_package_id + 1;
- if (verbose > 1)
+ if (debug > 1)
fprintf(stderr, "max_package_id %d, sizing for %d packages\n",
max_package_id, topo.num_packages);
- if (!summary_only && topo.num_packages > 1)
+ if (debug && !summary_only && topo.num_packages > 1)
show_pkg = 1;
topo.num_threads_per_core = max_siblings;
- if (verbose > 1)
+ if (debug > 1)
fprintf(stderr, "max_siblings %d\n", max_siblings);
free(cpus);
@@ -2267,13 +2892,9 @@ int initialize_counters(int cpu_id)
my_package_id = get_physical_package_id(cpu_id);
my_core_id = get_core_id(cpu_id);
-
- if (cpu_is_first_sibling_in_core(cpu_id)) {
- my_thread_id = 0;
+ my_thread_id = get_cpu_position_in_core(cpu_id);
+ if (!my_thread_id)
topo.num_cores++;
- } else {
- my_thread_id = 1;
- }
init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
@@ -2297,27 +2918,37 @@ void setup_all_buffers(void)
for_all_proc_cpus(initialize_counters);
}
-void turbostat_init()
+void set_base_cpu(void)
{
- check_cpuid();
+ base_cpu = sched_getcpu();
+ if (base_cpu < 0)
+ err(-ENODEV, "No valid cpus found");
- check_dev_msr();
- check_super_user();
+ if (debug > 1)
+ fprintf(stderr, "base_cpu = %d\n", base_cpu);
+}
+void turbostat_init()
+{
setup_all_buffers();
+ set_base_cpu();
+ check_dev_msr();
+ check_permissions();
+ process_cpuid();
- if (verbose)
- print_verbose_header();
- if (verbose)
+ if (debug)
for_all_cpus(print_epb, ODD_COUNTERS);
- if (verbose)
+ if (debug)
+ for_all_cpus(print_perf_limit, ODD_COUNTERS);
+
+ if (debug)
for_all_cpus(print_rapl, ODD_COUNTERS);
for_all_cpus(set_temperature_target, ODD_COUNTERS);
- if (verbose)
+ if (debug)
for_all_cpus(print_thermal, ODD_COUNTERS);
}
@@ -2382,56 +3013,82 @@ int get_and_dump_counters(void)
return status;
}
+void print_version() {
+ fprintf(stderr, "turbostat version 4.7 27-May, 2015"
+ " - Len Brown <lenb@kernel.org>\n");
+}
+
void cmdline(int argc, char **argv)
{
int opt;
+ int option_index = 0;
+ static struct option long_options[] = {
+ {"Counter", required_argument, 0, 'C'},
+ {"counter", required_argument, 0, 'c'},
+ {"Dump", no_argument, 0, 'D'},
+ {"debug", no_argument, 0, 'd'},
+ {"interval", required_argument, 0, 'i'},
+ {"help", no_argument, 0, 'h'},
+ {"Joules", no_argument, 0, 'J'},
+ {"MSR", required_argument, 0, 'M'},
+ {"msr", required_argument, 0, 'm'},
+ {"Package", no_argument, 0, 'p'},
+ {"processor", no_argument, 0, 'p'},
+ {"Summary", no_argument, 0, 'S'},
+ {"TCC", required_argument, 0, 'T'},
+ {"version", no_argument, 0, 'v' },
+ {0, 0, 0, 0 }
+ };
progname = argv[0];
- while ((opt = getopt(argc, argv, "+pPsSvi:c:C:m:M:RJT:")) != -1) {
+ while ((opt = getopt_long_only(argc, argv, "C:c:Ddhi:JM:m:PpST:v",
+ long_options, &option_index)) != -1) {
switch (opt) {
- case 'p':
- show_core_only++;
+ case 'C':
+ sscanf(optarg, "%x", &extra_delta_offset64);
break;
- case 'P':
- show_pkg_only++;
+ case 'c':
+ sscanf(optarg, "%x", &extra_delta_offset32);
break;
- case 's':
+ case 'D':
dump_only++;
break;
- case 'S':
- summary_only++;
- break;
- case 'v':
- verbose++;
+ case 'd':
+ debug++;
break;
+ case 'h':
+ default:
+ help();
+ exit(1);
case 'i':
interval_sec = atoi(optarg);
break;
- case 'c':
- sscanf(optarg, "%x", &extra_delta_offset32);
+ case 'J':
+ rapl_joules++;
break;
- case 'C':
- sscanf(optarg, "%x", &extra_delta_offset64);
+ case 'M':
+ sscanf(optarg, "%x", &extra_msr_offset64);
break;
case 'm':
sscanf(optarg, "%x", &extra_msr_offset32);
break;
- case 'M':
- sscanf(optarg, "%x", &extra_msr_offset64);
+ case 'P':
+ show_pkg_only++;
+ break;
+ case 'p':
+ show_core_only++;
break;
- case 'R':
- rapl_verbose++;
+ case 'S':
+ summary_only++;
break;
case 'T':
tcc_activation_temp_override = atoi(optarg);
break;
- case 'J':
- rapl_joules++;
+ case 'v':
+ print_version();
+ exit(0);
break;
-
- default:
- usage();
}
}
}
@@ -2440,9 +3097,8 @@ int main(int argc, char **argv)
{
cmdline(argc, argv);
- if (verbose)
- fprintf(stderr, "turbostat v3.7 Feb 6, 2014"
- " - Len Brown <lenb@kernel.org>\n");
+ if (debug)
+ print_version();
turbostat_init();
OpenPOWER on IntegriCloud