diff options
Diffstat (limited to 'tools/power/x86/turbostat/turbostat.c')
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 881 |
1 files changed, 701 insertions, 180 deletions
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 3fa94e291d16..20a257a12ea5 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -38,12 +38,15 @@ #include <string.h> #include <ctype.h> #include <sched.h> +#include <time.h> #include <cpuid.h> #include <linux/capability.h> #include <errno.h> char *proc_stat = "/proc/stat"; -unsigned int interval_sec = 5; +FILE *outf; +int *fd_percpu; +struct timespec interval_ts = {5, 0}; unsigned int debug; unsigned int rapl_joules; unsigned int summary_only; @@ -72,6 +75,7 @@ unsigned int extra_msr_offset64; unsigned int extra_delta_offset32; unsigned int extra_delta_offset64; unsigned int aperf_mperf_multiplier = 1; +int do_irq = 1; int do_smi; double bclk; double base_hz; @@ -86,6 +90,10 @@ char *output_buffer, *outp; unsigned int do_rapl; unsigned int do_dts; unsigned int do_ptm; +unsigned int do_gfx_rc6_ms; +unsigned long long gfx_cur_rc6_ms; +unsigned int do_gfx_mhz; +unsigned int gfx_cur_mhz; unsigned int tcc_activation_temp; unsigned int tcc_activation_temp_override; double rapl_power_units, rapl_time_units; @@ -98,6 +106,12 @@ unsigned int crystal_hz; unsigned long long tsc_hz; int base_cpu; double discover_bclk(unsigned int family, unsigned int model); +unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ + /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ +unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ +unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ +unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ +unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ #define RAPL_PKG (1 << 0) /* 0x610 MSR_PKG_POWER_LIMIT */ @@ -145,6 +159,7 @@ struct thread_data { unsigned long long extra_delta64; unsigned long long extra_msr32; unsigned long long extra_delta32; + unsigned int irq_count; unsigned int smi_count; unsigned int cpu_id; unsigned int flags; @@ -172,6 +187,8 @@ struct pkg_data { unsigned long long pkg_any_core_c0; unsigned long long pkg_any_gfxe_c0; unsigned long long pkg_both_core_gfxe_c0; + unsigned long long gfx_rc6_ms; + unsigned int gfx_mhz; unsigned int package_id; unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ @@ -212,6 +229,9 @@ struct topo_params { struct timeval tv_even, tv_odd, tv_delta; +int *irq_column_2_cpu; /* /proc/interrupts column numbers */ +int *irqs_per_cpu; /* indexed by cpu_num */ + void setup_all_buffers(void); int cpu_is_not_present(int cpu) @@ -262,23 +282,34 @@ int cpu_migrate(int cpu) else return 0; } - -int get_msr(int cpu, off_t offset, unsigned long long *msr) +int get_msr_fd(int cpu) { - ssize_t retval; char pathname[32]; int fd; + fd = fd_percpu[cpu]; + + if (fd) + return fd; + sprintf(pathname, "/dev/cpu/%d/msr", cpu); fd = open(pathname, O_RDONLY); if (fd < 0) err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); - retval = pread(fd, msr, sizeof *msr, offset); - close(fd); + fd_percpu[cpu] = fd; + + return fd; +} + +int get_msr(int cpu, off_t offset, unsigned long long *msr) +{ + ssize_t retval; + + retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); if (retval != sizeof *msr) - err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); + err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset); return 0; } @@ -286,8 +317,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) /* * Example Format w/ field column widths: * - * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz SMI %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt - * 123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 + * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt + * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 */ void print_header(void) @@ -301,7 +332,7 @@ void print_header(void) if (has_aperf) outp += sprintf(outp, " Avg_MHz"); if (has_aperf) - outp += sprintf(outp, " %%Busy"); + outp += sprintf(outp, " Busy%%"); if (has_aperf) outp += sprintf(outp, " Bzy_MHz"); outp += sprintf(outp, " TSC_MHz"); @@ -318,6 +349,8 @@ void print_header(void) if (!debug) goto done; + if (do_irq) + outp += sprintf(outp, " IRQ"); if (do_smi) outp += sprintf(outp, " SMI"); @@ -335,6 +368,12 @@ void print_header(void) if (do_ptm) outp += sprintf(outp, " PkgTmp"); + if (do_gfx_rc6_ms) + outp += sprintf(outp, " GFX%%rc6"); + + if (do_gfx_mhz) + outp += sprintf(outp, " GFXMHz"); + if (do_skl_residency) { outp += sprintf(outp, " Totl%%C0"); outp += sprintf(outp, " Any%%C0"); @@ -409,6 +448,8 @@ int dump_counters(struct thread_data *t, struct core_data *c, extra_msr_offset32, t->extra_msr32); outp += sprintf(outp, "msr0x%x: %016llX\n", extra_msr_offset64, t->extra_msr64); + if (do_irq) + outp += sprintf(outp, "IRQ: %08X\n", t->irq_count); if (do_smi) outp += sprintf(outp, "SMI: %08X\n", t->smi_count); } @@ -504,7 +545,7 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "%8.0f", 1.0 / units * t->aperf / interval_float); - /* %Busy */ + /* Busy% */ if (has_aperf) { if (!skip_c0) outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); @@ -542,6 +583,10 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!debug) goto done; + /* IRQ */ + if (do_irq) + outp += sprintf(outp, "%8d", t->irq_count); + /* SMI */ if (do_smi) outp += sprintf(outp, "%8d", t->smi_count); @@ -575,6 +620,14 @@ int format_counters(struct thread_data *t, struct core_data *c, if (do_ptm) outp += sprintf(outp, "%8d", p->pkg_temp_c); + /* GFXrc6 */ + if (do_gfx_rc6_ms) + outp += sprintf(outp, "%8.2f", 100.0 * p->gfx_rc6_ms / 1000.0 / interval_float); + + /* GFXMHz */ + if (do_gfx_mhz) + outp += sprintf(outp, "%8d", p->gfx_mhz); + /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ if (do_skl_residency) { outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); @@ -645,15 +698,24 @@ done: return 0; } -void flush_stdout() +void flush_output_stdout(void) { - fputs(output_buffer, stdout); - fflush(stdout); + FILE *filep; + + if (outf == stderr) + filep = stdout; + else + filep = outf; + + fputs(output_buffer, filep); + fflush(filep); + outp = output_buffer; } -void flush_stderr() +void flush_output_stderr(void) { - fputs(output_buffer, stderr); + fputs(output_buffer, outf); + fflush(outf); outp = output_buffer; } void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) @@ -704,6 +766,9 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->pc10 = new->pc10 - old->pc10; old->pkg_temp_c = new->pkg_temp_c; + old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; + old->gfx_mhz = new->gfx_mhz; + DELTA_WRAP32(new->energy_pkg, old->energy_pkg); DELTA_WRAP32(new->energy_cores, old->energy_cores); DELTA_WRAP32(new->energy_gfx, old->energy_gfx); @@ -745,9 +810,9 @@ delta_thread(struct thread_data *new, struct thread_data *old, } else { if (!aperf_mperf_unstable) { - fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); - fprintf(stderr, "* Frequency results do not cover entire interval *\n"); - fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); + fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname); + fprintf(outf, "* Frequency results do not cover entire interval *\n"); + fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n"); aperf_mperf_unstable = 1; } @@ -782,7 +847,8 @@ delta_thread(struct thread_data *new, struct thread_data *old, } if (old->mperf == 0) { - if (debug > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); + if (debug > 1) + fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id); old->mperf = 1; /* divide by 0 protection */ } @@ -797,6 +863,9 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->extra_msr32 = new->extra_msr32; old->extra_msr64 = new->extra_msr64; + if (do_irq) + old->irq_count = new->irq_count - old->irq_count; + if (do_smi) old->smi_count = new->smi_count - old->smi_count; } @@ -826,10 +895,12 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data t->mperf = 0; t->c1 = 0; - t->smi_count = 0; t->extra_delta32 = 0; t->extra_delta64 = 0; + t->irq_count = 0; + t->smi_count = 0; + /* tells format_counters to dump all fields from this set */ t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; @@ -861,6 +932,9 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->rapl_pkg_perf_status = 0; p->rapl_dram_perf_status = 0; p->pkg_temp_c = 0; + + p->gfx_rc6_ms = 0; + p->gfx_mhz = 0; } int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) @@ -873,6 +947,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.threads.extra_delta32 += t->extra_delta32; average.threads.extra_delta64 += t->extra_delta64; + average.threads.irq_count += t->irq_count; + average.threads.smi_count += t->smi_count; + /* sum per-core values only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; @@ -910,6 +987,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.packages.energy_cores += p->energy_cores; average.packages.energy_gfx += p->energy_gfx; + average.packages.gfx_rc6_ms = p->gfx_rc6_ms; + average.packages.gfx_mhz = p->gfx_mhz; + average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; @@ -970,7 +1050,6 @@ static unsigned long long rdtsc(void) return low | ((unsigned long long)high) << 32; } - /* * get_counters(...) * migrate to cpu @@ -980,23 +1059,74 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int cpu = t->cpu_id; unsigned long long msr; + int aperf_mperf_retry_count = 0; if (cpu_migrate(cpu)) { - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } +retry: t->tsc = rdtsc(); /* we are running on local CPU of interest */ if (has_aperf) { + unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; + + /* + * The TSC, APERF and MPERF must be read together for + * APERF/MPERF and MPERF/TSC to give accurate results. + * + * Unfortunately, APERF and MPERF are read by + * individual system call, so delays may occur + * between them. If the time to read them + * varies by a large amount, we re-read them. + */ + + /* + * This initial dummy APERF read has been seen to + * reduce jitter in the subsequent reads. + */ + + if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) + return -3; + + t->tsc = rdtsc(); /* re-read close to APERF */ + + tsc_before = t->tsc; + if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) return -3; + + tsc_between = rdtsc(); + if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) return -4; + + tsc_after = rdtsc(); + + aperf_time = tsc_between - tsc_before; + mperf_time = tsc_after - tsc_between; + + /* + * If the system call latency to read APERF and MPERF + * differ by more than 2x, then try again. + */ + if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) { + aperf_mperf_retry_count++; + if (aperf_mperf_retry_count < 5) + goto retry; + else + warnx("cpu%d jitter %lld %lld", + cpu, aperf_time, mperf_time); + } + aperf_mperf_retry_count = 0; + t->aperf = t->aperf * aperf_mperf_multiplier; t->mperf = t->mperf * aperf_mperf_multiplier; } + if (do_irq) + t->irq_count = irqs_per_cpu[cpu]; if (do_smi) { if (get_msr(cpu, MSR_SMI_COUNT, &msr)) return -5; @@ -1124,6 +1254,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -17; p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); } + + if (do_gfx_rc6_ms) + p->gfx_rc6_ms = gfx_cur_rc6_ms; + + if (do_gfx_mhz) + p->gfx_mhz = gfx_cur_mhz; + return 0; } @@ -1175,18 +1312,18 @@ dump_nhm_platform_info(void) get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); - fprintf(stderr, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 40) & 0xFF; - fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n", + fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0xFF; - fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", + fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n", ratio, bclk, ratio * bclk); get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); - fprintf(stderr, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", + fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); return; @@ -1200,16 +1337,16 @@ dump_hsw_turbo_ratio_limits(void) get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); - fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 8) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 0) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk); return; } @@ -1222,46 +1359,46 @@ dump_ivt_turbo_ratio_limits(void) get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); - fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 56) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 48) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 40) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 32) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 24) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 16) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 0) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); return; } @@ -1274,46 +1411,46 @@ dump_nhm_turbo_ratio_limits(void) get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); - fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 56) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 48) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 40) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 32) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 24) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 16) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 0) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", ratio, bclk, ratio * bclk); return; } @@ -1321,21 +1458,23 @@ dump_nhm_turbo_ratio_limits(void) static void dump_knl_turbo_ratio_limits(void) { - int cores; - unsigned int ratio; + const unsigned int buckets_no = 7; + unsigned long long msr; - int delta_cores; - int delta_ratio; - int i; + int delta_cores, delta_ratio; + int i, b_nr; + unsigned int cores[buckets_no]; + unsigned int ratio[buckets_no]; get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); - fprintf(stderr, "cpu%d: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); /** * Turbo encoding in KNL is as follows: - * [7:0] -- Base value of number of active cores of bucket 1. + * [0] -- Reserved + * [7:1] -- Base value of number of active cores of bucket 1. * [15:8] -- Base value of freq ratio of bucket 1. * [20:16] -- +ve delta of number of active cores of bucket 2. * i.e. active cores of bucket 2 = @@ -1354,29 +1493,25 @@ dump_knl_turbo_ratio_limits(void) * [60:56]-- +ve delta of number of active cores of bucket 7. * [63:61]-- -ve delta of freq ratio of bucket 7. */ - cores = msr & 0xFF; - ratio = (msr >> 8) && 0xFF; - if (ratio > 0) - fprintf(stderr, - "%d * %.0f = %.0f MHz max turbo %d active cores\n", - ratio, bclk, ratio * bclk, cores); - - for (i = 16; i < 64; i = i + 8) { + + b_nr = 0; + cores[b_nr] = (msr & 0xFF) >> 1; + ratio[b_nr] = (msr >> 8) & 0xFF; + + for (i = 16; i < 64; i += 8) { delta_cores = (msr >> i) & 0x1F; - delta_ratio = (msr >> (i + 5)) && 0x7; - if (!delta_cores || !delta_ratio) - return; - cores = cores + delta_cores; - ratio = ratio - delta_ratio; - - /** -ve ratios will make successive ratio calculations - * negative. Hence return instead of carrying on. - */ - if (ratio > 0) - fprintf(stderr, - "%d * %.0f = %.0f MHz max turbo %d active cores\n", - ratio, bclk, ratio * bclk, cores); + delta_ratio = (msr >> (i + 5)) & 0x7; + + cores[b_nr + 1] = cores[b_nr] + delta_cores; + ratio[b_nr + 1] = ratio[b_nr] - delta_ratio; + b_nr++; } + + for (i = buckets_no - 1; i >= 0; i--) + if (i > 0 ? ratio[i] != ratio[i - 1] : 1) + fprintf(outf, + "%d * %.0f = %.0f MHz max turbo %d active cores\n", + ratio[i], bclk, ratio[i] * bclk, cores[i]); } static void @@ -1389,15 +1524,15 @@ dump_nhm_cst_cfg(void) #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) - fprintf(stderr, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr); - fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", + fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", (msr & (1 << 15)) ? "" : "UN", - (unsigned int)msr & 7, + (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); return; } @@ -1408,48 +1543,59 @@ dump_config_tdp(void) unsigned long long msr; get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); - fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); - fprintf(stderr, " (base_ratio=%d)\n", (unsigned int)msr & 0xEF); + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); + fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF); get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); - fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); if (msr) { - fprintf(stderr, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0xEFFF); - fprintf(stderr, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0xEFFF); - fprintf(stderr, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); - fprintf(stderr, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0xEFFF); + fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF); + fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF); + fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); + fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF); } - fprintf(stderr, ")\n"); + fprintf(outf, ")\n"); get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); - fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); if (msr) { - fprintf(stderr, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0xEFFF); - fprintf(stderr, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0xEFFF); - fprintf(stderr, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); - fprintf(stderr, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0xEFFF); + fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF); + fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF); + fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); + fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF); } - fprintf(stderr, ")\n"); + fprintf(outf, ")\n"); get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); - fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); if ((msr) & 0x3) - fprintf(stderr, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); - fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); - fprintf(stderr, ")\n"); - + fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); + fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); + fprintf(outf, ")\n"); + get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); - fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); - fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F); - fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); - fprintf(stderr, ")\n"); + fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); + fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF); + fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); + fprintf(outf, ")\n"); +} +void free_fd_percpu(void) +{ + int i; + + for (i = 0; i < topo.max_cpu_num; ++i) { + if (fd_percpu[i] != 0) + close(fd_percpu[i]); + } + + free(fd_percpu); } void free_all_buffers(void) { CPU_FREE(cpu_present_set); cpu_present_set = NULL; - cpu_present_set = 0; + cpu_present_setsize = 0; CPU_FREE(cpu_affinity_set); cpu_affinity_set = NULL; @@ -1474,6 +1620,11 @@ void free_all_buffers(void) free(output_buffer); output_buffer = NULL; outp = NULL; + + free_fd_percpu(); + + free(irq_column_2_cpu); + free(irqs_per_cpu); } /* @@ -1481,7 +1632,7 @@ void free_all_buffers(void) */ FILE *fopen_or_die(const char *path, const char *mode) { - FILE *filep = fopen(path, "r"); + FILE *filep = fopen(path, mode); if (!filep) err(1, "%s: open failed", path); return filep; @@ -1696,6 +1847,136 @@ int mark_cpu_present(int cpu) return 0; } +/* + * snapshot_proc_interrupts() + * + * read and record summary of /proc/interrupts + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_proc_interrupts(void) +{ + static FILE *fp; + int column, retval; + + if (fp == NULL) + fp = fopen_or_die("/proc/interrupts", "r"); + else + rewind(fp); + + /* read 1st line of /proc/interrupts to get cpu* name for each column */ + for (column = 0; column < topo.num_cpus; ++column) { + int cpu_number; + + retval = fscanf(fp, " CPU%d", &cpu_number); + if (retval != 1) + break; + + if (cpu_number > topo.max_cpu_num) { + warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num); + return 1; + } + + irq_column_2_cpu[column] = cpu_number; + irqs_per_cpu[cpu_number] = 0; + } + + /* read /proc/interrupt count lines and sum up irqs per cpu */ + while (1) { + int column; + char buf[64]; + + retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */ + if (retval != 1) + break; + + /* read the count per cpu */ + for (column = 0; column < topo.num_cpus; ++column) { + + int cpu_number, irq_count; + + retval = fscanf(fp, " %d", &irq_count); + if (retval != 1) + break; + + cpu_number = irq_column_2_cpu[column]; + irqs_per_cpu[cpu_number] += irq_count; + + } + + while (getc(fp) != '\n') + ; /* flush interrupt description */ + + } + return 0; +} +/* + * snapshot_gfx_rc6_ms() + * + * record snapshot of + * /sys/class/drm/card0/power/rc6_residency_ms + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_gfx_rc6_ms(void) +{ + FILE *fp; + int retval; + + fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r"); + + retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms); + if (retval != 1) + err(1, "GFX rc6"); + + fclose(fp); + + return 0; +} +/* + * snapshot_gfx_mhz() + * + * record snapshot of + * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_gfx_mhz(void) +{ + static FILE *fp; + int retval; + + if (fp == NULL) + fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r"); + else + rewind(fp); + + retval = fscanf(fp, "%d", &gfx_cur_mhz); + if (retval != 1) + err(1, "GFX MHz"); + + return 0; +} + +/* + * snapshot /proc and /sys files + * + * return 1 if configuration restart needed, else return 0 + */ +int snapshot_proc_sysfs_files(void) +{ + if (snapshot_proc_interrupts()) + return 1; + + if (do_gfx_rc6_ms) + snapshot_gfx_rc6_ms(); + + if (do_gfx_mhz) + snapshot_gfx_mhz(); + + return 0; +} + void turbostat_loop() { int retval; @@ -1704,6 +1985,7 @@ void turbostat_loop() restart: restarted++; + snapshot_proc_sysfs_files(); retval = for_all_cpus(get_counters, EVEN_COUNTERS); if (retval < -1) { exit(retval); @@ -1722,7 +2004,9 @@ restart: re_initialize(); goto restart; } - sleep(interval_sec); + nanosleep(&interval_ts, NULL); + if (snapshot_proc_sysfs_files()) + goto restart; retval = for_all_cpus(get_counters, ODD_COUNTERS); if (retval < -1) { exit(retval); @@ -1735,8 +2019,10 @@ restart: for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); - flush_stdout(); - sleep(interval_sec); + flush_output_stdout(); + nanosleep(&interval_ts, NULL); + if (snapshot_proc_sysfs_files()) + goto restart; retval = for_all_cpus(get_counters, EVEN_COUNTERS); if (retval < -1) { exit(retval); @@ -1749,7 +2035,7 @@ restart: for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); compute_average(ODD_COUNTERS); format_all_counters(ODD_COUNTERS); - flush_stdout(); + flush_output_stdout(); } } @@ -1889,6 +2175,7 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) /* Nehalem compatible, but do not include turbo-ratio limit support */ case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */ + case 0x57: /* PHI - Knights Landing (different MSR definition) */ return 0; default: return 1; @@ -2016,7 +2303,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; if (cpu_migrate(cpu)) { - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } @@ -2037,7 +2324,98 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) epb_string = "custom"; break; } - fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); + fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); + + return 0; +} +/* + * print_hwp() + * Decode the MSR_HWP_CAPABILITIES + */ +int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + int cpu; + + if (!has_hwp) + return 0; + + cpu = t->cpu_id; + + /* MSR_HWP_CAPABILITIES is per-package */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + if (cpu_migrate(cpu)) { + fprintf(outf, "Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (get_msr(cpu, MSR_PM_ENABLE, &msr)) + return 0; + + fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", + cpu, msr, (msr & (1 << 0)) ? "" : "No-"); + + /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ + if ((msr & (1 << 0)) == 0) + return 0; + + if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr)) + return 0; + + fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " + "(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n", + cpu, msr, + (unsigned int)HWP_HIGHEST_PERF(msr), + (unsigned int)HWP_GUARANTEED_PERF(msr), + (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), + (unsigned int)HWP_LOWEST_PERF(msr)); + + if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) + return 0; + + fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " + "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n", + cpu, msr, + (unsigned int)(((msr) >> 0) & 0xff), + (unsigned int)(((msr) >> 8) & 0xff), + (unsigned int)(((msr) >> 16) & 0xff), + (unsigned int)(((msr) >> 24) & 0xff), + (unsigned int)(((msr) >> 32) & 0xff3), + (unsigned int)(((msr) >> 42) & 0x1)); + + if (has_hwp_pkg) { + if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) + return 0; + + fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " + "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n", + cpu, msr, + (unsigned int)(((msr) >> 0) & 0xff), + (unsigned int)(((msr) >> 8) & 0xff), + (unsigned int)(((msr) >> 16) & 0xff), + (unsigned int)(((msr) >> 24) & 0xff), + (unsigned int)(((msr) >> 32) & 0xff3)); + } + if (has_hwp_notify) { + if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) + return 0; + + fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " + "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", + cpu, msr, + ((msr) & 0x1) ? "EN" : "Dis", + ((msr) & 0x2) ? "EN" : "Dis"); + } + if (get_msr(cpu, MSR_HWP_STATUS, &msr)) + return 0; + + fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " + "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", + cpu, msr, + ((msr) & 0x1) ? "" : "No-", + ((msr) & 0x2) ? "" : "No-"); return 0; } @@ -2057,14 +2435,14 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data return 0; if (cpu_migrate(cpu)) { - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } if (do_core_perf_limit_reasons) { get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); - fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); - fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", + fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", (msr & 1 << 15) ? "bit15, " : "", (msr & 1 << 14) ? "bit14, " : "", (msr & 1 << 13) ? "Transitions, " : "", @@ -2079,7 +2457,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 2) ? "bit2, " : "", (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); - fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", + fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", (msr & 1 << 31) ? "bit31, " : "", (msr & 1 << 30) ? "bit30, " : "", (msr & 1 << 29) ? "Transitions, " : "", @@ -2098,8 +2476,8 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data } if (do_gfx_perf_limit_reasons) { get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); - fprintf(stderr, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); - fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s)", + fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", (msr & 1 << 0) ? "PROCHOT, " : "", (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 4) ? "Graphics, " : "", @@ -2108,7 +2486,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 9) ? "GFXPwr, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); - fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s)\n", + fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", (msr & 1 << 16) ? "PROCHOT, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 20) ? "Graphics, " : "", @@ -2120,15 +2498,15 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data } if (do_ring_perf_limit_reasons) { get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); - fprintf(stderr, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); - fprintf(stderr, " (Active: %s%s%s%s%s%s)", + fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(outf, " (Active: %s%s%s%s%s%s)", (msr & 1 << 0) ? "PROCHOT, " : "", (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 6) ? "VR-Therm, " : "", (msr & 1 << 8) ? "Amps, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); - fprintf(stderr, " (Logged: %s%s%s%s%s%s)\n", + fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", (msr & 1 << 16) ? "PROCHOT, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 22) ? "VR-Therm, " : "", @@ -2251,7 +2629,7 @@ void rapl_probe(unsigned int family, unsigned int model) rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; if (debug) - fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); + fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); return; } @@ -2293,7 +2671,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p return 0; if (cpu_migrate(cpu)) { - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } @@ -2302,7 +2680,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p return 0; dts = (msr >> 16) & 0x7F; - fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tcc_activation_temp - dts); #ifdef THERM_DEBUG @@ -2311,7 +2689,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p dts = (msr >> 16) & 0x7F; dts2 = (msr >> 8) & 0x7F; - fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); #endif } @@ -2325,7 +2703,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p dts = (msr >> 16) & 0x7F; resolution = (msr >> 27) & 0xF; - fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", + fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", cpu, msr, tcc_activation_temp - dts, resolution); #ifdef THERM_DEBUG @@ -2334,17 +2712,17 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p dts = (msr >> 16) & 0x7F; dts2 = (msr >> 8) & 0x7F; - fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", + fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); #endif } return 0; } - + void print_power_limit_msr(int cpu, unsigned long long msr, char *label) { - fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", + fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", cpu, label, ((msr >> 15) & 1) ? "EN" : "DIS", ((msr >> 0) & 0x7FFF) * rapl_power_units, @@ -2368,7 +2746,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu = t->cpu_id; if (cpu_migrate(cpu)) { - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } @@ -2376,7 +2754,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -1; if (debug) { - fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " + fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, rapl_power_units, rapl_energy_units, rapl_time_units); } @@ -2386,7 +2764,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -5; - fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", + fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", cpu, msr, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, @@ -2399,11 +2777,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) return -9; - fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", + fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 63) & 1 ? "": "UN"); print_power_limit_msr(cpu, msr, "PKG Limit #1"); - fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", + fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", cpu, ((msr >> 47) & 1) ? "EN" : "DIS", ((msr >> 32) & 0x7FFF) * rapl_power_units, @@ -2415,7 +2793,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) return -6; - fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", + fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", cpu, msr, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, @@ -2425,7 +2803,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (do_rapl & RAPL_DRAM) { if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) return -9; - fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", + fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "": "UN"); print_power_limit_msr(cpu, msr, "DRAM Limit"); @@ -2435,7 +2813,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PP0_POLICY, &msr)) return -7; - fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); + fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); } } if (do_rapl & RAPL_CORES) { @@ -2443,7 +2821,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) return -9; - fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", + fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "": "UN"); print_power_limit_msr(cpu, msr, "Cores Limit"); } @@ -2453,11 +2831,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PP1_POLICY, &msr)) return -8; - fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); + fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) return -9; - fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", + fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "": "UN"); print_power_limit_msr(cpu, msr, "GFX Limit"); } @@ -2583,23 +2961,23 @@ double slm_bclk(void) double freq; if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) - fprintf(stderr, "SLM BCLK: unknown\n"); + fprintf(outf, "SLM BCLK: unknown\n"); i = msr & 0xf; if (i >= SLM_BCLK_FREQS) { - fprintf(stderr, "SLM BCLK[%d] invalid\n", i); + fprintf(outf, "SLM BCLK[%d] invalid\n", i); msr = 3; } freq = slm_freq_table[i]; - fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq); + fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); return freq; } double discover_bclk(unsigned int family, unsigned int model) { - if (has_snb_msrs(family, model)) + if (has_snb_msrs(family, model) || is_knl(family, model)) return 100.00; else if (is_slm(family, model)) return slm_bclk(); @@ -2635,13 +3013,13 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk cpu = t->cpu_id; if (cpu_migrate(cpu)) { - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } if (tcc_activation_temp_override != 0) { tcc_activation_temp = tcc_activation_temp_override; - fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", + fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tcc_activation_temp); return 0; } @@ -2656,7 +3034,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk target_c_local = (msr >> 16) & 0xFF; if (debug) - fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, target_c_local); if (!target_c_local) @@ -2668,37 +3046,93 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk guess: tcc_activation_temp = TJMAX_DEFAULT; - fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", + fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tcc_activation_temp); return 0; } + +void decode_feature_control_msr(void) +{ + unsigned long long msr; + + if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr)) + fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", + base_cpu, msr, + msr & FEATURE_CONTROL_LOCKED ? "" : "UN-", + msr & (1 << 18) ? "SGX" : ""); +} + +void decode_misc_enable_msr(void) +{ + unsigned long long msr; + + if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) + fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n", + base_cpu, msr, + msr & (1 << 3) ? "TCC" : "", + msr & (1 << 16) ? "EIST" : "", + msr & (1 << 18) ? "MONITOR" : ""); +} + +/* + * Decode MSR_MISC_PWR_MGMT + * + * Decode the bits according to the Nehalem documentation + * bit[0] seems to continue to have same meaning going forward + * bit[1] less so... + */ +void decode_misc_pwr_mgmt_msr(void) +{ + unsigned long long msr; + + if (!do_nhm_platform_info) + return; + + if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) + fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n", + base_cpu, msr, + msr & (1 << 0) ? "DIS" : "EN", + msr & (1 << 1) ? "EN" : "DIS"); +} + void process_cpuid() { - unsigned int eax, ebx, ecx, edx, max_level; + unsigned int eax, ebx, ecx, edx, max_level, max_extended_level; unsigned int fms, family, model, stepping; eax = ebx = ecx = edx = 0; - __get_cpuid(0, &max_level, &ebx, &ecx, &edx); + __cpuid(0, max_level, ebx, ecx, edx); if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) genuine_intel = 1; if (debug) - fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", + fprintf(outf, "CPUID(0): %.4s%.4s%.4s ", (char *)&ebx, (char *)&edx, (char *)&ecx); - __get_cpuid(1, &fms, &ebx, &ecx, &edx); + __cpuid(1, fms, ebx, ecx, edx); family = (fms >> 8) & 0xf; model = (fms >> 4) & 0xf; stepping = fms & 0xf; if (family == 6 || family == 0xf) model += ((fms >> 16) & 0xf) << 4; - if (debug) - fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", + if (debug) { + fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", max_level, family, model, stepping, family, model, stepping); + fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n", + ecx & (1 << 0) ? "SSE3" : "-", + ecx & (1 << 3) ? "MONITOR" : "-", + ecx & (1 << 6) ? "SMX" : "-", + ecx & (1 << 7) ? "EIST" : "-", + ecx & (1 << 8) ? "TM2" : "-", + edx & (1 << 4) ? "TSC" : "-", + edx & (1 << 5) ? "MSR" : "-", + edx & (1 << 22) ? "ACPI-TM" : "-", + edx & (1 << 29) ? "TM" : "-"); + } if (!(edx & (1 << 5))) errx(1, "CPUID: no MSR"); @@ -2709,15 +3143,15 @@ void process_cpuid() * This check is valid for both Intel and AMD. */ ebx = ecx = edx = 0; - __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx); + __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); - if (max_level >= 0x80000007) { + if (max_extended_level >= 0x80000007) { /* * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 * this check is valid for both Intel and AMD */ - __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); + __cpuid(0x80000007, eax, ebx, ecx, edx); has_invariant_tsc = edx & (1 << 8); } @@ -2726,20 +3160,48 @@ void process_cpuid() * this check is valid for both Intel and AMD */ - __get_cpuid(0x6, &eax, &ebx, &ecx, &edx); + __cpuid(0x6, eax, ebx, ecx, edx); has_aperf = ecx & (1 << 0); do_dts = eax & (1 << 0); do_ptm = eax & (1 << 6); + has_hwp = eax & (1 << 7); + has_hwp_notify = eax & (1 << 8); + has_hwp_activity_window = eax & (1 << 9); + has_hwp_epp = eax & (1 << 10); + has_hwp_pkg = eax & (1 << 11); has_epb = ecx & (1 << 3); if (debug) - fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sEPB\n", - has_aperf ? "" : "No ", - do_dts ? "" : "No ", - do_ptm ? "" : "No ", - has_epb ? "" : "No "); + fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, " + "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", + has_aperf ? "" : "No-", + do_dts ? "" : "No-", + do_ptm ? "" : "No-", + has_hwp ? "" : "No-", + has_hwp_notify ? "" : "No-", + has_hwp_activity_window ? "" : "No-", + has_hwp_epp ? "" : "No-", + has_hwp_pkg ? "" : "No-", + has_epb ? "" : "No-"); + + if (debug) + decode_misc_enable_msr(); + + if (max_level >= 0x7) { + int has_sgx; - if (max_level > 0x15) { + ecx = 0; + + __cpuid_count(0x7, 0, eax, ebx, ecx, edx); + + has_sgx = ebx & (1 << 2); + fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-"); + + if (has_sgx) + decode_feature_control_msr(); + } + + if (max_level >= 0x15) { unsigned int eax_crystal; unsigned int ebx_tsc; @@ -2747,12 +3209,12 @@ void process_cpuid() * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz */ eax_crystal = ebx_tsc = crystal_hz = edx = 0; - __get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx); + __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); if (ebx_tsc != 0) { if (debug && (ebx != 0)) - fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", + fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", eax_crystal, ebx_tsc, crystal_hz); if (crystal_hz == 0) @@ -2768,11 +3230,24 @@ void process_cpuid() if (crystal_hz) { tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; if (debug) - fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", + fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); } } } + if (max_level >= 0x16) { + unsigned int base_mhz, max_mhz, bus_mhz, edx; + + /* + * CPUID 16H Base MHz, Max MHz, Bus MHz + */ + base_mhz = max_mhz = bus_mhz = edx = 0; + + __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); + if (debug) + fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", + base_mhz, max_mhz, bus_mhz); + } if (has_aperf) aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); @@ -2788,6 +3263,9 @@ void process_cpuid() do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); + if (debug) + decode_misc_pwr_mgmt_msr(); + rapl_probe(family, model); perf_limit_reasons_probe(family, model); @@ -2797,12 +3275,16 @@ void process_cpuid() if (has_skl_msrs(family, model)) calculate_tsc_tweak(); + do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK); + + do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK); + return; } void help() { - fprintf(stderr, + fprintf(outf, "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" "\n" "Turbostat forks the specified COMMAND and prints statistics\n" @@ -2814,6 +3296,7 @@ void help() "--help print this help message\n" "--counter msr print 32-bit counter at address \"msr\"\n" "--Counter msr print 64-bit Counter at address \"msr\"\n" + "--out file create or truncate \"file\" for all output\n" "--msr msr print 32-bit value at address \"msr\"\n" "--MSR msr print 64-bit Value at address \"msr\"\n" "--version print version information\n" @@ -2858,7 +3341,7 @@ void topology_probe() show_cpu = 1; if (debug > 1) - fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); + fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); if (cpus == NULL) @@ -2893,7 +3376,7 @@ void topology_probe() if (cpu_is_not_present(i)) { if (debug > 1) - fprintf(stderr, "cpu%d NOT PRESENT\n", i); + fprintf(outf, "cpu%d NOT PRESENT\n", i); continue; } cpus[i].core_id = get_core_id(i); @@ -2908,26 +3391,26 @@ void topology_probe() if (siblings > max_siblings) max_siblings = siblings; if (debug > 1) - fprintf(stderr, "cpu %d pkg %d core %d\n", + fprintf(outf, "cpu %d pkg %d core %d\n", i, cpus[i].physical_package_id, cpus[i].core_id); } topo.num_cores_per_pkg = max_core_id + 1; if (debug > 1) - fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", + fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.num_cores_per_pkg); if (debug && !summary_only && topo.num_cores_per_pkg > 1) show_core = 1; topo.num_packages = max_package_id + 1; if (debug > 1) - fprintf(stderr, "max_package_id %d, sizing for %d packages\n", + fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); if (debug && !summary_only && topo.num_packages > 1) show_pkg = 1; topo.num_threads_per_core = max_siblings; if (debug > 1) - fprintf(stderr, "max_siblings %d\n", max_siblings); + fprintf(outf, "max_siblings %d\n", max_siblings); free(cpus); } @@ -3019,10 +3502,27 @@ void allocate_output_buffer() if (outp == NULL) err(-1, "calloc output buffer"); } +void allocate_fd_percpu(void) +{ + fd_percpu = calloc(topo.max_cpu_num, sizeof(int)); + if (fd_percpu == NULL) + err(-1, "calloc fd_percpu"); +} +void allocate_irq_buffers(void) +{ + irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); + if (irq_column_2_cpu == NULL) + err(-1, "calloc %d", topo.num_cpus); + irqs_per_cpu = calloc(topo.max_cpu_num, sizeof(int)); + if (irqs_per_cpu == NULL) + err(-1, "calloc %d", topo.max_cpu_num); +} void setup_all_buffers(void) { topology_probe(); + allocate_irq_buffers(); + allocate_fd_percpu(); allocate_counters(&thread_even, &core_even, &package_even); allocate_counters(&thread_odd, &core_odd, &package_odd); allocate_output_buffer(); @@ -3036,7 +3536,7 @@ void set_base_cpu(void) err(-ENODEV, "No valid cpus found"); if (debug > 1) - fprintf(stderr, "base_cpu = %d\n", base_cpu); + fprintf(outf, "base_cpu = %d\n", base_cpu); } void turbostat_init() @@ -3049,6 +3549,9 @@ void turbostat_init() if (debug) + for_all_cpus(print_hwp, ODD_COUNTERS); + + if (debug) for_all_cpus(print_epb, ODD_COUNTERS); if (debug) @@ -3100,9 +3603,10 @@ int fork_it(char **argv) for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); - flush_stderr(); - fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); + fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); + + flush_output_stderr(); return status; } @@ -3119,13 +3623,13 @@ int get_and_dump_counters(void) if (status) return status; - flush_stdout(); + flush_output_stdout(); return status; } void print_version() { - fprintf(stderr, "turbostat version 4.8 26-Sep, 2015" + fprintf(outf, "turbostat version 4.11 27 Feb 2016" " - Len Brown <lenb@kernel.org>\n"); } @@ -3143,6 +3647,7 @@ void cmdline(int argc, char **argv) {"Joules", no_argument, 0, 'J'}, {"MSR", required_argument, 0, 'M'}, {"msr", required_argument, 0, 'm'}, + {"out", required_argument, 0, 'o'}, {"Package", no_argument, 0, 'p'}, {"processor", no_argument, 0, 'p'}, {"Summary", no_argument, 0, 'S'}, @@ -3153,7 +3658,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:PpST:v", + while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v", long_options, &option_index)) != -1) { switch (opt) { case 'C': @@ -3173,7 +3678,18 @@ void cmdline(int argc, char **argv) help(); exit(1); case 'i': - interval_sec = atoi(optarg); + { + double interval = strtod(optarg, NULL); + + if (interval < 0.001) { + fprintf(outf, "interval %f seconds is too small\n", + interval); + exit(2); + } + + interval_ts.tv_sec = interval; + interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; + } break; case 'J': rapl_joules++; @@ -3184,6 +3700,9 @@ void cmdline(int argc, char **argv) case 'm': sscanf(optarg, "%x", &extra_msr_offset32); break; + case 'o': + outf = fopen_or_die(optarg, "w"); + break; case 'P': show_pkg_only++; break; @@ -3206,6 +3725,8 @@ void cmdline(int argc, char **argv) int main(int argc, char **argv) { + outf = stderr; + cmdline(argc, argv); if (debug) |