diff options
Diffstat (limited to 'tools/power')
-rw-r--r-- | tools/power/acpi/common/getopt.c | 4 | ||||
-rw-r--r-- | tools/power/acpi/man/acpidump.8 | 17 | ||||
-rw-r--r-- | tools/power/acpi/os_specific/service_layers/oslinuxtbl.c | 97 | ||||
-rw-r--r-- | tools/power/acpi/os_specific/service_layers/osunixmap.c | 4 | ||||
-rw-r--r-- | tools/power/acpi/tools/acpidump/acpidump.h | 2 | ||||
-rw-r--r-- | tools/power/acpi/tools/acpidump/apdump.c | 8 | ||||
-rw-r--r-- | tools/power/acpi/tools/acpidump/apfiles.c | 12 | ||||
-rw-r--r-- | tools/power/acpi/tools/acpidump/apmain.c | 15 | ||||
-rw-r--r-- | tools/power/cpupower/utils/helpers/pci.c | 11 | ||||
-rw-r--r-- | tools/power/cpupower/utils/idle_monitor/mperf_monitor.c | 5 | ||||
-rw-r--r-- | tools/power/x86/turbostat/Makefile | 8 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 138 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 650 |
13 files changed, 700 insertions, 271 deletions
diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index 5da129e10aa2..326e826a5d20 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -127,7 +127,7 @@ int acpi_getopt(int argc, char **argv, char *opts) argv[acpi_gbl_optind][0] != '-' || argv[acpi_gbl_optind][1] == '\0') { return (ACPI_OPT_END); - } else if (ACPI_STRCMP(argv[acpi_gbl_optind], "--") == 0) { + } else if (strcmp(argv[acpi_gbl_optind], "--") == 0) { acpi_gbl_optind++; return (ACPI_OPT_END); } @@ -140,7 +140,7 @@ int acpi_getopt(int argc, char **argv, char *opts) /* Make sure that the option is legal */ if (current_char == ':' || - (opts_ptr = ACPI_STRCHR(opts, current_char)) == NULL) { + (opts_ptr = strchr(opts, current_char)) == NULL) { ACPI_OPTION_ERROR("Illegal option: -", current_char); if (argv[acpi_gbl_optind][++current_char_ptr] == '\0') { diff --git a/tools/power/acpi/man/acpidump.8 b/tools/power/acpi/man/acpidump.8 index 38f095d86b52..79e2d1d435d1 100644 --- a/tools/power/acpi/man/acpidump.8 +++ b/tools/power/acpi/man/acpidump.8 @@ -22,9 +22,6 @@ acpidump options are as follow: .B \-b Dump tables to binary files .TP -.B \-c -Dump customized tables -.TP .B \-h \-? This help message .TP @@ -48,15 +45,25 @@ Verbose mode .B \-a <Address> Get table via a physical address .TP +.B \-c <on|off> +Turning on/off customized table dumping +.TP .B \-f <BinaryFile> Get table via a binary file .TP .B \-n <Signature> Get table via a name/signature .TP -Invocation without parameters dumps all available tables +.B \-x +Do not use but dump XSDT +.TP +.B \-x \-x +Do not use or dump XSDT +.TP +.fi +Invocation without parameters dumps all available tables. .TP -Multiple mixed instances of -a, -f, and -n are supported +Multiple mixed instances of -a, -f, and -n are supported. .SH EXAMPLES diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index 92f1fd700344..dd5008b0617a 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -222,7 +222,7 @@ acpi_os_get_table_by_address(acpi_physical_address address, goto exit; } - ACPI_MEMCPY(local_table, mapped_table, table_length); + memcpy(local_table, mapped_table, table_length); exit: osl_unmap_table(mapped_table); @@ -531,7 +531,7 @@ static acpi_status osl_load_rsdp(void) gbl_rsdp_address = rsdp_base + (ACPI_CAST8(mapped_table) - rsdp_address); - ACPI_MEMCPY(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp)); + memcpy(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp)); acpi_os_unmap_memory(rsdp_address, rsdp_size); return (AE_OK); @@ -582,64 +582,67 @@ static acpi_status osl_table_initialize(void) return (AE_OK); } - /* Get RSDP from memory */ + if (!gbl_dump_customized_tables) { - status = osl_load_rsdp(); - if (ACPI_FAILURE(status)) { - return (status); - } + /* Get RSDP from memory */ + + status = osl_load_rsdp(); + if (ACPI_FAILURE(status)) { + return (status); + } - /* Get XSDT from memory */ + /* Get XSDT from memory */ - if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) { - if (gbl_xsdt) { - free(gbl_xsdt); - gbl_xsdt = NULL; + if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) { + if (gbl_xsdt) { + free(gbl_xsdt); + gbl_xsdt = NULL; + } + + gbl_revision = 2; + status = osl_get_bios_table(ACPI_SIG_XSDT, 0, + ACPI_CAST_PTR(struct + acpi_table_header + *, &gbl_xsdt), + &address); + if (ACPI_FAILURE(status)) { + return (status); + } } - gbl_revision = 2; - status = osl_get_bios_table(ACPI_SIG_XSDT, 0, - ACPI_CAST_PTR(struct - acpi_table_header *, - &gbl_xsdt), &address); - if (ACPI_FAILURE(status)) { - return (status); + /* Get RSDT from memory */ + + if (gbl_rsdp.rsdt_physical_address) { + if (gbl_rsdt) { + free(gbl_rsdt); + gbl_rsdt = NULL; + } + + status = osl_get_bios_table(ACPI_SIG_RSDT, 0, + ACPI_CAST_PTR(struct + acpi_table_header + *, &gbl_rsdt), + &address); + if (ACPI_FAILURE(status)) { + return (status); + } } - } - /* Get RSDT from memory */ + /* Get FADT from memory */ - if (gbl_rsdp.rsdt_physical_address) { - if (gbl_rsdt) { - free(gbl_rsdt); - gbl_rsdt = NULL; + if (gbl_fadt) { + free(gbl_fadt); + gbl_fadt = NULL; } - status = osl_get_bios_table(ACPI_SIG_RSDT, 0, + status = osl_get_bios_table(ACPI_SIG_FADT, 0, ACPI_CAST_PTR(struct acpi_table_header *, - &gbl_rsdt), &address); + &gbl_fadt), + &gbl_fadt_address); if (ACPI_FAILURE(status)) { return (status); } - } - - /* Get FADT from memory */ - - if (gbl_fadt) { - free(gbl_fadt); - gbl_fadt = NULL; - } - - status = osl_get_bios_table(ACPI_SIG_FADT, 0, - ACPI_CAST_PTR(struct acpi_table_header *, - &gbl_fadt), - &gbl_fadt_address); - if (ACPI_FAILURE(status)) { - return (status); - } - - if (!gbl_dump_customized_tables) { /* Add mandatory tables to global table list first */ @@ -961,7 +964,7 @@ osl_get_bios_table(char *signature, goto exit; } - ACPI_MEMCPY(local_table, mapped_table, table_length); + memcpy(local_table, mapped_table, table_length); *address = table_address; *table = local_table; @@ -1156,7 +1159,7 @@ osl_table_name_from_file(char *filename, char *signature, u32 *instance) /* Extract instance number */ if (isdigit((int)filename[ACPI_NAME_SIZE])) { - sscanf(&filename[ACPI_NAME_SIZE], "%d", instance); + sscanf(&filename[ACPI_NAME_SIZE], "%u", instance); } else if (strlen(filename) != ACPI_NAME_SIZE) { return (AE_BAD_SIGNATURE); } else { diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index 3853a7350440..44ad4889d468 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -54,7 +54,7 @@ ACPI_MODULE_NAME("osunixmap") #ifndef O_BINARY #define O_BINARY 0 #endif -#ifdef _free_BSD +#if defined(_dragon_fly) || defined(_free_BSD) #define MMAP_FLAGS MAP_SHARED #else #define MMAP_FLAGS MAP_PRIVATE @@ -146,6 +146,6 @@ void acpi_os_unmap_memory(void *where, acpi_size length) acpi_size page_size; page_size = acpi_os_get_page_size(); - offset = (acpi_physical_address) where % page_size; + offset = ACPI_TO_INTEGER(where) % page_size; munmap((u8 *)where - offset, (length + offset)); } diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index 84bdef0136cb..eed534481434 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -66,7 +66,7 @@ EXTERN u8 INIT_GLOBAL(gbl_summary_mode, FALSE); EXTERN u8 INIT_GLOBAL(gbl_verbose_mode, FALSE); EXTERN u8 INIT_GLOBAL(gbl_binary_mode, FALSE); -EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, FALSE); +EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, TRUE); EXTERN u8 INIT_GLOBAL(gbl_do_not_dump_xsdt, FALSE); EXTERN ACPI_FILE INIT_GLOBAL(gbl_output_file, NULL); EXTERN char INIT_GLOBAL(*gbl_output_filename, NULL); diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index c736adf5fb55..61d0de804b70 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -329,7 +329,7 @@ int ap_dump_table_by_name(char *signature) acpi_status status; int table_status; - if (ACPI_STRLEN(signature) != ACPI_NAME_SIZE) { + if (strlen(signature) != ACPI_NAME_SIZE) { acpi_log_error ("Invalid table signature [%s]: must be exactly 4 characters\n", signature); @@ -338,15 +338,15 @@ int ap_dump_table_by_name(char *signature) /* Table signatures are expected to be uppercase */ - ACPI_STRCPY(local_signature, signature); + strcpy(local_signature, signature); acpi_ut_strupr(local_signature); /* To be friendly, handle tables whose signatures do not match the name */ if (ACPI_COMPARE_NAME(local_signature, "FADT")) { - ACPI_STRCPY(local_signature, ACPI_SIG_FADT); + strcpy(local_signature, ACPI_SIG_FADT); } else if (ACPI_COMPARE_NAME(local_signature, "MADT")) { - ACPI_STRCPY(local_signature, ACPI_SIG_MADT); + strcpy(local_signature, ACPI_SIG_MADT); } /* Dump all instances of this signature (to handle multiple SSDTs) */ diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index 8f2fe168228e..a37f9702b2a9 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -136,10 +136,10 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance) } else { ACPI_MOVE_NAME(filename, table->signature); } - filename[0] = (char)ACPI_TOLOWER(filename[0]); - filename[1] = (char)ACPI_TOLOWER(filename[1]); - filename[2] = (char)ACPI_TOLOWER(filename[2]); - filename[3] = (char)ACPI_TOLOWER(filename[3]); + filename[0] = (char)tolower((int)filename[0]); + filename[1] = (char)tolower((int)filename[1]); + filename[2] = (char)tolower((int)filename[2]); + filename[3] = (char)tolower((int)filename[3]); filename[ACPI_NAME_SIZE] = 0; /* Handle multiple SSDts - create different filenames for each */ @@ -147,10 +147,10 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance) if (instance > 0) { acpi_ut_snprintf(instance_str, sizeof(instance_str), "%u", instance); - ACPI_STRCAT(filename, instance_str); + strcat(filename, instance_str); } - ACPI_STRCAT(filename, ACPI_TABLE_FILE_SUFFIX); + strcat(filename, ACPI_TABLE_FILE_SUFFIX); if (gbl_verbose_mode) { acpi_log_error diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index d0ba6535f5af..57620f66ae6c 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -80,7 +80,7 @@ struct ap_dump_action action_table[AP_MAX_ACTIONS]; u32 current_action = 0; #define AP_UTILITY_NAME "ACPI Binary Table Dump Utility" -#define AP_SUPPORTED_OPTIONS "?a:bcf:hn:o:r:svxz" +#define AP_SUPPORTED_OPTIONS "?a:bc:f:hn:o:r:svxz" /****************************************************************************** * @@ -96,7 +96,6 @@ static void ap_display_usage(void) ACPI_USAGE_HEADER("acpidump [options]"); ACPI_OPTION("-b", "Dump tables to binary files"); - ACPI_OPTION("-c", "Dump customized tables"); ACPI_OPTION("-h -?", "This help message"); ACPI_OPTION("-o <File>", "Redirect output to file"); ACPI_OPTION("-r <Address>", "Dump tables from specified RSDP"); @@ -107,6 +106,7 @@ static void ap_display_usage(void) ACPI_USAGE_TEXT("\nTable Options:\n"); ACPI_OPTION("-a <Address>", "Get table via a physical address"); + ACPI_OPTION("-c <on|off>", "Turning on/off customized table dumping"); ACPI_OPTION("-f <BinaryFile>", "Get table via a binary file"); ACPI_OPTION("-n <Signature>", "Get table via a name/signature"); ACPI_OPTION("-x", "Do not use but dump XSDT"); @@ -181,7 +181,16 @@ static int ap_do_options(int argc, char **argv) case 'c': /* Dump customized tables */ - gbl_dump_customized_tables = TRUE; + if (!strcmp(acpi_gbl_optarg, "on")) { + gbl_dump_customized_tables = TRUE; + } else if (!strcmp(acpi_gbl_optarg, "off")) { + gbl_dump_customized_tables = FALSE; + } else { + acpi_log_error + ("%s: Cannot handle this switch, please use on|off\n", + acpi_gbl_optarg); + return (-1); + } continue; case 'h': diff --git a/tools/power/cpupower/utils/helpers/pci.c b/tools/power/cpupower/utils/helpers/pci.c index 9690798e6446..8b278983cfc5 100644 --- a/tools/power/cpupower/utils/helpers/pci.c +++ b/tools/power/cpupower/utils/helpers/pci.c @@ -25,14 +25,21 @@ struct pci_dev *pci_acc_init(struct pci_access **pacc, int domain, int bus, int slot, int func, int vendor, int dev) { - struct pci_filter filter_nb_link = { domain, bus, slot, func, - vendor, dev }; + struct pci_filter filter_nb_link; struct pci_dev *device; *pacc = pci_alloc(); if (*pacc == NULL) return NULL; + pci_filter_init(*pacc, &filter_nb_link); + filter_nb_link.domain = domain; + filter_nb_link.bus = bus; + filter_nb_link.slot = slot; + filter_nb_link.func = func; + filter_nb_link.vendor = vendor; + filter_nb_link.device = dev; + pci_init(*pacc); pci_scan_bus(*pacc); diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index 90a8c4f071e7..c83f1606970b 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -135,7 +135,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent, dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n", mperf_cstates[id].name, mperf_diff, tsc_diff); } else if (max_freq_mode == MAX_FREQ_SYSFS) { - timediff = timespec_diff_us(time_start, time_end); + timediff = max_frequency * timespec_diff_us(time_start, time_end); *percent = 100.0 * mperf_diff / timediff; dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n", mperf_cstates[id].name, mperf_diff, timediff); @@ -176,7 +176,7 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count, dprint("%s: Average freq based on %s maximum frequency:\n", mperf_cstates[id].name, (max_freq_mode == MAX_FREQ_TSC_REF) ? "TSC calculated" : "sysfs read"); - dprint("%max_frequency: %lu", max_frequency); + dprint("max_frequency: %lu\n", max_frequency); dprint("aperf_diff: %llu\n", aperf_diff); dprint("mperf_diff: %llu\n", mperf_diff); dprint("avg freq: %llu\n", *count); @@ -279,6 +279,7 @@ use_sysfs: return -1; } max_freq_mode = MAX_FREQ_SYSFS; + max_frequency /= 1000; /* Default automatically to MHz value */ return 0; } diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index d1b3a361e526..e367b1a85d70 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -1,11 +1,15 @@ CC = $(CROSS_COMPILE)gcc -BUILD_OUTPUT := $(PWD) +BUILD_OUTPUT := $(CURDIR) PREFIX := /usr DESTDIR := +ifeq ("$(origin O)", "command line") + BUILD_OUTPUT := $(O) +endif + turbostat : turbostat.c CFLAGS += -Wall -CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"' +CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' %: %.c @mkdir -p $(BUILD_OUTPUT) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index feea7ad9500b..05b8fc38dc8b 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -20,9 +20,11 @@ upon its completion. The second method is to omit the command, and turbostat displays statistics every 5 seconds. The 5-second interval can be changed using the --interval option. - +.PP Some information is not available on older processors. .SS Options +Options can be specified with a single or double '-', and only as much of the option +name as necessary to disambiguate it from others is necessary. Note that options are case-sensitive. \fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter. .PP \fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter. @@ -55,16 +57,20 @@ more than once may also enable internal turbostat debug information. The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit, displays the statistics gathered since it was forked. .PP -.SH FIELD DESCRIPTIONS +.SH DEFAULT FIELD DESCRIPTIONS .nf -\fBPackage\fP processor package number. -\fBCore\fP processor core number. -\fBCPU\fP Linux CPU (logical processor) number. -Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology. +\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. \fBAVG_MHz\fP number of cycles executed divided by time elapsed. \fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. +.fi +.PP +.SH DEBUG FIELD DESCRIPTIONS +.nf +\fBPackage\fP processor package number. +\fBCore\fP processor core number. +Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT). \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. \fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. @@ -81,63 +87,76 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T Without any parameters, turbostat displays statistics ever 5 seconds. (override interval with "-i sec" option, or specify a command for turbostat to fork). +.nf +[root@hsw]# ./turbostat + CPU Avg_MHz %Busy Bzy_MHz TSC_MHz + - 488 12.51 3898 3498 + 0 0 0.01 3885 3498 + 4 3897 99.99 3898 3498 + 1 0 0.00 3861 3498 + 5 0 0.00 3882 3498 + 2 1 0.02 3894 3498 + 6 2 0.06 3898 3498 + 3 0 0.00 3849 3498 + 7 0 0.00 3877 3498 + +.fi +.SH DEBUG EXAMPLE +The "--debug" option prints additional system information before measurements: The first row of statistics is a summary for the entire system. For residency % columns, the summary is a weighted average. For Temperature columns, the summary is the column maximum. For Watts columns, the summary is a system total. Subsequent rows show per-CPU statistics. - -.nf -[root@ivy]# ./turbostat - Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt - - - 6 0.36 1596 3492 0 0.59 0.01 99.04 0.00 23 24 23.82 0.01 72.47 0.00 6.40 1.01 0.00 - 0 0 9 0.58 1596 3492 0 0.28 0.01 99.13 0.00 23 24 23.82 0.01 72.47 0.00 6.40 1.01 0.00 - 0 4 1 0.07 1596 3492 0 0.79 - 1 1 10 0.65 1596 3492 0 0.59 0.00 98.76 0.00 23 - 1 5 5 0.28 1596 3492 0 0.95 - 2 2 10 0.66 1596 3492 0 0.41 0.01 98.92 0.00 23 - 2 6 2 0.10 1597 3492 0 0.97 - 3 3 3 0.20 1596 3492 0 0.44 0.00 99.37 0.00 23 - 3 7 5 0.31 1596 3492 0 0.33 -.fi -.SH DEBUG EXAMPLE -The "--debug" option prints additional system information before measurements: - .nf -turbostat version 4.0 10-Feb, 2015 - Len Brown <lenb@kernel.org> -CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9) +turbostat version 4.1 10-Feb, 2015 - Len Brown <lenb@kernel.org> +CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3) CPUID(6): APERF, DTS, PTM, EPB -RAPL: 851 sec. Joule Counter Range, at 77 Watts -cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300 -16 * 100 = 1600 MHz max efficiency +RAPL: 3121 sec. Joule Counter Range, at 84 Watts +cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300 +8 * 100 = 800 MHz max efficiency 35 * 100 = 3500 MHz TSC frequency -cpu0: MSR_IA32_POWER_CTL: 0x0014005d (C1E auto-promotion: DISabled) -cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6n) +cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled) +cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0) cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 37 * 100 = 3700 MHz max turbo 4 active cores 38 * 100 = 3800 MHz max turbo 3 active cores 39 * 100 = 3900 MHz max turbo 2 active cores 39 * 100 = 3900 MHz max turbo 1 active cores cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced) -cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.) -cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.) -cpu0: MSR_PKG_POWER_LIMIT: 0x30000148268 (UNlocked) -cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled) -cpu0: PKG Limit #2: DISabled (96.000000 Watts, 0.000977* sec, clamp DISabled) +cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Auto-HWP, Amps, MultiCoreTurbo, Transitions, ) +cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: ) +cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, ) +cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.) +cpu0: MSR_PKG_POWER_INFO: 0x000002a0 (84 W TDP, RAPL 0 - 0 W, 0.000000 sec.) +cpu0: MSR_PKG_POWER_LIMIT: 0x428348001a82a0 (UNlocked) +cpu0: PKG Limit #1: ENabled (84.000000 Watts, 8.000000 sec, clamp DISabled) +cpu0: PKG Limit #2: ENabled (105.000000 Watts, 0.002441* sec, clamp DISabled) cpu0: MSR_PP0_POLICY: 0 cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked) cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) cpu0: MSR_PP1_POLICY: 0 cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) -cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C) -cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C) -cpu0: MSR_IA32_THERM_STATUS: 0x88580000 (17 C +/- 1) -cpu1: MSR_IA32_THERM_STATUS: 0x885a0000 (15 C +/- 1) -cpu2: MSR_IA32_THERM_STATUS: 0x88570000 (18 C +/- 1) -cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1) - ... +cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C) +cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88340800 (48 C) +cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1) +cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1) +cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1) +cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1) + Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt + - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00 + 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00 + 0 4 3897 99.98 3898 3498 0 0.02 + 1 1 7 0.17 3887 3498 0 0.04 0.00 0.00 99.79 32 + 1 5 0 0.00 3885 3498 0 0.21 + 2 2 29 0.76 3895 3498 0 0.10 0.01 0.01 99.13 32 + 2 6 2 0.06 3896 3498 0 0.80 + 3 3 1 0.02 3832 3498 0 0.03 0.00 0.00 99.95 28 + 3 7 0 0.00 3879 3498 0 0.04 +^C + .fi The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency available at the minimum package voltage. The \fBTSC frequency\fP is the base @@ -147,6 +166,9 @@ should be sustainable on all CPUs indefinitely, given nominal power and cooling. The remaining rows show what maximum turbo frequency is possible depending on the number of idle cores. Note that not all information is available on all processors. +.PP +The --debug option adds additional columns to the measurement ouput, including CPU idle power-state residency processor temperature sensor readinds. +See the field definitions above. .SH FORK EXAMPLE If turbostat is invoked with a command, it will fork that command and output the statistics gathered when the command exits. @@ -154,27 +176,23 @@ eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds until ^C while the other CPUs are mostly idle: .nf -root@ivy: turbostat cat /dev/zero > /dev/null +root@hsw: turbostat cat /dev/zero > /dev/null ^C - Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt - - - 496 12.75 3886 3492 0 13.16 0.04 74.04 0.00 36 36 0.00 0.00 0.00 0.00 23.15 17.65 0.00 - 0 0 22 0.57 3830 3492 0 0.83 0.02 98.59 0.00 27 36 0.00 0.00 0.00 0.00 23.15 17.65 0.00 - 0 4 9 0.24 3829 3492 0 1.15 - 1 1 4 0.09 3783 3492 0 99.91 0.00 0.00 0.00 36 - 1 5 3880 99.82 3888 3492 0 0.18 - 2 2 17 0.44 3813 3492 0 0.77 0.04 98.75 0.00 28 - 2 6 12 0.32 3823 3492 0 0.89 - 3 3 16 0.43 3844 3492 0 0.63 0.11 98.84 0.00 30 - 3 7 4 0.11 3827 3492 0 0.94 -30.372243 sec + CPU Avg_MHz %Busy Bzy_MHz TSC_MHz + - 482 12.51 3854 3498 + 0 0 0.01 1960 3498 + 4 0 0.00 2128 3498 + 1 0 0.00 3003 3498 + 5 3854 99.98 3855 3498 + 2 0 0.01 3504 3498 + 6 3 0.08 3884 3498 + 3 0 0.00 2553 3498 + 7 0 0.00 2126 3498 +10.783983 sec .fi -Above the cycle soaker drives cpu5 up its 3.8 GHz turbo limit -while the other processors are generally in various states of idle. - -Note that cpu1 and cpu5 are HT siblings within core1. -As cpu5 is very busy, it prevents its sibling, cpu1, -from entering a c-state deeper than c1. +Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit. +The first row shows the average MHz and %Busy across all the processors in the system. Note that the Avg_MHz column reflects the total number of cycles executed divided by the measurement interval. If the %Busy column is 100%, diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 2d089cac8580..323b65edfc97 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -52,11 +52,13 @@ unsigned int skip_c0; unsigned int skip_c1; unsigned int do_nhm_cstates; unsigned int do_snb_cstates; +unsigned int do_knl_cstates; unsigned int do_pc2; unsigned int do_pc3; unsigned int do_pc6; unsigned int do_pc7; unsigned int do_c8_c9_c10; +unsigned int do_skl_residency; unsigned int do_slm_cstates; unsigned int use_c1_residency_msr; unsigned int has_aperf; @@ -65,8 +67,6 @@ unsigned int units = 1000000; /* MHz etc */ unsigned int genuine_intel; unsigned int has_invariant_tsc; unsigned int do_nhm_platform_info; -unsigned int do_nhm_turbo_ratio_limit; -unsigned int do_ivt_turbo_ratio_limit; unsigned int extra_msr_offset32; unsigned int extra_msr_offset64; unsigned int extra_delta_offset32; @@ -84,11 +84,15 @@ unsigned int do_dts; unsigned int do_ptm; unsigned int tcc_activation_temp; unsigned int tcc_activation_temp_override; -double rapl_power_units, rapl_energy_units, rapl_time_units; +double rapl_power_units, rapl_time_units; +double rapl_dram_energy_units, rapl_energy_units; double rapl_joule_counter_range; unsigned int do_core_perf_limit_reasons; unsigned int do_gfx_perf_limit_reasons; unsigned int do_ring_perf_limit_reasons; +unsigned int crystal_hz; +unsigned long long tsc_hz; +int base_cpu; #define RAPL_PKG (1 << 0) /* 0x610 MSR_PKG_POWER_LIMIT */ @@ -101,18 +105,18 @@ unsigned int do_ring_perf_limit_reasons; #define RAPL_DRAM (1 << 3) /* 0x618 MSR_DRAM_POWER_LIMIT */ /* 0x619 MSR_DRAM_ENERGY_STATUS */ - /* 0x61c MSR_DRAM_POWER_INFO */ #define RAPL_DRAM_PERF_STATUS (1 << 4) /* 0x61b MSR_DRAM_PERF_STATUS */ +#define RAPL_DRAM_POWER_INFO (1 << 5) + /* 0x61c MSR_DRAM_POWER_INFO */ -#define RAPL_CORES (1 << 5) +#define RAPL_CORES (1 << 6) /* 0x638 MSR_PP0_POWER_LIMIT */ /* 0x639 MSR_PP0_ENERGY_STATUS */ -#define RAPL_CORE_POLICY (1 << 6) +#define RAPL_CORE_POLICY (1 << 7) /* 0x63a MSR_PP0_POLICY */ - -#define RAPL_GFX (1 << 7) +#define RAPL_GFX (1 << 8) /* 0x640 MSR_PP1_POWER_LIMIT */ /* 0x641 MSR_PP1_ENERGY_STATUS */ /* 0x642 MSR_PP1_POLICY */ @@ -159,6 +163,10 @@ struct pkg_data { unsigned long long pc8; unsigned long long pc9; unsigned long long pc10; + unsigned long long pkg_wtd_core_c0; + unsigned long long pkg_any_core_c0; + unsigned long long pkg_any_gfxe_c0; + unsigned long long pkg_both_core_gfxe_c0; unsigned int package_id; unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ @@ -292,8 +300,7 @@ void print_header(void) if (has_aperf) outp += sprintf(outp, " Bzy_MHz"); outp += sprintf(outp, " TSC_MHz"); - if (do_smi) - outp += sprintf(outp, " SMI"); + if (extra_delta_offset32) outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); if (extra_delta_offset64) @@ -302,9 +309,16 @@ void print_header(void) outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); if (extra_msr_offset64) outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); + + if (!debug) + goto done; + + if (do_smi) + outp += sprintf(outp, " SMI"); + if (do_nhm_cstates) outp += sprintf(outp, " CPU%%c1"); - if (do_nhm_cstates && !do_slm_cstates) + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) outp += sprintf(outp, " CPU%%c3"); if (do_nhm_cstates) outp += sprintf(outp, " CPU%%c6"); @@ -316,6 +330,13 @@ void print_header(void) if (do_ptm) outp += sprintf(outp, " PkgTmp"); + if (do_skl_residency) { + outp += sprintf(outp, " Totl%%C0"); + outp += sprintf(outp, " Any%%C0"); + outp += sprintf(outp, " GFX%%C0"); + outp += sprintf(outp, " CPUGFX%%"); + } + if (do_pc2) outp += sprintf(outp, " Pkg%%pc2"); if (do_pc3) @@ -359,6 +380,7 @@ void print_header(void) outp += sprintf(outp, " time"); } + done: outp += sprintf(outp, "\n"); } @@ -396,6 +418,12 @@ int dump_counters(struct thread_data *t, struct core_data *c, if (p) { outp += sprintf(outp, "package: %d\n", p->package_id); + + outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); + outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0); + outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0); + outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0); + outp += sprintf(outp, "pc2: %016llX\n", p->pc2); if (do_pc3) outp += sprintf(outp, "pc3: %016llX\n", p->pc3); @@ -487,10 +515,6 @@ int format_counters(struct thread_data *t, struct core_data *c, /* TSC_MHz */ outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); - /* SMI */ - if (do_smi) - outp += sprintf(outp, "%8d", t->smi_count); - /* delta */ if (extra_delta_offset32) outp += sprintf(outp, " %11llu", t->extra_delta32); @@ -506,6 +530,13 @@ int format_counters(struct thread_data *t, struct core_data *c, if (extra_msr_offset64) outp += sprintf(outp, " 0x%016llx", t->extra_msr64); + if (!debug) + goto done; + + /* SMI */ + if (do_smi) + outp += sprintf(outp, "%8d", t->smi_count); + if (do_nhm_cstates) { if (!skip_c1) outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc); @@ -517,7 +548,7 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (do_nhm_cstates && !do_slm_cstates) + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc); if (do_nhm_cstates) outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc); @@ -531,9 +562,18 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) goto done; + /* PkgTmp */ if (do_ptm) outp += sprintf(outp, "%8d", p->pkg_temp_c); + /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ + if (do_skl_residency) { + outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); + outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc); + outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc); + outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc); + } + if (do_pc2) outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc); if (do_pc3) @@ -565,7 +605,7 @@ int format_counters(struct thread_data *t, struct core_data *c, if (do_rapl & RAPL_GFX) outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float); if (do_rapl & RAPL_DRAM) - outp += sprintf(outp, fmt8, p->energy_dram * rapl_energy_units / interval_float); + outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float); if (do_rapl & RAPL_PKG_PERF_STATUS) outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); if (do_rapl & RAPL_DRAM_PERF_STATUS) @@ -582,7 +622,7 @@ int format_counters(struct thread_data *t, struct core_data *c, p->energy_gfx * rapl_energy_units); if (do_rapl & RAPL_DRAM) outp += sprintf(outp, fmt8, - p->energy_dram * rapl_energy_units); + p->energy_dram * rapl_dram_energy_units); if (do_rapl & RAPL_PKG_PERF_STATUS) outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); if (do_rapl & RAPL_DRAM_PERF_STATUS) @@ -636,6 +676,13 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ void delta_package(struct pkg_data *new, struct pkg_data *old) { + + if (do_skl_residency) { + old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; + old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; + old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; + old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; + } old->pc2 = new->pc2 - old->pc2; if (do_pc3) old->pc3 = new->pc3 - old->pc3; @@ -782,6 +829,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data c->c7 = 0; c->core_temp_c = 0; + p->pkg_wtd_core_c0 = 0; + p->pkg_any_core_c0 = 0; + p->pkg_any_gfxe_c0 = 0; + p->pkg_both_core_gfxe_c0 = 0; + p->pc2 = 0; if (do_pc3) p->pc3 = 0; @@ -826,6 +878,13 @@ int sum_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; + if (do_skl_residency) { + average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; + average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; + average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; + average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; + } + average.packages.pc2 += p->pc2; if (do_pc3) average.packages.pc3 += p->pc3; @@ -873,6 +932,13 @@ void compute_average(struct thread_data *t, struct core_data *c, average.cores.c6 /= topo.num_cores; average.cores.c7 /= topo.num_cores; + if (do_skl_residency) { + average.packages.pkg_wtd_core_c0 /= topo.num_packages; + average.packages.pkg_any_core_c0 /= topo.num_packages; + average.packages.pkg_any_gfxe_c0 /= topo.num_packages; + average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages; + } + average.packages.pc2 /= topo.num_packages; if (do_pc3) average.packages.pc3 /= topo.num_packages; @@ -954,14 +1020,17 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; - if (do_nhm_cstates && !do_slm_cstates) { + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) return -6; } - if (do_nhm_cstates) { + if (do_nhm_cstates && !do_knl_cstates) { if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) return -7; + } else if (do_knl_cstates) { + if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6)) + return -7; } if (do_snb_cstates) @@ -979,6 +1048,16 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; + if (do_skl_residency) { + if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) + return -10; + if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) + return -11; + if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) + return -12; + if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) + return -13; + } if (do_pc3) if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) return -9; @@ -1055,49 +1134,77 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) #define PCL_6R 9 /* PC6 Retention */ #define PCL__7 10 /* PC7 */ #define PCL_7S 11 /* PC7 Shrink */ -#define PCLUNL 12 /* Unlimited */ +#define PCL__8 12 /* PC8 */ +#define PCL__9 13 /* PC9 */ +#define PCLUNL 14 /* Unlimited */ int pkg_cstate_limit = PCLUKN; char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2", - "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "unlimited"}; + "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"}; -int nhm_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL}; -int snb_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL}; -int hsw_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCLRSV, PCLUNL}; -int slv_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7}; -int amt_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7}; -int phi_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL}; +int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; -void print_verbose_header(void) +static void +dump_nhm_platform_info(void) { unsigned long long msr; unsigned int ratio; - if (!do_nhm_platform_info) - return; - - get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); + get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); ratio = (msr >> 40) & 0xFF; - fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", + fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0xFF; - fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", + fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", ratio, bclk, ratio * bclk); - get_msr(0, MSR_IA32_POWER_CTL, &msr); + get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", msr, msr & 0x2 ? "EN" : "DIS"); - if (!do_ivt_turbo_ratio_limit) - goto print_nhm_turbo_ratio_limits; + return; +} + +static void +dump_hsw_turbo_ratio_limits(void) +{ + unsigned long long msr; + unsigned int ratio; + + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); + + fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr); + + ratio = (msr >> 8) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 0) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", + ratio, bclk, ratio * bclk); + return; +} + +static void +dump_ivt_turbo_ratio_limits(void) +{ + unsigned long long msr; + unsigned int ratio; - get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); - fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); + fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr); ratio = (msr >> 56) & 0xFF; if (ratio) @@ -1138,30 +1245,18 @@ void print_verbose_header(void) if (ratio) fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); + return; +} -print_nhm_turbo_ratio_limits: - get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); - -#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) -#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) - - fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); - - fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", - (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", - (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", - (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", - (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", - (msr & (1 << 15)) ? "" : "UN", - (unsigned int)msr & 7, - pkg_cstate_limit_strings[pkg_cstate_limit]); - - if (!do_nhm_turbo_ratio_limit) - return; +static void +dump_nhm_turbo_ratio_limits(void) +{ + unsigned long long msr; + unsigned int ratio; - get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); - fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); + fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); ratio = (msr >> 56) & 0xFF; if (ratio) @@ -1202,7 +1297,91 @@ print_nhm_turbo_ratio_limits: if (ratio) fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", ratio, bclk, ratio * bclk); + return; +} + +static void +dump_knl_turbo_ratio_limits(void) +{ + int cores; + unsigned int ratio; + unsigned long long msr; + int delta_cores; + int delta_ratio; + int i; + + get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); + + fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", + msr); + + /** + * Turbo encoding in KNL is as follows: + * [7:0] -- Base value of number of active cores of bucket 1. + * [15:8] -- Base value of freq ratio of bucket 1. + * [20:16] -- +ve delta of number of active cores of bucket 2. + * i.e. active cores of bucket 2 = + * active cores of bucket 1 + delta + * [23:21] -- Negative delta of freq ratio of bucket 2. + * i.e. freq ratio of bucket 2 = + * freq ratio of bucket 1 - delta + * [28:24]-- +ve delta of number of active cores of bucket 3. + * [31:29]-- -ve delta of freq ratio of bucket 3. + * [36:32]-- +ve delta of number of active cores of bucket 4. + * [39:37]-- -ve delta of freq ratio of bucket 4. + * [44:40]-- +ve delta of number of active cores of bucket 5. + * [47:45]-- -ve delta of freq ratio of bucket 5. + * [52:48]-- +ve delta of number of active cores of bucket 6. + * [55:53]-- -ve delta of freq ratio of bucket 6. + * [60:56]-- +ve delta of number of active cores of bucket 7. + * [63:61]-- -ve delta of freq ratio of bucket 7. + */ + cores = msr & 0xFF; + ratio = (msr >> 8) && 0xFF; + if (ratio > 0) + fprintf(stderr, + "%d * %.0f = %.0f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, cores); + + for (i = 16; i < 64; i = i + 8) { + delta_cores = (msr >> i) & 0x1F; + delta_ratio = (msr >> (i + 5)) && 0x7; + if (!delta_cores || !delta_ratio) + return; + cores = cores + delta_cores; + ratio = ratio - delta_ratio; + + /** -ve ratios will make successive ratio calculations + * negative. Hence return instead of carrying on. + */ + if (ratio > 0) + fprintf(stderr, + "%d * %.0f = %.0f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, cores); + } +} + +static void +dump_nhm_cst_cfg(void) +{ + unsigned long long msr; + + get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + +#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) + + fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); + fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", + (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", + (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", + (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", + (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", + (msr & (1 << 15)) ? "" : "UN", + (unsigned int)msr & 7, + pkg_cstate_limit_strings[pkg_cstate_limit]); + return; } void free_all_buffers(void) @@ -1268,12 +1447,41 @@ int parse_int_file(const char *fmt, ...) } /* - * cpu_is_first_sibling_in_core(cpu) - * return 1 if given CPU is 1st HT sibling in the core + * get_cpu_position_in_core(cpu) + * return the position of the CPU among its HT siblings in the core + * return -1 if the sibling is not in list */ -int cpu_is_first_sibling_in_core(int cpu) +int get_cpu_position_in_core(int cpu) { - return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); + char path[64]; + FILE *filep; + int this_cpu; + char character; + int i; + + sprintf(path, + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", + cpu); + filep = fopen(path, "r"); + if (filep == NULL) { + perror(path); + exit(1); + } + + for (i = 0; i < topo.num_threads_per_core; i++) { + fscanf(filep, "%d", &this_cpu); + if (this_cpu == cpu) { + fclose(filep); + return i; + } + + /* Account for no separator after last thread*/ + if (i != (topo.num_threads_per_core - 1)) + fscanf(filep, "%c", &character); + } + + fclose(filep); + return -1; } /* @@ -1299,25 +1507,31 @@ int get_num_ht_siblings(int cpu) { char path[80]; FILE *filep; - int sib1, sib2; - int matches; + int sib1; + int matches = 0; char character; + char str[100]; + char *ch; sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); filep = fopen_or_die(path, "r"); + /* * file format: - * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) - * otherwinse 1 sibling (self). + * A ',' separated or '-' separated set of numbers + * (eg 1-2 or 1,3,4,5) */ - matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); + fscanf(filep, "%d%c\n", &sib1, &character); + fseek(filep, 0, SEEK_SET); + fgets(str, 100, filep); + ch = strchr(str, character); + while (ch != NULL) { + matches++; + ch = strchr(ch+1, character); + } fclose(filep); - - if (matches == 3) - return 2; - else - return 1; + return matches+1; } /* @@ -1481,9 +1695,12 @@ restart: void check_dev_msr() { struct stat sb; + char pathname[32]; - if (stat("/dev/cpu/0/msr", &sb)) - err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); + if (stat(pathname, &sb)) + if (system("/sbin/modprobe msr > /dev/null 2>&1")) + err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); } void check_permissions() @@ -1494,6 +1711,7 @@ void check_permissions() cap_user_data_t cap_data = &cap_data_data; extern int capget(cap_user_header_t hdrp, cap_user_data_t datap); int do_exit = 0; + char pathname[32]; /* check for CAP_SYS_RAWIO */ cap_header->pid = getpid(); @@ -1508,7 +1726,8 @@ void check_permissions() } /* test file permissions */ - if (euidaccess("/dev/cpu/0/msr", R_OK)) { + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); + if (euidaccess(pathname, R_OK)) { do_exit++; warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr"); } @@ -1573,6 +1792,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case 0x47: /* BDW */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ pkg_cstate_limits = hsw_pkg_cstate_limits; break; case 0x37: /* BYT */ @@ -1588,9 +1809,9 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) default: return 0; } - get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); - pkg_cstate_limit = pkg_cstate_limits[msr & 0x7]; + pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; return 1; } @@ -1615,11 +1836,66 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) switch (model) { case 0x3E: /* IVB Xeon */ + case 0x3F: /* HSW Xeon */ return 1; default: return 0; } } +int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case 0x3F: /* HSW Xeon */ + return 1; + default: + return 0; + } +} + +int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case 0x57: /* Knights Landing */ + return 1; + default: + return 0; + } +} +static void +dump_cstate_pstate_config_info(family, model) +{ + if (!do_nhm_platform_info) + return; + + dump_nhm_platform_info(); + + if (has_hsw_turbo_ratio_limit(family, model)) + dump_hsw_turbo_ratio_limits(); + + if (has_ivt_turbo_ratio_limit(family, model)) + dump_ivt_turbo_ratio_limits(); + + if (has_nhm_turbo_ratio_limit(family, model)) + dump_nhm_turbo_ratio_limits(); + + if (has_knl_turbo_ratio_limit(family, model)) + dump_knl_turbo_ratio_limits(); + + dump_nhm_cst_cfg(); +} + /* * print_epb() @@ -1648,7 +1924,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) return 0; - switch (msr & 0x7) { + switch (msr & 0xF) { case ENERGY_PERF_BIAS_PERFORMANCE: epb_string = "performance"; break; @@ -1690,35 +1966,35 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", - (msr & 1 << 0) ? "PROCHOT, " : "", - (msr & 1 << 1) ? "ThermStatus, " : "", - (msr & 1 << 2) ? "bit2, " : "", - (msr & 1 << 4) ? "Graphics, " : "", - (msr & 1 << 5) ? "Auto-HWP, " : "", - (msr & 1 << 6) ? "VR-Therm, " : "", - (msr & 1 << 8) ? "Amps, " : "", - (msr & 1 << 9) ? "CorePwr, " : "", - (msr & 1 << 10) ? "PkgPwrL1, " : "", - (msr & 1 << 11) ? "PkgPwrL2, " : "", - (msr & 1 << 12) ? "MultiCoreTurbo, " : "", - (msr & 1 << 13) ? "Transitions, " : "", + (msr & 1 << 15) ? "bit15, " : "", (msr & 1 << 14) ? "bit14, " : "", - (msr & 1 << 15) ? "bit15, " : ""); + (msr & 1 << 13) ? "Transitions, " : "", + (msr & 1 << 12) ? "MultiCoreTurbo, " : "", + (msr & 1 << 11) ? "PkgPwrL2, " : "", + (msr & 1 << 10) ? "PkgPwrL1, " : "", + (msr & 1 << 9) ? "CorePwr, " : "", + (msr & 1 << 8) ? "Amps, " : "", + (msr & 1 << 6) ? "VR-Therm, " : "", + (msr & 1 << 5) ? "Auto-HWP, " : "", + (msr & 1 << 4) ? "Graphics, " : "", + (msr & 1 << 2) ? "bit2, " : "", + (msr & 1 << 1) ? "ThermStatus, " : "", + (msr & 1 << 0) ? "PROCHOT, " : ""); fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", - (msr & 1 << 16) ? "PROCHOT, " : "", - (msr & 1 << 17) ? "ThermStatus, " : "", - (msr & 1 << 18) ? "bit18, " : "", - (msr & 1 << 20) ? "Graphics, " : "", - (msr & 1 << 21) ? "Auto-HWP, " : "", - (msr & 1 << 22) ? "VR-Therm, " : "", - (msr & 1 << 24) ? "Amps, " : "", - (msr & 1 << 25) ? "CorePwr, " : "", - (msr & 1 << 26) ? "PkgPwrL1, " : "", - (msr & 1 << 27) ? "PkgPwrL2, " : "", - (msr & 1 << 28) ? "MultiCoreTurbo, " : "", - (msr & 1 << 29) ? "Transitions, " : "", + (msr & 1 << 31) ? "bit31, " : "", (msr & 1 << 30) ? "bit30, " : "", - (msr & 1 << 31) ? "bit31, " : ""); + (msr & 1 << 29) ? "Transitions, " : "", + (msr & 1 << 28) ? "MultiCoreTurbo, " : "", + (msr & 1 << 27) ? "PkgPwrL2, " : "", + (msr & 1 << 26) ? "PkgPwrL1, " : "", + (msr & 1 << 25) ? "CorePwr, " : "", + (msr & 1 << 24) ? "Amps, " : "", + (msr & 1 << 22) ? "VR-Therm, " : "", + (msr & 1 << 21) ? "Auto-HWP, " : "", + (msr & 1 << 20) ? "Graphics, " : "", + (msr & 1 << 18) ? "bit18, " : "", + (msr & 1 << 17) ? "ThermStatus, " : "", + (msr & 1 << 16) ? "PROCHOT, " : ""); } if (do_gfx_perf_limit_reasons) { @@ -1772,7 +2048,7 @@ double get_tdp(model) unsigned long long msr; if (do_rapl & RAPL_PKG_POWER_INFO) - if (!get_msr(0, MSR_PKG_POWER_INFO, &msr)) + if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; switch (model) { @@ -1784,6 +2060,26 @@ double get_tdp(model) } } +/* + * rapl_dram_energy_units_probe() + * Energy units are either hard-coded, or come from RAPL Energy Unit MSR. + */ +static double +rapl_dram_energy_units_probe(int model, double rapl_energy_units) +{ + /* only called for genuine_intel, family 6 */ + + switch (model) { + case 0x3F: /* HSX */ + case 0x4F: /* BDX */ + case 0x56: /* BDX-DE */ + case 0x57: /* KNL */ + return (rapl_dram_energy_units = 15.3 / 1000000); + default: + return (rapl_energy_units); + } +} + /* * rapl_probe() @@ -1812,14 +2108,19 @@ void rapl_probe(unsigned int family, unsigned int model) case 0x47: /* BDW */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; break; + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ + do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; + break; case 0x3F: /* HSX */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ - do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; + case 0x57: /* KNL */ + do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; break; case 0x2D: case 0x3E: - do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; + do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; break; case 0x37: /* BYT */ case 0x4D: /* AVN */ @@ -1830,7 +2131,7 @@ void rapl_probe(unsigned int family, unsigned int model) } /* units on package 0, verify later other packages match */ - if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr)) + if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) return; rapl_power_units = 1.0 / (1 << (msr & 0xF)); @@ -1839,6 +2140,8 @@ void rapl_probe(unsigned int family, unsigned int model) else rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); + rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units); + time_unit = msr >> 16 & 0xF; if (time_unit == 0) time_unit = 0xA; @@ -2009,19 +2312,18 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ((msr >> 48) & 1) ? "EN" : "DIS"); } - if (do_rapl & RAPL_DRAM) { + if (do_rapl & RAPL_DRAM_POWER_INFO) { if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) return -6; - fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", cpu, msr, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); - - + } + if (do_rapl & RAPL_DRAM) { if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) return -9; fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", @@ -2090,6 +2392,8 @@ int has_snb_msrs(unsigned int family, unsigned int model) case 0x47: /* BDW */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ return 1; } return 0; @@ -2110,12 +2414,36 @@ int has_hsw_msrs(unsigned int family, unsigned int model) switch (model) { case 0x45: /* HSW */ case 0x3D: /* BDW */ + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ + return 1; + } + return 0; +} + +/* + * SKL adds support for additional MSRS: + * + * MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 + * MSR_PKG_ANY_CORE_C0_RES 0x00000659 + * MSR_PKG_ANY_GFXE_C0_RES 0x0000065A + * MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B + */ +int has_skl_msrs(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + switch (model) { + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ return 1; } return 0; } + int is_slm(unsigned int family, unsigned int model) { if (!genuine_intel) @@ -2128,6 +2456,17 @@ int is_slm(unsigned int family, unsigned int model) return 0; } +int is_knl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + switch (model) { + case 0x57: /* KNL */ + return 1; + } + return 0; +} + #define SLM_BCLK_FREQS 5 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; @@ -2137,7 +2476,7 @@ double slm_bclk(void) unsigned int i; double freq; - if (get_msr(0, MSR_FSB_FREQ, &msr)) + if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) fprintf(stderr, "SLM BCLK: unknown\n"); i = msr & 0xf; @@ -2205,7 +2544,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk if (!do_nhm_platform_info) goto guess; - if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) + if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) goto guess; target_c_local = (msr >> 16) & 0xFF; @@ -2228,7 +2567,7 @@ guess: return 0; } -void check_cpuid() +void process_cpuid() { unsigned int eax, ebx, ecx, edx, max_level; unsigned int fms, family, model, stepping; @@ -2294,6 +2633,41 @@ void check_cpuid() do_ptm ? "" : "No ", has_epb ? "" : "No "); + if (max_level > 0x15) { + unsigned int eax_crystal; + unsigned int ebx_tsc; + + /* + * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz + */ + eax_crystal = ebx_tsc = crystal_hz = edx = 0; + __get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx); + + if (ebx_tsc != 0) { + + if (debug && (ebx != 0)) + fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", + eax_crystal, ebx_tsc, crystal_hz); + + if (crystal_hz == 0) + switch(model) { + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ + crystal_hz = 24000000; /* 24 MHz */ + break; + default: + crystal_hz = 0; + } + + if (crystal_hz) { + tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; + if (debug) + fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", + tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); + } + } + } + do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model); do_snb_cstates = has_snb_msrs(family, model); do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2); @@ -2301,18 +2675,20 @@ void check_cpuid() do_pc6 = (pkg_cstate_limit >= PCL__6); do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7); do_c8_c9_c10 = has_hsw_msrs(family, model); + do_skl_residency = has_skl_msrs(family, model); do_slm_cstates = is_slm(family, model); + do_knl_cstates = is_knl(family, model); bclk = discover_bclk(family, model); - do_nhm_turbo_ratio_limit = do_nhm_platform_info && has_nhm_turbo_ratio_limit(family, model); - do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); rapl_probe(family, model); perf_limit_reasons_probe(family, model); + if (debug) + dump_cstate_pstate_config_info(); + return; } - void help() { fprintf(stderr, @@ -2428,14 +2804,14 @@ void topology_probe() if (debug > 1) fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.num_cores_per_pkg); - if (!summary_only && topo.num_cores_per_pkg > 1) + if (debug && !summary_only && topo.num_cores_per_pkg > 1) show_core = 1; topo.num_packages = max_package_id + 1; if (debug > 1) fprintf(stderr, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); - if (!summary_only && topo.num_packages > 1) + if (debug && !summary_only && topo.num_packages > 1) show_pkg = 1; topo.num_threads_per_core = max_siblings; @@ -2516,13 +2892,9 @@ int initialize_counters(int cpu_id) my_package_id = get_physical_package_id(cpu_id); my_core_id = get_core_id(cpu_id); - - if (cpu_is_first_sibling_in_core(cpu_id)) { - my_thread_id = 0; + my_thread_id = get_cpu_position_in_core(cpu_id); + if (!my_thread_id) topo.num_cores++; - } else { - my_thread_id = 1; - } init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); @@ -2546,16 +2918,24 @@ void setup_all_buffers(void) for_all_proc_cpus(initialize_counters); } +void set_base_cpu(void) +{ + base_cpu = sched_getcpu(); + if (base_cpu < 0) + err(-ENODEV, "No valid cpus found"); + + if (debug > 1) + fprintf(stderr, "base_cpu = %d\n", base_cpu); +} + void turbostat_init() { + setup_all_buffers(); + set_base_cpu(); check_dev_msr(); check_permissions(); - check_cpuid(); - - setup_all_buffers(); + process_cpuid(); - if (debug) - print_verbose_header(); if (debug) for_all_cpus(print_epb, ODD_COUNTERS); @@ -2634,7 +3014,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(stderr, "turbostat version 4.1 10-Feb, 2015" + fprintf(stderr, "turbostat version 4.7 27-May, 2015" " - Len Brown <lenb@kernel.org>\n"); } |