summaryrefslogtreecommitdiffstats
path: root/tools/power/x86
diff options
context:
space:
mode:
Diffstat (limited to 'tools/power/x86')
-rwxr-xr-xtools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py586
-rw-r--r--tools/power/x86/turbostat/turbostat.8255
-rw-r--r--tools/power/x86/turbostat/turbostat.c2000
-rw-r--r--tools/power/x86/x86_energy_perf_policy/Makefile27
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8241
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c1504
6 files changed, 3729 insertions, 884 deletions
diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
new file mode 100755
index 000000000000..0b24dd9d01ff
--- /dev/null
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -0,0 +1,586 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+""" This utility can be used to debug and tune the performance of the
+intel_pstate driver. This utility can be used in two ways:
+- If there is Linux trace file with pstate_sample events enabled, then
+this utility can parse the trace file and generate performance plots.
+- If user has not specified a trace file as input via command line parameters,
+then this utility enables and collects trace data for a user specified interval
+and generates performance plots.
+
+Prerequisites:
+ Python version 2.7.x
+ gnuplot 5.0 or higher
+ gnuplot-py 1.8
+ (Most of the distributions have these required packages. They may be called
+ gnuplot-py, phython-gnuplot. )
+
+ HWP (Hardware P-States are disabled)
+ Kernel config for Linux trace is enabled
+
+ see print_help(): for Usage and Output details
+
+"""
+from __future__ import print_function
+from datetime import datetime
+import subprocess
+import os
+import time
+import re
+import sys
+import getopt
+import Gnuplot
+from numpy import *
+from decimal import *
+
+__author__ = "Srinivas Pandruvada"
+__copyright__ = " Copyright (c) 2017, Intel Corporation. "
+__license__ = "GPL version 2"
+
+
+MAX_CPUS = 256
+
+# Define the csv file columns
+C_COMM = 18
+C_GHZ = 17
+C_ELAPSED = 16
+C_SAMPLE = 15
+C_DURATION = 14
+C_LOAD = 13
+C_BOOST = 12
+C_FREQ = 11
+C_TSC = 10
+C_APERF = 9
+C_MPERF = 8
+C_TO = 7
+C_FROM = 6
+C_SCALED = 5
+C_CORE = 4
+C_USEC = 3
+C_SEC = 2
+C_CPU = 1
+
+global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname
+
+# 11 digits covers uptime to 115 days
+getcontext().prec = 11
+
+sample_num =0
+last_sec_cpu = [0] * MAX_CPUS
+last_usec_cpu = [0] * MAX_CPUS
+
+def print_help():
+ print('intel_pstate_tracer.py:')
+ print(' Usage:')
+ print(' If the trace file is available, then to simply parse and plot, use (sudo not required):')
+ print(' ./intel_pstate_tracer.py [-c cpus] -t <trace_file> -n <test_name>')
+ print(' Or')
+ print(' ./intel_pstate_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>')
+ print(' To generate trace file, parse and plot, use (sudo required):')
+ print(' sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name>')
+ print(' Or')
+ print(' sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name>')
+ print(' Optional argument:')
+ print(' cpus: comma separated list of CPUs')
+ print(' Output:')
+ print(' If not already present, creates a "results/test_name" folder in the current working directory with:')
+ print(' cpu.csv - comma seperated values file with trace contents and some additional calculations.')
+ print(' cpu???.csv - comma seperated values file for CPU number ???.')
+ print(' *.png - a variety of PNG format plot files created from the trace contents and the additional calculations.')
+ print(' Notes:')
+ print(' Avoid the use of _ (underscore) in test names, because in gnuplot it is a subscript directive.')
+ print(' Maximum number of CPUs is {0:d}. If there are more the script will abort with an error.'.format(MAX_CPUS))
+ print(' Off-line CPUs cause the script to list some warnings, and create some empty files. Use the CPU mask feature for a clean run.')
+ print(' Empty y range warnings for autoscaled plots can occur and can be ignored.')
+
+def plot_perf_busy_with_sample(cpu_index):
+ """ Plot method to per cpu information """
+
+ file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+ if os.path.exists(file_name):
+ output_png = "cpu%03d_perf_busy_vs_samples.png" % cpu_index
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:40]')
+ g_plot('set y2range [0:200]')
+ g_plot('set y2tics 0, 10')
+ g_plot('set title "{} : cpu perf busy vs. sample : CPU {:0>3} : {:%F %H:%M}"'.format(testname, cpu_index, datetime.now()))
+# Override common
+ g_plot('set xlabel "Samples"')
+ g_plot('set ylabel "P-State"')
+ g_plot('set y2label "Scaled Busy/performance/io-busy(%)"')
+ set_4_plot_linestyles(g_plot)
+ g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y2 title "performance",\\'.format(C_SAMPLE, C_CORE))
+ g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 2 axis x1y2 title "scaled-busy",\\'.format(C_SAMPLE, C_SCALED))
+ g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 3 axis x1y2 title "io-boost",\\'.format(C_SAMPLE, C_BOOST))
+ g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 4 axis x1y1 title "P-State"'.format(C_SAMPLE, C_TO))
+
+def plot_perf_busy(cpu_index):
+ """ Plot some per cpu information """
+
+ file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+ if os.path.exists(file_name):
+ output_png = "cpu%03d_perf_busy.png" % cpu_index
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:40]')
+ g_plot('set y2range [0:200]')
+ g_plot('set y2tics 0, 10')
+ g_plot('set title "{} : perf busy : CPU {:0>3} : {:%F %H:%M}"'.format(testname, cpu_index, datetime.now()))
+ g_plot('set ylabel "P-State"')
+ g_plot('set y2label "Scaled Busy/performance/io-busy(%)"')
+ set_4_plot_linestyles(g_plot)
+ g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y2 title "performance",\\'.format(C_ELAPSED, C_CORE))
+ g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 2 axis x1y2 title "scaled-busy",\\'.format(C_ELAPSED, C_SCALED))
+ g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 3 axis x1y2 title "io-boost",\\'.format(C_ELAPSED, C_BOOST))
+ g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 4 axis x1y1 title "P-State"'.format(C_ELAPSED, C_TO))
+
+def plot_durations(cpu_index):
+ """ Plot per cpu durations """
+
+ file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+ if os.path.exists(file_name):
+ output_png = "cpu%03d_durations.png" % cpu_index
+ g_plot = common_all_gnuplot_settings(output_png)
+# Should autoscale be used here? Should seconds be used here?
+ g_plot('set yrange [0:5000]')
+ g_plot('set ytics 0, 500')
+ g_plot('set title "{} : durations : CPU {:0>3} : {:%F %H:%M}"'.format(testname, cpu_index, datetime.now()))
+ g_plot('set ylabel "Timer Duration (MilliSeconds)"')
+# override common
+ g_plot('set key off')
+ set_4_plot_linestyles(g_plot)
+ g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_DURATION))
+
+def plot_loads(cpu_index):
+ """ Plot per cpu loads """
+
+ file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+ if os.path.exists(file_name):
+ output_png = "cpu%03d_loads.png" % cpu_index
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:100]')
+ g_plot('set ytics 0, 10')
+ g_plot('set title "{} : loads : CPU {:0>3} : {:%F %H:%M}"'.format(testname, cpu_index, datetime.now()))
+ g_plot('set ylabel "CPU load (percent)"')
+# override common
+ g_plot('set key off')
+ set_4_plot_linestyles(g_plot)
+ g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_LOAD))
+
+def plot_pstate_cpu_with_sample():
+ """ Plot all cpu information """
+
+ if os.path.exists('cpu.csv'):
+ output_png = 'all_cpu_pstates_vs_samples.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:40]')
+# override common
+ g_plot('set xlabel "Samples"')
+ g_plot('set ylabel "P-State"')
+ g_plot('set title "{} : cpu pstate vs. sample : {:%F %H:%M}"'.format(testname, datetime.now()))
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_SAMPLE, C_TO)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_pstate_cpu():
+ """ Plot all cpu information from csv files """
+
+ output_png = 'all_cpu_pstates.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:40]')
+ g_plot('set ylabel "P-State"')
+ g_plot('set title "{} : cpu pstates : {:%F %H:%M}"'.format(testname, datetime.now()))
+
+# the following command is really cool, but doesn't work with the CPU masking option because it aborts on the first missing file.
+# plot_str = 'plot for [i=0:*] file=sprintf("cpu%03d.csv",i) title_s=sprintf("cpu%03d",i) file using 16:7 pt 7 ps 1 title title_s'
+#
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_TO)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_load_cpu():
+ """ Plot all cpu loads """
+
+ output_png = 'all_cpu_loads.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:100]')
+ g_plot('set ylabel "CPU load (percent)"')
+ g_plot('set title "{} : cpu loads : {:%F %H:%M}"'.format(testname, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_LOAD)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_frequency_cpu():
+ """ Plot all cpu frequencies """
+
+ output_png = 'all_cpu_frequencies.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:4]')
+ g_plot('set ylabel "CPU Frequency (GHz)"')
+ g_plot('set title "{} : cpu frequencies : {:%F %H:%M}"'.format(testname, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_FREQ)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_duration_cpu():
+ """ Plot all cpu durations """
+
+ output_png = 'all_cpu_durations.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:5000]')
+ g_plot('set ytics 0, 500')
+ g_plot('set ylabel "Timer Duration (MilliSeconds)"')
+ g_plot('set title "{} : cpu durations : {:%F %H:%M}"'.format(testname, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_DURATION)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_scaled_cpu():
+ """ Plot all cpu scaled busy """
+
+ output_png = 'all_cpu_scaled.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+# autoscale this one, no set y range
+ g_plot('set ylabel "Scaled Busy (Unitless)"')
+ g_plot('set title "{} : cpu scaled busy : {:%F %H:%M}"'.format(testname, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_SCALED)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_boost_cpu():
+ """ Plot all cpu IO Boosts """
+
+ output_png = 'all_cpu_boost.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:100]')
+ g_plot('set ylabel "CPU IO Boost (percent)"')
+ g_plot('set title "{} : cpu io boost : {:%F %H:%M}"'.format(testname, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_BOOST)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_ghz_cpu():
+ """ Plot all cpu tsc ghz """
+
+ output_png = 'all_cpu_ghz.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+# autoscale this one, no set y range
+ g_plot('set ylabel "TSC Frequency (GHz)"')
+ g_plot('set title "{} : cpu TSC Frequencies (Sanity check calculation) : {:%F %H:%M}"'.format(testname, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_GHZ)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def common_all_gnuplot_settings(output_png):
+ """ common gnuplot settings for multiple CPUs one one graph. """
+
+ g_plot = common_gnuplot_settings()
+ g_plot('set output "' + output_png + '"')
+ return(g_plot)
+
+def common_gnuplot_settings():
+ """ common gnuplot settings. """
+
+ g_plot = Gnuplot.Gnuplot(persist=1)
+# The following line is for rigor only. It seems to be assumed for .csv files
+ g_plot('set datafile separator \",\"')
+ g_plot('set ytics nomirror')
+ g_plot('set xtics nomirror')
+ g_plot('set xtics font ", 10"')
+ g_plot('set ytics font ", 10"')
+ g_plot('set tics out scale 1.0')
+ g_plot('set grid')
+ g_plot('set key out horiz')
+ g_plot('set key bot center')
+ g_plot('set key samplen 2 spacing .8 font ", 9"')
+ g_plot('set term png size 1200, 600')
+ g_plot('set title font ", 11"')
+ g_plot('set ylabel font ", 10"')
+ g_plot('set xlabel font ", 10"')
+ g_plot('set xlabel offset 0, 0.5')
+ g_plot('set xlabel "Elapsed Time (Seconds)"')
+ return(g_plot)
+
+def set_4_plot_linestyles(g_plot):
+ """ set the linestyles used for 4 plots in 1 graphs. """
+
+ g_plot('set style line 1 linetype 1 linecolor rgb "green" pointtype -1')
+ g_plot('set style line 2 linetype 1 linecolor rgb "red" pointtype -1')
+ g_plot('set style line 3 linetype 1 linecolor rgb "purple" pointtype -1')
+ g_plot('set style line 4 linetype 1 linecolor rgb "blue" pointtype -1')
+
+def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz):
+ """ Store master csv file information """
+
+ global graph_data_present
+
+ if cpu_mask[cpu_int] == 0:
+ return
+
+ try:
+ f_handle = open('cpu.csv', 'a')
+ string_buffer = "CPU_%03u, %05u, %06u, %u, %u, %u, %u, %u, %u, %u, %.4f, %u, %.2f, %.3f, %u, %.3f, %.3f, %s\n" % (cpu_int, int(time_pre_dec), int(time_post_dec), int(core_busy), int(scaled), int(_from), int(_to), int(mperf), int(aperf), int(tsc), freq_ghz, int(io_boost), load, duration_ms, sample_num, elapsed_time, tsc_ghz, common_comm)
+ f_handle.write(string_buffer);
+ f_handle.close()
+ except:
+ print('IO error cpu.csv')
+ return
+
+ graph_data_present = True;
+
+def split_csv():
+ """ seperate the all csv file into per CPU csv files. """
+
+ global current_max_cpu
+
+ if os.path.exists('cpu.csv'):
+ for index in range(0, current_max_cpu + 1):
+ if cpu_mask[int(index)] != 0:
+ os.system('grep -m 1 common_cpu cpu.csv > cpu{:0>3}.csv'.format(index))
+ os.system('grep CPU_{:0>3} cpu.csv >> cpu{:0>3}.csv'.format(index, index))
+
+def fix_ownership(path):
+ """Change the owner of the file to SUDO_UID, if required"""
+
+ uid = os.environ.get('SUDO_UID')
+ gid = os.environ.get('SUDO_GID')
+ if uid is not None:
+ os.chown(path, int(uid), int(gid))
+
+def cleanup_data_files():
+ """ clean up existing data files """
+
+ if os.path.exists('cpu.csv'):
+ os.remove('cpu.csv')
+ f_handle = open('cpu.csv', 'a')
+ f_handle.write('common_cpu, common_secs, common_usecs, core_busy, scaled_busy, from, to, mperf, aperf, tsc, freq, boost, load, duration_ms, sample_num, elapsed_time, tsc_ghz, common_comm')
+ f_handle.write('\n')
+ f_handle.close()
+
+def clear_trace_file():
+ """ Clear trace file """
+
+ try:
+ f_handle = open('/sys/kernel/debug/tracing/trace', 'w')
+ f_handle.close()
+ except:
+ print('IO error clearing trace file ')
+ quit()
+
+def enable_trace():
+ """ Enable trace """
+
+ try:
+ open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable'
+ , 'w').write("1")
+ except:
+ print('IO error enabling trace ')
+ quit()
+
+def disable_trace():
+ """ Disable trace """
+
+ try:
+ open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable'
+ , 'w').write("0")
+ except:
+ print('IO error disabling trace ')
+ quit()
+
+def set_trace_buffer_size():
+ """ Set trace buffer size """
+
+ try:
+ open('/sys/kernel/debug/tracing/buffer_size_kb'
+ , 'w').write("10240")
+ except:
+ print('IO error setting trace buffer size ')
+ quit()
+
+def read_trace_data(filename):
+ """ Read and parse trace data """
+
+ global current_max_cpu
+ global sample_num, last_sec_cpu, last_usec_cpu, start_time
+
+ try:
+ data = open(filename, 'r').read()
+ except:
+ print('Error opening ', filename)
+ quit()
+
+ for line in data.splitlines():
+ search_obj = \
+ re.search(r'(^(.*?)\[)((\d+)[^\]])(.*?)(\d+)([.])(\d+)(.*?core_busy=)(\d+)(.*?scaled=)(\d+)(.*?from=)(\d+)(.*?to=)(\d+)(.*?mperf=)(\d+)(.*?aperf=)(\d+)(.*?tsc=)(\d+)(.*?freq=)(\d+)'
+ , line)
+
+ if search_obj:
+ cpu = search_obj.group(3)
+ cpu_int = int(cpu)
+ cpu = str(cpu_int)
+
+ time_pre_dec = search_obj.group(6)
+ time_post_dec = search_obj.group(8)
+ core_busy = search_obj.group(10)
+ scaled = search_obj.group(12)
+ _from = search_obj.group(14)
+ _to = search_obj.group(16)
+ mperf = search_obj.group(18)
+ aperf = search_obj.group(20)
+ tsc = search_obj.group(22)
+ freq = search_obj.group(24)
+ common_comm = search_obj.group(2).replace(' ', '')
+
+ # Not all kernel versions have io_boost field
+ io_boost = '0'
+ search_obj = re.search(r'.*?io_boost=(\d+)', line)
+ if search_obj:
+ io_boost = search_obj.group(1)
+
+ if sample_num == 0 :
+ start_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000)
+ sample_num += 1
+
+ if last_sec_cpu[cpu_int] == 0 :
+ last_sec_cpu[cpu_int] = time_pre_dec
+ last_usec_cpu[cpu_int] = time_post_dec
+ else :
+ duration_us = (int(time_pre_dec) - int(last_sec_cpu[cpu_int])) * 1000000 + (int(time_post_dec) - int(last_usec_cpu[cpu_int]))
+ duration_ms = Decimal(duration_us) / Decimal(1000)
+ last_sec_cpu[cpu_int] = time_pre_dec
+ last_usec_cpu[cpu_int] = time_post_dec
+ elapsed_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000) - start_time
+ load = Decimal(int(mperf)*100)/ Decimal(tsc)
+ freq_ghz = Decimal(freq)/Decimal(1000000)
+# Sanity check calculation, typically anomalies indicate missed samples
+# However, check for 0 (should never occur)
+ tsc_ghz = Decimal(0)
+ if duration_ms != Decimal(0) :
+ tsc_ghz = Decimal(tsc)/duration_ms/Decimal(1000000)
+ store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz)
+
+ if cpu_int > current_max_cpu:
+ current_max_cpu = cpu_int
+# End of for each trace line loop
+# Now seperate the main overall csv file into per CPU csv files.
+ split_csv()
+
+interval = ""
+filename = ""
+cpu_list = ""
+testname = ""
+graph_data_present = False;
+
+valid1 = False
+valid2 = False
+
+cpu_mask = zeros((MAX_CPUS,), dtype=int)
+
+try:
+ opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:",["help","trace_file=","interval=","cpu=","name="])
+except getopt.GetoptError:
+ print_help()
+ sys.exit(2)
+for opt, arg in opts:
+ if opt == '-h':
+ print()
+ sys.exit()
+ elif opt in ("-t", "--trace_file"):
+ valid1 = True
+ location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+ filename = os.path.join(location, arg)
+ elif opt in ("-i", "--interval"):
+ valid1 = True
+ interval = arg
+ elif opt in ("-c", "--cpu"):
+ cpu_list = arg
+ elif opt in ("-n", "--name"):
+ valid2 = True
+ testname = arg
+
+if not (valid1 and valid2):
+ print_help()
+ sys.exit()
+
+if cpu_list:
+ for p in re.split("[,]", cpu_list):
+ if int(p) < MAX_CPUS :
+ cpu_mask[int(p)] = 1
+else:
+ for i in range (0, MAX_CPUS):
+ cpu_mask[i] = 1
+
+if not os.path.exists('results'):
+ os.mkdir('results')
+ # The regular user needs to own the directory, not root.
+ fix_ownership('results')
+
+os.chdir('results')
+if os.path.exists(testname):
+ print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
+ sys.exit()
+os.mkdir(testname)
+# The regular user needs to own the directory, not root.
+fix_ownership(testname)
+os.chdir(testname)
+
+# Temporary (or perhaps not)
+cur_version = sys.version_info
+print('python version (should be >= 2.7):')
+print(cur_version)
+
+# Left as "cleanup" for potential future re-run ability.
+cleanup_data_files()
+
+if interval:
+ filename = "/sys/kernel/debug/tracing/trace"
+ clear_trace_file()
+ set_trace_buffer_size()
+ enable_trace()
+ print('Sleeping for ', interval, 'seconds')
+ time.sleep(int(interval))
+ disable_trace()
+
+current_max_cpu = 0
+
+read_trace_data(filename)
+
+if graph_data_present == False:
+ print('No valid data to plot')
+ sys.exit(2)
+
+for cpu_no in range(0, current_max_cpu + 1):
+ plot_perf_busy_with_sample(cpu_no)
+ plot_perf_busy(cpu_no)
+ plot_durations(cpu_no)
+ plot_loads(cpu_no)
+
+plot_pstate_cpu_with_sample()
+plot_pstate_cpu()
+plot_load_cpu()
+plot_frequency_cpu()
+plot_duration_cpu()
+plot_scaled_cpu()
+plot_boost_cpu()
+plot_ghz_cpu()
+
+# It is preferrable, but not necessary, that the regular user owns the files, not root.
+for root, dirs, files in os.walk('.'):
+ for f in files:
+ fix_ownership(f)
+
+os.chdir('../../')
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 03cb639b292e..ccf2a69365cc 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -16,9 +16,9 @@ idle power-state statistics, temperature and power on X86 processors.
There are two ways to invoke turbostat.
The first method is to supply a
\fBcommand\fP, which is forked and statistics are printed
-upon its completion.
+in one-shot upon its completion.
The second method is to omit the command,
-and turbostat displays statistics every 5 seconds.
+and turbostat displays statistics every 5 seconds interval.
The 5-second interval can be changed using the --interval option.
.PP
Some information is not available on older processors.
@@ -28,9 +28,10 @@ name as necessary to disambiguate it from others is necessary. Note that option
.PP
\fB--add attributes\fP add column with counter having specified 'attributes'. The 'location' attribute is required, all others are optional.
.nf
- location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP}
+ location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP | \fB/sys/path...\fP}
msrDDD is a decimal offset, eg. msr16
msr0xXXX is a hex offset, eg. msr0x10
+ /sys/path... is an absolute path to a sysfs attribute
scope: {\fBcpu\fP | \fBcore\fP | \fBpackage\fP}
sample and print the counter for every cpu, core, or package.
@@ -45,12 +46,21 @@ name as necessary to disambiguate it from others is necessary. Note that option
'delta' shows the difference in values during the measurement interval.
'percent' shows the delta as a percentage of the cycles elapsed.
default: delta
+
+ name: "name_string"
+ Any string that does not match a key-word above is used
+ as the column header.
.fi
.PP
+\fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44
+.PP
+\fB--hide column\fP do not show the specified columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group.
+.PP
+\fB--show column\fP show only the specified columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group.
+.PP
\fB--Dump\fP displays the raw counter values.
.PP
-\fB--debug\fP displays additional system configuration information. Invoking this parameter
-more than once may also enable internal turbostat debug information.
+\fB--quiet\fP Do not decode and print the system configuration header information.
.PP
\fB--interval seconds\fP overrides the default 5.0 second measurement interval.
.PP
@@ -61,9 +71,7 @@ The file is truncated if it already exists, and it is created if it does not exi
.PP
\fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts.
.PP
-\fB--Package\fP limits output to the system summary plus the 1st thread in each Package.
-.PP
-\fB--processor\fP limits output to the system summary plus the 1st thread in each processor of each package. Ie. it skips hyper-threaded siblings.
+\fB--list\fP display column header names available for use by --show and --hide, then exit.
.PP
\fB--Summary\fP limits output to a 1-line System Summary for each interval.
.PP
@@ -74,24 +82,27 @@ The file is truncated if it already exists, and it is created if it does not exi
The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit,
displays the statistics gathered since it was forked.
.PP
-.SH DEFAULT FIELD DESCRIPTIONS
+.SH ROW DESCRIPTIONS
+The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system.
+.SH COLUMN DESCRIPTIONS
.nf
+\fBCore\fP processor core number. Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
-\fBAVG_MHz\fP number of cycles executed divided by time elapsed.
-\fBBusy%\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state.
-\fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state).
+\fBPackage\fP processor package number -- not present on systems with a single processor package.
+\fBAvg_MHz\fP number of cycles executed divided by time elapsed. Note that this includes idle-time when 0 instructions are executed.
+\fBBusy%\fP percent of the measurement interval that the CPU executes instructions, aka. % of time in "C0" state.
+\fBBzy_MHz\fP average clock rate while the CPU was not idle (ie. in "c0" state).
\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
-.fi
-.PP
-.SH DEBUG FIELD DESCRIPTIONS
-.nf
-\fBPackage\fP processor package number.
-\fBCore\fP processor core number.
-Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
-\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states.
+\fBIRQ\fP The number of interrupts serviced by that CPU during the measurement interval. The system total line is the sum of interrupts serviced across all CPUs. turbostat parses /proc/interrupts to generate this summary.
+\fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs.
+\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system.
+\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved.
+\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters.
\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
\fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
-\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states.
+\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
+\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
+\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters.
\fBPkgWatt\fP Watts consumed by the whole package.
\fBCorWatt\fP Watts consumed by the core part of the package.
\fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors.
@@ -99,51 +110,110 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T
\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package.
\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
.fi
+.SH TOO MUCH INFORMATION EXAMPLE
+By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters.
+This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug.
.PP
-.SH PERIODIC EXAMPLE
-Without any parameters, turbostat displays statistics ever 5 seconds.
-Periodic output goes to stdout, by default, unless --out is used to specify an output file.
-The 5-second interval can be changed with th "-i sec" option.
-Or a command may be specified as in "FORK EXAMPLE" below.
+When you are not interested in all that information, and there are several ways to see only what you want. First the "--quiet" option will skip the configuration information, and turbostat will show only the counter columns. Second, you can reduce the columns with the "--hide" and "--show" options. If you use the "--show" option, then turbostat will show only the columns you list. If you use the "--hide" option, turbostat will show all columns, except the ones you list.
+.PP
+To find out what columns are available for --show and --hide, the "--list" option is available. For convenience, the special strings "sysfs" can be used to refer to all of the sysfs C-state counters at once:
+.nf
+sudo ./turbostat --show sysfs --quiet sleep 10
+10.003837 sec
+ C1 C1E C3 C6 C7s C1% C1E% C3% C6% C7s%
+ 4 21 2 2 459 0.14 0.82 0.00 0.00 98.93
+ 1 17 2 2 130 0.00 0.02 0.00 0.00 99.80
+ 0 0 0 0 31 0.00 0.00 0.00 0.00 99.95
+ 2 1 0 0 52 1.14 6.49 0.00 0.00 92.21
+ 1 2 0 0 52 0.00 0.08 0.00 0.00 99.86
+ 0 0 0 0 71 0.00 0.00 0.00 0.00 99.89
+ 0 0 0 0 25 0.00 0.00 0.00 0.00 99.96
+ 0 0 0 0 74 0.00 0.00 0.00 0.00 99.94
+ 0 1 0 0 24 0.00 0.00 0.00 0.00 99.84
+.fi
+.PP
+.SH ONE SHOT COMMAND EXAMPLE
+If turbostat is invoked with a command, it will fork that command
+and output the statistics gathered after the command exits.
+In this case, turbostat output goes to stderr, by default.
+Output can instead be saved to a file using the --out option.
+In this example, the "sleep 10" command is forked, and turbostat waits for it to complete before saving all statistics into "ts.out". Note that "sleep 10" is not part of turbostat, but is simply an example of a command that turbostat can fork. The "ts.out" file is what you want to edit in a very wide window, paste into a spreadsheet, or attach to a bugzilla entry.
+
.nf
-[root@hsw]# ./turbostat
- CPU Avg_MHz Busy% Bzy_MHz TSC_MHz
- - 488 12.51 3898 3498
- 0 0 0.01 3885 3498
- 4 3897 99.99 3898 3498
- 1 0 0.00 3861 3498
- 5 0 0.00 3882 3498
- 2 1 0.02 3894 3498
- 6 2 0.06 3898 3498
- 3 0 0.00 3849 3498
- 7 0 0.00 3877 3498
+[root@hsw]# ./turbostat -o ts.out sleep 10
+[root@hsw]#
+.fi
+.SH PERIODIC INTERVAL EXAMPLE
+Without a command to fork, turbostat displays statistics ever 5 seconds.
+Periodic output goes to stdout, by default, unless --out is used to specify an output file.
+The 5-second interval can be changed with the "-i sec" option.
+.nf
+sudo ./turbostat --quiet --hide sysfs,IRQ,SMI,CoreTmp,PkgTmp,GFX%rc6,GFXMHz,PkgWatt,CorWatt,GFXWatt
+ Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c1 CPU%c3 CPU%c6 CPU%c7
+ - - 488 12.52 3900 3498 12.50 0.00 0.00 74.98
+ 0 0 5 0.13 3900 3498 99.87 0.00 0.00 0.00
+ 0 4 3897 99.99 3900 3498 0.01
+ 1 1 0 0.00 3856 3498 0.01 0.00 0.00 99.98
+ 1 5 0 0.00 3861 3498 0.01
+ 2 2 1 0.02 3889 3498 0.03 0.00 0.00 99.95
+ 2 6 0 0.00 3863 3498 0.05
+ 3 3 0 0.01 3869 3498 0.02 0.00 0.00 99.97
+ 3 7 0 0.00 3878 3498 0.03
+ Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c1 CPU%c3 CPU%c6 CPU%c7
+ - - 491 12.59 3900 3498 12.42 0.00 0.00 74.99
+ 0 0 27 0.69 3900 3498 99.31 0.00 0.00 0.00
+ 0 4 3898 99.99 3900 3498 0.01
+ 1 1 0 0.00 3883 3498 0.01 0.00 0.00 99.99
+ 1 5 0 0.00 3898 3498 0.01
+ 2 2 0 0.01 3889 3498 0.02 0.00 0.00 99.98
+ 2 6 0 0.00 3889 3498 0.02
+ 3 3 0 0.00 3856 3498 0.01 0.00 0.00 99.99
+ 3 7 0 0.00 3897 3498 0.01
.fi
-.SH DEBUG EXAMPLE
-The "--debug" option prints additional system information before measurements:
+This example also shows the use of the --hide option to skip columns that are not wanted.
+Note that cpu4 in this example is 99.99% busy, while the other CPUs are all under 1% busy.
+Notice that cpu4's HT sibling is cpu0, which is under 1% busy, but can get into CPU%c1 only,
+because its cpu4's activity on shared hardware keeps it from entering a deeper C-state.
-The first row of statistics is a summary for the entire system.
-For residency % columns, the summary is a weighted average.
-For Temperature columns, the summary is the column maximum.
-For Watts columns, the summary is a system total.
-Subsequent rows show per-CPU statistics.
+.SH SYSTEM CONFIGURATION INFORMATION EXAMPLE
+
+By default, turbostat always dumps system configuration information
+before taking measurements. In the example above, "--quiet" is used
+to suppress that output. Here is an example of the configuration information:
.nf
-turbostat version 4.1 10-Feb, 2015 - Len Brown <lenb@kernel.org>
+turbostat version 2017.02.15 - Len Brown <lenb@kernel.org>
CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3)
-CPUID(6): APERF, DTS, PTM, EPB
+CPUID(1): SSE3 MONITOR - EIST TM2 TSC MSR ACPI-TM TM
+CPUID(6): APERF, TURBO, DTS, PTM, No-HWP, No-HWPnotify, No-HWPwindow, No-HWPepp, No-HWPpkg, EPB
+cpu4: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST No-MWAIT PREFETCH TURBO)
+CPUID(7): No-SGX
+cpu4: MSR_MISC_PWR_MGMT: 0x00400000 (ENable-EIST_Coordination DISable-EPB DISable-OOB)
RAPL: 3121 sec. Joule Counter Range, at 84 Watts
-cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300
-8 * 100 = 800 MHz max efficiency
-35 * 100 = 3500 MHz TSC frequency
-cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
-cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
-cpu0: MSR_TURBO_RATIO_LIMIT: 0x25262727
-37 * 100 = 3700 MHz max turbo 4 active cores
-38 * 100 = 3800 MHz max turbo 3 active cores
-39 * 100 = 3900 MHz max turbo 2 active cores
-39 * 100 = 3900 MHz max turbo 1 active cores
+cpu4: MSR_PLATFORM_INFO: 0x80838f3012300
+8 * 100.0 = 800.0 MHz max efficiency frequency
+35 * 100.0 = 3500.0 MHz base frequency
+cpu4: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
+cpu4: MSR_TURBO_RATIO_LIMIT: 0x25262727
+37 * 100.0 = 3700.0 MHz max turbo 4 active cores
+38 * 100.0 = 3800.0 MHz max turbo 3 active cores
+39 * 100.0 = 3900.0 MHz max turbo 2 active cores
+39 * 100.0 = 3900.0 MHz max turbo 1 active cores
+cpu4: MSR_CONFIG_TDP_NOMINAL: 0x00000023 (base_ratio=35)
+cpu4: MSR_CONFIG_TDP_LEVEL_1: 0x00000000 ()
+cpu4: MSR_CONFIG_TDP_LEVEL_2: 0x00000000 ()
+cpu4: MSR_CONFIG_TDP_CONTROL: 0x80000000 ( lock=1)
+cpu4: MSR_TURBO_ACTIVATION_RATIO: 0x00000000 (MAX_NON_TURBO_RATIO=0 lock=0)
+cpu4: MSR_PKG_CST_CONFIG_CONTROL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
+cpu4: POLL: CPUIDLE CORE POLL IDLE
+cpu4: C1: MWAIT 0x00
+cpu4: C1E: MWAIT 0x01
+cpu4: C3: MWAIT 0x10
+cpu4: C6: MWAIT 0x20
+cpu4: C7s: MWAIT 0x32
+cpu4: MSR_MISC_FEATURE_CONTROL: 0x00000000 (L2-Prefetch L2-Prefetch-pair L1-Prefetch L1-IP-Prefetch)
cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced)
-cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Auto-HWP, Amps, MultiCoreTurbo, Transitions, )
+cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Transitions, MultiCoreTurbo, Amps, Auto-HWP, )
cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: )
cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, )
cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.)
@@ -158,23 +228,14 @@ cpu0: MSR_PP1_POLICY: 0
cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked)
cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C)
-cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88340800 (48 C)
-cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1)
-cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1)
-cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1)
-cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1)
- Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt
- - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00
- 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00
- 0 4 3897 99.98 3898 3498 0 0.02
- 1 1 7 0.17 3887 3498 0 0.04 0.00 0.00 99.79 32
- 1 5 0 0.00 3885 3498 0 0.21
- 2 2 29 0.76 3895 3498 0 0.10 0.01 0.01 99.13 32
- 2 6 2 0.06 3896 3498 0 0.80
- 3 3 1 0.02 3832 3498 0 0.03 0.00 0.00 99.95 28
- 3 7 0 0.00 3879 3498 0 0.04
-^C
-
+cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884c0800 (24 C)
+cpu0: MSR_IA32_THERM_STATUS: 0x884c0000 (24 C +/- 1)
+cpu1: MSR_IA32_THERM_STATUS: 0x88510000 (19 C +/- 1)
+cpu2: MSR_IA32_THERM_STATUS: 0x884e0000 (22 C +/- 1)
+cpu3: MSR_IA32_THERM_STATUS: 0x88510000 (19 C +/- 1)
+cpu4: MSR_PKGC3_IRTL: 0x00008842 (valid, 67584 ns)
+cpu4: MSR_PKGC6_IRTL: 0x00008873 (valid, 117760 ns)
+cpu4: MSR_PKGC7_IRTL: 0x00008891 (valid, 148480 ns)
.fi
The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
available at the minimum package voltage. The \fBTSC frequency\fP is the base
@@ -184,42 +245,22 @@ should be sustainable on all CPUs indefinitely, given nominal power and cooling.
The remaining rows show what maximum turbo frequency is possible
depending on the number of idle cores. Note that not all information is
available on all processors.
-.PP
-The --debug option adds additional columns to the measurement ouput, including CPU idle power-state residency processor temperature sensor readinds.
-See the field definitions above.
-.SH FORK EXAMPLE
-If turbostat is invoked with a command, it will fork that command
-and output the statistics gathered after the command exits.
-In this case, turbostat output goes to stderr, by default.
-Output can instead be saved to a file using the --out option.
-eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
-until ^C while the other CPUs are mostly idle:
-
+.SH ADD COUNTER EXAMPLE
+Here we limit turbostat to showing just the CPU number for cpu0 - cpu3.
+We add a counter showing the 32-bit raw value of MSR 0x199 (MSR_IA32_PERF_CTL),
+labeling it with the column header, "PRF_CTRL", and display it only once,
+afte the conclusion of a 0.1 second sleep.
.nf
-root@hsw: turbostat cat /dev/zero > /dev/null
-^C
- CPU Avg_MHz Busy% Bzy_MHz TSC_MHz
- - 482 12.51 3854 3498
- 0 0 0.01 1960 3498
- 4 0 0.00 2128 3498
- 1 0 0.00 3003 3498
- 5 3854 99.98 3855 3498
- 2 0 0.01 3504 3498
- 6 3 0.08 3884 3498
- 3 0 0.00 2553 3498
- 7 0 0.00 2126 3498
-10.783983 sec
+sudo ./turbostat --quiet --cpu 0-3 --show CPU --add msr0x199,u32,raw,PRF_CTRL sleep .1
+0.101604 sec
+CPU PRF_CTRL
+- 0x00000000
+0 0x00000c00
+1 0x00000800
+2 0x00000a00
+3 0x00000800
.fi
-Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit.
-The first row shows the average MHz and Busy% across all the processors in the system.
-
-Note that the Avg_MHz column reflects the total number of cycles executed
-divided by the measurement interval. If the Busy% column is 100%,
-then the processor was running at that speed the entire interval.
-The Avg_MHz multiplied by the Busy% results in the Bzy_MHz --
-which is the average frequency while the processor was executing --
-not including any non-busy idle time.
.SH NOTES
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index f13f61b065c6..0dafba2c1e7d 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -49,18 +49,14 @@ FILE *outf;
int *fd_percpu;
struct timespec interval_ts = {5, 0};
unsigned int debug;
+unsigned int quiet;
+unsigned int sums_need_wide_columns;
unsigned int rapl_joules;
unsigned int summary_only;
+unsigned int list_header_only;
unsigned int dump_only;
-unsigned int do_nhm_cstates;
unsigned int do_snb_cstates;
unsigned int do_knl_cstates;
-unsigned int do_pc2;
-unsigned int do_pc3;
-unsigned int do_pc6;
-unsigned int do_pc7;
-unsigned int do_c8_c9_c10;
-unsigned int do_skl_residency;
unsigned int do_slm_cstates;
unsigned int use_c1_residency_msr;
unsigned int has_aperf;
@@ -71,25 +67,19 @@ unsigned int units = 1000000; /* MHz etc */
unsigned int genuine_intel;
unsigned int has_invariant_tsc;
unsigned int do_nhm_platform_info;
+unsigned int no_MSR_MISC_PWR_MGMT;
unsigned int aperf_mperf_multiplier = 1;
-int do_irq = 1;
-int do_smi;
double bclk;
double base_hz;
unsigned int has_base_hz;
double tsc_tweak = 1.0;
-unsigned int show_pkg;
-unsigned int show_core;
-unsigned int show_cpu;
unsigned int show_pkg_only;
unsigned int show_core_only;
char *output_buffer, *outp;
unsigned int do_rapl;
unsigned int do_dts;
unsigned int do_ptm;
-unsigned int do_gfx_rc6_ms;
unsigned long long gfx_cur_rc6_ms;
-unsigned int do_gfx_mhz;
unsigned int gfx_cur_mhz;
unsigned int tcc_activation_temp;
unsigned int tcc_activation_temp_override;
@@ -102,6 +92,7 @@ unsigned int do_ring_perf_limit_reasons;
unsigned int crystal_hz;
unsigned long long tsc_hz;
int base_cpu;
+int do_migrate;
double discover_bclk(unsigned int family, unsigned int model);
unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
@@ -109,6 +100,7 @@ unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
+unsigned int has_misc_feature_control;
#define RAPL_PKG (1 << 0)
/* 0x610 MSR_PKG_POWER_LIMIT */
@@ -148,34 +140,40 @@ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
* Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
*/
#define NAME_BYTES 20
+#define PATH_BYTES 128
int backwards_count;
char *progname;
-cpu_set_t *cpu_present_set, *cpu_affinity_set;
-size_t cpu_present_setsize, cpu_affinity_setsize;
+#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */
+cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
+size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
+#define MAX_ADDED_COUNTERS 16
struct thread_data {
+ struct timeval tv_begin;
+ struct timeval tv_end;
unsigned long long tsc;
unsigned long long aperf;
unsigned long long mperf;
unsigned long long c1;
- unsigned int irq_count;
+ unsigned long long irq_count;
unsigned int smi_count;
unsigned int cpu_id;
unsigned int flags;
#define CPU_IS_FIRST_THREAD_IN_CORE 0x2
#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4
- unsigned long long counter[1];
+ unsigned long long counter[MAX_ADDED_COUNTERS];
} *thread_even, *thread_odd;
struct core_data {
unsigned long long c3;
unsigned long long c6;
unsigned long long c7;
+ unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */
unsigned int core_temp_c;
unsigned int core_id;
- unsigned long long counter[1];
+ unsigned long long counter[MAX_ADDED_COUNTERS];
} *core_even, *core_odd;
struct pkg_data {
@@ -200,7 +198,7 @@ struct pkg_data {
unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
unsigned int pkg_temp_c;
- unsigned long long counter[1];
+ unsigned long long counter[MAX_ADDED_COUNTERS];
} *package_even, *package_odd;
#define ODD_COUNTERS thread_odd, core_odd, package_odd
@@ -215,22 +213,27 @@ struct pkg_data {
#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
-enum counter_type {COUNTER_CYCLES, COUNTER_SECONDS};
+enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
struct msr_counter {
unsigned int msr_num;
char name[NAME_BYTES];
+ char path[PATH_BYTES];
unsigned int width;
enum counter_type type;
enum counter_format format;
struct msr_counter *next;
+ unsigned int flags;
+#define FLAGS_HIDE (1 << 0)
+#define FLAGS_SHOW (1 << 1)
+#define SYSFS_PERCPU (1 << 1)
};
struct sys_counters {
- unsigned int thread_counter_bytes;
- unsigned int core_counter_bytes;
- unsigned int package_counter_bytes;
+ unsigned int added_thread_counters;
+ unsigned int added_core_counters;
+ unsigned int added_package_counters;
struct msr_counter *tp;
struct msr_counter *cp;
struct msr_counter *pp;
@@ -300,6 +303,9 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg
int cpu_migrate(int cpu)
{
+ if (!do_migrate)
+ return 0;
+
CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
@@ -334,147 +340,347 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
if (retval != sizeof *msr)
- err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset);
+ err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
return 0;
}
/*
- * Example Format w/ field column widths:
- *
- * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 ThreadC CoreTmp CoreCnt PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt PkgCnt
- * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
+ * Each string in this array is compared in --show and --hide cmdline.
+ * Thus, strings that are proper sub-sets must follow their more specific peers.
+ */
+struct msr_counter bic[] = {
+ { 0x0, "Package" },
+ { 0x0, "Avg_MHz" },
+ { 0x0, "Bzy_MHz" },
+ { 0x0, "TSC_MHz" },
+ { 0x0, "IRQ" },
+ { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
+ { 0x0, "Busy%" },
+ { 0x0, "CPU%c1" },
+ { 0x0, "CPU%c3" },
+ { 0x0, "CPU%c6" },
+ { 0x0, "CPU%c7" },
+ { 0x0, "ThreadC" },
+ { 0x0, "CoreTmp" },
+ { 0x0, "CoreCnt" },
+ { 0x0, "PkgTmp" },
+ { 0x0, "GFX%rc6" },
+ { 0x0, "GFXMHz" },
+ { 0x0, "Pkg%pc2" },
+ { 0x0, "Pkg%pc3" },
+ { 0x0, "Pkg%pc6" },
+ { 0x0, "Pkg%pc7" },
+ { 0x0, "Pkg%pc8" },
+ { 0x0, "Pkg%pc9" },
+ { 0x0, "Pkg%pc10" },
+ { 0x0, "PkgWatt" },
+ { 0x0, "CorWatt" },
+ { 0x0, "GFXWatt" },
+ { 0x0, "PkgCnt" },
+ { 0x0, "RAMWatt" },
+ { 0x0, "PKG_%" },
+ { 0x0, "RAM_%" },
+ { 0x0, "Pkg_J" },
+ { 0x0, "Cor_J" },
+ { 0x0, "GFX_J" },
+ { 0x0, "RAM_J" },
+ { 0x0, "Core" },
+ { 0x0, "CPU" },
+ { 0x0, "Mod%c6" },
+ { 0x0, "sysfs" },
+ { 0x0, "Totl%C0" },
+ { 0x0, "Any%C0" },
+ { 0x0, "GFX%C0" },
+ { 0x0, "CPUGFX%" },
+};
+
+
+
+#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
+#define BIC_Package (1ULL << 0)
+#define BIC_Avg_MHz (1ULL << 1)
+#define BIC_Bzy_MHz (1ULL << 2)
+#define BIC_TSC_MHz (1ULL << 3)
+#define BIC_IRQ (1ULL << 4)
+#define BIC_SMI (1ULL << 5)
+#define BIC_Busy (1ULL << 6)
+#define BIC_CPU_c1 (1ULL << 7)
+#define BIC_CPU_c3 (1ULL << 8)
+#define BIC_CPU_c6 (1ULL << 9)
+#define BIC_CPU_c7 (1ULL << 10)
+#define BIC_ThreadC (1ULL << 11)
+#define BIC_CoreTmp (1ULL << 12)
+#define BIC_CoreCnt (1ULL << 13)
+#define BIC_PkgTmp (1ULL << 14)
+#define BIC_GFX_rc6 (1ULL << 15)
+#define BIC_GFXMHz (1ULL << 16)
+#define BIC_Pkgpc2 (1ULL << 17)
+#define BIC_Pkgpc3 (1ULL << 18)
+#define BIC_Pkgpc6 (1ULL << 19)
+#define BIC_Pkgpc7 (1ULL << 20)
+#define BIC_Pkgpc8 (1ULL << 21)
+#define BIC_Pkgpc9 (1ULL << 22)
+#define BIC_Pkgpc10 (1ULL << 23)
+#define BIC_PkgWatt (1ULL << 24)
+#define BIC_CorWatt (1ULL << 25)
+#define BIC_GFXWatt (1ULL << 26)
+#define BIC_PkgCnt (1ULL << 27)
+#define BIC_RAMWatt (1ULL << 28)
+#define BIC_PKG__ (1ULL << 29)
+#define BIC_RAM__ (1ULL << 30)
+#define BIC_Pkg_J (1ULL << 31)
+#define BIC_Cor_J (1ULL << 32)
+#define BIC_GFX_J (1ULL << 33)
+#define BIC_RAM_J (1ULL << 34)
+#define BIC_Core (1ULL << 35)
+#define BIC_CPU (1ULL << 36)
+#define BIC_Mod_c6 (1ULL << 37)
+#define BIC_sysfs (1ULL << 38)
+#define BIC_Totl_c0 (1ULL << 39)
+#define BIC_Any_c0 (1ULL << 40)
+#define BIC_GFX_c0 (1ULL << 41)
+#define BIC_CPUGFX (1ULL << 42)
+
+unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
+unsigned long long bic_present = BIC_sysfs;
+
+#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
+#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
+#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
+
+#define MAX_DEFERRED 16
+char *deferred_skip_names[MAX_DEFERRED];
+int deferred_skip_index;
+
+/*
+ * HIDE_LIST - hide this list of counters, show the rest [default]
+ * SHOW_LIST - show this list of counters, hide the rest
*/
+enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
-void print_header(void)
+void help(void)
{
- struct msr_counter *mp;
+ fprintf(outf,
+ "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
+ "\n"
+ "Turbostat forks the specified COMMAND and prints statistics\n"
+ "when COMMAND completes.\n"
+ "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
+ "to print statistics, until interrupted.\n"
+ "--add add a counter\n"
+ " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
+ "--cpu cpu-set limit output to summary plus cpu-set:\n"
+ " {core | package | j,k,l..m,n-p }\n"
+ "--quiet skip decoding system configuration header\n"
+ "--interval sec Override default 5-second measurement interval\n"
+ "--help print this help message\n"
+ "--list list column headers only\n"
+ "--out file create or truncate \"file\" for all output\n"
+ "--version print version information\n"
+ "\n"
+ "For more help, run \"man turbostat\"\n");
+}
- if (show_pkg)
- outp += sprintf(outp, "\tPackage");
- if (show_core)
- outp += sprintf(outp, "\tCore");
- if (show_cpu)
- outp += sprintf(outp, "\tCPU");
- if (has_aperf)
- outp += sprintf(outp, "\tAvg_MHz");
- if (has_aperf)
- outp += sprintf(outp, "\tBusy%%");
- if (has_aperf)
- outp += sprintf(outp, "\tBzy_MHz");
- outp += sprintf(outp, "\tTSC_MHz");
+/*
+ * bic_lookup
+ * for all the strings in comma separate name_list,
+ * set the approprate bit in return value.
+ */
+unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
+{
+ int i;
+ unsigned long long retval = 0;
- if (!debug)
- goto done;
+ while (name_list) {
+ char *comma;
- if (do_irq)
- outp += sprintf(outp, "\tIRQ");
- if (do_smi)
- outp += sprintf(outp, "\tSMI");
-
- if (do_nhm_cstates)
- outp += sprintf(outp, "\tCPU%%c1");
- if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
- outp += sprintf(outp, "\tCPU%%c3");
- if (do_nhm_cstates)
- outp += sprintf(outp, "\tCPU%%c6");
- if (do_snb_cstates)
- outp += sprintf(outp, "\tCPU%%c7");
+ comma = strchr(name_list, ',');
+
+ if (comma)
+ *comma = '\0';
+
+ for (i = 0; i < MAX_BIC; ++i) {
+ if (!strcmp(name_list, bic[i].name)) {
+ retval |= (1ULL << i);
+ break;
+ }
+ }
+ if (i == MAX_BIC) {
+ if (mode == SHOW_LIST) {
+ fprintf(stderr, "Invalid counter name: %s\n", name_list);
+ exit(-1);
+ }
+ deferred_skip_names[deferred_skip_index++] = name_list;
+ if (debug)
+ fprintf(stderr, "deferred \"%s\"\n", name_list);
+ if (deferred_skip_index >= MAX_DEFERRED) {
+ fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
+ MAX_DEFERRED, name_list);
+ help();
+ exit(1);
+ }
+ }
+
+ name_list = comma;
+ if (name_list)
+ name_list++;
+
+ }
+ return retval;
+}
+
+
+void print_header(char *delim)
+{
+ struct msr_counter *mp;
+ int printed = 0;
+
+ if (debug)
+ outp += sprintf(outp, "usec %s", delim);
+ if (DO_BIC(BIC_Package))
+ outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Core))
+ outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CPU))
+ outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Avg_MHz))
+ outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Busy))
+ outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Bzy_MHz))
+ outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_TSC_MHz))
+ outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_IRQ)) {
+ if (sums_need_wide_columns)
+ outp += sprintf(outp, "%s IRQ", (printed++ ? delim : ""));
+ else
+ outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
+ }
+
+ if (DO_BIC(BIC_SMI))
+ outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
for (mp = sys.tp; mp; mp = mp->next) {
+
if (mp->format == FORMAT_RAW) {
if (mp->width == 64)
- outp += sprintf(outp, "\t%18.18s", mp->name);
+ outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
else
- outp += sprintf(outp, "\t%10.10s", mp->name);
+ outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
} else {
- outp += sprintf(outp, "\t%-7.7s", mp->name);
+ if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
+ else
+ outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
}
}
- if (do_dts)
- outp += sprintf(outp, "\tCoreTmp");
+ if (DO_BIC(BIC_CPU_c1))
+ outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
+ outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CPU_c6))
+ outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CPU_c7))
+ outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_Mod_c6))
+ outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_CoreTmp))
+ outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
for (mp = sys.cp; mp; mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 64)
- outp += sprintf(outp, "\t%18.18s", mp->name);
+ outp += sprintf(outp, "%s%18.18s", delim, mp->name);
else
- outp += sprintf(outp, "\t%10.10s", mp->name);
+ outp += sprintf(outp, "%s%10.10s", delim, mp->name);
} else {
- outp += sprintf(outp, "\t%-7.7s", mp->name);
+ if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8s", delim, mp->name);
+ else
+ outp += sprintf(outp, "%s%s", delim, mp->name);
}
}
- if (do_ptm)
- outp += sprintf(outp, "\tPkgTmp");
-
- if (do_gfx_rc6_ms)
- outp += sprintf(outp, "\tGFX%%rc6");
-
- if (do_gfx_mhz)
- outp += sprintf(outp, "\tGFXMHz");
-
- if (do_skl_residency) {
- outp += sprintf(outp, "\tTotl%%C0");
- outp += sprintf(outp, "\tAny%%C0");
- outp += sprintf(outp, "\tGFX%%C0");
- outp += sprintf(outp, "\tCPUGFX%%");
- }
-
- if (do_pc2)
- outp += sprintf(outp, "\tPkg%%pc2");
- if (do_pc3)
- outp += sprintf(outp, "\tPkg%%pc3");
- if (do_pc6)
- outp += sprintf(outp, "\tPkg%%pc6");
- if (do_pc7)
- outp += sprintf(outp, "\tPkg%%pc7");
- if (do_c8_c9_c10) {
- outp += sprintf(outp, "\tPkg%%pc8");
- outp += sprintf(outp, "\tPkg%%pc9");
- outp += sprintf(outp, "\tPk%%pc10");
- }
+ if (DO_BIC(BIC_PkgTmp))
+ outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_GFX_rc6))
+ outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_GFXMHz))
+ outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_Totl_c0))
+ outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Any_c0))
+ outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_GFX_c0))
+ outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CPUGFX))
+ outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_Pkgpc2))
+ outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Pkgpc3))
+ outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Pkgpc6))
+ outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Pkgpc7))
+ outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Pkgpc8))
+ outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Pkgpc9))
+ outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Pkgpc10))
+ outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
if (do_rapl && !rapl_joules) {
- if (do_rapl & RAPL_PKG)
- outp += sprintf(outp, "\tPkgWatt");
- if (do_rapl & RAPL_CORES_ENERGY_STATUS)
- outp += sprintf(outp, "\tCorWatt");
- if (do_rapl & RAPL_GFX)
- outp += sprintf(outp, "\tGFXWatt");
- if (do_rapl & RAPL_DRAM)
- outp += sprintf(outp, "\tRAMWatt");
- if (do_rapl & RAPL_PKG_PERF_STATUS)
- outp += sprintf(outp, "\tPKG_%%");
- if (do_rapl & RAPL_DRAM_PERF_STATUS)
- outp += sprintf(outp, "\tRAM_%%");
+ if (DO_BIC(BIC_PkgWatt))
+ outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CorWatt))
+ outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_GFXWatt))
+ outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_RAMWatt))
+ outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_PKG__))
+ outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_RAM__))
+ outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
} else if (do_rapl && rapl_joules) {
- if (do_rapl & RAPL_PKG)
- outp += sprintf(outp, "\tPkg_J");
- if (do_rapl & RAPL_CORES_ENERGY_STATUS)
- outp += sprintf(outp, "\tCor_J");
- if (do_rapl & RAPL_GFX)
- outp += sprintf(outp, "\tGFX_J");
- if (do_rapl & RAPL_DRAM)
- outp += sprintf(outp, "\tRAM_J");
- if (do_rapl & RAPL_PKG_PERF_STATUS)
- outp += sprintf(outp, "\tPKG_%%");
- if (do_rapl & RAPL_DRAM_PERF_STATUS)
- outp += sprintf(outp, "\tRAM_%%");
+ if (DO_BIC(BIC_Pkg_J))
+ outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Cor_J))
+ outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_GFX_J))
+ outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_RAM_J))
+ outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_PKG__))
+ outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_RAM__))
+ outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
}
for (mp = sys.pp; mp; mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 64)
- outp += sprintf(outp, "\t%18.18s", mp->name);
+ outp += sprintf(outp, "%s%18.18s", delim, mp->name);
else
- outp += sprintf(outp, "\t%10.10s", mp->name);
+ outp += sprintf(outp, "%s%10.10s", delim, mp->name);
} else {
- outp += sprintf(outp, "\t%-7.7s", mp->name);
+ if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8s", delim, mp->name);
+ else
+ outp += sprintf(outp, "%s%s", delim, mp->name);
}
}
-done:
outp += sprintf(outp, "\n");
}
@@ -494,10 +700,10 @@ int dump_counters(struct thread_data *t, struct core_data *c,
outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
outp += sprintf(outp, "c1: %016llX\n", t->c1);
- if (do_irq)
- outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
- if (do_smi)
- outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
+ if (DO_BIC(BIC_IRQ))
+ outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
+ if (DO_BIC(BIC_SMI))
+ outp += sprintf(outp, "SMI: %d\n", t->smi_count);
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
@@ -516,6 +722,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
i, mp->msr_num, c->counter[i]);
}
+ outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
}
if (p) {
@@ -527,11 +734,11 @@ int dump_counters(struct thread_data *t, struct core_data *c,
outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
- if (do_pc3)
+ if (DO_BIC(BIC_Pkgpc3))
outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
- if (do_pc6)
+ if (DO_BIC(BIC_Pkgpc6))
outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
- if (do_pc7)
+ if (DO_BIC(BIC_Pkgpc7))
outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
@@ -563,10 +770,12 @@ int dump_counters(struct thread_data *t, struct core_data *c,
int format_counters(struct thread_data *t, struct core_data *c,
struct pkg_data *p)
{
- double interval_float;
+ double interval_float, tsc;
char *fmt8;
int i;
struct msr_counter *mp;
+ char *delim = "\t";
+ int printed = 0;
/* if showing only 1st thread in core and this isn't one, bail out */
if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
@@ -576,106 +785,134 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0;
+ /*if not summary line and --cpu is used */
+ if ((t != &average.threads) &&
+ (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
+ return 0;
+
+ if (debug) {
+ /* on each row, print how many usec each timestamp took to gather */
+ struct timeval tv;
+
+ timersub(&t->tv_end, &t->tv_begin, &tv);
+ outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
+ }
+
interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
+ tsc = t->tsc * tsc_tweak;
+
/* topo columns, print blanks on 1st (average) line */
if (t == &average.threads) {
- if (show_pkg)
- outp += sprintf(outp, "\t-");
- if (show_core)
- outp += sprintf(outp, "\t-");
- if (show_cpu)
- outp += sprintf(outp, "\t-");
+ if (DO_BIC(BIC_Package))
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Core))
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CPU))
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
} else {
- if (show_pkg) {
+ if (DO_BIC(BIC_Package)) {
if (p)
- outp += sprintf(outp, "\t%d", p->package_id);
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
else
- outp += sprintf(outp, "\t-");
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
}
- if (show_core) {
+ if (DO_BIC(BIC_Core)) {
if (c)
- outp += sprintf(outp, "\t%d", c->core_id);
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
else
- outp += sprintf(outp, "\t-");
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
}
- if (show_cpu)
- outp += sprintf(outp, "\t%d", t->cpu_id);
+ if (DO_BIC(BIC_CPU))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
}
- /* Avg_MHz */
- if (has_aperf)
- outp += sprintf(outp, "\t%.0f",
+ if (DO_BIC(BIC_Avg_MHz))
+ outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
1.0 / units * t->aperf / interval_float);
- /* Busy% */
- if (has_aperf)
- outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
+ if (DO_BIC(BIC_Busy))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
- /* Bzy_MHz */
- if (has_aperf) {
+ if (DO_BIC(BIC_Bzy_MHz)) {
if (has_base_hz)
- outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf);
+ outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
else
- outp += sprintf(outp, "\t%.0f",
- 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
+ outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
+ tsc / units * t->aperf / t->mperf / interval_float);
}
- /* TSC_MHz */
- outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
-
- if (!debug)
- goto done;
+ if (DO_BIC(BIC_TSC_MHz))
+ outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
/* IRQ */
- if (do_irq)
- outp += sprintf(outp, "\t%d", t->irq_count);
+ if (DO_BIC(BIC_IRQ)) {
+ if (sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
+ else
+ outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
+ }
/* SMI */
- if (do_smi)
- outp += sprintf(outp, "\t%d", t->smi_count);
-
- if (do_nhm_cstates)
- outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc);
-
- /* print per-core data only for 1st thread in core */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
- goto done;
-
- if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
- outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc);
- if (do_nhm_cstates)
- outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc);
- if (do_snb_cstates)
- outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
+ if (DO_BIC(BIC_SMI))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
+ /* Added counters */
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 32)
- outp += sprintf(outp, "\t0x%08lx", (unsigned long) t->counter[i]);
+ outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
else
- outp += sprintf(outp, "\t0x%016llx", t->counter[i]);
+ outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
} else if (mp->format == FORMAT_DELTA) {
- outp += sprintf(outp, "\t%8lld", t->counter[i]);
+ if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
+ else
+ outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
} else if (mp->format == FORMAT_PERCENT) {
- outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/t->tsc);
+ if (mp->type == COUNTER_USEC)
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
+ else
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
}
}
+ /* C1 */
+ if (DO_BIC(BIC_CPU_c1))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
- if (do_dts)
- outp += sprintf(outp, "\t%d", c->core_temp_c);
+
+ /* print per-core data only for 1st thread in core */
+ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+ goto done;
+
+ if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
+ if (DO_BIC(BIC_CPU_c6))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
+ if (DO_BIC(BIC_CPU_c7))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
+
+ /* Mod%c6 */
+ if (DO_BIC(BIC_Mod_c6))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
+
+ if (DO_BIC(BIC_CoreTmp))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 32)
- outp += sprintf(outp, "\t0x%08lx", (unsigned long) c->counter[i]);
+ outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
else
- outp += sprintf(outp, "\t0x%016llx", c->counter[i]);
+ outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
} else if (mp->format == FORMAT_DELTA) {
- outp += sprintf(outp, "\t%8lld", c->counter[i]);
+ if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
+ else
+ outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
} else if (mp->format == FORMAT_PERCENT) {
- outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/t->tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
}
}
@@ -684,95 +921,91 @@ int format_counters(struct thread_data *t, struct core_data *c,
goto done;
/* PkgTmp */
- if (do_ptm)
- outp += sprintf(outp, "\t%d", p->pkg_temp_c);
+ if (DO_BIC(BIC_PkgTmp))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
/* GFXrc6 */
- if (do_gfx_rc6_ms) {
+ if (DO_BIC(BIC_GFX_rc6)) {
if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */
- outp += sprintf(outp, "\t**.**");
+ outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
} else {
- outp += sprintf(outp, "\t%.2f",
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
p->gfx_rc6_ms / 10.0 / interval_float);
}
}
/* GFXMHz */
- if (do_gfx_mhz)
- outp += sprintf(outp, "\t%d", p->gfx_mhz);
+ if (DO_BIC(BIC_GFXMHz))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
- if (do_skl_residency) {
- outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
- outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
- outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
- outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
- }
-
- if (do_pc2)
- outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/t->tsc);
- if (do_pc3)
- outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/t->tsc);
- if (do_pc6)
- outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/t->tsc);
- if (do_pc7)
- outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/t->tsc);
- if (do_c8_c9_c10) {
- outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/t->tsc);
- outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/t->tsc);
- outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/t->tsc);
- }
+ if (DO_BIC(BIC_Totl_c0))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
+ if (DO_BIC(BIC_Any_c0))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
+ if (DO_BIC(BIC_GFX_c0))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
+ if (DO_BIC(BIC_CPUGFX))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
+
+ if (DO_BIC(BIC_Pkgpc2))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
+ if (DO_BIC(BIC_Pkgpc3))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
+ if (DO_BIC(BIC_Pkgpc6))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
+ if (DO_BIC(BIC_Pkgpc7))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
+ if (DO_BIC(BIC_Pkgpc8))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
+ if (DO_BIC(BIC_Pkgpc9))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
+ if (DO_BIC(BIC_Pkgpc10))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
/*
* If measurement interval exceeds minimum RAPL Joule Counter range,
* indicate that results are suspect by printing "**" in fraction place.
*/
if (interval_float < rapl_joule_counter_range)
- fmt8 = "\t%.2f";
+ fmt8 = "%s%.2f";
else
fmt8 = "%6.0f**";
- if (do_rapl && !rapl_joules) {
- if (do_rapl & RAPL_PKG)
- outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
- if (do_rapl & RAPL_CORES_ENERGY_STATUS)
- outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
- if (do_rapl & RAPL_GFX)
- outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
- if (do_rapl & RAPL_DRAM)
- outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
- if (do_rapl & RAPL_PKG_PERF_STATUS)
- outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
- if (do_rapl & RAPL_DRAM_PERF_STATUS)
- outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
- } else if (do_rapl && rapl_joules) {
- if (do_rapl & RAPL_PKG)
- outp += sprintf(outp, fmt8,
- p->energy_pkg * rapl_energy_units);
- if (do_rapl & RAPL_CORES)
- outp += sprintf(outp, fmt8,
- p->energy_cores * rapl_energy_units);
- if (do_rapl & RAPL_GFX)
- outp += sprintf(outp, fmt8,
- p->energy_gfx * rapl_energy_units);
- if (do_rapl & RAPL_DRAM)
- outp += sprintf(outp, fmt8,
- p->energy_dram * rapl_dram_energy_units);
- if (do_rapl & RAPL_PKG_PERF_STATUS)
- outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
- if (do_rapl & RAPL_DRAM_PERF_STATUS)
- outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
- }
+ if (DO_BIC(BIC_PkgWatt))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
+ if (DO_BIC(BIC_CorWatt))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
+ if (DO_BIC(BIC_GFXWatt))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
+ if (DO_BIC(BIC_RAMWatt))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
+ if (DO_BIC(BIC_Pkg_J))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
+ if (DO_BIC(BIC_Cor_J))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
+ if (DO_BIC(BIC_GFX_J))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
+ if (DO_BIC(BIC_RAM_J))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
+ if (DO_BIC(BIC_PKG__))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+ if (DO_BIC(BIC_RAM__))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 32)
- outp += sprintf(outp, "\t0x%08lx", (unsigned long) p->counter[i]);
+ outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
else
- outp += sprintf(outp, "\t0x%016llx", p->counter[i]);
+ outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
} else if (mp->format == FORMAT_DELTA) {
- outp += sprintf(outp, "\t%8lld", p->counter[i]);
+ if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
+ else
+ outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
} else if (mp->format == FORMAT_PERCENT) {
- outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/t->tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
}
}
@@ -807,7 +1040,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
static int printed;
if (!printed || !summary_only)
- print_header();
+ print_header("\t");
if (topo.num_cpus > 1)
format_counters(&average.threads, &average.cores,
@@ -834,18 +1067,22 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
int i;
struct msr_counter *mp;
- if (do_skl_residency) {
+
+ if (DO_BIC(BIC_Totl_c0))
old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
+ if (DO_BIC(BIC_Any_c0))
old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
+ if (DO_BIC(BIC_GFX_c0))
old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
+ if (DO_BIC(BIC_CPUGFX))
old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
- }
+
old->pc2 = new->pc2 - old->pc2;
- if (do_pc3)
+ if (DO_BIC(BIC_Pkgpc3))
old->pc3 = new->pc3 - old->pc3;
- if (do_pc6)
+ if (DO_BIC(BIC_Pkgpc6))
old->pc6 = new->pc6 - old->pc6;
- if (do_pc7)
+ if (DO_BIC(BIC_Pkgpc7))
old->pc7 = new->pc7 - old->pc7;
old->pc8 = new->pc8 - old->pc8;
old->pc9 = new->pc9 - old->pc9;
@@ -887,6 +1124,7 @@ delta_core(struct core_data *new, struct core_data *old)
old->c6 = new->c6 - old->c6;
old->c7 = new->c7 - old->c7;
old->core_temp_c = new->core_temp_c;
+ old->mc6_us = new->mc6_us - old->mc6_us;
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -916,7 +1154,7 @@ delta_thread(struct thread_data *new, struct thread_data *old,
old->c1 = new->c1 - old->c1;
- if (has_aperf) {
+ if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
old->aperf = new->aperf - old->aperf;
old->mperf = new->mperf - old->mperf;
@@ -937,11 +1175,11 @@ delta_thread(struct thread_data *new, struct thread_data *old,
* it is possible for mperf's non-halted cycles + idle states
* to exceed TSC's all cycles: show c1 = 0% in that case.
*/
- if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
+ if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
old->c1 = 0;
else {
/* normal case, derive c1 */
- old->c1 = old->tsc - old->mperf - core_delta->c3
+ old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
- core_delta->c6 - core_delta->c7;
}
}
@@ -952,10 +1190,10 @@ delta_thread(struct thread_data *new, struct thread_data *old,
old->mperf = 1; /* divide by 0 protection */
}
- if (do_irq)
+ if (DO_BIC(BIC_IRQ))
old->irq_count = new->irq_count - old->irq_count;
- if (do_smi)
+ if (DO_BIC(BIC_SMI))
old->smi_count = new->smi_count - old->smi_count;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
@@ -1008,6 +1246,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
c->c3 = 0;
c->c6 = 0;
c->c7 = 0;
+ c->mc6_us = 0;
c->core_temp_c = 0;
p->pkg_wtd_core_c0 = 0;
@@ -1016,11 +1255,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->pkg_both_core_gfxe_c0 = 0;
p->pc2 = 0;
- if (do_pc3)
+ if (DO_BIC(BIC_Pkgpc3))
p->pc3 = 0;
- if (do_pc6)
+ if (DO_BIC(BIC_Pkgpc6))
p->pc6 = 0;
- if (do_pc7)
+ if (DO_BIC(BIC_Pkgpc7))
p->pc7 = 0;
p->pc8 = 0;
p->pc9 = 0;
@@ -1036,7 +1275,6 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->gfx_rc6_ms = 0;
p->gfx_mhz = 0;
-
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
t->counter[i] = 0;
@@ -1073,6 +1311,7 @@ int sum_counters(struct thread_data *t, struct core_data *c,
average.cores.c3 += c->c3;
average.cores.c6 += c->c6;
average.cores.c7 += c->c7;
+ average.cores.mc6_us += c->mc6_us;
average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
@@ -1086,19 +1325,21 @@ int sum_counters(struct thread_data *t, struct core_data *c,
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0;
- if (do_skl_residency) {
+ if (DO_BIC(BIC_Totl_c0))
average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
+ if (DO_BIC(BIC_Any_c0))
average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
+ if (DO_BIC(BIC_GFX_c0))
average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
+ if (DO_BIC(BIC_CPUGFX))
average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
- }
average.packages.pc2 += p->pc2;
- if (do_pc3)
+ if (DO_BIC(BIC_Pkgpc3))
average.packages.pc3 += p->pc3;
- if (do_pc6)
+ if (DO_BIC(BIC_Pkgpc6))
average.packages.pc6 += p->pc6;
- if (do_pc7)
+ if (DO_BIC(BIC_Pkgpc7))
average.packages.pc7 += p->pc7;
average.packages.pc8 += p->pc8;
average.packages.pc9 += p->pc9;
@@ -1143,23 +1384,29 @@ void compute_average(struct thread_data *t, struct core_data *c,
average.threads.mperf /= topo.num_cpus;
average.threads.c1 /= topo.num_cpus;
+ if (average.threads.irq_count > 9999999)
+ sums_need_wide_columns = 1;
+
average.cores.c3 /= topo.num_cores;
average.cores.c6 /= topo.num_cores;
average.cores.c7 /= topo.num_cores;
+ average.cores.mc6_us /= topo.num_cores;
- if (do_skl_residency) {
+ if (DO_BIC(BIC_Totl_c0))
average.packages.pkg_wtd_core_c0 /= topo.num_packages;
+ if (DO_BIC(BIC_Any_c0))
average.packages.pkg_any_core_c0 /= topo.num_packages;
+ if (DO_BIC(BIC_GFX_c0))
average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
+ if (DO_BIC(BIC_CPUGFX))
average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
- }
average.packages.pc2 /= topo.num_packages;
- if (do_pc3)
+ if (DO_BIC(BIC_Pkgpc3))
average.packages.pc3 /= topo.num_packages;
- if (do_pc6)
+ if (DO_BIC(BIC_Pkgpc6))
average.packages.pc6 /= topo.num_packages;
- if (do_pc7)
+ if (DO_BIC(BIC_Pkgpc7))
average.packages.pc7 /= topo.num_packages;
average.packages.pc8 /= topo.num_packages;
@@ -1169,16 +1416,29 @@ void compute_average(struct thread_data *t, struct core_data *c,
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
continue;
+ if (mp->type == COUNTER_ITEMS) {
+ if (average.threads.counter[i] > 9999999)
+ sums_need_wide_columns = 1;
+ continue;
+ }
average.threads.counter[i] /= topo.num_cpus;
}
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
continue;
+ if (mp->type == COUNTER_ITEMS) {
+ if (average.cores.counter[i] > 9999999)
+ sums_need_wide_columns = 1;
+ }
average.cores.counter[i] /= topo.num_cores;
}
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
continue;
+ if (mp->type == COUNTER_ITEMS) {
+ if (average.packages.counter[i] > 9999999)
+ sums_need_wide_columns = 1;
+ }
average.packages.counter[i] /= topo.num_packages;
}
}
@@ -1193,6 +1453,60 @@ static unsigned long long rdtsc(void)
}
/*
+ * Open a file, and exit on failure
+ */
+FILE *fopen_or_die(const char *path, const char *mode)
+{
+ FILE *filep = fopen(path, mode);
+
+ if (!filep)
+ err(1, "%s: open failed", path);
+ return filep;
+}
+/*
+ * snapshot_sysfs_counter()
+ *
+ * return snapshot of given counter
+ */
+unsigned long long snapshot_sysfs_counter(char *path)
+{
+ FILE *fp;
+ int retval;
+ unsigned long long counter;
+
+ fp = fopen_or_die(path, "r");
+
+ retval = fscanf(fp, "%lld", &counter);
+ if (retval != 1)
+ err(1, "snapshot_sysfs_counter(%s)", path);
+
+ fclose(fp);
+
+ return counter;
+}
+
+int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
+{
+ if (mp->msr_num != 0) {
+ if (get_msr(cpu, mp->msr_num, counterp))
+ return -1;
+ } else {
+ char path[128];
+
+ if (mp->flags & SYSFS_PERCPU) {
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
+ cpu, mp->path);
+
+ *counterp = snapshot_sysfs_counter(path);
+ } else {
+ *counterp = snapshot_sysfs_counter(mp->path);
+ }
+ }
+
+ return 0;
+}
+
+/*
* get_counters(...)
* migrate to cpu
* acquire and record local counters for that cpu
@@ -1205,6 +1519,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
struct msr_counter *mp;
int i;
+
+ gettimeofday(&t->tv_begin, (struct timezone *)NULL);
+
if (cpu_migrate(cpu)) {
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1;
@@ -1213,7 +1530,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
retry:
t->tsc = rdtsc(); /* we are running on local CPU of interest */
- if (has_aperf) {
+ if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
/*
@@ -1269,35 +1586,33 @@ retry:
t->mperf = t->mperf * aperf_mperf_multiplier;
}
- if (do_irq)
+ if (DO_BIC(BIC_IRQ))
t->irq_count = irqs_per_cpu[cpu];
- if (do_smi) {
+ if (DO_BIC(BIC_SMI)) {
if (get_msr(cpu, MSR_SMI_COUNT, &msr))
return -5;
t->smi_count = msr & 0xFFFFFFFF;
}
-
- if (use_c1_residency_msr) {
+ if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
return -6;
}
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
- if (get_msr(cpu, mp->msr_num, &t->counter[i]))
+ if (get_mp(cpu, mp, &t->counter[i]))
return -10;
}
-
/* collect core counters only for 1st thread in core */
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
- return 0;
+ goto done;
- if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) {
+ if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
return -6;
}
- if (do_nhm_cstates && !do_knl_cstates) {
+ if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
return -7;
} else if (do_knl_cstates) {
@@ -1305,55 +1620,74 @@ retry:
return -7;
}
- if (do_snb_cstates)
+ if (DO_BIC(BIC_CPU_c7))
if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
return -8;
- if (do_dts) {
+ if (DO_BIC(BIC_Mod_c6))
+ if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
+ return -8;
+
+ if (DO_BIC(BIC_CoreTmp)) {
if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
return -9;
c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
}
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
- if (get_msr(cpu, mp->msr_num, &c->counter[i]))
+ if (get_mp(cpu, mp, &c->counter[i]))
return -10;
}
/* collect package counters only for 1st core in package */
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
- return 0;
+ goto done;
- if (do_skl_residency) {
+ if (DO_BIC(BIC_Totl_c0)) {
if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
return -10;
+ }
+ if (DO_BIC(BIC_Any_c0)) {
if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
return -11;
+ }
+ if (DO_BIC(BIC_GFX_c0)) {
if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
return -12;
+ }
+ if (DO_BIC(BIC_CPUGFX)) {
if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
return -13;
}
- if (do_pc3)
+ if (DO_BIC(BIC_Pkgpc3))
if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
return -9;
- if (do_pc6)
- if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
- return -10;
- if (do_pc2)
+ if (DO_BIC(BIC_Pkgpc6)) {
+ if (do_slm_cstates) {
+ if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
+ return -10;
+ } else {
+ if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
+ return -10;
+ }
+ }
+
+ if (DO_BIC(BIC_Pkgpc2))
if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
return -11;
- if (do_pc7)
+ if (DO_BIC(BIC_Pkgpc7))
if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
return -12;
- if (do_c8_c9_c10) {
+ if (DO_BIC(BIC_Pkgpc8))
if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
return -13;
+ if (DO_BIC(BIC_Pkgpc9))
if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
return -13;
+ if (DO_BIC(BIC_Pkgpc10))
if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
return -13;
- }
+
if (do_rapl & RAPL_PKG) {
if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
return -13;
@@ -1384,22 +1718,24 @@ retry:
return -16;
p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
}
- if (do_ptm) {
+ if (DO_BIC(BIC_PkgTmp)) {
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
return -17;
p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
}
- if (do_gfx_rc6_ms)
+ if (DO_BIC(BIC_GFX_rc6))
p->gfx_rc6_ms = gfx_cur_rc6_ms;
- if (do_gfx_mhz)
+ if (DO_BIC(BIC_GFXMHz))
p->gfx_mhz = gfx_cur_mhz;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- if (get_msr(cpu, mp->msr_num, &p->counter[i]))
+ if (get_mp(cpu, mp, &p->counter[i]))
return -10;
}
+done:
+ gettimeofday(&t->tv_end, (struct timezone *)NULL);
return 0;
}
@@ -1433,8 +1769,8 @@ char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
+int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
@@ -1457,11 +1793,11 @@ dump_nhm_platform_info(void)
fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 40) & 0xFF;
- fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n",
+ fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 8) & 0xFF;
- fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n",
+ fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
ratio, bclk, ratio * bclk);
get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
@@ -1483,12 +1819,12 @@ dump_hsw_turbo_ratio_limits(void)
ratio = (msr >> 8) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 0) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
ratio, bclk, ratio * bclk);
return;
}
@@ -1505,99 +1841,175 @@ dump_ivt_turbo_ratio_limits(void)
ratio = (msr >> 56) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 48) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 40) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 32) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 24) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 16) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 8) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 0) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
ratio, bclk, ratio * bclk);
return;
}
+int has_turbo_ratio_group_limits(int family, int model)
+{
+
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_ATOM_GOLDMONT:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_ATOM_DENVERTON:
+ return 1;
+ }
+ return 0;
+}
static void
-dump_nhm_turbo_ratio_limits(void)
+dump_turbo_ratio_limits(int family, int model)
{
- unsigned long long msr;
- unsigned int ratio;
+ unsigned long long msr, core_counts;
+ unsigned int ratio, group_size;
get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
-
fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
+ if (has_turbo_ratio_group_limits(family, model)) {
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
+ fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
+ } else {
+ core_counts = 0x0807060504030201;
+ }
+
ratio = (msr >> 56) & 0xFF;
+ group_size = (core_counts >> 56) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
ratio = (msr >> 48) & 0xFF;
+ group_size = (core_counts >> 48) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
ratio = (msr >> 40) & 0xFF;
+ group_size = (core_counts >> 40) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
ratio = (msr >> 32) & 0xFF;
+ group_size = (core_counts >> 32) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
ratio = (msr >> 24) & 0xFF;
+ group_size = (core_counts >> 24) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
ratio = (msr >> 16) & 0xFF;
+ group_size = (core_counts >> 16) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
ratio = (msr >> 8) & 0xFF;
+ group_size = (core_counts >> 8) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
ratio = (msr >> 0) & 0xFF;
+ group_size = (core_counts >> 0) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
return;
}
static void
+dump_atom_turbo_ratio_limits(void)
+{
+ unsigned long long msr;
+ unsigned int ratio;
+
+ get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
+ fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
+
+ ratio = (msr >> 0) & 0x3F;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 8) & 0x3F;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 16) & 0x3F;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
+ ratio, bclk, ratio * bclk);
+
+ get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
+ fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
+
+ ratio = (msr >> 24) & 0x3F;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 16) & 0x3F;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 8) & 0x3F;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 0) & 0x3F;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
+ ratio, bclk, ratio * bclk);
+}
+
+static void
dump_knl_turbo_ratio_limits(void)
{
const unsigned int buckets_no = 7;
@@ -1652,7 +2064,7 @@ dump_knl_turbo_ratio_limits(void)
for (i = buckets_no - 1; i >= 0; i--)
if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
fprintf(outf,
- "%d * %.0f = %.0f MHz max turbo %d active cores\n",
+ "%d * %.1f = %.1f MHz max turbo %d active cores\n",
ratio[i], bclk, ratio[i] * bclk, cores[i]);
}
@@ -1661,12 +2073,12 @@ dump_nhm_cst_cfg(void)
{
unsigned long long msr;
- get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+ get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
- fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
+ fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
@@ -1810,16 +2222,6 @@ void free_all_buffers(void)
free(irqs_per_cpu);
}
-/*
- * Open a file, and exit on failure
- */
-FILE *fopen_or_die(const char *path, const char *mode)
-{
- FILE *filep = fopen(path, mode);
- if (!filep)
- err(1, "%s: open failed", path);
- return filep;
-}
/*
* Parse a file containing a single int.
@@ -2131,8 +2533,10 @@ int snapshot_gfx_mhz(void)
if (fp == NULL)
fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
- else
+ else {
rewind(fp);
+ fflush(fp);
+ }
retval = fscanf(fp, "%d", &gfx_cur_mhz);
if (retval != 1)
@@ -2148,13 +2552,14 @@ int snapshot_gfx_mhz(void)
*/
int snapshot_proc_sysfs_files(void)
{
- if (snapshot_proc_interrupts())
- return 1;
+ if (DO_BIC(BIC_IRQ))
+ if (snapshot_proc_interrupts())
+ return 1;
- if (do_gfx_rc6_ms)
+ if (DO_BIC(BIC_GFX_rc6))
snapshot_gfx_rc6_ms();
- if (do_gfx_mhz)
+ if (DO_BIC(BIC_GFXMHz))
snapshot_gfx_mhz();
return 0;
@@ -2283,7 +2688,9 @@ void check_permissions()
* MSR_SMI_COUNT 0x00000034
*
* MSR_PLATFORM_INFO 0x000000ce
- * MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2
+ * MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
+ *
+ * MSR_MISC_PWR_MGMT 0x000001aa
*
* MSR_PKG_C3_RESIDENCY 0x000003f8
* MSR_PKG_C6_RESIDENCY 0x000003f9
@@ -2291,7 +2698,8 @@ void check_permissions()
* MSR_CORE_C6_RESIDENCY 0x000003fd
*
* Side effect:
- * sets global pkg_cstate_limit to decode MSR_NHM_SNB_PKG_CST_CFG_CTL
+ * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
+ * sets has_misc_feature_control
*/
int probe_nhm_msrs(unsigned int family, unsigned int model)
{
@@ -2322,6 +2730,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_IVYBRIDGE: /* IVB */
case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
pkg_cstate_limits = snb_pkg_cstate_limits;
+ has_misc_feature_control = 1;
break;
case INTEL_FAM6_HASWELL_CORE: /* HSW */
case INTEL_FAM6_HASWELL_X: /* HSX */
@@ -2336,29 +2745,34 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
pkg_cstate_limits = hsw_pkg_cstate_limits;
+ has_misc_feature_control = 1;
break;
case INTEL_FAM6_SKYLAKE_X: /* SKX */
pkg_cstate_limits = skx_pkg_cstate_limits;
+ has_misc_feature_control = 1;
break;
case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */
+ no_MSR_MISC_PWR_MGMT = 1;
case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */
pkg_cstate_limits = slv_pkg_cstate_limits;
break;
case INTEL_FAM6_ATOM_AIRMONT: /* AMT */
pkg_cstate_limits = amt_pkg_cstate_limits;
+ no_MSR_MISC_PWR_MGMT = 1;
break;
case INTEL_FAM6_XEON_PHI_KNL: /* PHI */
case INTEL_FAM6_XEON_PHI_KNM:
pkg_cstate_limits = phi_pkg_cstate_limits;
break;
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
+ case INTEL_FAM6_ATOM_GEMINI_LAKE:
case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
pkg_cstate_limits = bxt_pkg_cstate_limits;
break;
default:
return 0;
}
- get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+ get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
@@ -2368,8 +2782,69 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
has_base_hz = 1;
return 1;
}
-int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
+/*
+ * SLV client has support for unique MSRs:
+ *
+ * MSR_CC6_DEMOTION_POLICY_CONFIG
+ * MSR_MC6_DEMOTION_POLICY_CONFIG
+ */
+
+int has_slv_msrs(unsigned int family, unsigned int model)
{
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_ATOM_SILVERMONT1:
+ case INTEL_FAM6_ATOM_MERRIFIELD:
+ case INTEL_FAM6_ATOM_MOOREFIELD:
+ return 1;
+ }
+ return 0;
+}
+int is_dnv(unsigned int family, unsigned int model)
+{
+
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_ATOM_DENVERTON:
+ return 1;
+ }
+ return 0;
+}
+int is_bdx(unsigned int family, unsigned int model)
+{
+
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_BROADWELL_X:
+ case INTEL_FAM6_BROADWELL_XEON_D:
+ return 1;
+ }
+ return 0;
+}
+int is_skx(unsigned int family, unsigned int model)
+{
+
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_SKYLAKE_X:
+ return 1;
+ }
+ return 0;
+}
+
+int has_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (has_slv_msrs(family, model))
+ return 0;
+
switch (model) {
/* Nehalem compatible, but do not include turbo-ratio limit support */
case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
@@ -2381,6 +2856,13 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
return 1;
}
}
+int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (has_slv_msrs(family, model))
+ return 1;
+
+ return 0;
+}
int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
{
if (!genuine_intel)
@@ -2429,6 +2911,22 @@ int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
return 0;
}
}
+int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_ATOM_GOLDMONT:
+ case INTEL_FAM6_SKYLAKE_X:
+ return 1;
+ default:
+ return 0;
+ }
+}
int has_config_tdp(unsigned int family, unsigned int model)
{
if (!genuine_intel)
@@ -2475,8 +2973,11 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
if (has_ivt_turbo_ratio_limit(family, model))
dump_ivt_turbo_ratio_limits();
- if (has_nhm_turbo_ratio_limit(family, model))
- dump_nhm_turbo_ratio_limits();
+ if (has_turbo_ratio_limit(family, model))
+ dump_turbo_ratio_limits(family, model);
+
+ if (has_atom_turbo_ratio_limit(family, model))
+ dump_atom_turbo_ratio_limits();
if (has_knl_turbo_ratio_limit(family, model))
dump_knl_turbo_ratio_limits();
@@ -2487,6 +2988,96 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
dump_nhm_cst_cfg();
}
+static void
+dump_sysfs_cstate_config(void)
+{
+ char path[64];
+ char name_buf[16];
+ char desc[64];
+ FILE *input;
+ int state;
+ char *sp;
+
+ if (!DO_BIC(BIC_sysfs))
+ return;
+
+ for (state = 0; state < 10; ++state) {
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
+ base_cpu, state);
+ input = fopen(path, "r");
+ if (input == NULL)
+ continue;
+ fgets(name_buf, sizeof(name_buf), input);
+
+ /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+ sp = strchr(name_buf, '-');
+ if (!sp)
+ sp = strchrnul(name_buf, '\n');
+ *sp = '\0';
+
+ fclose(input);
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
+ base_cpu, state);
+ input = fopen(path, "r");
+ if (input == NULL)
+ continue;
+ fgets(desc, sizeof(desc), input);
+
+ fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
+ fclose(input);
+ }
+}
+static void
+dump_sysfs_pstate_config(void)
+{
+ char path[64];
+ char driver_buf[64];
+ char governor_buf[64];
+ FILE *input;
+ int turbo;
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
+ base_cpu);
+ input = fopen(path, "r");
+ if (input == NULL) {
+ fprintf(stderr, "NSFOD %s\n", path);
+ return;
+ }
+ fgets(driver_buf, sizeof(driver_buf), input);
+ fclose(input);
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
+ base_cpu);
+ input = fopen(path, "r");
+ if (input == NULL) {
+ fprintf(stderr, "NSFOD %s\n", path);
+ return;
+ }
+ fgets(governor_buf, sizeof(governor_buf), input);
+ fclose(input);
+
+ fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
+ fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
+
+ sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
+ input = fopen(path, "r");
+ if (input != NULL) {
+ fscanf(input, "%d", &turbo);
+ fprintf(outf, "cpufreq boost: %d\n", turbo);
+ fclose(input);
+ }
+
+ sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
+ input = fopen(path, "r");
+ if (input != NULL) {
+ fscanf(input, "%d", &turbo);
+ fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
+ fclose(input);
+ }
+}
+
/*
* print_epb()
@@ -2570,7 +3161,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return 0;
fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
- "(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n",
+ "(high %d guar %d eff %d low %d)\n",
cpu, msr,
(unsigned int)HWP_HIGHEST_PERF(msr),
(unsigned int)HWP_GUARANTEED_PERF(msr),
@@ -2581,7 +3172,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return 0;
fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
- "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n",
+ "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
cpu, msr,
(unsigned int)(((msr) >> 0) & 0xff),
(unsigned int)(((msr) >> 8) & 0xff),
@@ -2595,7 +3186,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return 0;
fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
- "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n",
+ "(min %d max %d des %d epp 0x%x window 0x%x)\n",
cpu, msr,
(unsigned int)(((msr) >> 0) & 0xff),
(unsigned int)(((msr) >> 8) & 0xff),
@@ -2790,15 +3381,42 @@ void rapl_probe(unsigned int family, unsigned int model)
case INTEL_FAM6_BROADWELL_CORE: /* BDW */
case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_Cor_J);
+ BIC_PRESENT(BIC_GFX_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_CorWatt);
+ BIC_PRESENT(BIC_GFXWatt);
+ }
break;
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
+ case INTEL_FAM6_ATOM_GEMINI_LAKE:
do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
+ if (rapl_joules)
+ BIC_PRESENT(BIC_Pkg_J);
+ else
+ BIC_PRESENT(BIC_PkgWatt);
break;
case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
- do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
+ BIC_PRESENT(BIC_PKG__);
+ BIC_PRESENT(BIC_RAM__);
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_Cor_J);
+ BIC_PRESENT(BIC_RAM_J);
+ BIC_PRESENT(BIC_GFX_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_CorWatt);
+ BIC_PRESENT(BIC_RAMWatt);
+ BIC_PRESENT(BIC_GFXWatt);
+ }
break;
case INTEL_FAM6_HASWELL_X: /* HSX */
case INTEL_FAM6_BROADWELL_X: /* BDX */
@@ -2807,17 +3425,55 @@ void rapl_probe(unsigned int family, unsigned int model)
case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
case INTEL_FAM6_XEON_PHI_KNM:
do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ BIC_PRESENT(BIC_PKG__);
+ BIC_PRESENT(BIC_RAM__);
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_RAM_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_RAMWatt);
+ }
break;
case INTEL_FAM6_SANDYBRIDGE_X:
case INTEL_FAM6_IVYBRIDGE_X:
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ BIC_PRESENT(BIC_PKG__);
+ BIC_PRESENT(BIC_RAM__);
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_Cor_J);
+ BIC_PRESENT(BIC_RAM_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_CorWatt);
+ BIC_PRESENT(BIC_RAMWatt);
+ }
break;
case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */
case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */
do_rapl = RAPL_PKG | RAPL_CORES;
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_Cor_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_CorWatt);
+ }
break;
case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
+ BIC_PRESENT(BIC_PKG__);
+ BIC_PRESENT(BIC_RAM__);
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_Cor_J);
+ BIC_PRESENT(BIC_RAM_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_CorWatt);
+ BIC_PRESENT(BIC_RAMWatt);
+ }
break;
default:
return;
@@ -2844,7 +3500,7 @@ void rapl_probe(unsigned int family, unsigned int model)
tdp = get_tdp(model);
rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
- if (debug)
+ if (!quiet)
fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
return;
@@ -2874,7 +3530,7 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
unsigned long long msr;
- unsigned int dts;
+ unsigned int dts, dts2;
int cpu;
if (!(do_dts || do_ptm))
@@ -2899,7 +3555,6 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
cpu, msr, tcc_activation_temp - dts);
-#ifdef THERM_DEBUG
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
return 0;
@@ -2907,11 +3562,10 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
dts2 = (msr >> 8) & 0x7F;
fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
-#endif
}
- if (do_dts) {
+ if (do_dts && debug) {
unsigned int resolution;
if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
@@ -2922,7 +3576,6 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
cpu, msr, tcc_activation_temp - dts, resolution);
-#ifdef THERM_DEBUG
if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
return 0;
@@ -2930,7 +3583,6 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
dts2 = (msr >> 8) & 0x7F;
fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
-#endif
}
return 0;
@@ -2969,11 +3621,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
return -1;
- if (debug) {
- fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
- "(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
- rapl_power_units, rapl_energy_units, rapl_time_units);
- }
+ fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
+ rapl_power_units, rapl_energy_units, rapl_time_units);
+
if (do_rapl & RAPL_PKG_POWER_INFO) {
if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
@@ -2994,7 +3644,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return -9;
fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
- cpu, msr, (msr >> 63) & 1 ? "": "UN");
+ cpu, msr, (msr >> 63) & 1 ? "" : "UN");
print_power_limit_msr(cpu, msr, "PKG Limit #1");
fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
@@ -3020,40 +3670,34 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
return -9;
fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
- cpu, msr, (msr >> 31) & 1 ? "": "UN");
+ cpu, msr, (msr >> 31) & 1 ? "" : "UN");
print_power_limit_msr(cpu, msr, "DRAM Limit");
}
if (do_rapl & RAPL_CORE_POLICY) {
- if (debug) {
- if (get_msr(cpu, MSR_PP0_POLICY, &msr))
- return -7;
+ if (get_msr(cpu, MSR_PP0_POLICY, &msr))
+ return -7;
- fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
- }
+ fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
}
if (do_rapl & RAPL_CORES_POWER_LIMIT) {
- if (debug) {
- if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
- return -9;
- fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
- cpu, msr, (msr >> 31) & 1 ? "": "UN");
- print_power_limit_msr(cpu, msr, "Cores Limit");
- }
+ if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
+ return -9;
+ fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
+ cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+ print_power_limit_msr(cpu, msr, "Cores Limit");
}
if (do_rapl & RAPL_GFX) {
- if (debug) {
- if (get_msr(cpu, MSR_PP1_POLICY, &msr))
- return -8;
+ if (get_msr(cpu, MSR_PP1_POLICY, &msr))
+ return -8;
- fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
+ fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
- if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
- return -9;
- fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
- cpu, msr, (msr >> 31) & 1 ? "": "UN");
- print_power_limit_msr(cpu, msr, "GFX Limit");
- }
+ if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
+ return -9;
+ fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
+ cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+ print_power_limit_msr(cpu, msr, "GFX Limit");
}
return 0;
}
@@ -3090,6 +3734,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
case INTEL_FAM6_SKYLAKE_X: /* SKX */
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
+ case INTEL_FAM6_ATOM_GEMINI_LAKE:
case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
return 1;
}
@@ -3121,6 +3766,7 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
+ case INTEL_FAM6_ATOM_GEMINI_LAKE:
return 1;
}
return 0;
@@ -3149,8 +3795,6 @@ int has_skl_msrs(unsigned int family, unsigned int model)
return 0;
}
-
-
int is_slm(unsigned int family, unsigned int model)
{
if (!genuine_intel)
@@ -3201,7 +3845,8 @@ double slm_bclk(void)
}
freq = slm_freq_table[i];
- fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
+ if (!quiet)
+ fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
return freq;
}
@@ -3264,7 +3909,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
target_c_local = (msr >> 16) & 0xFF;
- if (debug)
+ if (!quiet)
fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
cpu, msr, target_c_local);
@@ -3298,14 +3943,34 @@ void decode_misc_enable_msr(void)
{
unsigned long long msr;
+ if (!genuine_intel)
+ return;
+
if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
- fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n",
+ fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
base_cpu, msr,
- msr & (1 << 3) ? "TCC" : "",
- msr & (1 << 16) ? "EIST" : "",
- msr & (1 << 18) ? "MONITOR" : "");
+ msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
+ msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
+ msr & MSR_IA32_MISC_ENABLE_MWAIT ? "No-" : "",
+ msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
+ msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
}
+void decode_misc_feature_control(void)
+{
+ unsigned long long msr;
+
+ if (!has_misc_feature_control)
+ return;
+
+ if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
+ fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
+ base_cpu, msr,
+ msr & (0 << 0) ? "No-" : "",
+ msr & (1 << 0) ? "No-" : "",
+ msr & (2 << 0) ? "No-" : "",
+ msr & (3 << 0) ? "No-" : "");
+}
/*
* Decode MSR_MISC_PWR_MGMT
*
@@ -3320,6 +3985,9 @@ void decode_misc_pwr_mgmt_msr(void)
if (!do_nhm_platform_info)
return;
+ if (no_MSR_MISC_PWR_MGMT)
+ return;
+
if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
base_cpu, msr,
@@ -3327,11 +3995,30 @@ void decode_misc_pwr_mgmt_msr(void)
msr & (1 << 1) ? "EN" : "DIS",
msr & (1 << 8) ? "EN" : "DIS");
}
+/*
+ * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
+ *
+ * This MSRs are present on Silvermont processors,
+ * Intel Atom processor E3000 series (Baytrail), and friends.
+ */
+void decode_c6_demotion_policy_msr(void)
+{
+ unsigned long long msr;
+
+ if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
+ fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
+ base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
+
+ if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
+ fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
+ base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
+}
void process_cpuid()
{
unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
unsigned int fms, family, model, stepping;
+ unsigned int has_turbo;
eax = ebx = ecx = edx = 0;
@@ -3340,7 +4027,7 @@ void process_cpuid()
if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
genuine_intel = 1;
- if (debug)
+ if (!quiet)
fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
(char *)&ebx, (char *)&edx, (char *)&ecx);
@@ -3351,7 +4038,7 @@ void process_cpuid()
if (family == 6 || family == 0xf)
model += ((fms >> 16) & 0xf) << 4;
- if (debug) {
+ if (!quiet) {
fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
max_level, family, model, stepping, family, model, stepping);
fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
@@ -3394,8 +4081,18 @@ void process_cpuid()
__cpuid(0x6, eax, ebx, ecx, edx);
has_aperf = ecx & (1 << 0);
+ if (has_aperf) {
+ BIC_PRESENT(BIC_Avg_MHz);
+ BIC_PRESENT(BIC_Busy);
+ BIC_PRESENT(BIC_Bzy_MHz);
+ }
do_dts = eax & (1 << 0);
+ if (do_dts)
+ BIC_PRESENT(BIC_CoreTmp);
+ has_turbo = eax & (1 << 1);
do_ptm = eax & (1 << 6);
+ if (do_ptm)
+ BIC_PRESENT(BIC_PkgTmp);
has_hwp = eax & (1 << 7);
has_hwp_notify = eax & (1 << 8);
has_hwp_activity_window = eax & (1 << 9);
@@ -3403,10 +4100,11 @@ void process_cpuid()
has_hwp_pkg = eax & (1 << 11);
has_epb = ecx & (1 << 3);
- if (debug)
- fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, "
+ if (!quiet)
+ fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
has_aperf ? "" : "No-",
+ has_turbo ? "" : "No-",
do_dts ? "" : "No-",
do_ptm ? "" : "No-",
has_hwp ? "" : "No-",
@@ -3416,10 +4114,11 @@ void process_cpuid()
has_hwp_pkg ? "" : "No-",
has_epb ? "" : "No-");
- if (debug)
+ if (!quiet)
decode_misc_enable_msr();
- if (max_level >= 0x7 && debug) {
+
+ if (max_level >= 0x7 && !quiet) {
int has_sgx;
ecx = 0;
@@ -3445,7 +4144,7 @@ void process_cpuid()
if (ebx_tsc != 0) {
- if (debug && (ebx != 0))
+ if (!quiet && (ebx != 0))
fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
eax_crystal, ebx_tsc, crystal_hz);
@@ -3462,6 +4161,7 @@ void process_cpuid()
crystal_hz = 25000000; /* 25.0 MHz */
break;
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
+ case INTEL_FAM6_ATOM_GEMINI_LAKE:
crystal_hz = 19200000; /* 19.2 MHz */
break;
default:
@@ -3470,7 +4170,7 @@ void process_cpuid()
if (crystal_hz) {
tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
- if (debug)
+ if (!quiet)
fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
}
@@ -3485,7 +4185,7 @@ void process_cpuid()
base_mhz = max_mhz = bus_mhz = edx = 0;
__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
- if (debug)
+ if (!quiet)
fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
base_mhz, max_mhz, bus_mhz);
}
@@ -3493,56 +4193,101 @@ void process_cpuid()
if (has_aperf)
aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
- do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
+ BIC_PRESENT(BIC_IRQ);
+ BIC_PRESENT(BIC_TSC_MHz);
+
+ if (probe_nhm_msrs(family, model)) {
+ do_nhm_platform_info = 1;
+ BIC_PRESENT(BIC_CPU_c1);
+ BIC_PRESENT(BIC_CPU_c3);
+ BIC_PRESENT(BIC_CPU_c6);
+ BIC_PRESENT(BIC_SMI);
+ }
do_snb_cstates = has_snb_msrs(family, model);
+
+ if (do_snb_cstates)
+ BIC_PRESENT(BIC_CPU_c7);
+
do_irtl_snb = has_snb_msrs(family, model);
- do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
- do_pc3 = (pkg_cstate_limit >= PCL__3);
- do_pc6 = (pkg_cstate_limit >= PCL__6);
- do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
- do_c8_c9_c10 = has_hsw_msrs(family, model);
+ if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
+ BIC_PRESENT(BIC_Pkgpc2);
+ if (pkg_cstate_limit >= PCL__3)
+ BIC_PRESENT(BIC_Pkgpc3);
+ if (pkg_cstate_limit >= PCL__6)
+ BIC_PRESENT(BIC_Pkgpc6);
+ if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
+ BIC_PRESENT(BIC_Pkgpc7);
+ if (has_slv_msrs(family, model)) {
+ BIC_NOT_PRESENT(BIC_Pkgpc2);
+ BIC_NOT_PRESENT(BIC_Pkgpc3);
+ BIC_PRESENT(BIC_Pkgpc6);
+ BIC_NOT_PRESENT(BIC_Pkgpc7);
+ BIC_PRESENT(BIC_Mod_c6);
+ use_c1_residency_msr = 1;
+ }
+ if (is_dnv(family, model)) {
+ BIC_PRESENT(BIC_CPU_c1);
+ BIC_NOT_PRESENT(BIC_CPU_c3);
+ BIC_NOT_PRESENT(BIC_Pkgpc3);
+ BIC_NOT_PRESENT(BIC_CPU_c7);
+ BIC_NOT_PRESENT(BIC_Pkgpc7);
+ use_c1_residency_msr = 1;
+ }
+ if (is_skx(family, model)) {
+ BIC_NOT_PRESENT(BIC_CPU_c3);
+ BIC_NOT_PRESENT(BIC_Pkgpc3);
+ BIC_NOT_PRESENT(BIC_CPU_c7);
+ BIC_NOT_PRESENT(BIC_Pkgpc7);
+ }
+ if (is_bdx(family, model)) {
+ BIC_NOT_PRESENT(BIC_CPU_c7);
+ BIC_NOT_PRESENT(BIC_Pkgpc7);
+ }
+ if (has_hsw_msrs(family, model)) {
+ BIC_PRESENT(BIC_Pkgpc8);
+ BIC_PRESENT(BIC_Pkgpc9);
+ BIC_PRESENT(BIC_Pkgpc10);
+ }
do_irtl_hsw = has_hsw_msrs(family, model);
- do_skl_residency = has_skl_msrs(family, model);
+ if (has_skl_msrs(family, model)) {
+ BIC_PRESENT(BIC_Totl_c0);
+ BIC_PRESENT(BIC_Any_c0);
+ BIC_PRESENT(BIC_GFX_c0);
+ BIC_PRESENT(BIC_CPUGFX);
+ }
do_slm_cstates = is_slm(family, model);
do_knl_cstates = is_knl(family, model);
- if (debug)
+ if (!quiet)
decode_misc_pwr_mgmt_msr();
+ if (!quiet && has_slv_msrs(family, model))
+ decode_c6_demotion_policy_msr();
+
rapl_probe(family, model);
perf_limit_reasons_probe(family, model);
- if (debug)
+ if (!quiet)
dump_cstate_pstate_config_info(family, model);
+ if (!quiet)
+ dump_sysfs_cstate_config();
+ if (!quiet)
+ dump_sysfs_pstate_config();
+
if (has_skl_msrs(family, model))
calculate_tsc_tweak();
- do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK);
+ if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
+ BIC_PRESENT(BIC_GFX_rc6);
- do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK);
+ if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+ BIC_PRESENT(BIC_GFXMHz);
- return;
-}
+ if (!quiet)
+ decode_misc_feature_control();
-void help()
-{
- fprintf(outf,
- "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
- "\n"
- "Turbostat forks the specified COMMAND and prints statistics\n"
- "when COMMAND completes.\n"
- "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
- "to print statistics, until interrupted.\n"
- "--add add a counter\n"
- " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
- "--debug run in \"debug\" mode\n"
- "--interval sec Override default 5-second measurement interval\n"
- "--help print this help message\n"
- "--out file create or truncate \"file\" for all output\n"
- "--version print version information\n"
- "\n"
- "For more help, run \"man turbostat\"\n");
+ return;
}
@@ -3579,7 +4324,7 @@ void topology_probe()
topo.max_cpu_num = 0;
for_all_proc_cpus(count_cpus);
if (!summary_only && topo.num_cpus > 1)
- show_cpu = 1;
+ BIC_PRESENT(BIC_CPU);
if (debug > 1)
fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
@@ -3599,6 +4344,15 @@ void topology_probe()
for_all_proc_cpus(mark_cpu_present);
/*
+ * Validate that all cpus in cpu_subset are also in cpu_present_set
+ */
+ for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
+ if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
+ if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
+ err(1, "cpu%d not present", i);
+ }
+
+ /*
* Allocate and initialize cpu_affinity_set
*/
cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
@@ -3639,15 +4393,15 @@ void topology_probe()
if (debug > 1)
fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
max_core_id, topo.num_cores_per_pkg);
- if (debug && !summary_only && topo.num_cores_per_pkg > 1)
- show_core = 1;
+ if (!summary_only && topo.num_cores_per_pkg > 1)
+ BIC_PRESENT(BIC_Core);
topo.num_packages = max_package_id + 1;
if (debug > 1)
fprintf(outf, "max_package_id %d, sizing for %d packages\n",
max_package_id, topo.num_packages);
- if (debug && !summary_only && topo.num_packages > 1)
- show_pkg = 1;
+ if (!summary_only && topo.num_packages > 1)
+ BIC_PRESENT(BIC_Package);
topo.num_threads_per_core = max_siblings;
if (debug > 1)
@@ -3662,7 +4416,7 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data
int i;
*t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
- topo.num_packages, sizeof(struct thread_data) + sys.thread_counter_bytes);
+ topo.num_packages, sizeof(struct thread_data));
if (*t == NULL)
goto error;
@@ -3671,14 +4425,14 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data
(*t)[i].cpu_id = -1;
*c = calloc(topo.num_cores_per_pkg * topo.num_packages,
- sizeof(struct core_data) + sys.core_counter_bytes);
+ sizeof(struct core_data));
if (*c == NULL)
goto error;
for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
(*c)[i].core_id = -1;
- *p = calloc(topo.num_packages, sizeof(struct pkg_data) + sys.package_counter_bytes);
+ *p = calloc(topo.num_packages, sizeof(struct pkg_data));
if (*p == NULL)
goto error;
@@ -3789,24 +4543,24 @@ void turbostat_init()
process_cpuid();
- if (debug)
+ if (!quiet)
for_all_cpus(print_hwp, ODD_COUNTERS);
- if (debug)
+ if (!quiet)
for_all_cpus(print_epb, ODD_COUNTERS);
- if (debug)
+ if (!quiet)
for_all_cpus(print_perf_limit, ODD_COUNTERS);
- if (debug)
+ if (!quiet)
for_all_cpus(print_rapl, ODD_COUNTERS);
for_all_cpus(set_temperature_target, ODD_COUNTERS);
- if (debug)
+ if (!quiet)
for_all_cpus(print_thermal, ODD_COUNTERS);
- if (debug && do_irtl_snb)
+ if (!quiet && do_irtl_snb)
print_irtl();
}
@@ -3815,6 +4569,7 @@ int fork_it(char **argv)
pid_t child_pid;
int status;
+ snapshot_proc_sysfs_files();
status = for_all_cpus(get_counters, EVEN_COUNTERS);
if (status)
exit(status);
@@ -3826,6 +4581,7 @@ int fork_it(char **argv)
if (!child_pid) {
/* child */
execvp(argv[0], argv);
+ err(errno, "exec %s", argv[0]);
} else {
/* parent */
@@ -3841,6 +4597,7 @@ int fork_it(char **argv)
* n.b. fork_it() does not check for errors from for_all_cpus()
* because re-starting is problematic when forking
*/
+ snapshot_proc_sysfs_files();
for_all_cpus(get_counters, ODD_COUNTERS);
gettimeofday(&tv_odd, (struct timezone *)NULL);
timersub(&tv_odd, &tv_even, &tv_delta);
@@ -3862,6 +4619,7 @@ int get_and_dump_counters(void)
{
int status;
+ snapshot_proc_sysfs_files();
status = for_all_cpus(get_counters, ODD_COUNTERS);
if (status)
return status;
@@ -3876,13 +4634,13 @@ int get_and_dump_counters(void)
}
void print_version() {
- fprintf(outf, "turbostat version 4.16 24 Dec 2016"
+ fprintf(outf, "turbostat version 17.06.23"
" - Len Brown <lenb@kernel.org>\n");
}
-int add_counter(unsigned int msr_num, char *name, unsigned int width,
- enum counter_scope scope, enum counter_type type,
- enum counter_format format)
+int add_counter(unsigned int msr_num, char *path, char *name,
+ unsigned int width, enum counter_scope scope,
+ enum counter_type type, enum counter_format format, int flags)
{
struct msr_counter *msrp;
@@ -3894,31 +4652,46 @@ int add_counter(unsigned int msr_num, char *name, unsigned int width,
msrp->msr_num = msr_num;
strncpy(msrp->name, name, NAME_BYTES);
+ if (path)
+ strncpy(msrp->path, path, PATH_BYTES);
msrp->width = width;
msrp->type = type;
msrp->format = format;
+ msrp->flags = flags;
switch (scope) {
case SCOPE_CPU:
- sys.thread_counter_bytes += 64;
msrp->next = sys.tp;
sys.tp = msrp;
- sys.thread_counter_bytes += sizeof(unsigned long long);
+ sys.added_thread_counters++;
+ if (sys.added_thread_counters > MAX_ADDED_COUNTERS) {
+ fprintf(stderr, "exceeded max %d added thread counters\n",
+ MAX_ADDED_COUNTERS);
+ exit(-1);
+ }
break;
case SCOPE_CORE:
- sys.core_counter_bytes += 64;
msrp->next = sys.cp;
sys.cp = msrp;
- sys.core_counter_bytes += sizeof(unsigned long long);
+ sys.added_core_counters++;
+ if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
+ fprintf(stderr, "exceeded max %d added core counters\n",
+ MAX_ADDED_COUNTERS);
+ exit(-1);
+ }
break;
case SCOPE_PACKAGE:
- sys.package_counter_bytes += 64;
msrp->next = sys.pp;
sys.pp = msrp;
- sys.package_counter_bytes += sizeof(unsigned long long);
+ sys.added_package_counters++;
+ if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
+ fprintf(stderr, "exceeded max %d added package counters\n",
+ MAX_ADDED_COUNTERS);
+ exit(-1);
+ }
break;
}
@@ -3928,7 +4701,8 @@ int add_counter(unsigned int msr_num, char *name, unsigned int width,
void parse_add_command(char *add_command)
{
int msr_num = 0;
- char name_buffer[NAME_BYTES];
+ char *path = NULL;
+ char name_buffer[NAME_BYTES] = "";
int width = 64;
int fail = 0;
enum counter_scope scope = SCOPE_CPU;
@@ -3943,6 +4717,11 @@ void parse_add_command(char *add_command)
if (sscanf(add_command, "msr%d", &msr_num) == 1)
goto next;
+ if (*add_command == '/') {
+ path = add_command;
+ goto next;
+ }
+
if (sscanf(add_command, "u%d", &width) == 1) {
if ((width == 32) || (width == 64))
goto next;
@@ -3968,6 +4747,10 @@ void parse_add_command(char *add_command)
type = COUNTER_SECONDS;
goto next;
}
+ if (!strncmp(add_command, "usec", strlen("usec"))) {
+ type = COUNTER_USEC;
+ goto next;
+ }
if (!strncmp(add_command, "raw", strlen("raw"))) {
format = FORMAT_RAW;
goto next;
@@ -3992,36 +4775,26 @@ void parse_add_command(char *add_command)
next:
add_command = strchr(add_command, ',');
- if (add_command)
+ if (add_command) {
+ *add_command = '\0';
add_command++;
+ }
}
- if (msr_num == 0) {
- fprintf(stderr, "--add: (msrDDD | msr0xXXX) required\n");
+ if ((msr_num == 0) && (path == NULL)) {
+ fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
fail++;
}
/* generate default column header */
if (*name_buffer == '\0') {
- if (format == FORMAT_RAW) {
- if (width == 32)
- sprintf(name_buffer, "msr%d", msr_num);
- else
- sprintf(name_buffer, "MSR%d", msr_num);
- } else if (format == FORMAT_DELTA) {
- if (width == 32)
- sprintf(name_buffer, "cnt%d", msr_num);
- else
- sprintf(name_buffer, "CNT%d", msr_num);
- } else if (format == FORMAT_PERCENT) {
- if (width == 32)
- sprintf(name_buffer, "msr%d%%", msr_num);
- else
- sprintf(name_buffer, "MSR%d%%", msr_num);
- }
+ if (width == 32)
+ sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
+ else
+ sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
}
- if (add_counter(msr_num, name_buffer, width, scope, type, format))
+ if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
fail++;
if (fail) {
@@ -4029,20 +4802,215 @@ next:
exit(1);
}
}
+
+int is_deferred_skip(char *name)
+{
+ int i;
+
+ for (i = 0; i < deferred_skip_index; ++i)
+ if (!strcmp(name, deferred_skip_names[i]))
+ return 1;
+ return 0;
+}
+
+void probe_sysfs(void)
+{
+ char path[64];
+ char name_buf[16];
+ FILE *input;
+ int state;
+ char *sp;
+
+ if (!DO_BIC(BIC_sysfs))
+ return;
+
+ for (state = 10; state > 0; --state) {
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
+ base_cpu, state);
+ input = fopen(path, "r");
+ if (input == NULL)
+ continue;
+ fgets(name_buf, sizeof(name_buf), input);
+
+ /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+ sp = strchr(name_buf, '-');
+ if (!sp)
+ sp = strchrnul(name_buf, '\n');
+ *sp = '%';
+ *(sp + 1) = '\0';
+
+ fclose(input);
+
+ sprintf(path, "cpuidle/state%d/time", state);
+
+ if (is_deferred_skip(name_buf))
+ continue;
+
+ add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
+ FORMAT_PERCENT, SYSFS_PERCPU);
+ }
+
+ for (state = 10; state > 0; --state) {
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
+ base_cpu, state);
+ input = fopen(path, "r");
+ if (input == NULL)
+ continue;
+ fgets(name_buf, sizeof(name_buf), input);
+ /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+ sp = strchr(name_buf, '-');
+ if (!sp)
+ sp = strchrnul(name_buf, '\n');
+ *sp = '\0';
+ fclose(input);
+
+ sprintf(path, "cpuidle/state%d/usage", state);
+
+ if (is_deferred_skip(name_buf))
+ continue;
+
+ add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
+ FORMAT_DELTA, SYSFS_PERCPU);
+ }
+
+}
+
+
+/*
+ * parse cpuset with following syntax
+ * 1,2,4..6,8-10 and set bits in cpu_subset
+ */
+void parse_cpu_command(char *optarg)
+{
+ unsigned int start, end;
+ char *next;
+
+ if (!strcmp(optarg, "core")) {
+ if (cpu_subset)
+ goto error;
+ show_core_only++;
+ return;
+ }
+ if (!strcmp(optarg, "package")) {
+ if (cpu_subset)
+ goto error;
+ show_pkg_only++;
+ return;
+ }
+ if (show_core_only || show_pkg_only)
+ goto error;
+
+ cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
+ if (cpu_subset == NULL)
+ err(3, "CPU_ALLOC");
+ cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
+
+ CPU_ZERO_S(cpu_subset_size, cpu_subset);
+
+ next = optarg;
+
+ while (next && *next) {
+
+ if (*next == '-') /* no negative cpu numbers */
+ goto error;
+
+ start = strtoul(next, &next, 10);
+
+ if (start >= CPU_SUBSET_MAXCPUS)
+ goto error;
+ CPU_SET_S(start, cpu_subset_size, cpu_subset);
+
+ if (*next == '\0')
+ break;
+
+ if (*next == ',') {
+ next += 1;
+ continue;
+ }
+
+ if (*next == '-') {
+ next += 1; /* start range */
+ } else if (*next == '.') {
+ next += 1;
+ if (*next == '.')
+ next += 1; /* start range */
+ else
+ goto error;
+ }
+
+ end = strtoul(next, &next, 10);
+ if (end <= start)
+ goto error;
+
+ while (++start <= end) {
+ if (start >= CPU_SUBSET_MAXCPUS)
+ goto error;
+ CPU_SET_S(start, cpu_subset_size, cpu_subset);
+ }
+
+ if (*next == ',')
+ next += 1;
+ else if (*next != '\0')
+ goto error;
+ }
+
+ return;
+
+error:
+ fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
+ help();
+ exit(-1);
+}
+
+int shown;
+/*
+ * parse_show_hide() - process cmdline to set default counter action
+ */
+void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
+{
+ /*
+ * --show: show only those specified
+ * The 1st invocation will clear and replace the enabled mask
+ * subsequent invocations can add to it.
+ */
+ if (new_mode == SHOW_LIST) {
+ if (shown == 0)
+ bic_enabled = bic_lookup(optarg, new_mode);
+ else
+ bic_enabled |= bic_lookup(optarg, new_mode);
+ shown = 1;
+
+ return;
+ }
+
+ /*
+ * --hide: do not show those specified
+ * multiple invocations simply clear more bits in enabled mask
+ */
+ bic_enabled &= ~bic_lookup(optarg, new_mode);
+
+}
+
void cmdline(int argc, char **argv)
{
int opt;
int option_index = 0;
static struct option long_options[] = {
{"add", required_argument, 0, 'a'},
+ {"cpu", required_argument, 0, 'c'},
{"Dump", no_argument, 0, 'D'},
- {"debug", no_argument, 0, 'd'},
+ {"debug", no_argument, 0, 'd'}, /* internal, not documented */
{"interval", required_argument, 0, 'i'},
{"help", no_argument, 0, 'h'},
+ {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help
{"Joules", no_argument, 0, 'J'},
+ {"list", no_argument, 0, 'l'},
+ {"migrate", no_argument, 0, 'm'},
{"out", required_argument, 0, 'o'},
- {"Package", no_argument, 0, 'p'},
- {"processor", no_argument, 0, 'p'},
+ {"quiet", no_argument, 0, 'q'},
+ {"show", required_argument, 0, 's'},
{"Summary", no_argument, 0, 'S'},
{"TCC", required_argument, 0, 'T'},
{"version", no_argument, 0, 'v' },
@@ -4051,18 +5019,24 @@ void cmdline(int argc, char **argv)
progname = argv[0];
- while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v",
+ while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:Jmo:qST:v",
long_options, &option_index)) != -1) {
switch (opt) {
case 'a':
parse_add_command(optarg);
break;
+ case 'c':
+ parse_cpu_command(optarg);
+ break;
case 'D':
dump_only++;
break;
case 'd':
debug++;
break;
+ case 'H':
+ parse_show_hide(optarg, HIDE_LIST);
+ break;
case 'h':
default:
help();
@@ -4084,14 +5058,21 @@ void cmdline(int argc, char **argv)
case 'J':
rapl_joules++;
break;
+ case 'l':
+ list_header_only++;
+ quiet++;
+ break;
+ case 'm':
+ do_migrate = 1;
+ break;
case 'o':
outf = fopen_or_die(optarg, "w");
break;
- case 'P':
- show_pkg_only++;
+ case 'q':
+ quiet = 1;
break;
- case 'p':
- show_core_only++;
+ case 's':
+ parse_show_hide(optarg, SHOW_LIST);
break;
case 'S':
summary_only++;
@@ -4113,15 +5094,24 @@ int main(int argc, char **argv)
cmdline(argc, argv);
- if (debug)
+ if (!quiet)
print_version();
+ probe_sysfs();
+
turbostat_init();
/* dump counters and exit */
if (dump_only)
return get_and_dump_counters();
+ /* list header and exit */
+ if (list_header_only) {
+ print_header(",");
+ flush_output_stdout();
+ return 0;
+ }
+
/*
* if any params left, it must be a command to fork
*/
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
index 971c9ffdcb50..a711eec0c895 100644
--- a/tools/power/x86/x86_energy_perf_policy/Makefile
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile
@@ -1,10 +1,27 @@
-DESTDIR ?=
+CC = $(CROSS_COMPILE)gcc
+BUILD_OUTPUT := $(CURDIR)
+PREFIX := /usr
+DESTDIR :=
+
+ifeq ("$(origin O)", "command line")
+ BUILD_OUTPUT := $(O)
+endif
x86_energy_perf_policy : x86_energy_perf_policy.c
+CFLAGS += -Wall
+CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
+
+%: %.c
+ @mkdir -p $(BUILD_OUTPUT)
+ $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@
+.PHONY : clean
clean :
- rm -f x86_energy_perf_policy
+ @rm -f $(BUILD_OUTPUT)/x86_energy_perf_policy
+
+install : x86_energy_perf_policy
+ install -d $(DESTDIR)$(PREFIX)/bin
+ install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy
+ install -d $(DESTDIR)$(PREFIX)/share/man/man8
+ install x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8
-install :
- install x86_energy_perf_policy ${DESTDIR}/usr/bin/
- install x86_energy_perf_policy.8 ${DESTDIR}/usr/share/man/man8/
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
index 8eaaad648cdb..17db1c3af4d0 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
@@ -1,104 +1,213 @@
-.\" This page Copyright (C) 2010 Len Brown <len.brown@intel.com>
+.\" This page Copyright (C) 2010 - 2015 Len Brown <len.brown@intel.com>
.\" Distributed under the GPL, Copyleft 1994.
.TH X86_ENERGY_PERF_POLICY 8
.SH NAME
-x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS
+x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Specific Registers
.SH SYNOPSIS
-.ft B
.B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB "\-r"
+.RB "[ options ] [ scope ] [field \ value]"
.br
-.B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB 'performance'
+.RB "scope: \-\-cpu\ cpu-list | \-\-pkg\ pkg-list"
.br
-.B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB 'normal'
+.RB "cpu-list, pkg-list: # | #,# | #-# | all"
.br
-.B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB 'powersave'
+.RB "field: \-\-all | \-\-epb | \-\-hwp-epp | \-\-hwp-min | \-\-hwp-max | \-\-hwp-desired"
.br
-.B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB n
+.RB "other: (\-\-force | \-\-hwp-enable | \-\-turbo-enable) value)"
.br
+.RB "value: # | default | performance | balance-performance | balance-power | power"
.SH DESCRIPTION
\fBx86_energy_perf_policy\fP
-allows software to convey
-its policy for the relative importance of performance
-versus energy savings to the processor.
+displays and updates energy-performance policy settings specific to
+Intel Architecture Processors. Settings are accessed via Model Specific Register (MSR)
+updates, no matter if the Linux cpufreq sub-system is enabled or not.
-The processor uses this information in model-specific ways
-when it must select trade-offs between performance and
-energy efficiency.
+Policy in MSR_IA32_ENERGY_PERF_BIAS (EPB)
+may affect a wide range of hardware decisions,
+such as how aggressively the hardware enters and exits CPU idle states (C-states)
+and Processor Performance States (P-states).
+This policy hint does not replace explicit OS C-state and P-state selection.
+Rather, it tells the hardware how aggressively to implement those selections.
+Further, it allows the OS to influence energy/performance trade-offs where there
+is no software interface, such as in the opportunistic "turbo-mode" P-state range.
+Note that MSR_IA32_ENERGY_PERF_BIAS is defined per CPU,
+but some implementations
+share a single MSR among all CPUs in each processor package.
+On those systems, a write to EPB on one processor will
+be visible, and will have an effect, on all CPUs
+in the same processor package.
-This policy hint does not supersede Processor Performance states
-(P-states) or CPU Idle power states (C-states), but allows
-software to have influence where it would otherwise be unable
-to express a preference.
+Hardware P-States (HWP) are effectively an expansion of hardware
+P-state control from the opportunistic turbo-mode P-state range
+to include the entire range of available P-states.
+On Broadwell Xeon, the initial HWP implementation, EBP influenced HWP.
+That influence was removed in subsequent generations,
+where it was moved to the
+Energy_Performance_Preference (EPP) field in
+a pair of dedicated MSRs -- MSR_IA32_HWP_REQUEST and MSR_IA32_HWP_REQUEST_PKG.
-For example, this setting may tell the hardware how
-aggressively or conservatively to control frequency
-in the "turbo range" above the explicitly OS-controlled
-P-state frequency range. It may also tell the hardware
-how aggressively is should enter the OS requested C-states.
+EPP is the most commonly managed knob in HWP mode,
+but MSR_IA32_HWP_REQUEST also allows the user to specify
+minimum-frequency for Quality-of-Service,
+and maximum-frequency for power-capping.
+MSR_IA32_HWP_REQUEST is defined per-CPU.
-Support for this feature is indicated by CPUID.06H.ECX.bit3
-per the Intel Architectures Software Developer's Manual.
+MSR_IA32_HWP_REQUEST_PKG has the same capability as MSR_IA32_HWP_REQUEST,
+but it can simultaneously set the default policy for all CPUs within a package.
+A bit in per-CPU MSR_IA32_HWP_REQUEST indicates whether it is
+over-ruled-by or exempt-from MSR_IA32_HWP_REQUEST_PKG.
-.SS Options
-\fB-c\fP limits operation to a single CPU.
-The default is to operate on all CPUs.
-Note that MSR_IA32_ENERGY_PERF_BIAS is defined per
-logical processor, but that the initial implementations
-of the MSR were shared among all processors in each package.
-.PP
-\fB-v\fP increases verbosity. By default
-x86_energy_perf_policy is silent.
-.PP
-\fB-r\fP is for "read-only" mode - the unchanged state
-is read and displayed.
+MSR_HWP_CAPABILITIES shows the default values for the fields
+in MSR_IA32_HWP_REQUEST. It is displayed when no values
+are being written.
+
+.SS SCOPE OPTIONS
.PP
-.I performance
-Set a policy where performance is paramount.
-The processor will be unwilling to sacrifice any performance
-for the sake of energy saving. This is the hardware default.
+\fB-c, --cpu\fP Operate on the MSR_IA32_HWP_REQUEST for each CPU in a CPU-list.
+The CPU-list may be comma-separated CPU numbers, with dash for range
+or the string "all". Eg. '--cpu 1,4,6-8' or '--cpu all'.
+When --cpu is used, \fB--hwp-use-pkg\fP is available, which specifies whether the per-cpu
+MSR_IA32_HWP_REQUEST should be over-ruled by MSR_IA32_HWP_REQUEST_PKG (1),
+or exempt from MSR_IA32_HWP_REQUEST_PKG (0).
+
+\fB-p, --pkg\fP Operate on the MSR_IA32_HWP_REQUEST_PKG for each package in the package-list.
+The list is a string of individual package numbers separated
+by commas, and or ranges of package numbers separated by a dash,
+or the string "all".
+For example '--pkg 1,3' or '--pkg all'
+
+.SS VALUE OPTIONS
.PP
-.I normal
+.I normal | default
Set a policy with a normal balance between performance and energy efficiency.
The processor will tolerate minor performance compromise
for potentially significant energy savings.
-This reasonable default for most desktops and servers.
+This is a reasonable default for most desktops and servers.
+"default" is a synonym for "normal".
.PP
-.I powersave
+.I performance
+Set a policy for maximum performance,
+accepting no performance sacrifice for the benefit of energy efficiency.
+.PP
+.I balance-performance
+Set a policy with a high priority on performance,
+but allowing some performance loss to benefit energy efficiency.
+.PP
+.I balance-power
+Set a policy where the performance and power are balanced.
+This is the default.
+.PP
+.I power
Set a policy where the processor can accept
-a measurable performance hit to maximize energy efficiency.
+a measurable performance impact to maximize energy efficiency.
+
.PP
-.I n
-Set MSR_IA32_ENERGY_PERF_BIAS to the specified number.
-The range of valid numbers is 0-15, where 0 is maximum
-performance and 15 is maximum energy efficiency.
+The following table shows the mapping from the value strings above to actual MSR values.
+This mapping is defined in the Linux-kernel header, msr-index.h.
+.nf
+VALUE STRING EPB EPP
+performance 0 0
+balance-performance 4 128
+normal, default 6 128
+balance-power 8 192
+power 15 255
+.fi
+.PP
+For MSR_IA32_HWP_REQUEST performance fields
+(--hwp-min, --hwp-max, --hwp-desired), the value option
+is in units of 100 MHz, Eg. 12 signifies 1200 MHz.
+
+.SS FIELD OPTIONS
+\fB-a, --all value-string\fP Sets all EPB and EPP and HWP limit fields to the value associated with
+the value-string. In addition, enables turbo-mode and HWP-mode, if they were previous disabled.
+Thus "--all normal" will set a system without cpufreq into a well known configuration.
+.PP
+\fB-B, --epb\fP set EPB per-core or per-package.
+See value strings in the table above.
+.PP
+\fB-d, --debug\fP debug increases verbosity. By default
+x86_energy_perf_policy is silent for updates,
+and verbose for read-only mode.
+.PP
+\fB-P, --hwp-epp\fP set HWP.EPP per-core or per-package.
+See value strings in the table above.
+.PP
+\fB-m, --hwp-min\fP request HWP to not go below the specified core/bus ratio.
+The "default" is the value found in IA32_HWP_CAPABILITIES.min.
+.PP
+\fB-M, --hwp-max\fP request HWP not exceed a the specified core/bus ratio.
+The "default" is the value found in IA32_HWP_CAPABILITIES.max.
+.PP
+\fB-D, --hwp-desired\fP request HWP 'desired' frequency.
+The "normal" setting is 0, which
+corresponds to 'full autonomous' HWP control.
+Non-zero performance values request a specific performance
+level on this processor, specified in multiples of 100 MHz.
+.PP
+\fB-w, --hwp-window\fP specify integer number of microsec
+in the sliding window that HWP uses to maintain average frequency.
+This parameter is meaningful only when the "desired" field above is non-zero.
+Default is 0, allowing the HW to choose.
+.SH OTHER OPTIONS
+.PP
+\fB-f, --force\fP writes the specified values without bounds checking.
+.PP
+\fB-U, --hwp-use-pkg\fP (0 | 1), when used in conjunction with --cpu,
+indicates whether the per-CPU MSR_IA32_HWP_REQUEST should be overruled (1)
+or exempt (0) from per-Package MSR_IA32_HWP_REQUEST_PKG settings.
+The default is exempt.
+.PP
+\fB-H, --hwp-enable\fP enable HardWare-P-state (HWP) mode. Once enabled, system RESET is required to disable HWP mode.
+.PP
+\fB-t, --turbo-enable\fP enable (1) or disable (0) turbo mode.
+.PP
+\fB-v, --version\fP print version and exit.
+.PP
+If no request to change policy is made,
+the default behavior is to read
+and display the current system state,
+including the default capabilities.
+.SH WARNING
+.PP
+This utility writes directly to Model Specific Registers.
+There is no locking or coordination should this utility
+be used to modify HWP limit fields at the same time that
+intel_pstate's sysfs attributes access the same MSRs.
+.PP
+Note that --hwp-desired and --hwp-window are considered experimental.
+Future versions of Linux reserve the right to access these
+fields internally -- potentially conflicting with user-space access.
+.SH EXAMPLE
+.nf
+# sudo x86_energy_perf_policy
+cpu0: EPB 6
+cpu0: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu0: HWP_CAP: low 1 eff 8 guar 27 high 35
+cpu1: EPB 6
+cpu1: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu1: HWP_CAP: low 1 eff 8 guar 27 high 35
+cpu2: EPB 6
+cpu2: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu2: HWP_CAP: low 1 eff 8 guar 27 high 35
+cpu3: EPB 6
+cpu3: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu3: HWP_CAP: low 1 eff 8 guar 27 high 35
+.fi
.SH NOTES
-.B "x86_energy_perf_policy "
+.B "x86_energy_perf_policy"
runs only as root.
.SH FILES
.ta
.nf
/dev/cpu/*/msr
.fi
-
.SH "SEE ALSO"
+.nf
msr(4)
+Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+.fi
.PP
.SH AUTHORS
.nf
-Written by Len Brown <len.brown@intel.com>
+Len Brown
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index 40b3e5482f8a..65bbe627a425 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -3,322 +3,1424 @@
* policy preference bias on recent X86 processors.
*/
/*
- * Copyright (c) 2010, Intel Corporation.
+ * Copyright (c) 2010 - 2017 Intel Corporation.
* Len Brown <len.brown@intel.com>
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * This program is released under GPL v2
*/
+#define _GNU_SOURCE
+#include MSRHEADER
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
+#include <sched.h>
#include <sys/stat.h>
#include <sys/resource.h>
+#include <getopt.h>
+#include <err.h>
#include <fcntl.h>
#include <signal.h>
#include <sys/time.h>
+#include <limits.h>
#include <stdlib.h>
#include <string.h>
+#include <cpuid.h>
+#include <errno.h>
+
+#define OPTARG_NORMAL (INT_MAX - 1)
+#define OPTARG_POWER (INT_MAX - 2)
+#define OPTARG_BALANCE_POWER (INT_MAX - 3)
+#define OPTARG_BALANCE_PERFORMANCE (INT_MAX - 4)
+#define OPTARG_PERFORMANCE (INT_MAX - 5)
+
+struct msr_hwp_cap {
+ unsigned char highest;
+ unsigned char guaranteed;
+ unsigned char efficient;
+ unsigned char lowest;
+};
-unsigned int verbose; /* set with -v */
-unsigned int read_only; /* set with -r */
+struct msr_hwp_request {
+ unsigned char hwp_min;
+ unsigned char hwp_max;
+ unsigned char hwp_desired;
+ unsigned char hwp_epp;
+ unsigned int hwp_window;
+ unsigned char hwp_use_pkg;
+} req_update;
+
+unsigned int debug;
+unsigned int verbose;
+unsigned int force;
char *progname;
-unsigned long long new_bias;
-int cpu = -1;
+int base_cpu;
+unsigned char update_epb;
+unsigned long long new_epb;
+unsigned char turbo_is_enabled;
+unsigned char update_turbo;
+unsigned char turbo_update_value;
+unsigned char update_hwp_epp;
+unsigned char update_hwp_min;
+unsigned char update_hwp_max;
+unsigned char update_hwp_desired;
+unsigned char update_hwp_window;
+unsigned char update_hwp_use_pkg;
+unsigned char update_hwp_enable;
+#define hwp_update_enabled() (update_hwp_enable | update_hwp_epp | update_hwp_max | update_hwp_min | update_hwp_desired | update_hwp_window | update_hwp_use_pkg)
+int max_cpu_num;
+int max_pkg_num;
+#define MAX_PACKAGES 64
+unsigned int first_cpu_in_pkg[MAX_PACKAGES];
+unsigned long long pkg_present_set;
+unsigned long long pkg_selected_set;
+cpu_set_t *cpu_present_set;
+cpu_set_t *cpu_selected_set;
+int genuine_intel;
+
+size_t cpu_setsize;
+
+char *proc_stat = "/proc/stat";
+
+unsigned int has_epb; /* MSR_IA32_ENERGY_PERF_BIAS */
+unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
+ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
+unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
+unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
+unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
+unsigned int has_hwp_request_pkg; /* IA32_HWP_REQUEST_PKG */
+
+unsigned int bdx_highest_ratio;
/*
- * Usage:
- *
- * -c cpu: limit action to a single CPU (default is all CPUs)
- * -v: verbose output (can invoke more than once)
- * -r: read-only, don't change any settings
- *
- * performance
- * Performance is paramount.
- * Unwilling to sacrifice any performance
- * for the sake of energy saving. (hardware default)
- *
- * normal
- * Can tolerate minor performance compromise
- * for potentially significant energy savings.
- * (reasonable default for most desktops and servers)
- *
- * powersave
- * Can tolerate significant performance hit
- * to maximize energy savings.
- *
- * n
- * a numerical value to write to the underlying MSR.
+ * maintain compatibility with original implementation, but don't document it:
*/
void usage(void)
{
- printf("%s: [-c cpu] [-v] "
- "(-r | 'performance' | 'normal' | 'powersave' | n)\n",
- progname);
+ fprintf(stderr, "%s [options] [scope][field value]\n", progname);
+ fprintf(stderr, "scope: --cpu cpu-list [--hwp-use-pkg #] | --pkg pkg-list\n");
+ fprintf(stderr, "field: --all | --epb | --hwp-epp | --hwp-min | --hwp-max | --hwp-desired\n");
+ fprintf(stderr, "other: --hwp-enable | --turbo-enable (0 | 1) | --help | --force\n");
+ fprintf(stderr,
+ "value: ( # | \"normal\" | \"performance\" | \"balance-performance\" | \"balance-power\"| \"power\")\n");
+ fprintf(stderr, "--hwp-window usec\n");
+
+ fprintf(stderr, "Specify only Energy Performance BIAS (legacy usage):\n");
+ fprintf(stderr, "%s: [-c cpu] [-v] (-r | policy-value )\n", progname);
+
exit(1);
}
-#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
+/*
+ * If bdx_highest_ratio is set,
+ * then we must translate between MSR format and simple ratio
+ * used on the cmdline.
+ */
+int ratio_2_msr_perf(int ratio)
+{
+ int msr_perf;
+
+ if (!bdx_highest_ratio)
+ return ratio;
+
+ msr_perf = ratio * 255 / bdx_highest_ratio;
+
+ if (debug)
+ fprintf(stderr, "%d = ratio_to_msr_perf(%d)\n", msr_perf, ratio);
+
+ return msr_perf;
+}
+int msr_perf_2_ratio(int msr_perf)
+{
+ int ratio;
+ double d;
+
+ if (!bdx_highest_ratio)
+ return msr_perf;
+
+ d = (double)msr_perf * (double) bdx_highest_ratio / 255.0;
+ d = d + 0.5; /* round */
+ ratio = (int)d;
+
+ if (debug)
+ fprintf(stderr, "%d = msr_perf_ratio(%d) {%f}\n", ratio, msr_perf, d);
+
+ return ratio;
+}
+int parse_cmdline_epb(int i)
+{
+ if (!has_epb)
+ errx(1, "EPB not enabled on this platform");
+
+ update_epb = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ return ENERGY_PERF_BIAS_POWERSAVE;
+ case OPTARG_BALANCE_POWER:
+ return ENERGY_PERF_BIAS_BALANCE_POWERSAVE;
+ case OPTARG_NORMAL:
+ return ENERGY_PERF_BIAS_NORMAL;
+ case OPTARG_BALANCE_PERFORMANCE:
+ return ENERGY_PERF_BIAS_BALANCE_PERFORMANCE;
+ case OPTARG_PERFORMANCE:
+ return ENERGY_PERF_BIAS_PERFORMANCE;
+ }
+ if (i < 0 || i > ENERGY_PERF_BIAS_POWERSAVE)
+ errx(1, "--epb must be from 0 to 15");
+ return i;
+}
+
+#define HWP_CAP_LOWEST 0
+#define HWP_CAP_HIGHEST 255
+
+/*
+ * "performance" changes hwp_min to cap.highest
+ * All others leave it at cap.lowest
+ */
+int parse_cmdline_hwp_min(int i)
+{
+ update_hwp_min = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ case OPTARG_BALANCE_POWER:
+ case OPTARG_NORMAL:
+ case OPTARG_BALANCE_PERFORMANCE:
+ return HWP_CAP_LOWEST;
+ case OPTARG_PERFORMANCE:
+ return HWP_CAP_HIGHEST;
+ }
+ return i;
+}
+/*
+ * "power" changes hwp_max to cap.lowest
+ * All others leave it at cap.highest
+ */
+int parse_cmdline_hwp_max(int i)
+{
+ update_hwp_max = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ return HWP_CAP_LOWEST;
+ case OPTARG_NORMAL:
+ case OPTARG_BALANCE_POWER:
+ case OPTARG_BALANCE_PERFORMANCE:
+ case OPTARG_PERFORMANCE:
+ return HWP_CAP_HIGHEST;
+ }
+ return i;
+}
+/*
+ * for --hwp-des, all strings leave it in autonomous mode
+ * If you want to change it, you need to explicitly pick a value
+ */
+int parse_cmdline_hwp_desired(int i)
+{
+ update_hwp_desired = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ case OPTARG_BALANCE_POWER:
+ case OPTARG_BALANCE_PERFORMANCE:
+ case OPTARG_NORMAL:
+ case OPTARG_PERFORMANCE:
+ return 0; /* autonomous */
+ }
+ return i;
+}
+
+int parse_cmdline_hwp_window(int i)
+{
+ unsigned int exponent;
+
+ update_hwp_window = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ case OPTARG_BALANCE_POWER:
+ case OPTARG_NORMAL:
+ case OPTARG_BALANCE_PERFORMANCE:
+ case OPTARG_PERFORMANCE:
+ return 0;
+ }
+ if (i < 0 || i > 1270000000) {
+ fprintf(stderr, "--hwp-window: 0 for auto; 1 - 1270000000 usec for window duration\n");
+ usage();
+ }
+ for (exponent = 0; ; ++exponent) {
+ if (debug)
+ printf("%d 10^%d\n", i, exponent);
+
+ if (i <= 127)
+ break;
+
+ i = i / 10;
+ }
+ if (debug)
+ fprintf(stderr, "%d*10^%d: 0x%x\n", i, exponent, (exponent << 7) | i);
+
+ return (exponent << 7) | i;
+}
+int parse_cmdline_hwp_epp(int i)
+{
+ update_hwp_epp = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ return HWP_EPP_POWERSAVE;
+ case OPTARG_BALANCE_POWER:
+ return HWP_EPP_BALANCE_POWERSAVE;
+ case OPTARG_NORMAL:
+ case OPTARG_BALANCE_PERFORMANCE:
+ return HWP_EPP_BALANCE_PERFORMANCE;
+ case OPTARG_PERFORMANCE:
+ return HWP_EPP_PERFORMANCE;
+ }
+ if (i < 0 || i > 0xff) {
+ fprintf(stderr, "--hwp-epp must be from 0 to 0xff\n");
+ usage();
+ }
+ return i;
+}
+int parse_cmdline_turbo(int i)
+{
+ update_turbo = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ return 0;
+ case OPTARG_NORMAL:
+ case OPTARG_BALANCE_POWER:
+ case OPTARG_BALANCE_PERFORMANCE:
+ case OPTARG_PERFORMANCE:
+ return 1;
+ }
+ if (i < 0 || i > 1) {
+ fprintf(stderr, "--turbo-enable: 1 to enable, 0 to disable\n");
+ usage();
+ }
+ return i;
+}
+
+int parse_optarg_string(char *s)
+{
+ int i;
+ char *endptr;
+
+ if (!strncmp(s, "default", 7))
+ return OPTARG_NORMAL;
+
+ if (!strncmp(s, "normal", 6))
+ return OPTARG_NORMAL;
+
+ if (!strncmp(s, "power", 9))
+ return OPTARG_POWER;
+
+ if (!strncmp(s, "balance-power", 17))
+ return OPTARG_BALANCE_POWER;
+
+ if (!strncmp(s, "balance-performance", 19))
+ return OPTARG_BALANCE_PERFORMANCE;
+
+ if (!strncmp(s, "performance", 11))
+ return OPTARG_PERFORMANCE;
+
+ i = strtol(s, &endptr, 0);
+ if (s == endptr) {
+ fprintf(stderr, "no digits in \"%s\"\n", s);
+ usage();
+ }
+ if (i == LONG_MIN || i == LONG_MAX)
+ errx(-1, "%s", s);
+
+ if (i > 0xFF)
+ errx(-1, "%d (0x%x) must be < 256", i, i);
+
+ if (i < 0)
+ errx(-1, "%d (0x%x) must be >= 0", i, i);
+ return i;
+}
+
+void parse_cmdline_all(char *s)
+{
+ force++;
+ update_hwp_enable = 1;
+ req_update.hwp_min = parse_cmdline_hwp_min(parse_optarg_string(s));
+ req_update.hwp_max = parse_cmdline_hwp_max(parse_optarg_string(s));
+ req_update.hwp_epp = parse_cmdline_hwp_epp(parse_optarg_string(s));
+ if (has_epb)
+ new_epb = parse_cmdline_epb(parse_optarg_string(s));
+ turbo_update_value = parse_cmdline_turbo(parse_optarg_string(s));
+ req_update.hwp_desired = parse_cmdline_hwp_desired(parse_optarg_string(s));
+ req_update.hwp_window = parse_cmdline_hwp_window(parse_optarg_string(s));
+}
+
+void validate_cpu_selected_set(void)
+{
+ int cpu;
+
+ if (CPU_COUNT_S(cpu_setsize, cpu_selected_set) == 0)
+ errx(0, "no CPUs requested");
+
+ for (cpu = 0; cpu <= max_cpu_num; ++cpu) {
+ if (CPU_ISSET_S(cpu, cpu_setsize, cpu_selected_set))
+ if (!CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
+ errx(1, "Requested cpu% is not present", cpu);
+ }
+}
+
+void parse_cmdline_cpu(char *s)
+{
+ char *startp, *endp;
+ int cpu = 0;
+
+ if (pkg_selected_set) {
+ usage();
+ errx(1, "--cpu | --pkg");
+ }
+ cpu_selected_set = CPU_ALLOC((max_cpu_num + 1));
+ if (cpu_selected_set == NULL)
+ err(1, "cpu_selected_set");
+ CPU_ZERO_S(cpu_setsize, cpu_selected_set);
+
+ for (startp = s; startp && *startp;) {
+
+ if (*startp == ',') {
+ startp++;
+ continue;
+ }
+
+ if (*startp == '-') {
+ int end_cpu;
-#define BIAS_PERFORMANCE 0
-#define BIAS_BALANCE 6
-#define BIAS_POWERSAVE 15
+ startp++;
+ end_cpu = strtol(startp, &endp, 10);
+ if (startp == endp)
+ continue;
+
+ while (cpu <= end_cpu) {
+ if (cpu > max_cpu_num)
+ errx(1, "Requested cpu%d exceeds max cpu%d", cpu, max_cpu_num);
+ CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+ cpu++;
+ }
+ startp = endp;
+ continue;
+ }
+
+ if (strncmp(startp, "all", 3) == 0) {
+ for (cpu = 0; cpu <= max_cpu_num; cpu += 1) {
+ if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
+ CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+ }
+ startp += 3;
+ if (*startp == 0)
+ break;
+ }
+ /* "--cpu even" is not documented */
+ if (strncmp(startp, "even", 4) == 0) {
+ for (cpu = 0; cpu <= max_cpu_num; cpu += 2) {
+ if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
+ CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+ }
+ startp += 4;
+ if (*startp == 0)
+ break;
+ }
+
+ /* "--cpu odd" is not documented */
+ if (strncmp(startp, "odd", 3) == 0) {
+ for (cpu = 1; cpu <= max_cpu_num; cpu += 2) {
+ if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
+ CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+ }
+ startp += 3;
+ if (*startp == 0)
+ break;
+ }
+
+ cpu = strtol(startp, &endp, 10);
+ if (startp == endp)
+ errx(1, "--cpu cpu-set: confused by '%s'", startp);
+ if (cpu > max_cpu_num)
+ errx(1, "Requested cpu%d exceeds max cpu%d", cpu, max_cpu_num);
+ CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+ startp = endp;
+ }
+
+ validate_cpu_selected_set();
+
+}
+
+void parse_cmdline_pkg(char *s)
+{
+ char *startp, *endp;
+ int pkg = 0;
+
+ if (cpu_selected_set) {
+ usage();
+ errx(1, "--pkg | --cpu");
+ }
+ pkg_selected_set = 0;
+
+ for (startp = s; startp && *startp;) {
+
+ if (*startp == ',') {
+ startp++;
+ continue;
+ }
+
+ if (*startp == '-') {
+ int end_pkg;
+
+ startp++;
+ end_pkg = strtol(startp, &endp, 10);
+ if (startp == endp)
+ continue;
+
+ while (pkg <= end_pkg) {
+ if (pkg > max_pkg_num)
+ errx(1, "Requested pkg%d exceeds max pkg%d", pkg, max_pkg_num);
+ pkg_selected_set |= 1 << pkg;
+ pkg++;
+ }
+ startp = endp;
+ continue;
+ }
+
+ if (strncmp(startp, "all", 3) == 0) {
+ pkg_selected_set = pkg_present_set;
+ return;
+ }
+
+ pkg = strtol(startp, &endp, 10);
+ if (pkg > max_pkg_num)
+ errx(1, "Requested pkg%d Exceeds max pkg%d", pkg, max_pkg_num);
+ pkg_selected_set |= 1 << pkg;
+ startp = endp;
+ }
+}
+
+void for_packages(unsigned long long pkg_set, int (func)(int))
+{
+ int pkg_num;
+
+ for (pkg_num = 0; pkg_num <= max_pkg_num; ++pkg_num) {
+ if (pkg_set & (1UL << pkg_num))
+ func(pkg_num);
+ }
+}
+
+void print_version(void)
+{
+ printf("x86_energy_perf_policy 17.05.11 (C) Len Brown <len.brown@intel.com>\n");
+}
void cmdline(int argc, char **argv)
{
int opt;
+ int option_index = 0;
+
+ static struct option long_options[] = {
+ {"all", required_argument, 0, 'a'},
+ {"cpu", required_argument, 0, 'c'},
+ {"pkg", required_argument, 0, 'p'},
+ {"debug", no_argument, 0, 'd'},
+ {"hwp-desired", required_argument, 0, 'D'},
+ {"epb", required_argument, 0, 'B'},
+ {"force", no_argument, 0, 'f'},
+ {"hwp-enable", no_argument, 0, 'e'},
+ {"help", no_argument, 0, 'h'},
+ {"hwp-epp", required_argument, 0, 'P'},
+ {"hwp-min", required_argument, 0, 'm'},
+ {"hwp-max", required_argument, 0, 'M'},
+ {"read", no_argument, 0, 'r'},
+ {"turbo-enable", required_argument, 0, 't'},
+ {"hwp-use-pkg", required_argument, 0, 'u'},
+ {"version", no_argument, 0, 'v'},
+ {"hwp-window", required_argument, 0, 'w'},
+ {0, 0, 0, 0 }
+ };
progname = argv[0];
- while ((opt = getopt(argc, argv, "+rvc:")) != -1) {
+ while ((opt = getopt_long_only(argc, argv, "+a:c:dD:E:e:f:m:M:rt:u:vw",
+ long_options, &option_index)) != -1) {
switch (opt) {
+ case 'a':
+ parse_cmdline_all(optarg);
+ break;
+ case 'B':
+ new_epb = parse_cmdline_epb(parse_optarg_string(optarg));
+ break;
case 'c':
- cpu = atoi(optarg);
+ parse_cmdline_cpu(optarg);
+ break;
+ case 'e':
+ update_hwp_enable = 1;
+ break;
+ case 'h':
+ usage();
+ break;
+ case 'd':
+ debug++;
+ verbose++;
+ break;
+ case 'f':
+ force++;
+ break;
+ case 'D':
+ req_update.hwp_desired = parse_cmdline_hwp_desired(parse_optarg_string(optarg));
+ break;
+ case 'm':
+ req_update.hwp_min = parse_cmdline_hwp_min(parse_optarg_string(optarg));
+ break;
+ case 'M':
+ req_update.hwp_max = parse_cmdline_hwp_max(parse_optarg_string(optarg));
+ break;
+ case 'p':
+ parse_cmdline_pkg(optarg);
+ break;
+ case 'P':
+ req_update.hwp_epp = parse_cmdline_hwp_epp(parse_optarg_string(optarg));
break;
case 'r':
- read_only = 1;
+ /* v1 used -r to specify read-only mode, now the default */
+ break;
+ case 't':
+ turbo_update_value = parse_cmdline_turbo(parse_optarg_string(optarg));
+ break;
+ case 'u':
+ update_hwp_use_pkg++;
+ if (atoi(optarg) == 0)
+ req_update.hwp_use_pkg = 0;
+ else
+ req_update.hwp_use_pkg = 1;
break;
case 'v':
- verbose++;
+ print_version();
+ exit(0);
+ break;
+ case 'w':
+ req_update.hwp_window = parse_cmdline_hwp_window(parse_optarg_string(optarg));
break;
default:
usage();
}
}
- /* if -r, then should be no additional optind */
- if (read_only && (argc > optind))
- usage();
-
/*
- * if no -r , then must be one additional optind
+ * v1 allowed "performance"|"normal"|"power" with no policy specifier
+ * to update BIAS. Continue to support that, even though no longer documented.
*/
- if (!read_only) {
+ if (argc == optind + 1)
+ new_epb = parse_cmdline_epb(parse_optarg_string(argv[optind]));
- if (argc != optind + 1) {
- printf("must supply -r or policy param\n");
- usage();
- }
+ if (argc > optind + 1) {
+ fprintf(stderr, "stray parameter '%s'\n", argv[optind + 1]);
+ usage();
+ }
+}
- if (!strcmp("performance", argv[optind])) {
- new_bias = BIAS_PERFORMANCE;
- } else if (!strcmp("normal", argv[optind])) {
- new_bias = BIAS_BALANCE;
- } else if (!strcmp("powersave", argv[optind])) {
- new_bias = BIAS_POWERSAVE;
- } else {
- char *endptr;
-
- new_bias = strtoull(argv[optind], &endptr, 0);
- if (endptr == argv[optind] ||
- new_bias > BIAS_POWERSAVE) {
- fprintf(stderr, "invalid value: %s\n",
- argv[optind]);
- usage();
- }
- }
+
+int get_msr(int cpu, int offset, unsigned long long *msr)
+{
+ int retval;
+ char pathname[32];
+ int fd;
+
+ sprintf(pathname, "/dev/cpu/%d/msr", cpu);
+ fd = open(pathname, O_RDONLY);
+ if (fd < 0)
+ err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+
+ retval = pread(fd, msr, sizeof(*msr), offset);
+ if (retval != sizeof(*msr))
+ err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset);
+
+ if (debug > 1)
+ fprintf(stderr, "get_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, *msr);
+
+ close(fd);
+ return 0;
+}
+
+int put_msr(int cpu, int offset, unsigned long long new_msr)
+{
+ char pathname[32];
+ int retval;
+ int fd;
+
+ sprintf(pathname, "/dev/cpu/%d/msr", cpu);
+ fd = open(pathname, O_RDWR);
+ if (fd < 0)
+ err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+
+ retval = pwrite(fd, &new_msr, sizeof(new_msr), offset);
+ if (retval != sizeof(new_msr))
+ err(-2, "pwrite(cpu%d, offset 0x%x, 0x%llx) = %d", cpu, offset, new_msr, retval);
+
+ close(fd);
+
+ if (debug > 1)
+ fprintf(stderr, "put_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, new_msr);
+
+ return 0;
+}
+
+void print_hwp_cap(int cpu, struct msr_hwp_cap *cap, char *str)
+{
+ if (cpu != -1)
+ printf("cpu%d: ", cpu);
+
+ printf("HWP_CAP: low %d eff %d guar %d high %d\n",
+ cap->lowest, cap->efficient, cap->guaranteed, cap->highest);
+}
+void read_hwp_cap(int cpu, struct msr_hwp_cap *cap, unsigned int msr_offset)
+{
+ unsigned long long msr;
+
+ get_msr(cpu, msr_offset, &msr);
+
+ cap->highest = msr_perf_2_ratio(HWP_HIGHEST_PERF(msr));
+ cap->guaranteed = msr_perf_2_ratio(HWP_GUARANTEED_PERF(msr));
+ cap->efficient = msr_perf_2_ratio(HWP_MOSTEFFICIENT_PERF(msr));
+ cap->lowest = msr_perf_2_ratio(HWP_LOWEST_PERF(msr));
+}
+
+void print_hwp_request(int cpu, struct msr_hwp_request *h, char *str)
+{
+ if (cpu != -1)
+ printf("cpu%d: ", cpu);
+
+ if (str)
+ printf("%s", str);
+
+ printf("HWP_REQ: min %d max %d des %d epp %d window 0x%x (%d*10^%dus) use_pkg %d\n",
+ h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp,
+ h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7, h->hwp_use_pkg);
+}
+void print_hwp_request_pkg(int pkg, struct msr_hwp_request *h, char *str)
+{
+ printf("pkg%d: ", pkg);
+
+ if (str)
+ printf("%s", str);
+
+ printf("HWP_REQ_PKG: min %d max %d des %d epp %d window 0x%x (%d*10^%dus)\n",
+ h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp,
+ h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7);
+}
+void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
+{
+ unsigned long long msr;
+
+ get_msr(cpu, msr_offset, &msr);
+
+ hwp_req->hwp_min = msr_perf_2_ratio((((msr) >> 0) & 0xff));
+ hwp_req->hwp_max = msr_perf_2_ratio((((msr) >> 8) & 0xff));
+ hwp_req->hwp_desired = msr_perf_2_ratio((((msr) >> 16) & 0xff));
+ hwp_req->hwp_epp = (((msr) >> 24) & 0xff);
+ hwp_req->hwp_window = (((msr) >> 32) & 0x3ff);
+ hwp_req->hwp_use_pkg = (((msr) >> 42) & 0x1);
+}
+
+void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
+{
+ unsigned long long msr = 0;
+
+ if (debug > 1)
+ printf("cpu%d: requesting min %d max %d des %d epp %d window 0x%0x use_pkg %d\n",
+ cpu, hwp_req->hwp_min, hwp_req->hwp_max,
+ hwp_req->hwp_desired, hwp_req->hwp_epp,
+ hwp_req->hwp_window, hwp_req->hwp_use_pkg);
+
+ msr |= HWP_MIN_PERF(ratio_2_msr_perf(hwp_req->hwp_min));
+ msr |= HWP_MAX_PERF(ratio_2_msr_perf(hwp_req->hwp_max));
+ msr |= HWP_DESIRED_PERF(ratio_2_msr_perf(hwp_req->hwp_desired));
+ msr |= HWP_ENERGY_PERF_PREFERENCE(hwp_req->hwp_epp);
+ msr |= HWP_ACTIVITY_WINDOW(hwp_req->hwp_window);
+ msr |= HWP_PACKAGE_CONTROL(hwp_req->hwp_use_pkg);
+
+ put_msr(cpu, msr_offset, msr);
+}
+
+int print_cpu_msrs(int cpu)
+{
+ unsigned long long msr;
+ struct msr_hwp_request req;
+ struct msr_hwp_cap cap;
+
+ if (has_epb) {
+ get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
+
+ printf("cpu%d: EPB %u\n", cpu, (unsigned int) msr);
}
+
+ if (!has_hwp)
+ return 0;
+
+ read_hwp_request(cpu, &req, MSR_HWP_REQUEST);
+ print_hwp_request(cpu, &req, "");
+
+ read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
+ print_hwp_cap(cpu, &cap, "");
+
+ return 0;
+}
+
+int print_pkg_msrs(int pkg)
+{
+ struct msr_hwp_request req;
+ unsigned long long msr;
+
+ if (!has_hwp)
+ return 0;
+
+ read_hwp_request(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG);
+ print_hwp_request_pkg(pkg, &req, "");
+
+ if (has_hwp_notify) {
+ get_msr(first_cpu_in_pkg[pkg], MSR_HWP_INTERRUPT, &msr);
+ fprintf(stderr,
+ "pkg%d: MSR_HWP_INTERRUPT: 0x%08llx (Excursion_Min-%sabled, Guaranteed_Perf_Change-%sabled)\n",
+ pkg, msr,
+ ((msr) & 0x2) ? "EN" : "Dis",
+ ((msr) & 0x1) ? "EN" : "Dis");
+ }
+ get_msr(first_cpu_in_pkg[pkg], MSR_HWP_STATUS, &msr);
+ fprintf(stderr,
+ "pkg%d: MSR_HWP_STATUS: 0x%08llx (%sExcursion_Min, %sGuaranteed_Perf_Change)\n",
+ pkg, msr,
+ ((msr) & 0x4) ? "" : "No-",
+ ((msr) & 0x1) ? "" : "No-");
+
+ return 0;
}
/*
- * validate_cpuid()
- * returns on success, quietly exits on failure (make verbose with -v)
+ * Assumption: All HWP systems have 100 MHz bus clock
*/
-void validate_cpuid(void)
+int ratio_2_sysfs_khz(int ratio)
{
- unsigned int eax, ebx, ecx, edx, max_level;
- unsigned int fms, family, model, stepping;
+ int bclk_khz = 100 * 1000; /* 100,000 KHz = 100 MHz */
- eax = ebx = ecx = edx = 0;
+ return ratio * bclk_khz;
+}
+/*
+ * If HWP is enabled and cpufreq sysfs attribtes are present,
+ * then update sysfs, so that it will not become
+ * stale when we write to MSRs.
+ * (intel_pstate's max_perf_pct and min_perf_pct will follow cpufreq,
+ * so we don't have to touch that.)
+ */
+void update_cpufreq_scaling_freq(int is_max, int cpu, unsigned int ratio)
+{
+ char pathname[64];
+ FILE *fp;
+ int retval;
+ int khz;
- asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx),
- "=d" (edx) : "a" (0));
+ sprintf(pathname, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_%s_freq",
+ cpu, is_max ? "max" : "min");
- if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) {
- if (verbose)
- fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel",
- (char *)&ebx, (char *)&edx, (char *)&ecx);
- exit(1);
+ fp = fopen(pathname, "w");
+ if (!fp) {
+ if (debug)
+ perror(pathname);
+ return;
}
- asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
- family = (fms >> 8) & 0xf;
- model = (fms >> 4) & 0xf;
- stepping = fms & 0xf;
- if (family == 6 || family == 0xf)
- model += ((fms >> 16) & 0xf) << 4;
+ khz = ratio_2_sysfs_khz(ratio);
+ retval = fprintf(fp, "%d", khz);
+ if (retval < 0)
+ if (debug)
+ perror("fprintf");
+ if (debug)
+ printf("echo %d > %s\n", khz, pathname);
- if (verbose > 1)
- printf("CPUID %d levels family:model:stepping "
- "0x%x:%x:%x (%d:%d:%d)\n", max_level,
- family, model, stepping, family, model, stepping);
+ fclose(fp);
+}
- if (!(edx & (1 << 5))) {
- if (verbose)
- printf("CPUID: no MSR\n");
- exit(1);
+/*
+ * We update all sysfs before updating any MSRs because of
+ * bugs in cpufreq/intel_pstate where the sysfs writes
+ * for a CPU may change the min/max values on other CPUS.
+ */
+
+int update_sysfs(int cpu)
+{
+ if (!has_hwp)
+ return 0;
+
+ if (!hwp_update_enabled())
+ return 0;
+
+ if (access("/sys/devices/system/cpu/cpu0/cpufreq", F_OK))
+ return 0;
+
+ if (update_hwp_min)
+ update_cpufreq_scaling_freq(0, cpu, req_update.hwp_min);
+
+ if (update_hwp_max)
+ update_cpufreq_scaling_freq(1, cpu, req_update.hwp_max);
+
+ return 0;
+}
+
+int verify_hwp_req_self_consistency(int cpu, struct msr_hwp_request *req)
+{
+ /* fail if min > max requested */
+ if (req->hwp_min > req->hwp_max) {
+ errx(1, "cpu%d: requested hwp-min %d > hwp_max %d",
+ cpu, req->hwp_min, req->hwp_max);
}
- /*
- * Support for MSR_IA32_ENERGY_PERF_BIAS
- * is indicated by CPUID.06H.ECX.bit3
- */
- asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6));
- if (verbose)
- printf("CPUID.06H.ECX: 0x%x\n", ecx);
- if (!(ecx & (1 << 3))) {
- if (verbose)
- printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n");
- exit(1);
+ /* fail if desired > max requestd */
+ if (req->hwp_desired && (req->hwp_desired > req->hwp_max)) {
+ errx(1, "cpu%d: requested hwp-desired %d > hwp_max %d",
+ cpu, req->hwp_desired, req->hwp_max);
}
- return; /* success */
+ /* fail if desired < min requestd */
+ if (req->hwp_desired && (req->hwp_desired < req->hwp_min)) {
+ errx(1, "cpu%d: requested hwp-desired %d < requested hwp_min %d",
+ cpu, req->hwp_desired, req->hwp_min);
+ }
+
+ return 0;
}
-unsigned long long get_msr(int cpu, int offset)
+int check_hwp_request_v_hwp_capabilities(int cpu, struct msr_hwp_request *req, struct msr_hwp_cap *cap)
{
- unsigned long long msr;
- char msr_path[32];
- int retval;
- int fd;
+ if (update_hwp_max) {
+ if (req->hwp_max > cap->highest)
+ errx(1, "cpu%d: requested max %d > capabilities highest %d, use --force?",
+ cpu, req->hwp_max, cap->highest);
+ if (req->hwp_max < cap->lowest)
+ errx(1, "cpu%d: requested max %d < capabilities lowest %d, use --force?",
+ cpu, req->hwp_max, cap->lowest);
+ }
- sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
- fd = open(msr_path, O_RDONLY);
- if (fd < 0) {
- printf("Try \"# modprobe msr\"\n");
- perror(msr_path);
- exit(1);
+ if (update_hwp_min) {
+ if (req->hwp_min > cap->highest)
+ errx(1, "cpu%d: requested min %d > capabilities highest %d, use --force?",
+ cpu, req->hwp_min, cap->highest);
+ if (req->hwp_min < cap->lowest)
+ errx(1, "cpu%d: requested min %d < capabilities lowest %d, use --force?",
+ cpu, req->hwp_min, cap->lowest);
}
- retval = pread(fd, &msr, sizeof msr, offset);
+ if (update_hwp_min && update_hwp_max && (req->hwp_min > req->hwp_max))
+ errx(1, "cpu%d: requested min %d > requested max %d",
+ cpu, req->hwp_min, req->hwp_max);
- if (retval != sizeof msr) {
- printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
- exit(-2);
+ if (update_hwp_desired && req->hwp_desired) {
+ if (req->hwp_desired > req->hwp_max)
+ errx(1, "cpu%d: requested desired %d > requested max %d, use --force?",
+ cpu, req->hwp_desired, req->hwp_max);
+ if (req->hwp_desired < req->hwp_min)
+ errx(1, "cpu%d: requested desired %d < requested min %d, use --force?",
+ cpu, req->hwp_desired, req->hwp_min);
+ if (req->hwp_desired < cap->lowest)
+ errx(1, "cpu%d: requested desired %d < capabilities lowest %d, use --force?",
+ cpu, req->hwp_desired, cap->lowest);
+ if (req->hwp_desired > cap->highest)
+ errx(1, "cpu%d: requested desired %d > capabilities highest %d, use --force?",
+ cpu, req->hwp_desired, cap->highest);
}
- close(fd);
- return msr;
+
+ return 0;
}
-unsigned long long put_msr(int cpu, unsigned long long new_msr, int offset)
+int update_hwp_request(int cpu)
{
- unsigned long long old_msr;
- char msr_path[32];
- int retval;
- int fd;
+ struct msr_hwp_request req;
+ struct msr_hwp_cap cap;
+
+ int msr_offset = MSR_HWP_REQUEST;
+
+ read_hwp_request(cpu, &req, msr_offset);
+ if (debug)
+ print_hwp_request(cpu, &req, "old: ");
+
+ if (update_hwp_min)
+ req.hwp_min = req_update.hwp_min;
+
+ if (update_hwp_max)
+ req.hwp_max = req_update.hwp_max;
+
+ if (update_hwp_desired)
+ req.hwp_desired = req_update.hwp_desired;
+
+ if (update_hwp_window)
+ req.hwp_window = req_update.hwp_window;
+
+ if (update_hwp_epp)
+ req.hwp_epp = req_update.hwp_epp;
+
+ req.hwp_use_pkg = req_update.hwp_use_pkg;
+
+ read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
+ if (debug)
+ print_hwp_cap(cpu, &cap, "");
+
+ if (!force)
+ check_hwp_request_v_hwp_capabilities(cpu, &req, &cap);
+
+ verify_hwp_req_self_consistency(cpu, &req);
- sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
- fd = open(msr_path, O_RDWR);
- if (fd < 0) {
- perror(msr_path);
- exit(1);
+ write_hwp_request(cpu, &req, msr_offset);
+
+ if (debug) {
+ read_hwp_request(cpu, &req, msr_offset);
+ print_hwp_request(cpu, &req, "new: ");
}
+ return 0;
+}
+int update_hwp_request_pkg(int pkg)
+{
+ struct msr_hwp_request req;
+ struct msr_hwp_cap cap;
+ int cpu = first_cpu_in_pkg[pkg];
+
+ int msr_offset = MSR_HWP_REQUEST_PKG;
+
+ read_hwp_request(cpu, &req, msr_offset);
+ if (debug)
+ print_hwp_request_pkg(pkg, &req, "old: ");
+
+ if (update_hwp_min)
+ req.hwp_min = req_update.hwp_min;
+
+ if (update_hwp_max)
+ req.hwp_max = req_update.hwp_max;
+
+ if (update_hwp_desired)
+ req.hwp_desired = req_update.hwp_desired;
+
+ if (update_hwp_window)
+ req.hwp_window = req_update.hwp_window;
+
+ if (update_hwp_epp)
+ req.hwp_epp = req_update.hwp_epp;
+
+ read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
+ if (debug)
+ print_hwp_cap(cpu, &cap, "");
+
+ if (!force)
+ check_hwp_request_v_hwp_capabilities(cpu, &req, &cap);
+
+ verify_hwp_req_self_consistency(cpu, &req);
+
+ write_hwp_request(cpu, &req, msr_offset);
- retval = pread(fd, &old_msr, sizeof old_msr, offset);
- if (retval != sizeof old_msr) {
- perror("pwrite");
- printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
- exit(-2);
+ if (debug) {
+ read_hwp_request(cpu, &req, msr_offset);
+ print_hwp_request_pkg(pkg, &req, "new: ");
}
+ return 0;
+}
+
+int enable_hwp_on_cpu(int cpu)
+{
+ unsigned long long msr;
+
+ get_msr(cpu, MSR_PM_ENABLE, &msr);
+ put_msr(cpu, MSR_PM_ENABLE, 1);
+
+ if (verbose)
+ printf("cpu%d: MSR_PM_ENABLE old: %d new: %d\n", cpu, (unsigned int) msr, 1);
+
+ return 0;
+}
+
+int update_cpu_msrs(int cpu)
+{
+ unsigned long long msr;
+
- retval = pwrite(fd, &new_msr, sizeof new_msr, offset);
- if (retval != sizeof new_msr) {
- perror("pwrite");
- printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval);
- exit(-2);
+ if (update_epb) {
+ get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
+ put_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, new_epb);
+
+ if (verbose)
+ printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n",
+ cpu, (unsigned int) msr, (unsigned int) new_epb);
}
- close(fd);
+ if (update_turbo) {
+ int turbo_is_present_and_disabled;
+
+ get_msr(cpu, MSR_IA32_MISC_ENABLE, &msr);
+
+ turbo_is_present_and_disabled = ((msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE) != 0);
+
+ if (turbo_update_value == 1) {
+ if (turbo_is_present_and_disabled) {
+ msr &= ~MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
+ put_msr(cpu, MSR_IA32_MISC_ENABLE, msr);
+ if (verbose)
+ printf("cpu%d: turbo ENABLE\n", cpu);
+ }
+ } else {
+ /*
+ * if "turbo_is_enabled" were known to be describe this cpu
+ * then we could use it here to skip redundant disable requests.
+ * but cpu may be in a different package, so we always write.
+ */
+ msr |= MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
+ put_msr(cpu, MSR_IA32_MISC_ENABLE, msr);
+ if (verbose)
+ printf("cpu%d: turbo DISABLE\n", cpu);
+ }
+ }
+
+ if (!has_hwp)
+ return 0;
+
+ if (!hwp_update_enabled())
+ return 0;
+
+ update_hwp_request(cpu);
+ return 0;
+}
+
+/*
+ * Open a file, and exit on failure
+ */
+FILE *fopen_or_die(const char *path, const char *mode)
+{
+ FILE *filep = fopen(path, "r");
- return old_msr;
+ if (!filep)
+ err(1, "%s: open failed", path);
+ return filep;
}
-void print_msr(int cpu)
+unsigned int get_pkg_num(int cpu)
{
- printf("cpu%d: 0x%016llx\n",
- cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS));
+ FILE *fp;
+ char pathname[128];
+ unsigned int pkg;
+ int retval;
+
+ sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
+
+ fp = fopen_or_die(pathname, "r");
+ retval = fscanf(fp, "%d\n", &pkg);
+ if (retval != 1)
+ errx(1, "%s: failed to parse", pathname);
+ return pkg;
}
-void update_msr(int cpu)
+int set_max_cpu_pkg_num(int cpu)
{
- unsigned long long previous_msr;
+ unsigned int pkg;
- previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS);
+ if (max_cpu_num < cpu)
+ max_cpu_num = cpu;
- if (verbose)
- printf("cpu%d msr0x%x 0x%016llx -> 0x%016llx\n",
- cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias);
+ pkg = get_pkg_num(cpu);
+
+ if (pkg >= MAX_PACKAGES)
+ errx(1, "cpu%d: %d >= MAX_PACKAGES (%d)", cpu, pkg, MAX_PACKAGES);
+
+ if (pkg > max_pkg_num)
+ max_pkg_num = pkg;
- return;
+ if ((pkg_present_set & (1ULL << pkg)) == 0) {
+ pkg_present_set |= (1ULL << pkg);
+ first_cpu_in_pkg[pkg] = cpu;
+ }
+
+ return 0;
+}
+int mark_cpu_present(int cpu)
+{
+ CPU_SET_S(cpu, cpu_setsize, cpu_present_set);
+ return 0;
}
-char *proc_stat = "/proc/stat";
/*
- * run func() on every cpu in /dev/cpu
+ * run func(cpu) on every cpu in /proc/stat
+ * return max_cpu number
*/
-void for_every_cpu(void (func)(int))
+int for_all_proc_cpus(int (func)(int))
{
FILE *fp;
+ int cpu_num;
int retval;
- fp = fopen(proc_stat, "r");
- if (fp == NULL) {
- perror(proc_stat);
- exit(1);
- }
+ fp = fopen_or_die(proc_stat, "r");
retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
- if (retval != 0) {
- perror("/proc/stat format");
- exit(1);
- }
+ if (retval != 0)
+ err(1, "%s: failed to parse format", proc_stat);
while (1) {
- int cpu;
-
- retval = fscanf(fp,
- "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n",
- &cpu);
+ retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
if (retval != 1)
break;
- func(cpu);
+ retval = func(cpu_num);
+ if (retval) {
+ fclose(fp);
+ return retval;
+ }
}
fclose(fp);
+ return 0;
+}
+
+void for_all_cpus_in_set(size_t set_size, cpu_set_t *cpu_set, int (func)(int))
+{
+ int cpu_num;
+
+ for (cpu_num = 0; cpu_num <= max_cpu_num; ++cpu_num)
+ if (CPU_ISSET_S(cpu_num, set_size, cpu_set))
+ func(cpu_num);
+}
+
+void init_data_structures(void)
+{
+ for_all_proc_cpus(set_max_cpu_pkg_num);
+
+ cpu_setsize = CPU_ALLOC_SIZE((max_cpu_num + 1));
+
+ cpu_present_set = CPU_ALLOC((max_cpu_num + 1));
+ if (cpu_present_set == NULL)
+ err(3, "CPU_ALLOC");
+ CPU_ZERO_S(cpu_setsize, cpu_present_set);
+ for_all_proc_cpus(mark_cpu_present);
+}
+
+/* clear has_hwp if it is not enable (or being enabled) */
+
+void verify_hwp_is_enabled(void)
+{
+ unsigned long long msr;
+
+ if (!has_hwp) /* set in early_cpuid() */
+ return;
+
+ /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
+ get_msr(base_cpu, MSR_PM_ENABLE, &msr);
+ if ((msr & 1) == 0) {
+ fprintf(stderr, "HWP can be enabled using '--hwp-enable'\n");
+ has_hwp = 0;
+ return;
+ }
+}
+
+int req_update_bounds_check(void)
+{
+ if (!hwp_update_enabled())
+ return 0;
+
+ /* fail if min > max requested */
+ if ((update_hwp_max && update_hwp_min) &&
+ (req_update.hwp_min > req_update.hwp_max)) {
+ printf("hwp-min %d > hwp_max %d\n", req_update.hwp_min, req_update.hwp_max);
+ return -EINVAL;
+ }
+
+ /* fail if desired > max requestd */
+ if (req_update.hwp_desired && update_hwp_max &&
+ (req_update.hwp_desired > req_update.hwp_max)) {
+ printf("hwp-desired cannot be greater than hwp_max\n");
+ return -EINVAL;
+ }
+ /* fail if desired < min requestd */
+ if (req_update.hwp_desired && update_hwp_min &&
+ (req_update.hwp_desired < req_update.hwp_min)) {
+ printf("hwp-desired cannot be less than hwp_min\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void set_base_cpu(void)
+{
+ base_cpu = sched_getcpu();
+ if (base_cpu < 0)
+ err(-ENODEV, "No valid cpus found");
+}
+
+
+void probe_dev_msr(void)
+{
+ struct stat sb;
+ char pathname[32];
+
+ sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+ if (stat(pathname, &sb))
+ if (system("/sbin/modprobe msr > /dev/null 2>&1"))
+ err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+}
+/*
+ * early_cpuid()
+ * initialize turbo_is_enabled, has_hwp, has_epb
+ * before cmdline is parsed
+ */
+void early_cpuid(void)
+{
+ unsigned int eax, ebx, ecx, edx, max_level;
+ unsigned int fms, family, model;
+
+ __get_cpuid(0, &max_level, &ebx, &ecx, &edx);
+
+ if (max_level < 6)
+ errx(1, "Processor not supported\n");
+
+ __get_cpuid(1, &fms, &ebx, &ecx, &edx);
+ family = (fms >> 8) & 0xf;
+ model = (fms >> 4) & 0xf;
+ if (family == 6 || family == 0xf)
+ model += ((fms >> 16) & 0xf) << 4;
+
+ if (model == 0x4F) {
+ unsigned long long msr;
+
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
+
+ bdx_highest_ratio = msr & 0xFF;
+ }
+
+ __get_cpuid(0x6, &eax, &ebx, &ecx, &edx);
+ turbo_is_enabled = (eax >> 1) & 1;
+ has_hwp = (eax >> 7) & 1;
+ has_epb = (ecx >> 3) & 1;
+}
+
+/*
+ * parse_cpuid()
+ * set
+ * has_hwp, has_hwp_notify, has_hwp_activity_window, has_hwp_epp, has_hwp_request_pkg, has_epb
+ */
+void parse_cpuid(void)
+{
+ unsigned int eax, ebx, ecx, edx, max_level;
+ unsigned int fms, family, model, stepping;
+
+ eax = ebx = ecx = edx = 0;
+
+ __get_cpuid(0, &max_level, &ebx, &ecx, &edx);
+
+ if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
+ genuine_intel = 1;
+
+ if (debug)
+ fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ",
+ (char *)&ebx, (char *)&edx, (char *)&ecx);
+
+ __get_cpuid(1, &fms, &ebx, &ecx, &edx);
+ family = (fms >> 8) & 0xf;
+ model = (fms >> 4) & 0xf;
+ stepping = fms & 0xf;
+ if (family == 6 || family == 0xf)
+ model += ((fms >> 16) & 0xf) << 4;
+
+ if (debug) {
+ fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
+ max_level, family, model, stepping, family, model, stepping);
+ fprintf(stderr, "CPUID(1): %s %s %s %s %s %s %s %s\n",
+ ecx & (1 << 0) ? "SSE3" : "-",
+ ecx & (1 << 3) ? "MONITOR" : "-",
+ ecx & (1 << 7) ? "EIST" : "-",
+ ecx & (1 << 8) ? "TM2" : "-",
+ edx & (1 << 4) ? "TSC" : "-",
+ edx & (1 << 5) ? "MSR" : "-",
+ edx & (1 << 22) ? "ACPI-TM" : "-",
+ edx & (1 << 29) ? "TM" : "-");
+ }
+
+ if (!(edx & (1 << 5)))
+ errx(1, "CPUID: no MSR");
+
+
+ __get_cpuid(0x6, &eax, &ebx, &ecx, &edx);
+ /* turbo_is_enabled already set */
+ /* has_hwp already set */
+ has_hwp_notify = eax & (1 << 8);
+ has_hwp_activity_window = eax & (1 << 9);
+ has_hwp_epp = eax & (1 << 10);
+ has_hwp_request_pkg = eax & (1 << 11);
+
+ if (!has_hwp_request_pkg && update_hwp_use_pkg)
+ errx(1, "--hwp-use-pkg is not available on this hardware");
+
+ /* has_epb already set */
+
+ if (debug)
+ fprintf(stderr,
+ "CPUID(6): %sTURBO, %sHWP, %sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
+ turbo_is_enabled ? "" : "No-",
+ has_hwp ? "" : "No-",
+ has_hwp_notify ? "" : "No-",
+ has_hwp_activity_window ? "" : "No-",
+ has_hwp_epp ? "" : "No-",
+ has_hwp_request_pkg ? "" : "No-",
+ has_epb ? "" : "No-");
+
+ return; /* success */
}
int main(int argc, char **argv)
{
+ set_base_cpu();
+ probe_dev_msr();
+ init_data_structures();
+
+ early_cpuid(); /* initial cpuid parse before cmdline */
+
cmdline(argc, argv);
- if (verbose > 1)
- printf("x86_energy_perf_policy Nov 24, 2010"
- " - Len Brown <lenb@kernel.org>\n");
- if (verbose > 1 && !read_only)
- printf("new_bias %lld\n", new_bias);
-
- validate_cpuid();
-
- if (cpu != -1) {
- if (read_only)
- print_msr(cpu);
- else
- update_msr(cpu);
- } else {
- if (read_only)
- for_every_cpu(print_msr);
- else
- for_every_cpu(update_msr);
+ if (debug)
+ print_version();
+
+ parse_cpuid();
+
+ /* If CPU-set and PKG-set are not initialized, default to all CPUs */
+ if ((cpu_selected_set == 0) && (pkg_selected_set == 0))
+ cpu_selected_set = cpu_present_set;
+
+ /*
+ * If HWP is being enabled, do it now, so that subsequent operations
+ * that access HWP registers can work.
+ */
+ if (update_hwp_enable)
+ for_all_cpus_in_set(cpu_setsize, cpu_selected_set, enable_hwp_on_cpu);
+
+ /* If HWP present, but disabled, warn and ignore from here forward */
+ verify_hwp_is_enabled();
+
+ if (req_update_bounds_check())
+ return -EINVAL;
+
+ /* display information only, no updates to settings */
+ if (!update_epb && !update_turbo && !hwp_update_enabled()) {
+ if (cpu_selected_set)
+ for_all_cpus_in_set(cpu_setsize, cpu_selected_set, print_cpu_msrs);
+
+ if (has_hwp_request_pkg) {
+ if (pkg_selected_set == 0)
+ pkg_selected_set = pkg_present_set;
+
+ for_packages(pkg_selected_set, print_pkg_msrs);
+ }
+
+ return 0;
}
+ /* update CPU set */
+ if (cpu_selected_set) {
+ for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_sysfs);
+ for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_msrs);
+ } else if (pkg_selected_set)
+ for_packages(pkg_selected_set, update_hwp_request_pkg);
+
return 0;
}
OpenPOWER on IntegriCloud