diff options
Diffstat (limited to 'tools')
35 files changed, 1617 insertions, 229 deletions
diff --git a/tools/firewire/nosy-dump.c b/tools/firewire/nosy-dump.c index f93b776370b6..3179c711bd65 100644 --- a/tools/firewire/nosy-dump.c +++ b/tools/firewire/nosy-dump.c @@ -150,6 +150,8 @@ subaction_create(uint32_t *data, size_t length) /* we put the ack in the subaction struct for easy access. */ sa = malloc(sizeof *sa - sizeof sa->packet + length); + if (!sa) + exit(EXIT_FAILURE); sa->ack = data[length / 4 - 1]; sa->length = length; memcpy(&sa->packet, data, length); @@ -180,6 +182,8 @@ link_transaction_lookup(int request_node, int response_node, int tlabel) } t = malloc(sizeof *t); + if (!t) + exit(EXIT_FAILURE); t->request_node = request_node; t->response_node = response_node; t->tlabel = tlabel; diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 5959affd8820..d25a46925e61 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -43,6 +43,7 @@ #include <sys/stat.h> #include <fcntl.h> #include <dirent.h> +#include <net/if.h> /* * KVP protocol: The user mode component first registers with the @@ -88,6 +89,7 @@ static char *os_major = ""; static char *os_minor = ""; static char *processor_arch; static char *os_build; +static char *os_version; static char *lic_version = "Unknown version"; static struct utsname uts_buf; @@ -297,7 +299,7 @@ static int kvp_file_init(void) return 0; } -static int kvp_key_delete(int pool, __u8 *key, int key_size) +static int kvp_key_delete(int pool, const char *key, int key_size) { int i; int j, k; @@ -340,7 +342,7 @@ static int kvp_key_delete(int pool, __u8 *key, int key_size) return 1; } -static int kvp_key_add_or_modify(int pool, __u8 *key, int key_size, __u8 *value, +static int kvp_key_add_or_modify(int pool, const char *key, int key_size, const char *value, int value_size) { int i; @@ -394,7 +396,7 @@ static int kvp_key_add_or_modify(int pool, __u8 *key, int key_size, __u8 *value, return 0; } -static int kvp_get_value(int pool, __u8 *key, int key_size, __u8 *value, +static int kvp_get_value(int pool, const char *key, int key_size, char *value, int value_size) { int i; @@ -426,8 +428,8 @@ static int kvp_get_value(int pool, __u8 *key, int key_size, __u8 *value, return 1; } -static int kvp_pool_enumerate(int pool, int index, __u8 *key, int key_size, - __u8 *value, int value_size) +static int kvp_pool_enumerate(int pool, int index, char *key, int key_size, + char *value, int value_size) { struct kvp_record *record; @@ -453,7 +455,9 @@ void kvp_get_os_info(void) char *p, buf[512]; uname(&uts_buf); - os_build = uts_buf.release; + os_version = uts_buf.release; + os_build = strdup(uts_buf.release); + os_name = uts_buf.sysname; processor_arch = uts_buf.machine; @@ -462,7 +466,7 @@ void kvp_get_os_info(void) * string to be of the form: x.y.z * Strip additional information we may have. */ - p = strchr(os_build, '-'); + p = strchr(os_version, '-'); if (p) *p = '\0'; @@ -879,7 +883,7 @@ static int kvp_process_ip_address(void *addrp, addr_length = INET6_ADDRSTRLEN; } - if ((length - *offset) < addr_length + 1) + if ((length - *offset) < addr_length + 2) return HV_E_FAIL; if (str == NULL) { strcpy(buffer, "inet_ntop failed\n"); @@ -887,11 +891,13 @@ static int kvp_process_ip_address(void *addrp, } if (*offset == 0) strcpy(buffer, tmp); - else + else { + strcat(buffer, ";"); strcat(buffer, tmp); - strcat(buffer, ";"); + } *offset += strlen(str) + 1; + return 0; } @@ -953,7 +959,9 @@ kvp_get_ip_info(int family, char *if_name, int op, * supported address families; if not we gather info on * the specified address family. */ - if ((family != 0) && (curp->ifa_addr->sa_family != family)) { + if ((((family != 0) && + (curp->ifa_addr->sa_family != family))) || + (curp->ifa_flags & IFF_LOOPBACK)) { curp = curp->ifa_next; continue; } @@ -1478,13 +1486,19 @@ int main(void) len = recvfrom(fd, kvp_recv_buffer, sizeof(kvp_recv_buffer), 0, addr_p, &addr_l); - if (len < 0 || addr.nl_pid) { + if (len < 0) { syslog(LOG_ERR, "recvfrom failed; pid:%u error:%d %s", addr.nl_pid, errno, strerror(errno)); close(fd); return -1; } + if (addr.nl_pid) { + syslog(LOG_WARNING, "Received packet from untrusted pid:%u", + addr.nl_pid); + continue; + } + incoming_msg = (struct nlmsghdr *)kvp_recv_buffer; incoming_cn_msg = (struct cn_msg *)NLMSG_DATA(incoming_msg); hv_msg = (struct hv_kvp_msg *)incoming_cn_msg->data; @@ -1649,7 +1663,7 @@ int main(void) strcpy(key_name, "OSMinorVersion"); break; case OSVersion: - strcpy(key_value, os_build); + strcpy(key_value, os_version); strcpy(key_name, "OSVersion"); break; case ProcessorArchitecture: diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index fd2f9221b241..07a03452c227 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -179,29 +179,6 @@ static struct termios orig_term; #define wmb() __asm__ __volatile__("" : : : "memory") #define mb() __asm__ __volatile__("" : : : "memory") -/* - * Convert an iovec element to the given type. - * - * This is a fairly ugly trick: we need to know the size of the type and - * alignment requirement to check the pointer is kosher. It's also nice to - * have the name of the type in case we report failure. - * - * Typing those three things all the time is cumbersome and error prone, so we - * have a macro which sets them all up and passes to the real function. - */ -#define convert(iov, type) \ - ((type *)_convert((iov), sizeof(type), __alignof__(type), #type)) - -static void *_convert(struct iovec *iov, size_t size, size_t align, - const char *name) -{ - if (iov->iov_len != size) - errx(1, "Bad iovec size %zu for %s", iov->iov_len, name); - if ((unsigned long)iov->iov_base % align != 0) - errx(1, "Bad alignment %p for %s", iov->iov_base, name); - return iov->iov_base; -} - /* Wrapper for the last available index. Makes it easier to change. */ #define lg_last_avail(vq) ((vq)->last_avail_idx) @@ -228,7 +205,8 @@ static bool iov_empty(const struct iovec iov[], unsigned int num_iov) } /* Take len bytes from the front of this iovec. */ -static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len) +static void iov_consume(struct iovec iov[], unsigned num_iov, + void *dest, unsigned len) { unsigned int i; @@ -236,11 +214,16 @@ static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len) unsigned int used; used = iov[i].iov_len < len ? iov[i].iov_len : len; + if (dest) { + memcpy(dest, iov[i].iov_base, used); + dest += used; + } iov[i].iov_base += used; iov[i].iov_len -= used; len -= used; } - assert(len == 0); + if (len != 0) + errx(1, "iovec too short!"); } /* The device virtqueue descriptors are followed by feature bitmasks. */ @@ -864,7 +847,7 @@ static void console_output(struct virtqueue *vq) warn("Write to stdout gave %i (%d)", len, errno); break; } - iov_consume(iov, out, len); + iov_consume(iov, out, NULL, len); } /* @@ -1591,9 +1574,9 @@ static void blk_request(struct virtqueue *vq) { struct vblk_info *vblk = vq->dev->priv; unsigned int head, out_num, in_num, wlen; - int ret; + int ret, i; u8 *in; - struct virtio_blk_outhdr *out; + struct virtio_blk_outhdr out; struct iovec iov[vq->vring.num]; off64_t off; @@ -1603,32 +1586,36 @@ static void blk_request(struct virtqueue *vq) */ head = wait_for_vq_desc(vq, iov, &out_num, &in_num); - /* - * Every block request should contain at least one output buffer - * (detailing the location on disk and the type of request) and one - * input buffer (to hold the result). - */ - if (out_num == 0 || in_num == 0) - errx(1, "Bad virtblk cmd %u out=%u in=%u", - head, out_num, in_num); + /* Copy the output header from the front of the iov (adjusts iov) */ + iov_consume(iov, out_num, &out, sizeof(out)); + + /* Find and trim end of iov input array, for our status byte. */ + in = NULL; + for (i = out_num + in_num - 1; i >= out_num; i--) { + if (iov[i].iov_len > 0) { + in = iov[i].iov_base + iov[i].iov_len - 1; + iov[i].iov_len--; + break; + } + } + if (!in) + errx(1, "Bad virtblk cmd with no room for status"); - out = convert(&iov[0], struct virtio_blk_outhdr); - in = convert(&iov[out_num+in_num-1], u8); /* * For historical reasons, block operations are expressed in 512 byte * "sectors". */ - off = out->sector * 512; + off = out.sector * 512; /* * In general the virtio block driver is allowed to try SCSI commands. * It'd be nice if we supported eject, for example, but we don't. */ - if (out->type & VIRTIO_BLK_T_SCSI_CMD) { + if (out.type & VIRTIO_BLK_T_SCSI_CMD) { fprintf(stderr, "Scsi commands unsupported\n"); *in = VIRTIO_BLK_S_UNSUPP; wlen = sizeof(*in); - } else if (out->type & VIRTIO_BLK_T_OUT) { + } else if (out.type & VIRTIO_BLK_T_OUT) { /* * Write * @@ -1636,10 +1623,10 @@ static void blk_request(struct virtqueue *vq) * if they try to write past end. */ if (lseek64(vblk->fd, off, SEEK_SET) != off) - err(1, "Bad seek to sector %llu", out->sector); + err(1, "Bad seek to sector %llu", out.sector); - ret = writev(vblk->fd, iov+1, out_num-1); - verbose("WRITE to sector %llu: %i\n", out->sector, ret); + ret = writev(vblk->fd, iov, out_num); + verbose("WRITE to sector %llu: %i\n", out.sector, ret); /* * Grr... Now we know how long the descriptor they sent was, we @@ -1655,7 +1642,7 @@ static void blk_request(struct virtqueue *vq) wlen = sizeof(*in); *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); - } else if (out->type & VIRTIO_BLK_T_FLUSH) { + } else if (out.type & VIRTIO_BLK_T_FLUSH) { /* Flush */ ret = fdatasync(vblk->fd); verbose("FLUSH fdatasync: %i\n", ret); @@ -1669,10 +1656,9 @@ static void blk_request(struct virtqueue *vq) * if they try to read past end. */ if (lseek64(vblk->fd, off, SEEK_SET) != off) - err(1, "Bad seek to sector %llu", out->sector); + err(1, "Bad seek to sector %llu", out.sector); - ret = readv(vblk->fd, iov+1, in_num-1); - verbose("READ from sector %llu: %i\n", out->sector, ret); + ret = readv(vblk->fd, iov + out_num, in_num); if (ret >= 0) { wlen = sizeof(*in) + ret; *in = VIRTIO_BLK_S_OK; @@ -1758,7 +1744,7 @@ static void rng_input(struct virtqueue *vq) len = readv(rng_info->rfd, iov, in_num); if (len <= 0) err(1, "Read from /dev/random gave %i", len); - iov_consume(iov, in_num, len); + iov_consume(iov, in_num, NULL, len); totlen += len; } diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index b38a1f9ad460..938e8904f64d 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -175,7 +175,7 @@ following filters are defined: + The option requires at least one branch type among any, any_call, any_ret, ind_call. -The privilege levels may be ommitted, in which case, the privilege levels of the associated +The privilege levels may be omitted, in which case, the privilege levels of the associated event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege levels are subject to permissions. When sampling on multiple events, branch stack sampling is enabled for all the sampling events. The sampled branch type is the same for all events. diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 80db3f4bcf7a..39d41068484f 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -11,11 +11,21 @@ lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h arch/*/include/asm/perf_regs.h +arch/*/include/uapi/asm/unistd*.h +arch/*/include/uapi/asm/perf_regs.h arch/*/lib/memcpy*.S arch/*/lib/memset*.S include/linux/poison.h include/linux/magic.h include/linux/hw_breakpoint.h +include/linux/rbtree_augmented.h +include/uapi/linux/perf_event.h +include/uapi/linux/const.h +include/uapi/linux/swab.h +include/uapi/linux/hw_breakpoint.h arch/x86/include/asm/svm.h arch/x86/include/asm/vmx.h arch/x86/include/asm/kvm_host.h +arch/x86/include/uapi/asm/svm.h +arch/x86/include/uapi/asm/vmx.h +arch/x86/include/uapi/asm/kvm.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 317766ec070b..2cbaad83a6e2 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -58,7 +58,7 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ -e s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ ) + -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ ) NO_PERF_REGS := 1 CC = $(CROSS_COMPILE)gcc diff --git a/tools/power/cpupower/.gitignore b/tools/power/cpupower/.gitignore index 8a83dd2ffc11..d42073f12609 100644 --- a/tools/power/cpupower/.gitignore +++ b/tools/power/cpupower/.gitignore @@ -20,3 +20,10 @@ utils/cpufreq-set.o utils/cpufreq-aperf.o cpupower bench/cpufreq-bench +debug/kernel/Module.symvers +debug/i386/centrino-decode +debug/i386/dump_psb +debug/i386/intel_gsic +debug/i386/powernow-k8-decode +debug/x86_64/centrino-decode +debug/x86_64/powernow-k8-decode diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index cf397bd26d0c..d875a74a3bdf 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -253,7 +253,8 @@ clean: | xargs rm -f -rm -f $(OUTPUT)cpupower -rm -f $(OUTPUT)libcpupower.so* - -rm -rf $(OUTPUT)po/*.{gmo,pot} + -rm -rf $(OUTPUT)po/*.gmo + -rm -rf $(OUTPUT)po/*.pot $(MAKE) -C bench O=$(OUTPUT) clean diff --git a/tools/power/cpupower/debug/i386/Makefile b/tools/power/cpupower/debug/i386/Makefile index 3ba158f0e287..c05cc0ac80c7 100644 --- a/tools/power/cpupower/debug/i386/Makefile +++ b/tools/power/cpupower/debug/i386/Makefile @@ -26,7 +26,10 @@ $(OUTPUT)powernow-k8-decode: powernow-k8-decode.c all: $(OUTPUT)centrino-decode $(OUTPUT)dump_psb $(OUTPUT)intel_gsic $(OUTPUT)powernow-k8-decode clean: - rm -rf $(OUTPUT){centrino-decode,dump_psb,intel_gsic,powernow-k8-decode} + rm -rf $(OUTPUT)centrino-decode + rm -rf $(OUTPUT)dump_psb + rm -rf $(OUTPUT)intel_gsic + rm -rf $(OUTPUT)powernow-k8-decode install: $(INSTALL) -d $(DESTDIR)${bindir} diff --git a/tools/power/cpupower/man/cpupower-monitor.1 b/tools/power/cpupower/man/cpupower-monitor.1 index 1141c2073719..e01c35d13b6e 100644 --- a/tools/power/cpupower/man/cpupower-monitor.1 +++ b/tools/power/cpupower/man/cpupower-monitor.1 @@ -7,11 +7,11 @@ cpupower\-monitor \- Report processor frequency and idle statistics .RB "\-l" .B cpupower monitor -.RB [ "\-m <mon1>," [ "<mon2>,..." ] ] +.RB [ -c ] [ "\-m <mon1>," [ "<mon2>,..." ] ] .RB [ "\-i seconds" ] .br .B cpupower monitor -.RB [ "\-m <mon1>," [ "<mon2>,..." ] ] +.RB [ -c ][ "\-m <mon1>," [ "<mon2>,..." ] ] .RB command .br .SH DESCRIPTION @@ -64,6 +64,17 @@ Only display specific monitors. Use the monitor string(s) provided by \-l option Measure intervall. .RE .PP +\-c +.RS 4 +Schedule the process on every core before starting and ending measuring. +This could be needed for the Idle_Stats monitor when no other MSR based +monitor (has to be run on the core that is measured) is run in parallel. +This is to wake up the processors from deeper sleep states and let the +kernel re +-account its cpuidle (C-state) information before reading the +cpuidle timings from sysfs. +.RE +.PP command .RS 4 Measure idle and frequency characteristics of an arbitrary command/workload. diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c index 906895d21cce..93b0aa74ca03 100644 --- a/tools/power/cpupower/utils/helpers/cpuid.c +++ b/tools/power/cpupower/utils/helpers/cpuid.c @@ -158,6 +158,8 @@ out: cpu_info->caps |= CPUPOWER_CAP_HAS_TURBO_RATIO; case 0x2A: /* SNB */ case 0x2D: /* SNB Xeon */ + case 0x3A: /* IVB */ + case 0x3E: /* IVB Xeon */ cpu_info->caps |= CPUPOWER_CAP_HAS_TURBO_RATIO; cpu_info->caps |= CPUPOWER_CAP_IS_SNB; break; diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 2eb584cf2f55..aa9e95486a2d 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -92,6 +92,14 @@ extern int get_cpu_info(unsigned int cpu, struct cpupower_cpu_info *cpu_info); extern struct cpupower_cpu_info cpupower_cpu_info; /* cpuid and cpuinfo helpers **************************/ +struct cpuid_core_info { + int pkg; + int core; + int cpu; + + /* flags */ + unsigned int is_online:1; +}; /* CPU topology/hierarchy parsing ******************/ struct cpupower_topology { @@ -101,18 +109,12 @@ struct cpupower_topology { unsigned int threads; /* per core */ /* Array gets mallocated with cores entries, holding per core info */ - struct { - int pkg; - int core; - int cpu; - - /* flags */ - unsigned int is_online:1; - } *core_info; + struct cpuid_core_info *core_info; }; extern int get_cpu_topology(struct cpupower_topology *cpu_top); extern void cpu_topology_release(struct cpupower_topology cpu_top); + /* CPU topology/hierarchy parsing ******************/ /* X86 ONLY ****************************************/ diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c index 96e28c124b5c..38ab91629463 100644 --- a/tools/power/cpupower/utils/helpers/sysfs.c +++ b/tools/power/cpupower/utils/helpers/sysfs.c @@ -37,25 +37,6 @@ unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen) return (unsigned int) numread; } -static unsigned int sysfs_write_file(const char *path, - const char *value, size_t len) -{ - int fd; - ssize_t numwrite; - - fd = open(path, O_WRONLY); - if (fd == -1) - return 0; - - numwrite = write(fd, value, len); - if (numwrite < 1) { - close(fd); - return 0; - } - close(fd); - return (unsigned int) numwrite; -} - /* * Detect whether a CPU is online * diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c index 4eae2c47ba48..c13120af519b 100644 --- a/tools/power/cpupower/utils/helpers/topology.c +++ b/tools/power/cpupower/utils/helpers/topology.c @@ -20,9 +20,8 @@ #include <helpers/sysfs.h> /* returns -1 on failure, 0 on success */ -int sysfs_topology_read_file(unsigned int cpu, const char *fname) +static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *result) { - unsigned long value; char linebuf[MAX_LINE_LEN]; char *endp; char path[SYSFS_PATH_MAX]; @@ -31,20 +30,12 @@ int sysfs_topology_read_file(unsigned int cpu, const char *fname) cpu, fname); if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0) return -1; - value = strtoul(linebuf, &endp, 0); + *result = strtol(linebuf, &endp, 0); if (endp == linebuf || errno == ERANGE) return -1; - return value; + return 0; } -struct cpuid_core_info { - unsigned int pkg; - unsigned int thread; - unsigned int cpu; - /* flags */ - unsigned int is_online:1; -}; - static int __compare(const void *t1, const void *t2) { struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1; @@ -53,9 +44,9 @@ static int __compare(const void *t1, const void *t2) return -1; else if (top1->pkg > top2->pkg) return 1; - else if (top1->thread < top2->thread) + else if (top1->core < top2->core) return -1; - else if (top1->thread > top2->thread) + else if (top1->core > top2->core) return 1; else if (top1->cpu < top2->cpu) return -1; @@ -73,28 +64,42 @@ static int __compare(const void *t1, const void *t2) */ int get_cpu_topology(struct cpupower_topology *cpu_top) { - int cpu, cpus = sysconf(_SC_NPROCESSORS_CONF); + int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF); - cpu_top->core_info = malloc(sizeof(struct cpupower_topology) * cpus); + cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus); if (cpu_top->core_info == NULL) return -ENOMEM; cpu_top->pkgs = cpu_top->cores = 0; for (cpu = 0; cpu < cpus; cpu++) { cpu_top->core_info[cpu].cpu = cpu; cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu); - cpu_top->core_info[cpu].pkg = - sysfs_topology_read_file(cpu, "physical_package_id"); - if ((int)cpu_top->core_info[cpu].pkg != -1 && - cpu_top->core_info[cpu].pkg > cpu_top->pkgs) - cpu_top->pkgs = cpu_top->core_info[cpu].pkg; - cpu_top->core_info[cpu].core = - sysfs_topology_read_file(cpu, "core_id"); + if(sysfs_topology_read_file( + cpu, + "physical_package_id", + &(cpu_top->core_info[cpu].pkg)) < 0) + return -1; + if(sysfs_topology_read_file( + cpu, + "core_id", + &(cpu_top->core_info[cpu].core)) < 0) + return -1; } - cpu_top->pkgs++; qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info), __compare); + /* Count the number of distinct pkgs values. This works + because the primary sort of the core_info struct was just + done by pkg value. */ + last_pkg = cpu_top->core_info[0].pkg; + for(cpu = 1; cpu < cpus; cpu++) { + if(cpu_top->core_info[cpu].pkg != last_pkg) { + last_pkg = cpu_top->core_info[cpu].pkg; + cpu_top->pkgs++; + } + } + cpu_top->pkgs++; + /* Intel's cores count is not consecutively numbered, there may * be a core_id of 3, but none of 2. Assume there always is 0 * Get amount of cores by counting duplicates in a package diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index 0d6571e418db..c4bae9203a69 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -39,6 +39,7 @@ static int mode; static int interval = 1; static char *show_monitors_param; static struct cpupower_topology cpu_top; +static unsigned int wake_cpus; /* ToDo: Document this in the manpage */ static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', }; @@ -84,7 +85,7 @@ int fill_string_with_spaces(char *s, int n) void print_header(int topology_depth) { int unsigned mon; - int state, need_len, pr_mon_len; + int state, need_len; cstate_t s; char buf[128] = ""; int percent_width = 4; @@ -93,7 +94,6 @@ void print_header(int topology_depth) printf("%s|", buf); for (mon = 0; mon < avail_monitors; mon++) { - pr_mon_len = 0; need_len = monitors[mon]->hw_states_num * (percent_width + 3) - 1; if (mon != 0) { @@ -315,16 +315,28 @@ int fork_it(char **argv) int do_interval_measure(int i) { unsigned int num; + int cpu; + + if (wake_cpus) + for (cpu = 0; cpu < cpu_count; cpu++) + bind_cpu(cpu); for (num = 0; num < avail_monitors; num++) { dprint("HW C-state residency monitor: %s - States: %d\n", monitors[num]->name, monitors[num]->hw_states_num); monitors[num]->start(); } + sleep(i); + + if (wake_cpus) + for (cpu = 0; cpu < cpu_count; cpu++) + bind_cpu(cpu); + for (num = 0; num < avail_monitors; num++) monitors[num]->stop(); + return 0; } @@ -333,7 +345,7 @@ static void cmdline(int argc, char *argv[]) int opt; progname = basename(argv[0]); - while ((opt = getopt(argc, argv, "+li:m:")) != -1) { + while ((opt = getopt(argc, argv, "+lci:m:")) != -1) { switch (opt) { case 'l': if (mode) @@ -352,6 +364,9 @@ static void cmdline(int argc, char *argv[]) mode = show; show_monitors_param = optarg; break; + case 'c': + wake_cpus = 1; + break; default: print_wrong_arg_exit(); } diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h index 9312ee1f2dbc..9e43f3371fbc 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h @@ -65,4 +65,21 @@ extern long long timespec_diff_us(struct timespec start, struct timespec end); "could be inaccurate\n"), mes, ov); \ } + +/* Taken over from x86info project sources -> return 0 on success */ +#include <sched.h> +#include <sys/types.h> +#include <unistd.h> +static inline int bind_cpu(int cpu) +{ + cpu_set_t set; + + if (sched_getaffinity(getpid(), sizeof(set), &set) == 0) { + CPU_ZERO(&set); + CPU_SET(cpu, &set); + return sched_setaffinity(getpid(), sizeof(set), &set); + } + return 1; +} + #endif /* __CPUIDLE_INFO_HW__ */ diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c index a1bc07cd53e1..a99b43b97d6d 100644 --- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c @@ -150,9 +150,15 @@ static struct cpuidle_monitor *snb_register(void) || cpupower_cpu_info.family != 6) return NULL; - if (cpupower_cpu_info.model != 0x2A - && cpupower_cpu_info.model != 0x2D) + switch (cpupower_cpu_info.model) { + case 0x2A: /* SNB */ + case 0x2D: /* SNB Xeon */ + case 0x3A: /* IVB */ + case 0x3E: /* IVB Xeon */ + break; + default: return NULL; + } is_valid = calloc(cpu_count, sizeof(int)); for (num = 0; num < SNB_CSTATE_COUNT; num++) { diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index f85649554191..f09641da40d4 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -1,9 +1,22 @@ +CC = $(CROSS_COMPILE)gcc +BUILD_OUTPUT := $(PWD) +PREFIX := /usr +DESTDIR := + turbostat : turbostat.c CFLAGS += -Wall +CFLAGS += -I../../../../arch/x86/include/uapi/ + +%: %.c + @mkdir -p $(BUILD_OUTPUT) + $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ +.PHONY : clean clean : - rm -f turbostat + @rm -f $(BUILD_OUTPUT)/turbostat -install : - install turbostat /usr/bin/turbostat - install turbostat.8 /usr/share/man/man8 +install : turbostat + install -d $(DESTDIR)$(PREFIX)/bin + install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat + install -d $(DESTDIR)$(PREFIX)/share/man/man8 + install turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8 diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index e4d0690cccf9..0d7dc2cfefb5 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -11,16 +11,16 @@ turbostat \- Report processor frequency and idle statistics .RB [ Options ] .RB [ "\-i interval_sec" ] .SH DESCRIPTION -\fBturbostat \fP reports processor topology, frequency -and idle power state statistics on modern X86 processors. +\fBturbostat \fP reports processor topology, frequency, +idle power-state statistics, temperature and power on modern X86 processors. Either \fBcommand\fP is forked and statistics are printed upon its completion, or statistics are printed periodically. \fBturbostat \fP -requires that the processor +must be run on root, and +minimally requires that the processor supports an "invariant" TSC, plus the APERF and MPERF MSRs. -\fBturbostat \fP will report idle cpu power state residency -on processors that additionally support C-state residency counters. +Additional information is reported depending on hardware counter support. .SS Options The \fB-p\fP option limits output to the 1st thread in 1st core of each package. @@ -57,7 +57,15 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T \fBGHz\fP average clock rate while the CPU was in c0 state. \fBTSC\fP average GHz that the TSC ran during the entire interval. \fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states. +\fBCTMP\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. +\fBPTMP\fP Degrees Celsius reported by the per-package Package Thermal Monitor. \fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states. +\fBPkg_W\fP Watts consumed by the whole package. +\fBCor_W\fP Watts consumed by the core part of the package. +\fBGFX_W\fP Watts consumed by the Graphics part of the package -- available only on client processors. +\fBRAM_W\fP Watts consumed by the DRAM DIMMS -- available only on server processors. +\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. +\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. .fi .PP .SH EXAMPLE @@ -66,50 +74,73 @@ Without any parameters, turbostat prints out counters ever 5 seconds. for turbostat to fork). The first row of statistics is a summary for the entire system. -Note that the summary is a weighted average. +For residency % columns, the summary is a weighted average. +For Temperature columns, the summary is the column maximum. +For Watts columns, the summary is a system total. Subsequent rows show per-CPU statistics. .nf -[root@x980]# ./turbostat -cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 - 0.09 1.62 3.38 1.83 0.32 97.76 1.26 83.61 - 0 0 0.15 1.62 3.38 10.23 0.05 89.56 1.26 83.61 - 0 6 0.05 1.62 3.38 10.34 - 1 2 0.03 1.62 3.38 0.07 0.05 99.86 - 1 8 0.03 1.62 3.38 0.06 - 2 4 0.21 1.62 3.38 0.10 1.49 98.21 - 2 10 0.02 1.62 3.38 0.29 - 8 1 0.04 1.62 3.38 0.04 0.08 99.84 - 8 7 0.01 1.62 3.38 0.06 - 9 3 0.53 1.62 3.38 0.10 0.20 99.17 - 9 9 0.02 1.62 3.38 0.60 - 10 5 0.01 1.62 3.38 0.02 0.04 99.92 - 10 11 0.02 1.62 3.38 0.02 +[root@sandy]# ./turbostat +cor CPU %c0 GHz TSC %c1 %c3 %c6 %c7 CTMP PTMP %pc2 %pc3 %pc6 %pc7 Pkg_W Cor_W GFX_W + 0.06 0.80 2.29 0.11 0.00 0.00 99.83 47 40 0.26 0.01 0.44 98.78 3.49 0.12 0.14 + 0 0 0.07 0.80 2.29 0.07 0.00 0.00 99.86 40 40 0.26 0.01 0.44 98.78 3.49 0.12 0.14 + 0 4 0.03 0.80 2.29 0.12 + 1 1 0.04 0.80 2.29 0.25 0.01 0.00 99.71 40 + 1 5 0.16 0.80 2.29 0.13 + 2 2 0.05 0.80 2.29 0.06 0.01 0.00 99.88 40 + 2 6 0.03 0.80 2.29 0.08 + 3 3 0.05 0.80 2.29 0.08 0.00 0.00 99.87 47 + 3 7 0.04 0.84 2.29 0.09 .fi .SH SUMMARY EXAMPLE The "-s" option prints the column headers just once, and then the one line system summary for each sample interval. .nf -[root@x980]# ./turbostat -s - %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 - 0.23 1.67 3.38 2.00 0.30 97.47 1.07 82.12 - 0.10 1.62 3.38 1.87 2.25 95.77 12.02 72.60 - 0.20 1.64 3.38 1.98 0.11 97.72 0.30 83.36 - 0.11 1.70 3.38 1.86 1.81 96.22 9.71 74.90 +[root@wsm]# turbostat -S + %c0 GHz TSC %c1 %c3 %c6 CTMP %pc3 %pc6 + 1.40 2.81 3.38 10.78 43.47 44.35 42 13.67 2.09 + 1.34 2.90 3.38 11.48 58.96 28.23 41 19.89 0.15 + 1.55 2.72 3.38 26.73 37.66 34.07 42 2.53 2.80 + 1.37 2.83 3.38 16.95 60.05 21.63 42 5.76 0.20 .fi .SH VERBOSE EXAMPLE The "-v" option adds verbosity to the output: .nf -GenuineIntel 11 CPUID levels; family:model:stepping 0x6:2c:2 (6:44:2) -12 * 133 = 1600 MHz max efficiency -25 * 133 = 3333 MHz TSC frequency -26 * 133 = 3467 MHz max turbo 4 active cores -26 * 133 = 3467 MHz max turbo 3 active cores -27 * 133 = 3600 MHz max turbo 2 active cores -27 * 133 = 3600 MHz max turbo 1 active cores - +[root@ivy]# turbostat -v +turbostat v3.0 November 23, 2012 - Len Brown <lenb@kernel.org> +CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9) +CPUID(6): APERF, DTS, PTM, EPB +RAPL: 851 sec. Joule Counter Range +cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300 +16 * 100 = 1600 MHz max efficiency +35 * 100 = 3500 MHz TSC frequency +cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6-noret) +cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 +37 * 100 = 3700 MHz max turbo 4 active cores +38 * 100 = 3800 MHz max turbo 3 active cores +39 * 100 = 3900 MHz max turbo 2 active cores +39 * 100 = 3900 MHz max turbo 1 active cores +cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced) +cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.) +cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.) +cpu0: MSR_PKG_POWER_LIMIT: 0x830000148268 (UNlocked) +cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled) +cpu0: PKG Limit #2: ENabled (96.000000 Watts, 0.000977* sec, clamp DISabled) +cpu0: MSR_PP0_POLICY: 0 +cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked) +cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) +cpu0: MSR_PP1_POLICY: 0 +cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) +cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) +cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C) +cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C) +cpu0: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) +cpu1: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) +cpu2: MSR_IA32_THERM_STATUS: 0x88540000 (21 C +/- 1) +cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1) + ... .fi The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency available at the minimum package voltage. The \fBTSC frequency\fP is the nominal @@ -142,7 +173,7 @@ cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 10 5 1.42 3.43 3.38 2.14 30.99 65.44 10 11 0.16 2.88 3.38 3.40 .fi -Above the cycle soaker drives cpu7 up its 3.6 Ghz turbo limit +Above the cycle soaker drives cpu7 up its 3.6 GHz turbo limit while the other processors are generally in various states of idle. Note that cpu1 and cpu7 are HT siblings within core8. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ea095abbe97e..ce6d46038f74 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -20,6 +20,7 @@ */ #define _GNU_SOURCE +#include <asm/msr.h> #include <stdio.h> #include <unistd.h> #include <sys/types.h> @@ -35,28 +36,18 @@ #include <ctype.h> #include <sched.h> -#define MSR_NEHALEM_PLATFORM_INFO 0xCE -#define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD -#define MSR_IVT_TURBO_RATIO_LIMIT 0x1AE -#define MSR_APERF 0xE8 -#define MSR_MPERF 0xE7 -#define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */ -#define MSR_PKG_C3_RESIDENCY 0x3F8 -#define MSR_PKG_C6_RESIDENCY 0x3F9 -#define MSR_PKG_C7_RESIDENCY 0x3FA /* SNB only */ -#define MSR_CORE_C3_RESIDENCY 0x3FC -#define MSR_CORE_C6_RESIDENCY 0x3FD -#define MSR_CORE_C7_RESIDENCY 0x3FE /* SNB only */ - char *proc_stat = "/proc/stat"; unsigned int interval_sec = 5; /* set with -i interval_sec */ unsigned int verbose; /* set with -v */ +unsigned int rapl_verbose; /* set with -R */ +unsigned int thermal_verbose; /* set with -T */ unsigned int summary_only; /* set with -s */ unsigned int skip_c0; unsigned int skip_c1; unsigned int do_nhm_cstates; unsigned int do_snb_cstates; unsigned int has_aperf; +unsigned int has_epb; unsigned int units = 1000000000; /* Ghz etc */ unsigned int genuine_intel; unsigned int has_invariant_tsc; @@ -74,6 +65,23 @@ unsigned int show_cpu; unsigned int show_pkg_only; unsigned int show_core_only; char *output_buffer, *outp; +unsigned int do_rapl; +unsigned int do_dts; +unsigned int do_ptm; +unsigned int tcc_activation_temp; +unsigned int tcc_activation_temp_override; +double rapl_power_units, rapl_energy_units, rapl_time_units; +double rapl_joule_counter_range; + +#define RAPL_PKG (1 << 0) +#define RAPL_CORES (1 << 1) +#define RAPL_GFX (1 << 2) +#define RAPL_DRAM (1 << 3) +#define RAPL_PKG_PERF_STATUS (1 << 4) +#define RAPL_DRAM_PERF_STATUS (1 << 5) +#define TJMAX_DEFAULT 100 + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) int aperf_mperf_unstable; int backwards_count; @@ -101,6 +109,7 @@ struct core_data { unsigned long long c3; unsigned long long c6; unsigned long long c7; + unsigned int core_temp_c; unsigned int core_id; } *core_even, *core_odd; @@ -110,6 +119,14 @@ struct pkg_data { unsigned long long pc6; unsigned long long pc7; unsigned int package_id; + unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ + unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ + unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */ + unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */ + unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ + unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ + unsigned int pkg_temp_c; + } *package_even, *package_odd; #define ODD_COUNTERS thread_odd, core_odd, package_odd @@ -247,6 +264,12 @@ void print_header(void) outp += sprintf(outp, " %%c6"); if (do_snb_cstates) outp += sprintf(outp, " %%c7"); + + if (do_dts) + outp += sprintf(outp, " CTMP"); + if (do_ptm) + outp += sprintf(outp, " PTMP"); + if (do_snb_cstates) outp += sprintf(outp, " %%pc2"); if (do_nhm_cstates) @@ -256,6 +279,19 @@ void print_header(void) if (do_snb_cstates) outp += sprintf(outp, " %%pc7"); + if (do_rapl & RAPL_PKG) + outp += sprintf(outp, " Pkg_W"); + if (do_rapl & RAPL_CORES) + outp += sprintf(outp, " Cor_W"); + if (do_rapl & RAPL_GFX) + outp += sprintf(outp, " GFX_W"); + if (do_rapl & RAPL_DRAM) + outp += sprintf(outp, " RAM_W"); + if (do_rapl & RAPL_PKG_PERF_STATUS) + outp += sprintf(outp, " PKG_%%"); + if (do_rapl & RAPL_DRAM_PERF_STATUS) + outp += sprintf(outp, " RAM_%%"); + outp += sprintf(outp, "\n"); } @@ -285,6 +321,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, fprintf(stderr, "c3: %016llX\n", c->c3); fprintf(stderr, "c6: %016llX\n", c->c6); fprintf(stderr, "c7: %016llX\n", c->c7); + fprintf(stderr, "DTS: %dC\n", c->core_temp_c); } if (p) { @@ -293,6 +330,13 @@ int dump_counters(struct thread_data *t, struct core_data *c, fprintf(stderr, "pc3: %016llX\n", p->pc3); fprintf(stderr, "pc6: %016llX\n", p->pc6); fprintf(stderr, "pc7: %016llX\n", p->pc7); + fprintf(stderr, "Joules PKG: %0X\n", p->energy_pkg); + fprintf(stderr, "Joules COR: %0X\n", p->energy_cores); + fprintf(stderr, "Joules GFX: %0X\n", p->energy_gfx); + fprintf(stderr, "Joules RAM: %0X\n", p->energy_dram); + fprintf(stderr, "Throttle PKG: %0X\n", p->rapl_pkg_perf_status); + fprintf(stderr, "Throttle RAM: %0X\n", p->rapl_dram_perf_status); + fprintf(stderr, "PTM: %dC\n", p->pkg_temp_c); } return 0; } @@ -302,14 +346,21 @@ int dump_counters(struct thread_data *t, struct core_data *c, * package: "pk" 2 columns %2d * core: "cor" 3 columns %3d * CPU: "CPU" 3 columns %3d + * Pkg_W: %6.2 + * Cor_W: %6.2 + * GFX_W: %5.2 + * RAM_W: %5.2 * GHz: "GHz" 3 columns %3.2 * TSC: "TSC" 3 columns %3.2 * percentage " %pc3" %6.2 + * Perf Status percentage: %5.2 + * "CTMP" 4 columns %4d */ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { double interval_float; + char *fmt5, *fmt6; /* if showing only 1st thread in core and this isn't one, bail out */ if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) @@ -349,7 +400,6 @@ int format_counters(struct thread_data *t, struct core_data *c, if (show_cpu) outp += sprintf(outp, " %3d", t->cpu_id); } - /* %c0 */ if (do_nhm_cstates) { if (show_pkg || show_core || show_cpu) @@ -414,10 +464,16 @@ int format_counters(struct thread_data *t, struct core_data *c, if (do_snb_cstates) outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); + if (do_dts) + outp += sprintf(outp, " %4d", c->core_temp_c); + /* print per-package data only for 1st core in package */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) goto done; + if (do_ptm) + outp += sprintf(outp, " %4d", p->pkg_temp_c); + if (do_snb_cstates) outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); if (do_nhm_cstates) @@ -426,6 +482,32 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); if (do_snb_cstates) outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); + + /* + * If measurement interval exceeds minimum RAPL Joule Counter range, + * indicate that results are suspect by printing "**" in fraction place. + */ + if (interval_float < rapl_joule_counter_range) { + fmt5 = " %5.2f"; + fmt6 = " %6.2f"; + } else { + fmt5 = " %3.0f**"; + fmt6 = " %4.0f**"; + } + + if (do_rapl & RAPL_PKG) + outp += sprintf(outp, fmt6, p->energy_pkg * rapl_energy_units / interval_float); + if (do_rapl & RAPL_CORES) + outp += sprintf(outp, fmt6, p->energy_cores * rapl_energy_units / interval_float); + if (do_rapl & RAPL_GFX) + outp += sprintf(outp, fmt5, p->energy_gfx * rapl_energy_units / interval_float); + if (do_rapl & RAPL_DRAM) + outp += sprintf(outp, fmt5, p->energy_dram * rapl_energy_units / interval_float); + if (do_rapl & RAPL_PKG_PERF_STATUS ) + outp += sprintf(outp, fmt5, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); + if (do_rapl & RAPL_DRAM_PERF_STATUS ) + outp += sprintf(outp, fmt5, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); + done: outp += sprintf(outp, "\n"); @@ -435,6 +517,7 @@ done: void flush_stdout() { fputs(output_buffer, stdout); + fflush(stdout); outp = output_buffer; } void flush_stderr() @@ -461,6 +544,13 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ for_all_cpus(format_counters, t, c, p); } +#define DELTA_WRAP32(new, old) \ + if (new > old) { \ + old = new - old; \ + } else { \ + old = 0x100000000 + new - old; \ + } + void delta_package(struct pkg_data *new, struct pkg_data *old) { @@ -468,6 +558,14 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->pc3 = new->pc3 - old->pc3; old->pc6 = new->pc6 - old->pc6; old->pc7 = new->pc7 - old->pc7; + old->pkg_temp_c = new->pkg_temp_c; + + DELTA_WRAP32(new->energy_pkg, old->energy_pkg); + DELTA_WRAP32(new->energy_cores, old->energy_cores); + DELTA_WRAP32(new->energy_gfx, old->energy_gfx); + DELTA_WRAP32(new->energy_dram, old->energy_dram); + DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); + DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); } void @@ -476,6 +574,7 @@ delta_core(struct core_data *new, struct core_data *old) old->c3 = new->c3 - old->c3; old->c6 = new->c6 - old->c6; old->c7 = new->c7 - old->c7; + old->core_temp_c = new->core_temp_c; } /* @@ -582,11 +681,20 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data c->c3 = 0; c->c6 = 0; c->c7 = 0; + c->core_temp_c = 0; p->pc2 = 0; p->pc3 = 0; p->pc6 = 0; p->pc7 = 0; + + p->energy_pkg = 0; + p->energy_dram = 0; + p->energy_cores = 0; + p->energy_gfx = 0; + p->rapl_pkg_perf_status = 0; + p->rapl_dram_perf_status = 0; + p->pkg_temp_c = 0; } int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) @@ -607,6 +715,8 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.cores.c6 += c->c6; average.cores.c7 += c->c7; + average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); + /* sum per-pkg values only for 1st core in pkg */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; @@ -616,6 +726,15 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.packages.pc6 += p->pc6; average.packages.pc7 += p->pc7; + average.packages.energy_pkg += p->energy_pkg; + average.packages.energy_dram += p->energy_dram; + average.packages.energy_cores += p->energy_cores; + average.packages.energy_gfx += p->energy_gfx; + + average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); + + average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; + average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; return 0; } /* @@ -667,23 +786,26 @@ static unsigned long long rdtsc(void) int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int cpu = t->cpu_id; + unsigned long long msr; - if (cpu_migrate(cpu)) + if (cpu_migrate(cpu)) { + fprintf(stderr, "Could not migrate to CPU %d\n", cpu); return -1; + } t->tsc = rdtsc(); /* we are running on local CPU of interest */ if (has_aperf) { - if (get_msr(cpu, MSR_APERF, &t->aperf)) + if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) return -3; - if (get_msr(cpu, MSR_MPERF, &t->mperf)) + if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) return -4; } if (extra_delta_offset32) { - if (get_msr(cpu, extra_delta_offset32, &t->extra_delta32)) + if (get_msr(cpu, extra_delta_offset32, &msr)) return -5; - t->extra_delta32 &= 0xFFFFFFFF; + t->extra_delta32 = msr & 0xFFFFFFFF; } if (extra_delta_offset64) @@ -691,9 +813,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -5; if (extra_msr_offset32) { - if (get_msr(cpu, extra_msr_offset32, &t->extra_msr32)) + if (get_msr(cpu, extra_msr_offset32, &msr)) return -5; - t->extra_msr32 &= 0xFFFFFFFF; + t->extra_msr32 = msr & 0xFFFFFFFF; } if (extra_msr_offset64) @@ -715,6 +837,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) return -8; + if (do_dts) { + if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) + return -9; + c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); + } + + /* collect package counters only for 1st core in package */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; @@ -731,6 +860,41 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) return -12; } + if (do_rapl & RAPL_PKG) { + if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) + return -13; + p->energy_pkg = msr & 0xFFFFFFFF; + } + if (do_rapl & RAPL_CORES) { + if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) + return -14; + p->energy_cores = msr & 0xFFFFFFFF; + } + if (do_rapl & RAPL_DRAM) { + if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) + return -15; + p->energy_dram = msr & 0xFFFFFFFF; + } + if (do_rapl & RAPL_GFX) { + if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr)) + return -16; + p->energy_gfx = msr & 0xFFFFFFFF; + } + if (do_rapl & RAPL_PKG_PERF_STATUS) { + if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr)) + return -16; + p->rapl_pkg_perf_status = msr & 0xFFFFFFFF; + } + if (do_rapl & RAPL_DRAM_PERF_STATUS) { + if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr)) + return -16; + p->rapl_dram_perf_status = msr & 0xFFFFFFFF; + } + if (do_ptm) { + if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) + return -17; + p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); + } return 0; } @@ -742,10 +906,10 @@ void print_verbose_header(void) if (!do_nehalem_platform_info) return; - get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); + get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); - if (verbose > 1) - fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); + if (verbose) + fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); ratio = (msr >> 40) & 0xFF; fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", @@ -760,8 +924,8 @@ void print_verbose_header(void) get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); - if (verbose > 1) - fprintf(stderr, "MSR_IVT_TURBO_RATIO_LIMIT: 0x%llx\n", msr); + if (verbose) + fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); ratio = (msr >> 56) & 0xFF; if (ratio) @@ -804,14 +968,56 @@ void print_verbose_header(void) ratio, bclk, ratio * bclk); print_nhm_turbo_ratio_limits: + get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + +#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) + + fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); + + fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ", + (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", + (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", + (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", + (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", + (msr & (1 << 15)) ? "" : "UN", + (unsigned int)msr & 7); + + + switch(msr & 0x7) { + case 0: + fprintf(stderr, "pc0"); + break; + case 1: + fprintf(stderr, do_snb_cstates ? "pc2" : "pc0"); + break; + case 2: + fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3"); + break; + case 3: + fprintf(stderr, "pc6"); + break; + case 4: + fprintf(stderr, "pc7"); + break; + case 5: + fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid"); + break; + case 7: + fprintf(stderr, "unlimited"); + break; + default: + fprintf(stderr, "invalid"); + } + fprintf(stderr, ")\n"); if (!do_nehalem_turbo_ratio_limit) return; - get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); + get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); - if (verbose > 1) - fprintf(stderr, "MSR_NEHALEM_TURBO_RATIO_LIMIT: 0x%llx\n", msr); + if (verbose) + fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); ratio = (msr >> 56) & 0xFF; if (ratio) @@ -1100,15 +1306,22 @@ int mark_cpu_present(int cpu) void turbostat_loop() { int retval; + int restarted = 0; restart: + restarted++; + retval = for_all_cpus(get_counters, EVEN_COUNTERS); if (retval < -1) { exit(retval); } else if (retval == -1) { + if (restarted > 1) { + exit(retval); + } re_initialize(); goto restart; } + restarted = 0; gettimeofday(&tv_even, (struct timezone *)NULL); while (1) { @@ -1207,6 +1420,299 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) } } +/* + * print_epb() + * Decode the ENERGY_PERF_BIAS MSR + */ +int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + char *epb_string; + int cpu; + + if (!has_epb) + return 0; + + cpu = t->cpu_id; + + /* EPB is per-package */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + if (cpu_migrate(cpu)) { + fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) + return 0; + + switch (msr & 0x7) { + case ENERGY_PERF_BIAS_PERFORMANCE: + epb_string = "performance"; + break; + case ENERGY_PERF_BIAS_NORMAL: + epb_string = "balanced"; + break; + case ENERGY_PERF_BIAS_POWERSAVE: + epb_string = "powersave"; + break; + default: + epb_string = "custom"; + break; + } + fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); + + return 0; +} + +#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ +#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ + +/* + * rapl_probe() + * + * sets do_rapl + */ +void rapl_probe(unsigned int family, unsigned int model) +{ + unsigned long long msr; + double tdp; + + if (!genuine_intel) + return; + + if (family != 6) + return; + + switch (model) { + case 0x2A: + case 0x3A: + do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX; + break; + case 0x2D: + case 0x3E: + do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS; + break; + default: + return; + } + + /* units on package 0, verify later other packages match */ + if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr)) + return; + + rapl_power_units = 1.0 / (1 << (msr & 0xF)); + rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); + rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); + + /* get TDP to determine energy counter range */ + if (get_msr(0, MSR_PKG_POWER_INFO, &msr)) + return; + + tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; + + rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; + + if (verbose) + fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range); + + return; +} + +int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + unsigned int dts; + int cpu; + + if (!(do_dts || do_ptm)) + return 0; + + cpu = t->cpu_id; + + /* DTS is per-core, no need to print for each thread */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + return 0; + + if (cpu_migrate(cpu)) { + fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) { + if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", + cpu, msr, tcc_activation_temp - dts); + +#ifdef THERM_DEBUG + if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + dts2 = (msr >> 8) & 0x7F; + fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", + cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); +#endif + } + + + if (do_dts) { + unsigned int resolution; + + if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + resolution = (msr >> 27) & 0xF; + fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", + cpu, msr, tcc_activation_temp - dts, resolution); + +#ifdef THERM_DEBUG + if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + dts2 = (msr >> 8) & 0x7F; + fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", + cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); +#endif + } + + return 0; +} + +void print_power_limit_msr(int cpu, unsigned long long msr, char *label) +{ + fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", + cpu, label, + ((msr >> 15) & 1) ? "EN" : "DIS", + ((msr >> 0) & 0x7FFF) * rapl_power_units, + (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, + (((msr >> 16) & 1) ? "EN" : "DIS")); + + return; +} + +int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + int cpu; + double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units; + + if (!do_rapl) + return 0; + + /* RAPL counters are per package, so print only for 1st thread/package */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + cpu = t->cpu_id; + if (cpu_migrate(cpu)) { + fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) + return -1; + + local_rapl_power_units = 1.0 / (1 << (msr & 0xF)); + local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); + local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); + + if (local_rapl_power_units != rapl_power_units) + fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu); + if (local_rapl_energy_units != rapl_energy_units) + fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu); + if (local_rapl_time_units != rapl_time_units) + fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu); + + if (verbose) { + fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " + "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, + local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units); + } + if (do_rapl & RAPL_PKG) { + if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) + return -5; + + + fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", + cpu, msr, + ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + + if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) + return -9; + + fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", + cpu, msr, (msr >> 63) & 1 ? "": "UN"); + + print_power_limit_msr(cpu, msr, "PKG Limit #1"); + fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", + cpu, + ((msr >> 47) & 1) ? "EN" : "DIS", + ((msr >> 32) & 0x7FFF) * rapl_power_units, + (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, + ((msr >> 48) & 1) ? "EN" : "DIS"); + } + + if (do_rapl & RAPL_DRAM) { + if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) + return -6; + + + fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", + cpu, msr, + ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + + + if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) + return -9; + fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", + cpu, msr, (msr >> 31) & 1 ? "": "UN"); + + print_power_limit_msr(cpu, msr, "DRAM Limit"); + } + if (do_rapl & RAPL_CORES) { + if (verbose) { + if (get_msr(cpu, MSR_PP0_POLICY, &msr)) + return -7; + + fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); + + if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) + return -9; + fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", + cpu, msr, (msr >> 31) & 1 ? "": "UN"); + print_power_limit_msr(cpu, msr, "Cores Limit"); + } + } + if (do_rapl & RAPL_GFX) { + if (verbose) { + if (get_msr(cpu, MSR_PP1_POLICY, &msr)) + return -8; + + fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); + + if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) + return -9; + fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", + cpu, msr, (msr >> 31) & 1 ? "": "UN"); + print_power_limit_msr(cpu, msr, "GFX Limit"); + } + } + return 0; +} + int is_snb(unsigned int family, unsigned int model) { @@ -1231,6 +1737,72 @@ double discover_bclk(unsigned int family, unsigned int model) return 133.33; } +/* + * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where + * the Thermal Control Circuit (TCC) activates. + * This is usually equal to tjMax. + * + * Older processors do not have this MSR, so there we guess, + * but also allow cmdline over-ride with -T. + * + * Several MSR temperature values are in units of degrees-C + * below this value, including the Digital Thermal Sensor (DTS), + * Package Thermal Management Sensor (PTM), and thermal event thresholds. + */ +int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + unsigned int target_c_local; + int cpu; + + /* tcc_activation_temp is used only for dts or ptm */ + if (!(do_dts || do_ptm)) + return 0; + + /* this is a per-package concept */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + cpu = t->cpu_id; + if (cpu_migrate(cpu)) { + fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (tcc_activation_temp_override != 0) { + tcc_activation_temp = tcc_activation_temp_override; + fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", + cpu, tcc_activation_temp); + return 0; + } + + /* Temperature Target MSR is Nehalem and newer only */ + if (!do_nehalem_platform_info) + goto guess; + + if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) + goto guess; + + target_c_local = (msr >> 16) & 0x7F; + + if (verbose) + fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", + cpu, msr, target_c_local); + + if (target_c_local < 85 || target_c_local > 120) + goto guess; + + tcc_activation_temp = target_c_local; + + return 0; + +guess: + tcc_activation_temp = TJMAX_DEFAULT; + fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", + cpu, tcc_activation_temp); + + return 0; +} void check_cpuid() { unsigned int eax, ebx, ecx, edx, max_level; @@ -1244,7 +1816,7 @@ void check_cpuid() genuine_intel = 1; if (verbose) - fprintf(stderr, "%.4s%.4s%.4s ", + fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", (char *)&ebx, (char *)&edx, (char *)&ecx); asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); @@ -1295,10 +1867,19 @@ void check_cpuid() asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); has_aperf = ecx & (1 << 0); - if (!has_aperf) { - fprintf(stderr, "No APERF MSR\n"); - exit(1); - } + do_dts = eax & (1 << 0); + do_ptm = eax & (1 << 6); + has_epb = ecx & (1 << 3); + + if (verbose) + fprintf(stderr, "CPUID(6): %s%s%s%s\n", + has_aperf ? "APERF" : "No APERF!", + do_dts ? ", DTS" : "", + do_ptm ? ", PTM": "", + has_epb ? ", EPB": ""); + + if (!has_aperf) + exit(-1); do_nehalem_platform_info = genuine_intel && has_invariant_tsc; do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ @@ -1307,12 +1888,15 @@ void check_cpuid() do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); + rapl_probe(family, model); + + return; } void usage() { - fprintf(stderr, "%s: [-v][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", + fprintf(stderr, "%s: [-v][-R][-T][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", progname); exit(1); } @@ -1548,6 +2132,17 @@ void turbostat_init() if (verbose) print_verbose_header(); + + if (verbose) + for_all_cpus(print_epb, ODD_COUNTERS); + + if (verbose) + for_all_cpus(print_rapl, ODD_COUNTERS); + + for_all_cpus(set_temperature_target, ODD_COUNTERS); + + if (verbose) + for_all_cpus(print_thermal, ODD_COUNTERS); } int fork_it(char **argv) @@ -1604,7 +2199,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:")) != -1) { + while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:RT:")) != -1) { switch (opt) { case 'p': show_core_only++; @@ -1636,6 +2231,12 @@ void cmdline(int argc, char **argv) case 'M': sscanf(optarg, "%x", &extra_msr_offset64); break; + case 'R': + rapl_verbose++; + break; + case 'T': + tcc_activation_temp_override = atoi(optarg); + break; default: usage(); } @@ -1646,8 +2247,8 @@ int main(int argc, char **argv) { cmdline(argc, argv); - if (verbose > 1) - fprintf(stderr, "turbostat v2.1 October 6, 2012" + if (verbose) + fprintf(stderr, "turbostat v3.0 November 23, 2012" " - Len Brown <lenb@kernel.org>\n"); turbostat_init(); diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile index f458237fdd79..971c9ffdcb50 100644 --- a/tools/power/x86/x86_energy_perf_policy/Makefile +++ b/tools/power/x86/x86_energy_perf_policy/Makefile @@ -1,8 +1,10 @@ +DESTDIR ?= + x86_energy_perf_policy : x86_energy_perf_policy.c clean : rm -f x86_energy_perf_policy install : - install x86_energy_perf_policy /usr/bin/ - install x86_energy_perf_policy.8 /usr/share/man/man8/ + install x86_energy_perf_policy ${DESTDIR}/usr/bin/ + install x86_energy_perf_policy.8 ${DESTDIR}/usr/share/man/man8/ diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 33c5c7ee148f..40b3e5482f8a 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -289,7 +289,7 @@ void for_every_cpu(void (func)(int)) "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu); if (retval != 1) - return; + break; func(cpu); } diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index c7ba7614061b..35fc584a4ffe 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -53,6 +53,9 @@ my %default = ( "STOP_AFTER_FAILURE" => 60, "STOP_TEST_AFTER" => 600, "MAX_MONITOR_WAIT" => 1800, + "GRUB_REBOOT" => "grub2-reboot", + "SYSLINUX" => "extlinux", + "SYSLINUX_PATH" => "/boot/extlinux", # required, and we will ask users if they don't have them but we keep the default # value something that is common. @@ -105,7 +108,12 @@ my $scp_to_target; my $scp_to_target_install; my $power_off; my $grub_menu; +my $grub_file; my $grub_number; +my $grub_reboot; +my $syslinux; +my $syslinux_path; +my $syslinux_label; my $target; my $make; my $pre_install; @@ -232,6 +240,11 @@ my %option_map = ( "ADD_CONFIG" => \$addconfig, "REBOOT_TYPE" => \$reboot_type, "GRUB_MENU" => \$grub_menu, + "GRUB_FILE" => \$grub_file, + "GRUB_REBOOT" => \$grub_reboot, + "SYSLINUX" => \$syslinux, + "SYSLINUX_PATH" => \$syslinux_path, + "SYSLINUX_LABEL" => \$syslinux_label, "PRE_INSTALL" => \$pre_install, "POST_INSTALL" => \$post_install, "NO_INSTALL" => \$no_install, @@ -368,7 +381,7 @@ EOF ; $config_help{"REBOOT_TYPE"} = << "EOF" Way to reboot the box to the test kernel. - Only valid options so far are "grub" and "script". + Only valid options so far are "grub", "grub2", "syslinux", and "script". If you specify grub, it will assume grub version 1 and will search in /boot/grub/menu.lst for the title \$GRUB_MENU @@ -378,11 +391,19 @@ $config_help{"REBOOT_TYPE"} = << "EOF" The entry in /boot/grub/menu.lst must be entered in manually. The test will not modify that file. + + If you specify grub2, then you also need to specify both \$GRUB_MENU + and \$GRUB_FILE. + + If you specify syslinux, then you may use SYSLINUX to define the syslinux + command (defaults to extlinux), and SYSLINUX_PATH to specify the path to + the syslinux install (defaults to /boot/extlinux). But you have to specify + SYSLINUX_LABEL to define the label to boot to for the test kernel. EOF ; $config_help{"GRUB_MENU"} = << "EOF" The grub title name for the test kernel to boot - (Only mandatory if REBOOT_TYPE = grub) + (Only mandatory if REBOOT_TYPE = grub or grub2) Note, ktest.pl will not update the grub menu.lst, you need to manually add an option for the test. ktest.pl will search @@ -393,6 +414,22 @@ $config_help{"GRUB_MENU"} = << "EOF" title Test Kernel kernel vmlinuz-test GRUB_MENU = Test Kernel + + For grub2, a search of \$GRUB_FILE is performed for the lines + that begin with "menuentry". It will not detect submenus. The + menu must be a non-nested menu. Add the quotes used in the menu + to guarantee your selection, as the first menuentry with the content + of \$GRUB_MENU that is found will be used. +EOF + ; +$config_help{"GRUB_FILE"} = << "EOF" + If grub2 is used, the full path for the grub.cfg file is placed + here. Use something like /boot/grub2/grub.cfg to search. +EOF + ; +$config_help{"SYSLINUX_LABEL"} = << "EOF" + If syslinux is used, the label that boots the target kernel must + be specified with SYSLINUX_LABEL. EOF ; $config_help{"REBOOT_SCRIPT"} = << "EOF" @@ -521,6 +558,15 @@ sub get_ktest_configs { if ($rtype eq "grub") { get_ktest_config("GRUB_MENU"); } + + if ($rtype eq "grub2") { + get_ktest_config("GRUB_MENU"); + get_ktest_config("GRUB_FILE"); + } + + if ($rtype eq "syslinux") { + get_ktest_config("SYSLINUX_LABEL"); + } } sub process_variables { @@ -1123,6 +1169,9 @@ sub wait_for_monitor; sub reboot { my ($time) = @_; + # Make sure everything has been written to disk + run_ssh("sync"); + if (defined($time)) { start_monitor; # flush out current monitor @@ -1452,8 +1501,44 @@ sub run_scp_mod { return run_scp($src, $dst, $cp_scp); } +sub get_grub2_index { + + return if (defined($grub_number)); + + doprint "Find grub2 menu ... "; + $grub_number = -1; + + my $ssh_grub = $ssh_exec; + $ssh_grub =~ s,\$SSH_COMMAND,cat $grub_file,g; + + open(IN, "$ssh_grub |") + or die "unable to get $grub_file"; + + my $found = 0; + + while (<IN>) { + if (/^menuentry.*$grub_menu/) { + $grub_number++; + $found = 1; + last; + } elsif (/^menuentry\s/) { + $grub_number++; + } + } + close(IN); + + die "Could not find '$grub_menu' in $grub_file on $machine" + if (!$found); + doprint "$grub_number\n"; +} + sub get_grub_index { + if ($reboot_type eq "grub2") { + get_grub2_index; + return; + } + if ($reboot_type ne "grub") { return; } @@ -1524,6 +1609,10 @@ sub reboot_to { if ($reboot_type eq "grub") { run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'"; + } elsif ($reboot_type eq "grub2") { + run_ssh "$grub_reboot $grub_number"; + } elsif ($reboot_type eq "syslinux") { + run_ssh "$syslinux --once \\\"$syslinux_label\\\" $syslinux_path"; } elsif (defined $reboot_script) { run_command "$reboot_script"; } @@ -1718,6 +1807,14 @@ sub do_post_install { dodie "Failed to run post install"; } +# Sometimes the reboot fails, and will hang. We try to ssh to the box +# and if we fail, we force another reboot, that should powercycle it. +sub test_booted { + if (!run_ssh "echo testing connection") { + reboot $sleep_time; + } +} + sub install { return if ($no_install); @@ -1730,6 +1827,8 @@ sub install { my $cp_target = eval_kernel_version $target_image; + test_booted; + run_scp_install "$outputdir/$build_target", "$cp_target" or dodie "failed to copy image"; @@ -1877,10 +1976,14 @@ sub make_oldconfig { if (!run_command "$make olddefconfig") { # Perhaps olddefconfig doesn't exist in this version of the kernel - # try a yes '' | oldconfig - doprint "olddefconfig failed, trying yes '' | make oldconfig\n"; - run_command "yes '' | $make oldconfig" or - dodie "failed make config oldconfig"; + # try oldnoconfig + doprint "olddefconfig failed, trying make oldnoconfig\n"; + if (!run_command "$make oldnoconfig") { + doprint "oldnoconfig failed, trying yes '' | make oldconfig\n"; + # try a yes '' | oldconfig + run_command "yes '' | $make oldconfig" or + dodie "failed make config oldconfig"; + } } } @@ -3700,6 +3803,11 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { $target = "$ssh_user\@$machine"; if ($reboot_type eq "grub") { dodie "GRUB_MENU not defined" if (!defined($grub_menu)); + } elsif ($reboot_type eq "grub2") { + dodie "GRUB_MENU not defined" if (!defined($grub_menu)); + dodie "GRUB_FILE not defined" if (!defined($grub_file)); + } elsif ($reboot_type eq "syslinux") { + dodie "SYSLINUX_LABEL not defined" if (!defined($syslinux_label)); } } diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf index de28a0a3b8fc..4012e9330344 100644 --- a/tools/testing/ktest/sample.conf +++ b/tools/testing/ktest/sample.conf @@ -332,8 +332,18 @@ # from other linux builds on the system. #LOCALVERSION = -test +# For REBOOT_TYPE = grub2, you must specify where the grub.cfg +# file is. This is the file that is searched to find the menu +# option to boot to with GRUB_REBOOT +#GRUB_FILE = /boot/grub2/grub.cfg + +# The tool for REBOOT_TYPE = grub2 to set the next reboot kernel +# to boot into (one shot mode). +# (default grub2_reboot) +#GRUB_REBOOT = grub2_reboot + # The grub title name for the test kernel to boot -# (Only mandatory if REBOOT_TYPE = grub) +# (Only mandatory if REBOOT_TYPE = grub or grub2) # # Note, ktest.pl will not update the grub menu.lst, you need to # manually add an option for the test. ktest.pl will search @@ -343,8 +353,33 @@ # For example, if in the /boot/grub/menu.lst the test kernel title has: # title Test Kernel # kernel vmlinuz-test +# +# For grub2, a search of top level "menuentry"s are done. No +# submenu is searched. The menu is found by searching for the +# contents of GRUB_MENU in the line that starts with "menuentry". +# You may want to include the quotes around the option. For example: +# for: menuentry 'Test Kernel' +# do a: GRUB_MENU = 'Test Kernel' +# For customizing, add your entry in /etc/grub.d/40_custom. +# #GRUB_MENU = Test Kernel +# For REBOOT_TYPE = syslinux, the name of the syslinux executable +# (on the target) to use to set up the next reboot to boot the +# test kernel. +# (default extlinux) +#SYSLINUX = syslinux + +# For REBOOT_TYPE = syslinux, the path that is passed to to the +# syslinux command where syslinux is installed. +# (default /boot/extlinux) +#SYSLINUX_PATH = /boot/syslinux + +# For REBOOT_TYPE = syslinux, the syslinux label that references the +# test kernel in the syslinux config file. +# (default undefined) +#SYSLINUX_LABEL = "test-kernel" + # A script to reboot the target into the test kernel # This and SWITCH_TO_TEST are about the same, except # SWITCH_TO_TEST is run even for REBOOT_TYPE = grub. @@ -497,7 +532,7 @@ #POST_BUILD_DIE = 1 # Way to reboot the box to the test kernel. -# Only valid options so far are "grub" and "script" +# Only valid options so far are "grub", "grub2", "syslinux" and "script" # (default grub) # If you specify grub, it will assume grub version 1 # and will search in /boot/grub/menu.lst for the title $GRUB_MENU @@ -505,6 +540,13 @@ # your setup, then specify "script" and have a command or script # specified in REBOOT_SCRIPT to boot to the target. # +# For REBOOT_TYPE = grub2, you must define both GRUB_MENU and +# GRUB_FILE. +# +# For REBOOT_TYPE = syslinux, you must define SYSLINUX_LABEL, and +# perhaps modify SYSLINUX (default extlinux) and SYSLINUX_PATH +# (default /boot/extlinux) +# # The entry in /boot/grub/menu.lst must be entered in manually. # The test will not modify that file. #REBOOT_TYPE = grub diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile index 931278035f5c..e18b42b254af 100644 --- a/tools/testing/selftests/breakpoints/Makefile +++ b/tools/testing/selftests/breakpoints/Makefile @@ -17,7 +17,7 @@ else endif run_tests: - ./breakpoint_test + @./breakpoint_test || echo "breakpoints selftests: [FAIL]" clean: rm -fr breakpoint_test diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile index 7c9c20ff578a..12657a5e4bf9 100644 --- a/tools/testing/selftests/cpu-hotplug/Makefile +++ b/tools/testing/selftests/cpu-hotplug/Makefile @@ -1,6 +1,6 @@ all: run_tests: - ./on-off-test.sh + @./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]" clean: diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile new file mode 100644 index 000000000000..5386fd7c43ae --- /dev/null +++ b/tools/testing/selftests/ipc/Makefile @@ -0,0 +1,25 @@ +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) +ifeq ($(ARCH),i386) + ARCH := X86 + CFLAGS := -DCONFIG_X86_32 -D__i386__ +endif +ifeq ($(ARCH),x86_64) + ARCH := X86 + CFLAGS := -DCONFIG_X86_64 -D__x86_64__ +endif + +CFLAGS += -I../../../../usr/include/ + +all: +ifeq ($(ARCH),X86) + gcc $(CFLAGS) msgque.c -o msgque_test +else + echo "Not an x86 target, can't build msgque selftest" +endif + +run_tests: all + ./msgque_test + +clean: + rm -fr ./msgque_test diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c new file mode 100644 index 000000000000..d66418237d21 --- /dev/null +++ b/tools/testing/selftests/ipc/msgque.c @@ -0,0 +1,246 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <linux/msg.h> +#include <fcntl.h> + +#define MAX_MSG_SIZE 32 + +struct msg1 { + int msize; + long mtype; + char mtext[MAX_MSG_SIZE]; +}; + +#define TEST_STRING "Test sysv5 msg" +#define MSG_TYPE 1 + +#define ANOTHER_TEST_STRING "Yet another test sysv5 msg" +#define ANOTHER_MSG_TYPE 26538 + +struct msgque_data { + key_t key; + int msq_id; + int qbytes; + int qnum; + int mode; + struct msg1 *messages; +}; + +int restore_queue(struct msgque_data *msgque) +{ + int fd, ret, id, i; + char buf[32]; + + fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY); + if (fd == -1) { + printf("Failed to open /proc/sys/kernel/msg_next_id\n"); + return -errno; + } + sprintf(buf, "%d", msgque->msq_id); + + ret = write(fd, buf, strlen(buf)); + if (ret != strlen(buf)) { + printf("Failed to write to /proc/sys/kernel/msg_next_id\n"); + return -errno; + } + + id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL); + if (id == -1) { + printf("Failed to create queue\n"); + return -errno; + } + + if (id != msgque->msq_id) { + printf("Restored queue has wrong id (%d instead of %d)\n", + id, msgque->msq_id); + ret = -EFAULT; + goto destroy; + } + + for (i = 0; i < msgque->qnum; i++) { + if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype, + msgque->messages[i].msize, IPC_NOWAIT) != 0) { + printf("msgsnd failed (%m)\n"); + ret = -errno; + goto destroy; + }; + } + return 0; + +destroy: + if (msgctl(id, IPC_RMID, 0)) + printf("Failed to destroy queue: %d\n", -errno); + return ret; +} + +int check_and_destroy_queue(struct msgque_data *msgque) +{ + struct msg1 message; + int cnt = 0, ret; + + while (1) { + ret = msgrcv(msgque->msq_id, &message.mtype, MAX_MSG_SIZE, + 0, IPC_NOWAIT); + if (ret < 0) { + if (errno == ENOMSG) + break; + printf("Failed to read IPC message: %m\n"); + ret = -errno; + goto err; + } + if (ret != msgque->messages[cnt].msize) { + printf("Wrong message size: %d (expected %d)\n", ret, + msgque->messages[cnt].msize); + ret = -EINVAL; + goto err; + } + if (message.mtype != msgque->messages[cnt].mtype) { + printf("Wrong message type\n"); + ret = -EINVAL; + goto err; + } + if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) { + printf("Wrong message content\n"); + ret = -EINVAL; + goto err; + } + cnt++; + } + + if (cnt != msgque->qnum) { + printf("Wrong message number\n"); + ret = -EINVAL; + goto err; + } + + ret = 0; +err: + if (msgctl(msgque->msq_id, IPC_RMID, 0)) { + printf("Failed to destroy queue: %d\n", -errno); + return -errno; + } + return ret; +} + +int dump_queue(struct msgque_data *msgque) +{ + struct msqid64_ds ds; + int kern_id; + int i, ret; + + for (kern_id = 0; kern_id < 256; kern_id++) { + ret = msgctl(kern_id, MSG_STAT, &ds); + if (ret < 0) { + if (errno == -EINVAL) + continue; + printf("Failed to get stats for IPC queue with id %d\n", + kern_id); + return -errno; + } + + if (ret == msgque->msq_id) + break; + } + + msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum); + if (msgque->messages == NULL) { + printf("Failed to get stats for IPC queue\n"); + return -ENOMEM; + } + + msgque->qnum = ds.msg_qnum; + msgque->mode = ds.msg_perm.mode; + msgque->qbytes = ds.msg_qbytes; + + for (i = 0; i < msgque->qnum; i++) { + ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype, + MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY); + if (ret < 0) { + printf("Failed to copy IPC message: %m (%d)\n", errno); + return -errno; + } + msgque->messages[i].msize = ret; + } + return 0; +} + +int fill_msgque(struct msgque_data *msgque) +{ + struct msg1 msgbuf; + + msgbuf.mtype = MSG_TYPE; + memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING)); + if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING), + IPC_NOWAIT) != 0) { + printf("First message send failed (%m)\n"); + return -errno; + }; + + msgbuf.mtype = ANOTHER_MSG_TYPE; + memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING)); + if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING), + IPC_NOWAIT) != 0) { + printf("Second message send failed (%m)\n"); + return -errno; + }; + return 0; +} + +int main(int argc, char **argv) +{ + int msg, pid, err; + struct msgque_data msgque; + + msgque.key = ftok(argv[0], 822155650); + if (msgque.key == -1) { + printf("Can't make key\n"); + return -errno; + } + + msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666); + if (msgque.msq_id == -1) { + printf("Can't create queue\n"); + goto err_out; + } + + err = fill_msgque(&msgque); + if (err) { + printf("Failed to fill queue\n"); + goto err_destroy; + } + + err = dump_queue(&msgque); + if (err) { + printf("Failed to dump queue\n"); + goto err_destroy; + } + + err = check_and_destroy_queue(&msgque); + if (err) { + printf("Failed to check and destroy queue\n"); + goto err_out; + } + + err = restore_queue(&msgque); + if (err) { + printf("Failed to restore queue\n"); + goto err_destroy; + } + + err = check_and_destroy_queue(&msgque); + if (err) { + printf("Failed to test queue\n"); + goto err_out; + } + return 0; + +err_destroy: + if (msgctl(msgque.msq_id, IPC_RMID, 0)) { + printf("Failed to destroy queue: %d\n", -errno); + return -errno; + } +err_out: + return err; +} diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile index dc79b86ea65c..56eb5523dbb8 100644 --- a/tools/testing/selftests/kcmp/Makefile +++ b/tools/testing/selftests/kcmp/Makefile @@ -16,13 +16,13 @@ CFLAGS += -I../../../../arch/x86/include/ all: ifeq ($(ARCH),X86) - gcc $(CFLAGS) kcmp_test.c -o run_test + gcc $(CFLAGS) kcmp_test.c -o kcmp_test else echo "Not an x86 target, can't build kcmp selftest" endif -run-tests: all - ./kcmp_test +run_tests: all + @./kcmp_test || echo "kcmp_test: [FAIL]" clean: rm -fr ./run_test diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c index 358cc6bfa35d..fa4f1b37e045 100644 --- a/tools/testing/selftests/kcmp/kcmp_test.c +++ b/tools/testing/selftests/kcmp/kcmp_test.c @@ -72,7 +72,8 @@ int main(int argc, char **argv) /* This one should return same fd */ ret = sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd1); if (ret) { - printf("FAIL: 0 expected but %d returned\n", ret); + printf("FAIL: 0 expected but %d returned (%s)\n", + ret, strerror(errno)); ret = -1; } else printf("PASS: 0 returned as expected\n"); @@ -80,7 +81,8 @@ int main(int argc, char **argv) /* Compare with self */ ret = sys_kcmp(pid1, pid1, KCMP_VM, 0, 0); if (ret) { - printf("FAIL: 0 expected but %li returned\n", ret); + printf("FAIL: 0 expected but %li returned (%s)\n", + ret, strerror(errno)); ret = -1; } else printf("PASS: 0 returned as expected\n"); diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile index 7c9c20ff578a..0f49c3f5f58d 100644 --- a/tools/testing/selftests/memory-hotplug/Makefile +++ b/tools/testing/selftests/memory-hotplug/Makefile @@ -1,6 +1,6 @@ all: run_tests: - ./on-off-test.sh + @./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]" clean: diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile index 54c0aad2b47c..218a122c7951 100644 --- a/tools/testing/selftests/mqueue/Makefile +++ b/tools/testing/selftests/mqueue/Makefile @@ -3,8 +3,8 @@ all: gcc -O2 -lrt -lpthread -lpopt -o mq_perf_tests mq_perf_tests.c run_tests: - ./mq_open_tests /test1 - ./mq_perf_tests + @./mq_open_tests /test1 || echo "mq_open_tests: [FAIL]" + @./mq_perf_tests || echo "mq_perf_tests: [FAIL]" clean: rm -f mq_open_tests mq_perf_tests diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index b336b24aa6c0..436d2e81868b 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,14 +1,14 @@ # Makefile for vm selftests CC = $(CROSS_COMPILE)gcc -CFLAGS = -Wall -Wextra +CFLAGS = -Wall -all: hugepage-mmap hugepage-shm map_hugetlb +all: hugepage-mmap hugepage-shm map_hugetlb thuge-gen %: %.c $(CC) $(CFLAGS) -o $@ $^ run_tests: all - /bin/sh ./run_vmtests + @/bin/sh ./run_vmtests || echo "vmtests: [FAIL]" clean: $(RM) hugepage-mmap hugepage-shm map_hugetlb diff --git a/tools/testing/selftests/vm/thuge-gen.c b/tools/testing/selftests/vm/thuge-gen.c new file mode 100644 index 000000000000..c87957295f74 --- /dev/null +++ b/tools/testing/selftests/vm/thuge-gen.c @@ -0,0 +1,254 @@ +/* Test selecting other page sizes for mmap/shmget. + + Before running this huge pages for each huge page size must have been + reserved. + For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used. + Also shmmax must be increased. + And you need to run as root to work around some weird permissions in shm. + And nothing using huge pages should run in parallel. + When the program aborts you may need to clean up the shm segments with + ipcrm -m by hand, like this + sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m + (warning this will remove all if someone else uses them) */ + +#define _GNU_SOURCE 1 +#include <sys/mman.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <sys/stat.h> +#include <glob.h> +#include <assert.h> +#include <unistd.h> +#include <stdarg.h> +#include <string.h> + +#define err(x) perror(x), exit(1) + +#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) +#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) +#define MAP_HUGE_SHIFT 26 +#define MAP_HUGE_MASK 0x3f +#define MAP_HUGETLB 0x40000 + +#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ +#define SHM_HUGE_SHIFT 26 +#define SHM_HUGE_MASK 0x3f +#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT) +#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) + +#define NUM_PAGESIZES 5 + +#define NUM_PAGES 4 + +#define Dprintf(fmt...) // printf(fmt) + +unsigned long page_sizes[NUM_PAGESIZES]; +int num_page_sizes; + +int ilog2(unsigned long v) +{ + int l = 0; + while ((1UL << l) < v) + l++; + return l; +} + +void find_pagesizes(void) +{ + glob_t g; + int i; + glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g); + assert(g.gl_pathc <= NUM_PAGESIZES); + for (i = 0; i < g.gl_pathc; i++) { + sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB", + &page_sizes[i]); + page_sizes[i] <<= 10; + printf("Found %luMB\n", page_sizes[i] >> 20); + } + num_page_sizes = g.gl_pathc; + globfree(&g); +} + +unsigned long default_huge_page_size(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + free(line); + return hps; +} + +void show(unsigned long ps) +{ + char buf[100]; + if (ps == getpagesize()) + return; + printf("%luMB: ", ps >> 20); + fflush(stdout); + snprintf(buf, sizeof buf, + "cat /sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", + ps >> 10); + system(buf); +} + +unsigned long read_sysfs(int warn, char *fmt, ...) +{ + char *line = NULL; + size_t linelen = 0; + char buf[100]; + FILE *f; + va_list ap; + unsigned long val = 0; + + va_start(ap, fmt); + vsnprintf(buf, sizeof buf, fmt, ap); + va_end(ap); + + f = fopen(buf, "r"); + if (!f) { + if (warn) + printf("missing %s\n", buf); + return 0; + } + if (getline(&line, &linelen, f) > 0) { + sscanf(line, "%lu", &val); + } + fclose(f); + free(line); + return val; +} + +unsigned long read_free(unsigned long ps) +{ + return read_sysfs(ps != getpagesize(), + "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", + ps >> 10); +} + +void test_mmap(unsigned long size, unsigned flags) +{ + char *map; + unsigned long before, after; + int err; + + before = read_free(size); + map = mmap(NULL, size*NUM_PAGES, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, 0, 0); + + if (map == (char *)-1) err("mmap"); + memset(map, 0xff, size*NUM_PAGES); + after = read_free(size); + Dprintf("before %lu after %lu diff %ld size %lu\n", + before, after, before - after, size); + assert(size == getpagesize() || (before - after) == NUM_PAGES); + show(size); + err = munmap(map, size); + assert(!err); +} + +void test_shmget(unsigned long size, unsigned flags) +{ + int id; + unsigned long before, after; + int err; + + before = read_free(size); + id = shmget(IPC_PRIVATE, size * NUM_PAGES, IPC_CREAT|0600|flags); + if (id < 0) err("shmget"); + + struct shm_info i; + if (shmctl(id, SHM_INFO, (void *)&i) < 0) err("shmctl"); + Dprintf("alloc %lu res %lu\n", i.shm_tot, i.shm_rss); + + + Dprintf("id %d\n", id); + char *map = shmat(id, NULL, 0600); + if (map == (char*)-1) err("shmat"); + + shmctl(id, IPC_RMID, NULL); + + memset(map, 0xff, size*NUM_PAGES); + after = read_free(size); + + Dprintf("before %lu after %lu diff %ld size %lu\n", + before, after, before - after, size); + assert(size == getpagesize() || (before - after) == NUM_PAGES); + show(size); + err = shmdt(map); + assert(!err); +} + +void sanity_checks(void) +{ + int i; + unsigned long largest = getpagesize(); + + for (i = 0; i < num_page_sizes; i++) { + if (page_sizes[i] > largest) + largest = page_sizes[i]; + + if (read_free(page_sizes[i]) < NUM_PAGES) { + printf("Not enough huge pages for page size %lu MB, need %u\n", + page_sizes[i] >> 20, + NUM_PAGES); + exit(0); + } + } + + if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) { + printf("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES); + exit(0); + } + +#if defined(__x86_64__) + if (largest != 1U<<30) { + printf("No GB pages available on x86-64\n" + "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES); + exit(0); + } +#endif +} + +int main(void) +{ + int i; + unsigned default_hps = default_huge_page_size(); + + find_pagesizes(); + + sanity_checks(); + + for (i = 0; i < num_page_sizes; i++) { + unsigned long ps = page_sizes[i]; + int arg = ilog2(ps) << MAP_HUGE_SHIFT; + printf("Testing %luMB mmap with shift %x\n", ps >> 20, arg); + test_mmap(ps, MAP_HUGETLB | arg); + } + printf("Testing default huge mmap\n"); + test_mmap(default_hps, SHM_HUGETLB); + + puts("Testing non-huge shmget"); + test_shmget(getpagesize(), 0); + + for (i = 0; i < num_page_sizes; i++) { + unsigned long ps = page_sizes[i]; + int arg = ilog2(ps) << SHM_HUGE_SHIFT; + printf("Testing %luMB shmget with shift %x\n", ps >> 20, arg); + test_shmget(ps, SHM_HUGETLB | arg); + } + puts("default huge shmget"); + test_shmget(default_hps, SHM_HUGETLB); + + return 0; +} diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index e626fa553c5a..fcc9aa25fd08 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -164,7 +164,7 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, r = virtqueue_add_buf(vq->vq, &sl, 1, 0, dev->buf + started, GFP_ATOMIC); - if (likely(r >= 0)) { + if (likely(r == 0)) { ++started; virtqueue_kick(vq->vq); } @@ -177,7 +177,7 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, r = 0; } - } while (r >= 0); + } while (r == 0); if (completed == completed_before) ++spurious; assert(completed <= bufs); @@ -232,7 +232,7 @@ const struct option longopts[] = { } }; -static void help() +static void help(void) { fprintf(stderr, "Usage: virtio_test [--help]" " [--no-indirect]" |