diff options
Diffstat (limited to 'tools')
31 files changed, 1677 insertions, 69 deletions
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 146fd6147e84..d9834b362943 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -701,14 +701,18 @@ int main(void) pfd.fd = fd; while (1) { + struct sockaddr *addr_p = (struct sockaddr *) &addr; + socklen_t addr_l = sizeof(addr); pfd.events = POLLIN; pfd.revents = 0; poll(&pfd, 1, -1); - len = recv(fd, kvp_recv_buffer, sizeof(kvp_recv_buffer), 0); + len = recvfrom(fd, kvp_recv_buffer, sizeof(kvp_recv_buffer), 0, + addr_p, &addr_l); - if (len < 0) { - syslog(LOG_ERR, "recv failed; error:%d", len); + if (len < 0 || addr.nl_pid) { + syslog(LOG_ERR, "recvfrom failed; pid:%u error:%d %s", + addr.nl_pid, errno, strerror(errno)); close(fd); return -1; } diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 5476bc0a1eac..b4b572e8c100 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,4 +1,6 @@ tools/perf +tools/scripts +tools/lib/traceevent include/linux/const.h include/linux/perf_event.h include/linux/rbtree.h diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8c767c6bca91..25249f76329d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -152,7 +152,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, if (symbol_conf.use_callchain) { err = callchain_append(he->callchain, - &evsel->hists.callchain_cursor, + &callchain_cursor, sample->period); if (err) return err; @@ -162,7 +162,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, * so we don't allocated the extra space needed because the stdio * code will not use it. */ - if (al->sym != NULL && use_browser > 0) { + if (he->ms.sym != NULL && use_browser > 0) { struct annotation *notes = symbol__annotation(he->ms.sym); assert(evsel != NULL); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 62ae30d34fa6..07b5c7703dd1 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1129,7 +1129,7 @@ static int add_default_attributes(void) return 0; if (!evsel_list->nr_entries) { - if (perf_evlist__add_attrs_array(evsel_list, default_attrs) < 0) + if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) return -1; } @@ -1139,21 +1139,21 @@ static int add_default_attributes(void) return 0; /* Append detailed run extra attributes: */ - if (perf_evlist__add_attrs_array(evsel_list, detailed_attrs) < 0) + if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) return -1; if (detailed_run < 2) return 0; /* Append very detailed run extra attributes: */ - if (perf_evlist__add_attrs_array(evsel_list, very_detailed_attrs) < 0) + if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) return -1; if (detailed_run < 3) return 0; /* Append very, very detailed run extra attributes: */ - return perf_evlist__add_attrs_array(evsel_list, very_very_detailed_attrs); + return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); } int cmd_stat(int argc, const char **argv, const char *prefix __used) @@ -1179,6 +1179,12 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) fprintf(stderr, "cannot use both --output and --log-fd\n"); usage_with_options(stat_usage, options); } + + if (output_fd < 0) { + fprintf(stderr, "argument to --log-fd must be a > 0\n"); + usage_with_options(stat_usage, options); + } + if (!output) { struct timespec tm; mode = append_file ? "a" : "w"; @@ -1190,7 +1196,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) } clock_gettime(CLOCK_REALTIME, &tm); fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); - } else if (output_fd != 2) { + } else if (output_fd > 0) { mode = append_file ? "a" : "w"; output = fdopen(output_fd, mode); if (!output) { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 871b540293e1..6bb0277b7dfe 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -787,7 +787,7 @@ static void perf_event__process_sample(struct perf_tool *tool, } if (symbol_conf.use_callchain) { - err = callchain_append(he->callchain, &evsel->hists.callchain_cursor, + err = callchain_append(he->callchain, &callchain_cursor, sample->period); if (err) return; diff --git a/tools/perf/design.txt b/tools/perf/design.txt index bd0bb1b1279b..67e5d0cace85 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -409,14 +409,15 @@ Counters can be enabled and disabled in two ways: via ioctl and via prctl. When a counter is disabled, it doesn't count or generate events but does continue to exist and maintain its count value. -An individual counter or counter group can be enabled with +An individual counter can be enabled with - ioctl(fd, PERF_EVENT_IOC_ENABLE); + ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); or disabled with - ioctl(fd, PERF_EVENT_IOC_DISABLE); + ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); +For a counter group, pass PERF_IOC_FLAG_GROUP as the third argument. Enabling or disabling the leader of a group enables or disables the whole group; that is, while the group leader is disabled, none of the counters in the group will count. Enabling or disabling a member of a diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 4deea6aaf927..34b1c46eaf42 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -668,7 +668,7 @@ static int annotate_browser__run(struct annotate_browser *browser, int evidx, "q/ESC/CTRL+C Exit\n\n" "-> Go to target\n" "<- Exit\n" - "h Cycle thru hottest instructions\n" + "H Cycle thru hottest instructions\n" "j Toggle showing jump to target arrows\n" "J Toggle showing number of jump sources on targets\n" "n Search next string\n" diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index ad73300f7bac..95264f304179 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -12,7 +12,7 @@ LF=' # First check if there is a .git to get the version from git describe # otherwise try to get the version from the kernel makefile if test -d ../../.git -o -f ../../.git && - VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && + VN=$(git describe --match 'v[0-9].[0-9]*' --abbrev=4 HEAD 2>/dev/null) && case "$VN" in *$LF*) (exit 1) ;; v[0-9]*) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9f7106a8d9a4..3a6bff47614f 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -18,6 +18,8 @@ #include "util.h" #include "callchain.h" +__thread struct callchain_cursor callchain_cursor; + bool ip_callchain__valid(struct ip_callchain *chain, const union perf_event *event) { diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 7f9c0f1ae3a9..3bdb407f9cd9 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -76,6 +76,8 @@ struct callchain_cursor { struct callchain_cursor_node *curr; }; +extern __thread struct callchain_cursor callchain_cursor; + static inline void callchain_init(struct callchain_root *root) { INIT_LIST_HEAD(&root->node.siblings); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4ac5f5ae4ce9..7400fb3fc50c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -159,6 +159,17 @@ out_delete_partial_list: return -1; } +int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, + struct perf_event_attr *attrs, size_t nr_attrs) +{ + size_t i; + + for (i = 0; i < nr_attrs; i++) + event_attr_init(attrs + i); + + return perf_evlist__add_attrs(evlist, attrs, nr_attrs); +} + static int trace_event__id(const char *evname) { char *filename, *colon; @@ -263,7 +274,8 @@ void perf_evlist__disable(struct perf_evlist *evlist) for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { list_for_each_entry(pos, &evlist->entries, node) { for (thread = 0; thread < evlist->threads->nr; thread++) - ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_DISABLE); + ioctl(FD(pos, cpu, thread), + PERF_EVENT_IOC_DISABLE, 0); } } } @@ -276,7 +288,8 @@ void perf_evlist__enable(struct perf_evlist *evlist) for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { list_for_each_entry(pos, &evlist->entries, node) { for (thread = 0; thread < evlist->threads->nr; thread++) - ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_ENABLE); + ioctl(FD(pos, cpu, thread), + PERF_EVENT_IOC_ENABLE, 0); } } } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 58abb63ac13a..989bee9624c2 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -54,6 +54,8 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); int perf_evlist__add_default(struct perf_evlist *evlist); int perf_evlist__add_attrs(struct perf_evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); +int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, + struct perf_event_attr *attrs, size_t nr_attrs); int perf_evlist__add_tracepoints(struct perf_evlist *evlist, const char *tracepoints[], size_t nr_tracepoints); int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, @@ -62,6 +64,8 @@ int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, #define perf_evlist__add_attrs_array(evlist, array) \ perf_evlist__add_attrs(evlist, array, ARRAY_SIZE(array)) +#define perf_evlist__add_default_attrs(evlist, array) \ + __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) #define perf_evlist__add_tracepoints_array(evlist, array) \ perf_evlist__add_tracepoints(evlist, array, ARRAY_SIZE(array)) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 91d19138f3ec..9f6cebd798ee 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -494,16 +494,24 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel, } static int perf_event__parse_id_sample(const union perf_event *event, u64 type, - struct perf_sample *sample) + struct perf_sample *sample, + bool swapped) { const u64 *array = event->sample.array; + union u64_swap u; array += ((event->header.size - sizeof(event->header)) / sizeof(u64)) - 1; if (type & PERF_SAMPLE_CPU) { - u32 *p = (u32 *)array; - sample->cpu = *p; + u.val64 = *array; + if (swapped) { + /* undo swap of u64, then swap on individual u32s */ + u.val64 = bswap_64(u.val64); + u.val32[0] = bswap_32(u.val32[0]); + } + + sample->cpu = u.val32[0]; array--; } @@ -523,9 +531,16 @@ static int perf_event__parse_id_sample(const union perf_event *event, u64 type, } if (type & PERF_SAMPLE_TID) { - u32 *p = (u32 *)array; - sample->pid = p[0]; - sample->tid = p[1]; + u.val64 = *array; + if (swapped) { + /* undo swap of u64, then swap on individual u32s */ + u.val64 = bswap_64(u.val64); + u.val32[0] = bswap_32(u.val32[0]); + u.val32[1] = bswap_32(u.val32[1]); + } + + sample->pid = u.val32[0]; + sample->tid = u.val32[1]; } return 0; @@ -562,7 +577,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, if (event->header.type != PERF_RECORD_SAMPLE) { if (!sample_id_all) return 0; - return perf_event__parse_id_sample(event, type, data); + return perf_event__parse_id_sample(event, type, data, swapped); } array = event->sample.array; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 2dd5edf161b7..e909d43cf542 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1942,7 +1942,6 @@ int perf_file_header__read(struct perf_file_header *header, else return -1; } else if (ph->needs_swap) { - unsigned int i; /* * feature bitmap is declared as an array of unsigned longs -- * not good since its size can differ between the host that @@ -1958,14 +1957,17 @@ int perf_file_header__read(struct perf_file_header *header, * file), punt and fallback to the original behavior -- * clearing all feature bits and setting buildid. */ - for (i = 0; i < BITS_TO_LONGS(HEADER_FEAT_BITS); ++i) - header->adds_features[i] = bswap_64(header->adds_features[i]); + mem_bswap_64(&header->adds_features, + BITS_TO_U64(HEADER_FEAT_BITS)); if (!test_bit(HEADER_HOSTNAME, header->adds_features)) { - for (i = 0; i < BITS_TO_LONGS(HEADER_FEAT_BITS); ++i) { - header->adds_features[i] = bswap_64(header->adds_features[i]); - header->adds_features[i] = bswap_32(header->adds_features[i]); - } + /* unswap as u64 */ + mem_bswap_64(&header->adds_features, + BITS_TO_U64(HEADER_FEAT_BITS)); + + /* unswap as u32 */ + mem_bswap_32(&header->adds_features, + BITS_TO_U32(HEADER_FEAT_BITS)); } if (!test_bit(HEADER_HOSTNAME, header->adds_features)) { @@ -2091,6 +2093,35 @@ static int read_attr(int fd, struct perf_header *ph, return ret <= 0 ? -1 : 0; } +static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel) +{ + struct event_format *event = trace_find_event(evsel->attr.config); + char bf[128]; + + if (event == NULL) + return -1; + + snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name); + evsel->name = strdup(bf); + if (event->name == NULL) + return -1; + + return 0; +} + +static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist) +{ + struct perf_evsel *pos; + + list_for_each_entry(pos, &evlist->entries, node) { + if (pos->attr.type == PERF_TYPE_TRACEPOINT && + perf_evsel__set_tracepoint_name(pos)) + return -1; + } + + return 0; +} + int perf_session__read_header(struct perf_session *session, int fd) { struct perf_header *header = &session->header; @@ -2172,6 +2203,9 @@ int perf_session__read_header(struct perf_session *session, int fd) lseek(fd, header->data_offset, SEEK_SET); + if (perf_evlist__set_tracepoint_names(session->evlist)) + goto out_delete_evlist; + header->frozen = 1; return 0; out_errno: diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 1293b5ebea4d..514e2a4b367d 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -378,7 +378,7 @@ void hist_entry__free(struct hist_entry *he) * collapse the histogram */ -static bool hists__collapse_insert_entry(struct hists *hists, +static bool hists__collapse_insert_entry(struct hists *hists __used, struct rb_root *root, struct hist_entry *he) { @@ -397,8 +397,9 @@ static bool hists__collapse_insert_entry(struct hists *hists, iter->period += he->period; iter->nr_events += he->nr_events; if (symbol_conf.use_callchain) { - callchain_cursor_reset(&hists->callchain_cursor); - callchain_merge(&hists->callchain_cursor, iter->callchain, + callchain_cursor_reset(&callchain_cursor); + callchain_merge(&callchain_cursor, + iter->callchain, he->callchain); } hist_entry__free(he); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index cfc64e293f90..34bb556d6219 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -67,8 +67,6 @@ struct hists { struct events_stats stats; u64 event_stream; u16 col_len[HISTC_NR_COLS]; - /* Best would be to reuse the session callchain cursor */ - struct callchain_cursor callchain_cursor; }; struct hist_entry *__hists__add_entry(struct hists *self, diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index f1584833bd22..587a230d2075 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h @@ -8,6 +8,8 @@ #define BITS_PER_LONG __WORDSIZE #define BITS_PER_BYTE 8 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) +#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) #define for_each_set_bit(bit, addr, size) \ for ((bit) = find_first_bit((addr), (size)); \ diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c index 1915de20dcac..3322b8446e89 100644 --- a/tools/perf/util/pager.c +++ b/tools/perf/util/pager.c @@ -57,6 +57,10 @@ void setup_pager(void) } if (!pager) pager = getenv("PAGER"); + if (!pager) { + if (!access("/usr/bin/pager", X_OK)) + pager = "/usr/bin/pager"; + } if (!pager) pager = "less"; else if (!*pager || !strcmp(pager, "cat")) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 59dccc98b554..0dda25d82d06 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2164,16 +2164,12 @@ int del_perf_probe_events(struct strlist *dellist) error: if (kfd >= 0) { - if (namelist) - strlist__delete(namelist); - + strlist__delete(namelist); close(kfd); } if (ufd >= 0) { - if (unamelist) - strlist__delete(unamelist); - + strlist__delete(unamelist); close(ufd); } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 93d355d27109..c3e399bcf18d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -288,7 +288,8 @@ struct branch_info *machine__resolve_bstack(struct machine *self, return bi; } -int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, +int machine__resolve_callchain(struct machine *self, + struct perf_evsel *evsel __used, struct thread *thread, struct ip_callchain *chain, struct symbol **parent) @@ -297,7 +298,12 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, unsigned int i; int err; - callchain_cursor_reset(&evsel->hists.callchain_cursor); + callchain_cursor_reset(&callchain_cursor); + + if (chain->nr > PERF_MAX_STACK_DEPTH) { + pr_warning("corrupted callchain. skipping...\n"); + return 0; + } for (i = 0; i < chain->nr; i++) { u64 ip; @@ -317,7 +323,14 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, case PERF_CONTEXT_USER: cpumode = PERF_RECORD_MISC_USER; break; default: - break; + pr_debug("invalid callchain context: " + "%"PRId64"\n", (s64) ip); + /* + * It seems the callchain is corrupted. + * Discard all. + */ + callchain_cursor_reset(&callchain_cursor); + return 0; } continue; } @@ -333,7 +346,7 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, break; } - err = callchain_cursor_append(&evsel->hists.callchain_cursor, + err = callchain_cursor_append(&callchain_cursor, ip, al.map, al.sym); if (err) return err; @@ -429,6 +442,16 @@ static void perf_tool__fill_defaults(struct perf_tool *tool) tool->finished_round = process_finished_round_stub; } } + +void mem_bswap_32(void *src, int byte_size) +{ + u32 *m = src; + while (byte_size > 0) { + *m = bswap_32(*m); + byte_size -= sizeof(u32); + ++m; + } +} void mem_bswap_64(void *src, int byte_size) { @@ -441,37 +464,65 @@ void mem_bswap_64(void *src, int byte_size) } } -static void perf_event__all64_swap(union perf_event *event) +static void swap_sample_id_all(union perf_event *event, void *data) +{ + void *end = (void *) event + event->header.size; + int size = end - data; + + BUG_ON(size % sizeof(u64)); + mem_bswap_64(data, size); +} + +static void perf_event__all64_swap(union perf_event *event, + bool sample_id_all __used) { struct perf_event_header *hdr = &event->header; mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr)); } -static void perf_event__comm_swap(union perf_event *event) +static void perf_event__comm_swap(union perf_event *event, bool sample_id_all) { event->comm.pid = bswap_32(event->comm.pid); event->comm.tid = bswap_32(event->comm.tid); + + if (sample_id_all) { + void *data = &event->comm.comm; + + data += ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } } -static void perf_event__mmap_swap(union perf_event *event) +static void perf_event__mmap_swap(union perf_event *event, + bool sample_id_all) { event->mmap.pid = bswap_32(event->mmap.pid); event->mmap.tid = bswap_32(event->mmap.tid); event->mmap.start = bswap_64(event->mmap.start); event->mmap.len = bswap_64(event->mmap.len); event->mmap.pgoff = bswap_64(event->mmap.pgoff); + + if (sample_id_all) { + void *data = &event->mmap.filename; + + data += ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } } -static void perf_event__task_swap(union perf_event *event) +static void perf_event__task_swap(union perf_event *event, bool sample_id_all) { event->fork.pid = bswap_32(event->fork.pid); event->fork.tid = bswap_32(event->fork.tid); event->fork.ppid = bswap_32(event->fork.ppid); event->fork.ptid = bswap_32(event->fork.ptid); event->fork.time = bswap_64(event->fork.time); + + if (sample_id_all) + swap_sample_id_all(event, &event->fork + 1); } -static void perf_event__read_swap(union perf_event *event) +static void perf_event__read_swap(union perf_event *event, bool sample_id_all) { event->read.pid = bswap_32(event->read.pid); event->read.tid = bswap_32(event->read.tid); @@ -479,6 +530,9 @@ static void perf_event__read_swap(union perf_event *event) event->read.time_enabled = bswap_64(event->read.time_enabled); event->read.time_running = bswap_64(event->read.time_running); event->read.id = bswap_64(event->read.id); + + if (sample_id_all) + swap_sample_id_all(event, &event->read + 1); } static u8 revbyte(u8 b) @@ -530,7 +584,8 @@ void perf_event__attr_swap(struct perf_event_attr *attr) swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64)); } -static void perf_event__hdr_attr_swap(union perf_event *event) +static void perf_event__hdr_attr_swap(union perf_event *event, + bool sample_id_all __used) { size_t size; @@ -541,18 +596,21 @@ static void perf_event__hdr_attr_swap(union perf_event *event) mem_bswap_64(event->attr.id, size); } -static void perf_event__event_type_swap(union perf_event *event) +static void perf_event__event_type_swap(union perf_event *event, + bool sample_id_all __used) { event->event_type.event_type.event_id = bswap_64(event->event_type.event_type.event_id); } -static void perf_event__tracing_data_swap(union perf_event *event) +static void perf_event__tracing_data_swap(union perf_event *event, + bool sample_id_all __used) { event->tracing_data.size = bswap_32(event->tracing_data.size); } -typedef void (*perf_event__swap_op)(union perf_event *event); +typedef void (*perf_event__swap_op)(union perf_event *event, + bool sample_id_all); static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_MMAP] = perf_event__mmap_swap, @@ -986,6 +1044,15 @@ static int perf_session__process_user_event(struct perf_session *session, union } } +static void event_swap(union perf_event *event, bool sample_id_all) +{ + perf_event__swap_op swap; + + swap = perf_event__swap_ops[event->header.type]; + if (swap) + swap(event, sample_id_all); +} + static int perf_session__process_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool, @@ -994,9 +1061,8 @@ static int perf_session__process_event(struct perf_session *session, struct perf_sample sample; int ret; - if (session->header.needs_swap && - perf_event__swap_ops[event->header.type]) - perf_event__swap_ops[event->header.type](event); + if (session->header.needs_swap) + event_swap(event, session->sample_id_all); if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; @@ -1428,7 +1494,6 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, int print_sym, int print_dso, int print_symoffset) { struct addr_location al; - struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; struct callchain_cursor_node *node; if (perf_event__preprocess_sample(event, machine, &al, sample, @@ -1446,10 +1511,10 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, error("Failed to resolve callchain. Skipping\n"); return; } - callchain_cursor_commit(cursor); + callchain_cursor_commit(&callchain_cursor); while (1) { - node = callchain_cursor_current(cursor); + node = callchain_cursor_current(&callchain_cursor); if (!node) break; @@ -1460,12 +1525,12 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, } if (print_dso) { printf(" ("); - map__fprintf_dsoname(al.map, stdout); + map__fprintf_dsoname(node->map, stdout); printf(")"); } printf("\n"); - callchain_cursor_advance(cursor); + callchain_cursor_advance(&callchain_cursor); } } else { diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 7a5434c00565..0c702e3f0a36 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -80,6 +80,7 @@ struct branch_info *machine__resolve_bstack(struct machine *self, bool perf_session__has_traces(struct perf_session *self, const char *msg); void mem_bswap_64(void *src, int byte_size); +void mem_bswap_32(void *src, int byte_size); void perf_event__attr_swap(struct perf_event_attr *attr); int perf_session__create_kernel_maps(struct perf_session *self); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e2ba8858f3e1..3e2e5ea0f03f 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -323,6 +323,7 @@ struct dso *dso__new(const char *name) dso->sorted_by_name = 0; dso->has_build_id = 0; dso->kernel = DSO_TYPE_USER; + dso->needs_swap = DSO_SWAP__UNSET; INIT_LIST_HEAD(&dso->node); } @@ -1156,6 +1157,33 @@ static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr) return -1; } +static int dso__swap_init(struct dso *dso, unsigned char eidata) +{ + static unsigned int const endian = 1; + + dso->needs_swap = DSO_SWAP__NO; + + switch (eidata) { + case ELFDATA2LSB: + /* We are big endian, DSO is little endian. */ + if (*(unsigned char const *)&endian != 1) + dso->needs_swap = DSO_SWAP__YES; + break; + + case ELFDATA2MSB: + /* We are little endian, DSO is big endian. */ + if (*(unsigned char const *)&endian != 0) + dso->needs_swap = DSO_SWAP__YES; + break; + + default: + pr_err("unrecognized DSO data encoding %d\n", eidata); + return -EINVAL; + } + + return 0; +} + static int dso__load_sym(struct dso *dso, struct map *map, const char *name, int fd, symbol_filter_t filter, int kmodule, int want_symtab) @@ -1187,6 +1215,9 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name, goto out_elf_end; } + if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) + goto out_elf_end; + /* Always reject images with a mismatched build-id: */ if (dso->has_build_id) { u8 build_id[BUILD_ID_SIZE]; @@ -1272,7 +1303,7 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name, if (opdsec && sym.st_shndx == opdidx) { u32 offset = sym.st_value - opdshdr.sh_addr; u64 *opd = opddata->d_buf + offset; - sym.st_value = *opd; + sym.st_value = DSO__SWAP(dso, u64, *opd); sym.st_shndx = elf_addr_to_index(elf, sym.st_value); } @@ -2786,8 +2817,11 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type, struct map *dso__new_map(const char *name) { + struct map *map = NULL; struct dso *dso = dso__new(name); - struct map *map = map__new2(0, dso, MAP__FUNCTION); + + if (dso) + map = map__new2(0, dso, MAP__FUNCTION); return map; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 5649d63798cb..af0752b1aca1 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -9,6 +9,7 @@ #include <linux/list.h> #include <linux/rbtree.h> #include <stdio.h> +#include <byteswap.h> #ifdef HAVE_CPLUS_DEMANGLE extern char *cplus_demangle(const char *, int); @@ -160,11 +161,18 @@ enum dso_kernel_type { DSO_TYPE_GUEST_KERNEL }; +enum dso_swap_type { + DSO_SWAP__UNSET, + DSO_SWAP__NO, + DSO_SWAP__YES, +}; + struct dso { struct list_head node; struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; enum dso_kernel_type kernel; + enum dso_swap_type needs_swap; u8 adjust_symbols:1; u8 has_build_id:1; u8 hit:1; @@ -182,6 +190,28 @@ struct dso { char name[0]; }; +#define DSO__SWAP(dso, type, val) \ +({ \ + type ____r = val; \ + BUG_ON(dso->needs_swap == DSO_SWAP__UNSET); \ + if (dso->needs_swap == DSO_SWAP__YES) { \ + switch (sizeof(____r)) { \ + case 2: \ + ____r = bswap_16(val); \ + break; \ + case 4: \ + ____r = bswap_32(val); \ + break; \ + case 8: \ + ____r = bswap_64(val); \ + break; \ + default: \ + BUG_ON(1); \ + } \ + } \ + ____r; \ +}) + struct dso *dso__new(const char *name); void dso__delete(struct dso *dso); diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ab2f682fd44c..16de7ad4850f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -73,8 +73,8 @@ int backwards_count; char *progname; int num_cpus; -cpu_set_t *cpu_mask; -size_t cpu_mask_size; +cpu_set_t *cpu_present_set, *cpu_mask; +size_t cpu_present_setsize, cpu_mask_size; struct counters { unsigned long long tsc; /* per thread */ @@ -103,6 +103,12 @@ struct timeval tv_even; struct timeval tv_odd; struct timeval tv_delta; +int mark_cpu_present(int pkg, int core, int cpu) +{ + CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); + return 0; +} + /* * cpu_mask_init(ncpus) * @@ -118,6 +124,18 @@ void cpu_mask_init(int ncpus) } cpu_mask_size = CPU_ALLOC_SIZE(ncpus); CPU_ZERO_S(cpu_mask_size, cpu_mask); + + /* + * Allocate and initialize cpu_present_set + */ + cpu_present_set = CPU_ALLOC(ncpus); + if (cpu_present_set == NULL) { + perror("CPU_ALLOC"); + exit(3); + } + cpu_present_setsize = CPU_ALLOC_SIZE(ncpus); + CPU_ZERO_S(cpu_present_setsize, cpu_present_set); + for_all_cpus(mark_cpu_present); } void cpu_mask_uninit() @@ -125,6 +143,9 @@ void cpu_mask_uninit() CPU_FREE(cpu_mask); cpu_mask = NULL; cpu_mask_size = 0; + CPU_FREE(cpu_present_set); + cpu_present_set = NULL; + cpu_present_setsize = 0; } int cpu_migrate(int cpu) @@ -912,6 +933,8 @@ int is_snb(unsigned int family, unsigned int model) switch (model) { case 0x2A: case 0x2D: + case 0x3A: /* IVB */ + case 0x3D: /* IVB Xeon */ return 1; } return 0; @@ -1047,6 +1070,9 @@ int fork_it(char **argv) int retval; pid_t child_pid; get_counters(cnt_even); + + /* clear affinity side-effect of get_counters() */ + sched_setaffinity(0, cpu_present_setsize, cpu_present_set); gettimeofday(&tv_even, (struct timezone *)NULL); child_pid = fork(); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 28bc57ee757c..a4162e15c25f 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,4 +1,4 @@ -TARGETS = breakpoints vm +TARGETS = breakpoints kcmp mqueue vm all: for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile new file mode 100644 index 000000000000..dc79b86ea65c --- /dev/null +++ b/tools/testing/selftests/kcmp/Makefile @@ -0,0 +1,29 @@ +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) +ifeq ($(ARCH),i386) + ARCH := X86 + CFLAGS := -DCONFIG_X86_32 -D__i386__ +endif +ifeq ($(ARCH),x86_64) + ARCH := X86 + CFLAGS := -DCONFIG_X86_64 -D__x86_64__ +endif + +CFLAGS += -I../../../../arch/x86/include/generated/ +CFLAGS += -I../../../../include/ +CFLAGS += -I../../../../usr/include/ +CFLAGS += -I../../../../arch/x86/include/ + +all: +ifeq ($(ARCH),X86) + gcc $(CFLAGS) kcmp_test.c -o run_test +else + echo "Not an x86 target, can't build kcmp selftest" +endif + +run-tests: all + ./kcmp_test + +clean: + rm -fr ./run_test + rm -fr ./test-file diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c new file mode 100644 index 000000000000..358cc6bfa35d --- /dev/null +++ b/tools/testing/selftests/kcmp/kcmp_test.c @@ -0,0 +1,94 @@ +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <limits.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <fcntl.h> + +#include <linux/unistd.h> +#include <linux/kcmp.h> + +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> + +static long sys_kcmp(int pid1, int pid2, int type, int fd1, int fd2) +{ + return syscall(__NR_kcmp, pid1, pid2, type, fd1, fd2); +} + +int main(int argc, char **argv) +{ + const char kpath[] = "kcmp-test-file"; + int pid1, pid2; + int fd1, fd2; + int status; + + fd1 = open(kpath, O_RDWR | O_CREAT | O_TRUNC, 0644); + pid1 = getpid(); + + if (fd1 < 0) { + perror("Can't create file"); + exit(1); + } + + pid2 = fork(); + if (pid2 < 0) { + perror("fork failed"); + exit(1); + } + + if (!pid2) { + int pid2 = getpid(); + int ret; + + fd2 = open(kpath, O_RDWR, 0644); + if (fd2 < 0) { + perror("Can't open file"); + exit(1); + } + + /* An example of output and arguments */ + printf("pid1: %6d pid2: %6d FD: %2ld FILES: %2ld VM: %2ld " + "FS: %2ld SIGHAND: %2ld IO: %2ld SYSVSEM: %2ld " + "INV: %2ld\n", + pid1, pid2, + sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd2), + sys_kcmp(pid1, pid2, KCMP_FILES, 0, 0), + sys_kcmp(pid1, pid2, KCMP_VM, 0, 0), + sys_kcmp(pid1, pid2, KCMP_FS, 0, 0), + sys_kcmp(pid1, pid2, KCMP_SIGHAND, 0, 0), + sys_kcmp(pid1, pid2, KCMP_IO, 0, 0), + sys_kcmp(pid1, pid2, KCMP_SYSVSEM, 0, 0), + + /* This one should fail */ + sys_kcmp(pid1, pid2, KCMP_TYPES + 1, 0, 0)); + + /* This one should return same fd */ + ret = sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd1); + if (ret) { + printf("FAIL: 0 expected but %d returned\n", ret); + ret = -1; + } else + printf("PASS: 0 returned as expected\n"); + + /* Compare with self */ + ret = sys_kcmp(pid1, pid1, KCMP_VM, 0, 0); + if (ret) { + printf("FAIL: 0 expected but %li returned\n", ret); + ret = -1; + } else + printf("PASS: 0 returned as expected\n"); + + exit(ret); + } + + waitpid(pid2, &status, P_ALL); + + return 0; +} diff --git a/tools/testing/selftests/mqueue/.gitignore b/tools/testing/selftests/mqueue/.gitignore new file mode 100644 index 000000000000..d8d42377205a --- /dev/null +++ b/tools/testing/selftests/mqueue/.gitignore @@ -0,0 +1,2 @@ +mq_open_tests +mq_perf_tests diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile new file mode 100644 index 000000000000..54c0aad2b47c --- /dev/null +++ b/tools/testing/selftests/mqueue/Makefile @@ -0,0 +1,10 @@ +all: + gcc -O2 -lrt mq_open_tests.c -o mq_open_tests + gcc -O2 -lrt -lpthread -lpopt -o mq_perf_tests mq_perf_tests.c + +run_tests: + ./mq_open_tests /test1 + ./mq_perf_tests + +clean: + rm -f mq_open_tests mq_perf_tests diff --git a/tools/testing/selftests/mqueue/mq_open_tests.c b/tools/testing/selftests/mqueue/mq_open_tests.c new file mode 100644 index 000000000000..711cc2923047 --- /dev/null +++ b/tools/testing/selftests/mqueue/mq_open_tests.c @@ -0,0 +1,492 @@ +/* + * This application is Copyright 2012 Red Hat, Inc. + * Doug Ledford <dledford@redhat.com> + * + * mq_open_tests is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * mq_open_tests is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For the full text of the license, see <http://www.gnu.org/licenses/>. + * + * mq_open_tests.c + * Tests the various situations that should either succeed or fail to + * open a posix message queue and then reports whether or not they + * did as they were supposed to. + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <limits.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <mqueue.h> + +static char *usage = +"Usage:\n" +" %s path\n" +"\n" +" path Path name of the message queue to create\n" +"\n" +" Note: this program must be run as root in order to enable all tests\n" +"\n"; + +char *DEF_MSGS = "/proc/sys/fs/mqueue/msg_default"; +char *DEF_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_default"; +char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max"; +char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max"; + +int default_settings; +struct rlimit saved_limits, cur_limits; +int saved_def_msgs, saved_def_msgsize, saved_max_msgs, saved_max_msgsize; +int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize; +FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize; +char *queue_path; +mqd_t queue = -1; + +static inline void __set(FILE *stream, int value, char *err_msg); +void shutdown(int exit_val, char *err_cause, int line_no); +static inline int get(FILE *stream); +static inline void set(FILE *stream, int value); +static inline void getr(int type, struct rlimit *rlim); +static inline void setr(int type, struct rlimit *rlim); +void validate_current_settings(); +static inline void test_queue(struct mq_attr *attr, struct mq_attr *result); +static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result); + +static inline void __set(FILE *stream, int value, char *err_msg) +{ + rewind(stream); + if (fprintf(stream, "%d", value) < 0) + perror(err_msg); +} + + +void shutdown(int exit_val, char *err_cause, int line_no) +{ + static int in_shutdown = 0; + + /* In case we get called recursively by a set() call below */ + if (in_shutdown++) + return; + + seteuid(0); + + if (queue != -1) + if (mq_close(queue)) + perror("mq_close() during shutdown"); + if (queue_path) + /* + * Be silent if this fails, if we cleaned up already it's + * expected to fail + */ + mq_unlink(queue_path); + if (default_settings) { + if (saved_def_msgs) + __set(def_msgs, saved_def_msgs, + "failed to restore saved_def_msgs"); + if (saved_def_msgsize) + __set(def_msgsize, saved_def_msgsize, + "failed to restore saved_def_msgsize"); + } + if (saved_max_msgs) + __set(max_msgs, saved_max_msgs, + "failed to restore saved_max_msgs"); + if (saved_max_msgsize) + __set(max_msgsize, saved_max_msgsize, + "failed to restore saved_max_msgsize"); + if (exit_val) + error(exit_val, errno, "%s at %d", err_cause, line_no); + exit(0); +} + +static inline int get(FILE *stream) +{ + int value; + rewind(stream); + if (fscanf(stream, "%d", &value) != 1) + shutdown(4, "Error reading /proc entry", __LINE__ - 1); + return value; +} + +static inline void set(FILE *stream, int value) +{ + int new_value; + + rewind(stream); + if (fprintf(stream, "%d", value) < 0) + return shutdown(5, "Failed writing to /proc file", + __LINE__ - 1); + new_value = get(stream); + if (new_value != value) + return shutdown(5, "We didn't get what we wrote to /proc back", + __LINE__ - 1); +} + +static inline void getr(int type, struct rlimit *rlim) +{ + if (getrlimit(type, rlim)) + shutdown(6, "getrlimit()", __LINE__ - 1); +} + +static inline void setr(int type, struct rlimit *rlim) +{ + if (setrlimit(type, rlim)) + shutdown(7, "setrlimit()", __LINE__ - 1); +} + +void validate_current_settings() +{ + int rlim_needed; + + if (cur_limits.rlim_cur < 4096) { + printf("Current rlimit value for POSIX message queue bytes is " + "unreasonably low,\nincreasing.\n\n"); + cur_limits.rlim_cur = 8192; + cur_limits.rlim_max = 16384; + setr(RLIMIT_MSGQUEUE, &cur_limits); + } + + if (default_settings) { + rlim_needed = (cur_def_msgs + 1) * (cur_def_msgsize + 1 + + 2 * sizeof(void *)); + if (rlim_needed > cur_limits.rlim_cur) { + printf("Temporarily lowering default queue parameters " + "to something that will work\n" + "with the current rlimit values.\n\n"); + set(def_msgs, 10); + cur_def_msgs = 10; + set(def_msgsize, 128); + cur_def_msgsize = 128; + } + } else { + rlim_needed = (cur_max_msgs + 1) * (cur_max_msgsize + 1 + + 2 * sizeof(void *)); + if (rlim_needed > cur_limits.rlim_cur) { + printf("Temporarily lowering maximum queue parameters " + "to something that will work\n" + "with the current rlimit values in case this is " + "a kernel that ties the default\n" + "queue parameters to the maximum queue " + "parameters.\n\n"); + set(max_msgs, 10); + cur_max_msgs = 10; + set(max_msgsize, 128); + cur_max_msgsize = 128; + } + } +} + +/* + * test_queue - Test opening a queue, shutdown if we fail. This should + * only be called in situations that should never fail. We clean up + * after ourselves and return the queue attributes in *result. + */ +static inline void test_queue(struct mq_attr *attr, struct mq_attr *result) +{ + int flags = O_RDWR | O_EXCL | O_CREAT; + int perms = DEFFILEMODE; + + if ((queue = mq_open(queue_path, flags, perms, attr)) == -1) + shutdown(1, "mq_open()", __LINE__); + if (mq_getattr(queue, result)) + shutdown(1, "mq_getattr()", __LINE__); + if (mq_close(queue)) + shutdown(1, "mq_close()", __LINE__); + queue = -1; + if (mq_unlink(queue_path)) + shutdown(1, "mq_unlink()", __LINE__); +} + +/* + * Same as test_queue above, but failure is not fatal. + * Returns: + * 0 - Failed to create a queue + * 1 - Created a queue, attributes in *result + */ +static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result) +{ + int flags = O_RDWR | O_EXCL | O_CREAT; + int perms = DEFFILEMODE; + + if ((queue = mq_open(queue_path, flags, perms, attr)) == -1) + return 0; + if (mq_getattr(queue, result)) + shutdown(1, "mq_getattr()", __LINE__); + if (mq_close(queue)) + shutdown(1, "mq_close()", __LINE__); + queue = -1; + if (mq_unlink(queue_path)) + shutdown(1, "mq_unlink()", __LINE__); + return 1; +} + +int main(int argc, char *argv[]) +{ + struct mq_attr attr, result; + + if (argc != 2) { + fprintf(stderr, "Must pass a valid queue name\n\n"); + fprintf(stderr, usage, argv[0]); + exit(1); + } + + /* + * Although we can create a msg queue with a non-absolute path name, + * unlink will fail. So, if the name doesn't start with a /, add one + * when we save it. + */ + if (*argv[1] == '/') + queue_path = strdup(argv[1]); + else { + queue_path = malloc(strlen(argv[1]) + 2); + if (!queue_path) { + perror("malloc()"); + exit(1); + } + queue_path[0] = '/'; + queue_path[1] = 0; + strcat(queue_path, argv[1]); + } + + if (getuid() != 0) { + fprintf(stderr, "Not running as root, but almost all tests " + "require root in order to modify\nsystem settings. " + "Exiting.\n"); + exit(1); + } + + /* Find out what files there are for us to make tweaks in */ + def_msgs = fopen(DEF_MSGS, "r+"); + def_msgsize = fopen(DEF_MSGSIZE, "r+"); + max_msgs = fopen(MAX_MSGS, "r+"); + max_msgsize = fopen(MAX_MSGSIZE, "r+"); + + if (!max_msgs) + shutdown(2, "Failed to open msg_max", __LINE__); + if (!max_msgsize) + shutdown(2, "Failed to open msgsize_max", __LINE__); + if (def_msgs || def_msgsize) + default_settings = 1; + + /* Load up the current system values for everything we can */ + getr(RLIMIT_MSGQUEUE, &saved_limits); + cur_limits = saved_limits; + if (default_settings) { + saved_def_msgs = cur_def_msgs = get(def_msgs); + saved_def_msgsize = cur_def_msgsize = get(def_msgsize); + } + saved_max_msgs = cur_max_msgs = get(max_msgs); + saved_max_msgsize = cur_max_msgsize = get(max_msgsize); + + /* Tell the user our initial state */ + printf("\nInitial system state:\n"); + printf("\tUsing queue path:\t\t%s\n", queue_path); + printf("\tRLIMIT_MSGQUEUE(soft):\t\t%d\n", saved_limits.rlim_cur); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t%d\n", saved_limits.rlim_max); + printf("\tMaximum Message Size:\t\t%d\n", saved_max_msgsize); + printf("\tMaximum Queue Size:\t\t%d\n", saved_max_msgs); + if (default_settings) { + printf("\tDefault Message Size:\t\t%d\n", saved_def_msgsize); + printf("\tDefault Queue Size:\t\t%d\n", saved_def_msgs); + } else { + printf("\tDefault Message Size:\t\tNot Supported\n"); + printf("\tDefault Queue Size:\t\tNot Supported\n"); + } + printf("\n"); + + validate_current_settings(); + + printf("Adjusted system state for testing:\n"); + printf("\tRLIMIT_MSGQUEUE(soft):\t\t%d\n", cur_limits.rlim_cur); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t%d\n", cur_limits.rlim_max); + printf("\tMaximum Message Size:\t\t%d\n", cur_max_msgsize); + printf("\tMaximum Queue Size:\t\t%d\n", cur_max_msgs); + if (default_settings) { + printf("\tDefault Message Size:\t\t%d\n", cur_def_msgsize); + printf("\tDefault Queue Size:\t\t%d\n", cur_def_msgs); + } + + printf("\n\nTest series 1, behavior when no attr struct " + "passed to mq_open:\n"); + if (!default_settings) { + test_queue(NULL, &result); + printf("Given sane system settings, mq_open without an attr " + "struct succeeds:\tPASS\n"); + if (result.mq_maxmsg != cur_max_msgs || + result.mq_msgsize != cur_max_msgsize) { + printf("Kernel does not support setting the default " + "mq attributes,\nbut also doesn't tie the " + "defaults to the maximums:\t\t\tPASS\n"); + } else { + set(max_msgs, ++cur_max_msgs); + set(max_msgsize, ++cur_max_msgsize); + test_queue(NULL, &result); + if (result.mq_maxmsg == cur_max_msgs && + result.mq_msgsize == cur_max_msgsize) + printf("Kernel does not support setting the " + "default mq attributes and\n" + "also ties system wide defaults to " + "the system wide maximums:\t\t" + "FAIL\n"); + else + printf("Kernel does not support setting the " + "default mq attributes,\n" + "but also doesn't tie the defaults to " + "the maximums:\t\t\tPASS\n"); + } + } else { + printf("Kernel supports setting defaults separately from " + "maximums:\t\tPASS\n"); + /* + * While we are here, go ahead and test that the kernel + * properly follows the default settings + */ + test_queue(NULL, &result); + printf("Given sane values, mq_open without an attr struct " + "succeeds:\t\tPASS\n"); + if (result.mq_maxmsg != cur_def_msgs || + result.mq_msgsize != cur_def_msgsize) + printf("Kernel supports setting defaults, but does " + "not actually honor them:\tFAIL\n\n"); + else { + set(def_msgs, ++cur_def_msgs); + set(def_msgsize, ++cur_def_msgsize); + /* In case max was the same as the default */ + set(max_msgs, ++cur_max_msgs); + set(max_msgsize, ++cur_max_msgsize); + test_queue(NULL, &result); + if (result.mq_maxmsg != cur_def_msgs || + result.mq_msgsize != cur_def_msgsize) + printf("Kernel supports setting defaults, but " + "does not actually honor them:\t" + "FAIL\n"); + else + printf("Kernel properly honors default setting " + "knobs:\t\t\t\tPASS\n"); + } + set(def_msgs, cur_max_msgs + 1); + cur_def_msgs = cur_max_msgs + 1; + set(def_msgsize, cur_max_msgsize + 1); + cur_def_msgsize = cur_max_msgsize + 1; + if (cur_def_msgs * (cur_def_msgsize + 2 * sizeof(void *)) >= + cur_limits.rlim_cur) { + cur_limits.rlim_cur = (cur_def_msgs + 2) * + (cur_def_msgsize + 2 * sizeof(void *)); + cur_limits.rlim_max = 2 * cur_limits.rlim_cur; + setr(RLIMIT_MSGQUEUE, &cur_limits); + } + if (test_queue_fail(NULL, &result)) { + if (result.mq_maxmsg == cur_max_msgs && + result.mq_msgsize == cur_max_msgsize) + printf("Kernel properly limits default values " + "to lesser of default/max:\t\tPASS\n"); + else + printf("Kernel does not properly set default " + "queue parameters when\ndefaults > " + "max:\t\t\t\t\t\t\t\tFAIL\n"); + } else + printf("Kernel fails to open mq because defaults are " + "greater than maximums:\tFAIL\n"); + set(def_msgs, --cur_def_msgs); + set(def_msgsize, --cur_def_msgsize); + cur_limits.rlim_cur = cur_limits.rlim_max = cur_def_msgs * + cur_def_msgsize; + setr(RLIMIT_MSGQUEUE, &cur_limits); + if (test_queue_fail(NULL, &result)) + printf("Kernel creates queue even though defaults " + "would exceed\nrlimit setting:" + "\t\t\t\t\t\t\t\tFAIL\n"); + else + printf("Kernel properly fails to create queue when " + "defaults would\nexceed rlimit:" + "\t\t\t\t\t\t\t\tPASS\n"); + } + + /* + * Test #2 - open with an attr struct that exceeds rlimit + */ + printf("\n\nTest series 2, behavior when attr struct is " + "passed to mq_open:\n"); + cur_max_msgs = 32; + cur_max_msgsize = cur_limits.rlim_max >> 4; + set(max_msgs, cur_max_msgs); + set(max_msgsize, cur_max_msgsize); + attr.mq_maxmsg = cur_max_msgs; + attr.mq_msgsize = cur_max_msgsize; + if (test_queue_fail(&attr, &result)) + printf("Queue open in excess of rlimit max when euid = 0 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open in excess of rlimit max when euid = 0 " + "failed:\t\tPASS\n"); + attr.mq_maxmsg = cur_max_msgs + 1; + attr.mq_msgsize = 10; + if (test_queue_fail(&attr, &result)) + printf("Queue open with mq_maxmsg > limit when euid = 0 " + "succeeded:\t\tPASS\n"); + else + printf("Queue open with mq_maxmsg > limit when euid = 0 " + "failed:\t\tFAIL\n"); + attr.mq_maxmsg = 1; + attr.mq_msgsize = cur_max_msgsize + 1; + if (test_queue_fail(&attr, &result)) + printf("Queue open with mq_msgsize > limit when euid = 0 " + "succeeded:\t\tPASS\n"); + else + printf("Queue open with mq_msgsize > limit when euid = 0 " + "failed:\t\tFAIL\n"); + attr.mq_maxmsg = 65536; + attr.mq_msgsize = 65536; + if (test_queue_fail(&attr, &result)) + printf("Queue open with total size > 2GB when euid = 0 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open with total size > 2GB when euid = 0 " + "failed:\t\t\tPASS\n"); + seteuid(99); + attr.mq_maxmsg = cur_max_msgs; + attr.mq_msgsize = cur_max_msgsize; + if (test_queue_fail(&attr, &result)) + printf("Queue open in excess of rlimit max when euid = 99 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open in excess of rlimit max when euid = 99 " + "failed:\t\tPASS\n"); + attr.mq_maxmsg = cur_max_msgs + 1; + attr.mq_msgsize = 10; + if (test_queue_fail(&attr, &result)) + printf("Queue open with mq_maxmsg > limit when euid = 99 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open with mq_maxmsg > limit when euid = 99 " + "failed:\t\tPASS\n"); + attr.mq_maxmsg = 1; + attr.mq_msgsize = cur_max_msgsize + 1; + if (test_queue_fail(&attr, &result)) + printf("Queue open with mq_msgsize > limit when euid = 99 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open with mq_msgsize > limit when euid = 99 " + "failed:\t\tPASS\n"); + attr.mq_maxmsg = 65536; + attr.mq_msgsize = 65536; + if (test_queue_fail(&attr, &result)) + printf("Queue open with total size > 2GB when euid = 99 " + "succeeded:\t\tFAIL\n"); + else + printf("Queue open with total size > 2GB when euid = 99 " + "failed:\t\t\tPASS\n"); + + shutdown(0,"",0); +} diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c new file mode 100644 index 000000000000..2fadd4b97045 --- /dev/null +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -0,0 +1,741 @@ +/* + * This application is Copyright 2012 Red Hat, Inc. + * Doug Ledford <dledford@redhat.com> + * + * mq_perf_tests is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * mq_perf_tests is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For the full text of the license, see <http://www.gnu.org/licenses/>. + * + * mq_perf_tests.c + * Tests various types of message queue workloads, concentrating on those + * situations that invole large message sizes, large message queue depths, + * or both, and reports back useful metrics about kernel message queue + * performance. + * + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <limits.h> +#include <errno.h> +#include <signal.h> +#include <pthread.h> +#include <sched.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <mqueue.h> +#include <popt.h> + +static char *usage = +"Usage:\n" +" %s [-c #[,#..] -f] path\n" +"\n" +" -c # Skip most tests and go straight to a high queue depth test\n" +" and then run that test continuously (useful for running at\n" +" the same time as some other workload to see how much the\n" +" cache thrashing caused by adding messages to a very deep\n" +" queue impacts the performance of other programs). The number\n" +" indicates which CPU core we should bind the process to during\n" +" the run. If you have more than one physical CPU, then you\n" +" will need one copy per physical CPU package, and you should\n" +" specify the CPU cores to pin ourself to via a comma separated\n" +" list of CPU values.\n" +" -f Only usable with continuous mode. Pin ourself to the CPUs\n" +" as requested, then instead of looping doing a high mq\n" +" workload, just busy loop. This will allow us to lock up a\n" +" single CPU just like we normally would, but without actually\n" +" thrashing the CPU cache. This is to make it easier to get\n" +" comparable numbers from some other workload running on the\n" +" other CPUs. One set of numbers with # CPUs locked up running\n" +" an mq workload, and another set of numbers with those same\n" +" CPUs locked away from the test workload, but not doing\n" +" anything to trash the cache like the mq workload might.\n" +" path Path name of the message queue to create\n" +"\n" +" Note: this program must be run as root in order to enable all tests\n" +"\n"; + +char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max"; +char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max"; + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define MAX_CPUS 64 +char *cpu_option_string; +int cpus_to_pin[MAX_CPUS]; +int num_cpus_to_pin; +pthread_t cpu_threads[MAX_CPUS]; +pthread_t main_thread; +cpu_set_t *cpu_set; +int cpu_set_size; +int cpus_online; + +#define MSG_SIZE 16 +#define TEST1_LOOPS 10000000 +#define TEST2_LOOPS 100000 +int continuous_mode; +int continuous_mode_fake; + +struct rlimit saved_limits, cur_limits; +int saved_max_msgs, saved_max_msgsize; +int cur_max_msgs, cur_max_msgsize; +FILE *max_msgs, *max_msgsize; +int cur_nice; +char *queue_path = "/mq_perf_tests"; +mqd_t queue = -1; +struct mq_attr result; +int mq_prio_max; + +const struct poptOption options[] = { + { + .longName = "continuous", + .shortName = 'c', + .argInfo = POPT_ARG_STRING, + .arg = &cpu_option_string, + .val = 'c', + .descrip = "Run continuous tests at a high queue depth in " + "order to test the effects of cache thrashing on " + "other tasks on the system. This test is intended " + "to be run on one core of each physical CPU while " + "some other CPU intensive task is run on all the other " + "cores of that same physical CPU and the other task " + "is timed. It is assumed that the process of adding " + "messages to the message queue in a tight loop will " + "impact that other task to some degree. Once the " + "tests are performed in this way, you should then " + "re-run the tests using fake mode in order to check " + "the difference in time required to perform the CPU " + "intensive task", + .argDescrip = "cpu[,cpu]", + }, + { + .longName = "fake", + .shortName = 'f', + .argInfo = POPT_ARG_NONE, + .arg = &continuous_mode_fake, + .val = 0, + .descrip = "Tie up the CPUs that we would normally tie up in" + "continuous mode, but don't actually do any mq stuff, " + "just keep the CPU busy so it can't be used to process " + "system level tasks as this would free up resources on " + "the other CPU cores and skew the comparison between " + "the no-mqueue work and mqueue work tests", + .argDescrip = NULL, + }, + { + .longName = "path", + .shortName = 'p', + .argInfo = POPT_ARG_STRING | POPT_ARGFLAG_SHOW_DEFAULT, + .arg = &queue_path, + .val = 'p', + .descrip = "The name of the path to use in the mqueue " + "filesystem for our tests", + .argDescrip = "pathname", + }, + POPT_AUTOHELP + POPT_TABLEEND +}; + +static inline void __set(FILE *stream, int value, char *err_msg); +void shutdown(int exit_val, char *err_cause, int line_no); +void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context); +void sig_action(int signum, siginfo_t *info, void *context); +static inline int get(FILE *stream); +static inline void set(FILE *stream, int value); +static inline int try_set(FILE *stream, int value); +static inline void getr(int type, struct rlimit *rlim); +static inline void setr(int type, struct rlimit *rlim); +static inline void open_queue(struct mq_attr *attr); +void increase_limits(void); + +static inline void __set(FILE *stream, int value, char *err_msg) +{ + rewind(stream); + if (fprintf(stream, "%d", value) < 0) + perror(err_msg); +} + + +void shutdown(int exit_val, char *err_cause, int line_no) +{ + static int in_shutdown = 0; + int errno_at_shutdown = errno; + int i; + + /* In case we get called by multiple threads or from an sighandler */ + if (in_shutdown++) + return; + + for (i = 0; i < num_cpus_to_pin; i++) + if (cpu_threads[i]) { + pthread_kill(cpu_threads[i], SIGUSR1); + pthread_join(cpu_threads[i], NULL); + } + + if (queue != -1) + if (mq_close(queue)) + perror("mq_close() during shutdown"); + if (queue_path) + /* + * Be silent if this fails, if we cleaned up already it's + * expected to fail + */ + mq_unlink(queue_path); + if (saved_max_msgs) + __set(max_msgs, saved_max_msgs, + "failed to restore saved_max_msgs"); + if (saved_max_msgsize) + __set(max_msgsize, saved_max_msgsize, + "failed to restore saved_max_msgsize"); + if (exit_val) + error(exit_val, errno_at_shutdown, "%s at %d", + err_cause, line_no); + exit(0); +} + +void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context) +{ + if (pthread_self() != main_thread) + pthread_exit(0); + else { + fprintf(stderr, "Caught signal %d in SIGUSR1 handler, " + "exiting\n", signum); + shutdown(0, "", 0); + fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n"); + exit(0); + } +} + +void sig_action(int signum, siginfo_t *info, void *context) +{ + if (pthread_self() != main_thread) + pthread_kill(main_thread, signum); + else { + fprintf(stderr, "Caught signal %d, exiting\n", signum); + shutdown(0, "", 0); + fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n"); + exit(0); + } +} + +static inline int get(FILE *stream) +{ + int value; + rewind(stream); + if (fscanf(stream, "%d", &value) != 1) + shutdown(4, "Error reading /proc entry", __LINE__); + return value; +} + +static inline void set(FILE *stream, int value) +{ + int new_value; + + rewind(stream); + if (fprintf(stream, "%d", value) < 0) + return shutdown(5, "Failed writing to /proc file", __LINE__); + new_value = get(stream); + if (new_value != value) + return shutdown(5, "We didn't get what we wrote to /proc back", + __LINE__); +} + +static inline int try_set(FILE *stream, int value) +{ + int new_value; + + rewind(stream); + fprintf(stream, "%d", value); + new_value = get(stream); + return new_value == value; +} + +static inline void getr(int type, struct rlimit *rlim) +{ + if (getrlimit(type, rlim)) + shutdown(6, "getrlimit()", __LINE__); +} + +static inline void setr(int type, struct rlimit *rlim) +{ + if (setrlimit(type, rlim)) + shutdown(7, "setrlimit()", __LINE__); +} + +/** + * open_queue - open the global queue for testing + * @attr - An attr struct specifying the desired queue traits + * @result - An attr struct that lists the actual traits the queue has + * + * This open is not allowed to fail, failure will result in an orderly + * shutdown of the program. The global queue_path is used to set what + * queue to open, the queue descriptor is saved in the global queue + * variable. + */ +static inline void open_queue(struct mq_attr *attr) +{ + int flags = O_RDWR | O_EXCL | O_CREAT | O_NONBLOCK; + int perms = DEFFILEMODE; + + queue = mq_open(queue_path, flags, perms, attr); + if (queue == -1) + shutdown(1, "mq_open()", __LINE__); + if (mq_getattr(queue, &result)) + shutdown(1, "mq_getattr()", __LINE__); + printf("\n\tQueue %s created:\n", queue_path); + printf("\t\tmq_flags:\t\t\t%s\n", result.mq_flags & O_NONBLOCK ? + "O_NONBLOCK" : "(null)"); + printf("\t\tmq_maxmsg:\t\t\t%d\n", result.mq_maxmsg); + printf("\t\tmq_msgsize:\t\t\t%d\n", result.mq_msgsize); + printf("\t\tmq_curmsgs:\t\t\t%d\n", result.mq_curmsgs); +} + +void *fake_cont_thread(void *arg) +{ + int i; + + for (i = 0; i < num_cpus_to_pin; i++) + if (cpu_threads[i] == pthread_self()) + break; + printf("\tStarted fake continuous mode thread %d on CPU %d\n", i, + cpus_to_pin[i]); + while (1) + ; +} + +void *cont_thread(void *arg) +{ + char buff[MSG_SIZE]; + int i, priority; + + for (i = 0; i < num_cpus_to_pin; i++) + if (cpu_threads[i] == pthread_self()) + break; + printf("\tStarted continuous mode thread %d on CPU %d\n", i, + cpus_to_pin[i]); + while (1) { + while (mq_send(queue, buff, sizeof(buff), 0) == 0) + ; + mq_receive(queue, buff, sizeof(buff), &priority); + } +} + +#define drain_queue() \ + while (mq_receive(queue, buff, MSG_SIZE, &prio_in) == MSG_SIZE) + +#define do_untimed_send() \ + do { \ + if (mq_send(queue, buff, MSG_SIZE, prio_out)) \ + shutdown(3, "Test send failure", __LINE__); \ + } while (0) + +#define do_send_recv() \ + do { \ + clock_gettime(clock, &start); \ + if (mq_send(queue, buff, MSG_SIZE, prio_out)) \ + shutdown(3, "Test send failure", __LINE__); \ + clock_gettime(clock, &middle); \ + if (mq_receive(queue, buff, MSG_SIZE, &prio_in) != MSG_SIZE) \ + shutdown(3, "Test receive failure", __LINE__); \ + clock_gettime(clock, &end); \ + nsec = ((middle.tv_sec - start.tv_sec) * 1000000000) + \ + (middle.tv_nsec - start.tv_nsec); \ + send_total.tv_nsec += nsec; \ + if (send_total.tv_nsec >= 1000000000) { \ + send_total.tv_sec++; \ + send_total.tv_nsec -= 1000000000; \ + } \ + nsec = ((end.tv_sec - middle.tv_sec) * 1000000000) + \ + (end.tv_nsec - middle.tv_nsec); \ + recv_total.tv_nsec += nsec; \ + if (recv_total.tv_nsec >= 1000000000) { \ + recv_total.tv_sec++; \ + recv_total.tv_nsec -= 1000000000; \ + } \ + } while (0) + +struct test { + char *desc; + void (*func)(int *); +}; + +void const_prio(int *prio) +{ + return; +} + +void inc_prio(int *prio) +{ + if (++*prio == mq_prio_max) + *prio = 0; +} + +void dec_prio(int *prio) +{ + if (--*prio < 0) + *prio = mq_prio_max - 1; +} + +void random_prio(int *prio) +{ + *prio = random() % mq_prio_max; +} + +struct test test2[] = { + {"\n\tTest #2a: Time send/recv message, queue full, constant prio\n", + const_prio}, + {"\n\tTest #2b: Time send/recv message, queue full, increasing prio\n", + inc_prio}, + {"\n\tTest #2c: Time send/recv message, queue full, decreasing prio\n", + dec_prio}, + {"\n\tTest #2d: Time send/recv message, queue full, random prio\n", + random_prio}, + {NULL, NULL} +}; + +/** + * Tests to perform (all done with MSG_SIZE messages): + * + * 1) Time to add/remove message with 0 messages on queue + * 1a) with constant prio + * 2) Time to add/remove message when queue close to capacity: + * 2a) with constant prio + * 2b) with increasing prio + * 2c) with decreasing prio + * 2d) with random prio + * 3) Test limits of priorities honored (double check _SC_MQ_PRIO_MAX) + */ +void *perf_test_thread(void *arg) +{ + char buff[MSG_SIZE]; + int prio_out, prio_in; + int i; + clockid_t clock; + pthread_t *t; + struct timespec res, start, middle, end, send_total, recv_total; + unsigned long long nsec; + struct test *cur_test; + + t = &cpu_threads[0]; + printf("\n\tStarted mqueue performance test thread on CPU %d\n", + cpus_to_pin[0]); + mq_prio_max = sysconf(_SC_MQ_PRIO_MAX); + if (mq_prio_max == -1) + shutdown(2, "sysconf(_SC_MQ_PRIO_MAX)", __LINE__); + if (pthread_getcpuclockid(cpu_threads[0], &clock) != 0) + shutdown(2, "pthread_getcpuclockid", __LINE__); + + if (clock_getres(clock, &res)) + shutdown(2, "clock_getres()", __LINE__); + + printf("\t\tMax priorities:\t\t\t%d\n", mq_prio_max); + printf("\t\tClock resolution:\t\t%d nsec%s\n", res.tv_nsec, + res.tv_nsec > 1 ? "s" : ""); + + + + printf("\n\tTest #1: Time send/recv message, queue empty\n"); + printf("\t\t(%d iterations)\n", TEST1_LOOPS); + prio_out = 0; + send_total.tv_sec = 0; + send_total.tv_nsec = 0; + recv_total.tv_sec = 0; + recv_total.tv_nsec = 0; + for (i = 0; i < TEST1_LOOPS; i++) + do_send_recv(); + printf("\t\tSend msg:\t\t\t%d.%ds total time\n", + send_total.tv_sec, send_total.tv_nsec); + nsec = ((unsigned long long)send_total.tv_sec * 1000000000 + + send_total.tv_nsec) / TEST1_LOOPS; + printf("\t\t\t\t\t\t%d nsec/msg\n", nsec); + printf("\t\tRecv msg:\t\t\t%d.%ds total time\n", + recv_total.tv_sec, recv_total.tv_nsec); + nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 + + recv_total.tv_nsec) / TEST1_LOOPS; + printf("\t\t\t\t\t\t%d nsec/msg\n", nsec); + + + for (cur_test = test2; cur_test->desc != NULL; cur_test++) { + printf(cur_test->desc); + printf("\t\t(%d iterations)\n", TEST2_LOOPS); + prio_out = 0; + send_total.tv_sec = 0; + send_total.tv_nsec = 0; + recv_total.tv_sec = 0; + recv_total.tv_nsec = 0; + printf("\t\tFilling queue..."); + fflush(stdout); + clock_gettime(clock, &start); + for (i = 0; i < result.mq_maxmsg - 1; i++) { + do_untimed_send(); + cur_test->func(&prio_out); + } + clock_gettime(clock, &end); + nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) * + 1000000000) + (end.tv_nsec - start.tv_nsec); + printf("done.\t\t%lld.%llds\n", nsec / 1000000000, + nsec % 1000000000); + printf("\t\tTesting..."); + fflush(stdout); + for (i = 0; i < TEST2_LOOPS; i++) { + do_send_recv(); + cur_test->func(&prio_out); + } + printf("done.\n"); + printf("\t\tSend msg:\t\t\t%d.%ds total time\n", + send_total.tv_sec, send_total.tv_nsec); + nsec = ((unsigned long long)send_total.tv_sec * 1000000000 + + send_total.tv_nsec) / TEST2_LOOPS; + printf("\t\t\t\t\t\t%d nsec/msg\n", nsec); + printf("\t\tRecv msg:\t\t\t%d.%ds total time\n", + recv_total.tv_sec, recv_total.tv_nsec); + nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 + + recv_total.tv_nsec) / TEST2_LOOPS; + printf("\t\t\t\t\t\t%d nsec/msg\n", nsec); + printf("\t\tDraining queue..."); + fflush(stdout); + clock_gettime(clock, &start); + drain_queue(); + clock_gettime(clock, &end); + nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) * + 1000000000) + (end.tv_nsec - start.tv_nsec); + printf("done.\t\t%lld.%llds\n", nsec / 1000000000, + nsec % 1000000000); + } + return 0; +} + +void increase_limits(void) +{ + cur_limits.rlim_cur = RLIM_INFINITY; + cur_limits.rlim_max = RLIM_INFINITY; + setr(RLIMIT_MSGQUEUE, &cur_limits); + while (try_set(max_msgs, cur_max_msgs += 10)) + ; + cur_max_msgs = get(max_msgs); + while (try_set(max_msgsize, cur_max_msgsize += 1024)) + ; + cur_max_msgsize = get(max_msgsize); + if (setpriority(PRIO_PROCESS, 0, -20) != 0) + shutdown(2, "setpriority()", __LINE__); + cur_nice = -20; +} + +int main(int argc, char *argv[]) +{ + struct mq_attr attr; + char *option, *next_option; + int i, cpu; + struct sigaction sa; + poptContext popt_context; + char rc; + void *retval; + + main_thread = pthread_self(); + num_cpus_to_pin = 0; + + if (sysconf(_SC_NPROCESSORS_ONLN) == -1) { + perror("sysconf(_SC_NPROCESSORS_ONLN)"); + exit(1); + } + cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN)); + cpu_set = CPU_ALLOC(cpus_online); + if (cpu_set == NULL) { + perror("CPU_ALLOC()"); + exit(1); + } + cpu_set_size = CPU_ALLOC_SIZE(cpus_online); + CPU_ZERO_S(cpu_set_size, cpu_set); + + popt_context = poptGetContext(NULL, argc, (const char **)argv, + options, 0); + + while ((rc = poptGetNextOpt(popt_context)) > 0) { + switch (rc) { + case 'c': + continuous_mode = 1; + option = cpu_option_string; + do { + next_option = strchr(option, ','); + if (next_option) + *next_option = '\0'; + cpu = atoi(option); + if (cpu >= cpus_online) + fprintf(stderr, "CPU %d exceeds " + "cpus online, ignoring.\n", + cpu); + else + cpus_to_pin[num_cpus_to_pin++] = cpu; + if (next_option) + option = ++next_option; + } while (next_option && num_cpus_to_pin < MAX_CPUS); + /* Double check that they didn't give us the same CPU + * more than once */ + for (cpu = 0; cpu < num_cpus_to_pin; cpu++) { + if (CPU_ISSET_S(cpus_to_pin[cpu], cpu_set_size, + cpu_set)) { + fprintf(stderr, "Any given CPU may " + "only be given once.\n"); + exit(1); + } else + CPU_SET_S(cpus_to_pin[cpu], + cpu_set_size, cpu_set); + } + break; + case 'p': + /* + * Although we can create a msg queue with a + * non-absolute path name, unlink will fail. So, + * if the name doesn't start with a /, add one + * when we save it. + */ + option = queue_path; + if (*option != '/') { + queue_path = malloc(strlen(option) + 2); + if (!queue_path) { + perror("malloc()"); + exit(1); + } + queue_path[0] = '/'; + queue_path[1] = 0; + strcat(queue_path, option); + free(option); + } + break; + } + } + + if (continuous_mode && num_cpus_to_pin == 0) { + fprintf(stderr, "Must pass at least one CPU to continuous " + "mode.\n"); + poptPrintUsage(popt_context, stderr, 0); + exit(1); + } else if (!continuous_mode) { + num_cpus_to_pin = 1; + cpus_to_pin[0] = cpus_online - 1; + } + + if (getuid() != 0) { + fprintf(stderr, "Not running as root, but almost all tests " + "require root in order to modify\nsystem settings. " + "Exiting.\n"); + exit(1); + } + + max_msgs = fopen(MAX_MSGS, "r+"); + max_msgsize = fopen(MAX_MSGSIZE, "r+"); + if (!max_msgs) + shutdown(2, "Failed to open msg_max", __LINE__); + if (!max_msgsize) + shutdown(2, "Failed to open msgsize_max", __LINE__); + + /* Load up the current system values for everything we can */ + getr(RLIMIT_MSGQUEUE, &saved_limits); + cur_limits = saved_limits; + saved_max_msgs = cur_max_msgs = get(max_msgs); + saved_max_msgsize = cur_max_msgsize = get(max_msgsize); + errno = 0; + cur_nice = getpriority(PRIO_PROCESS, 0); + if (errno) + shutdown(2, "getpriority()", __LINE__); + + /* Tell the user our initial state */ + printf("\nInitial system state:\n"); + printf("\tUsing queue path:\t\t\t%s\n", queue_path); + printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%d\n", saved_limits.rlim_cur); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%d\n", saved_limits.rlim_max); + printf("\tMaximum Message Size:\t\t\t%d\n", saved_max_msgsize); + printf("\tMaximum Queue Size:\t\t\t%d\n", saved_max_msgs); + printf("\tNice value:\t\t\t\t%d\n", cur_nice); + printf("\n"); + + increase_limits(); + + printf("Adjusted system state for testing:\n"); + if (cur_limits.rlim_cur == RLIM_INFINITY) { + printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t(unlimited)\n"); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t(unlimited)\n"); + } else { + printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%d\n", + cur_limits.rlim_cur); + printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%d\n", + cur_limits.rlim_max); + } + printf("\tMaximum Message Size:\t\t\t%d\n", cur_max_msgsize); + printf("\tMaximum Queue Size:\t\t\t%d\n", cur_max_msgs); + printf("\tNice value:\t\t\t\t%d\n", cur_nice); + printf("\tContinuous mode:\t\t\t(%s)\n", continuous_mode ? + (continuous_mode_fake ? "fake mode" : "enabled") : + "disabled"); + printf("\tCPUs to pin:\t\t\t\t%d", cpus_to_pin[0]); + for (cpu = 1; cpu < num_cpus_to_pin; cpu++) + printf(",%d", cpus_to_pin[cpu]); + printf("\n"); + + sa.sa_sigaction = sig_action_SIGUSR1; + sigemptyset(&sa.sa_mask); + sigaddset(&sa.sa_mask, SIGHUP); + sigaddset(&sa.sa_mask, SIGINT); + sigaddset(&sa.sa_mask, SIGQUIT); + sigaddset(&sa.sa_mask, SIGTERM); + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGUSR1, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGUSR1)", __LINE__); + sa.sa_sigaction = sig_action; + if (sigaction(SIGHUP, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGHUP)", __LINE__); + if (sigaction(SIGINT, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGINT)", __LINE__); + if (sigaction(SIGQUIT, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGQUIT)", __LINE__); + if (sigaction(SIGTERM, &sa, NULL) == -1) + shutdown(1, "sigaction(SIGTERM)", __LINE__); + + if (!continuous_mode_fake) { + attr.mq_flags = O_NONBLOCK; + attr.mq_maxmsg = cur_max_msgs; + attr.mq_msgsize = MSG_SIZE; + open_queue(&attr); + } + for (i = 0; i < num_cpus_to_pin; i++) { + pthread_attr_t thread_attr; + void *thread_func; + + if (continuous_mode_fake) + thread_func = &fake_cont_thread; + else if (continuous_mode) + thread_func = &cont_thread; + else + thread_func = &perf_test_thread; + + CPU_ZERO_S(cpu_set_size, cpu_set); + CPU_SET_S(cpus_to_pin[i], cpu_set_size, cpu_set); + pthread_attr_init(&thread_attr); + pthread_attr_setaffinity_np(&thread_attr, cpu_set_size, + cpu_set); + if (pthread_create(&cpu_threads[i], &thread_attr, thread_func, + NULL)) + shutdown(1, "pthread_create()", __LINE__); + pthread_attr_destroy(&thread_attr); + } + + if (!continuous_mode) { + pthread_join(cpu_threads[0], &retval); + shutdown((long)retval, "perf_test_thread()", __LINE__); + } else { + while (1) + sleep(1); + } + shutdown(0, "", 0); +} |