summaryrefslogtreecommitdiffstats
path: root/tools/perf/util/machine.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util/machine.c')
-rw-r--r--tools/perf/util/machine.c201
1 files changed, 172 insertions, 29 deletions
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 111ae858cbcb..61959aba7e27 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -10,6 +10,7 @@
#include "hist.h"
#include "machine.h"
#include "map.h"
+#include "symbol.h"
#include "sort.h"
#include "strlist.h"
#include "thread.h"
@@ -21,6 +22,7 @@
#include "unwind.h"
#include "linux/hash.h"
#include "asm/bug.h"
+#include "bpf-event.h"
#include "sane_ctype.h"
#include <symbol/kallsyms.h>
@@ -41,7 +43,7 @@ static void machine__threads_init(struct machine *machine)
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];
- threads->entries = RB_ROOT;
+ threads->entries = RB_ROOT_CACHED;
init_rwsem(&threads->lock);
threads->nr = 0;
INIT_LIST_HEAD(&threads->dead);
@@ -137,7 +139,7 @@ struct machine *machine__new_kallsyms(void)
struct machine *machine = machine__new_host();
/*
* FIXME:
- * 1) We should switch to machine__load_kallsyms(), i.e. not explicitely
+ * 1) We should switch to machine__load_kallsyms(), i.e. not explicitly
* ask for not using the kcore parsing code, once this one is fixed
* to create a map per module.
*/
@@ -179,7 +181,7 @@ void machine__delete_threads(struct machine *machine)
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];
down_write(&threads->lock);
- nd = rb_first(&threads->entries);
+ nd = rb_first_cached(&threads->entries);
while (nd) {
struct thread *t = rb_entry(nd, struct thread, rb_node);
@@ -222,7 +224,7 @@ void machine__delete(struct machine *machine)
void machines__init(struct machines *machines)
{
machine__init(&machines->host, "", HOST_KERNEL_ID);
- machines->guests = RB_ROOT;
+ machines->guests = RB_ROOT_CACHED;
}
void machines__exit(struct machines *machines)
@@ -234,9 +236,10 @@ void machines__exit(struct machines *machines)
struct machine *machines__add(struct machines *machines, pid_t pid,
const char *root_dir)
{
- struct rb_node **p = &machines->guests.rb_node;
+ struct rb_node **p = &machines->guests.rb_root.rb_node;
struct rb_node *parent = NULL;
struct machine *pos, *machine = malloc(sizeof(*machine));
+ bool leftmost = true;
if (machine == NULL)
return NULL;
@@ -251,12 +254,14 @@ struct machine *machines__add(struct machines *machines, pid_t pid,
pos = rb_entry(parent, struct machine, rb_node);
if (pid < pos->pid)
p = &(*p)->rb_left;
- else
+ else {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
}
rb_link_node(&machine->rb_node, parent, p);
- rb_insert_color(&machine->rb_node, &machines->guests);
+ rb_insert_color_cached(&machine->rb_node, &machines->guests, leftmost);
return machine;
}
@@ -267,7 +272,7 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec)
machines->host.comm_exec = comm_exec;
- for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
struct machine *machine = rb_entry(nd, struct machine, rb_node);
machine->comm_exec = comm_exec;
@@ -276,7 +281,7 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec)
struct machine *machines__find(struct machines *machines, pid_t pid)
{
- struct rb_node **p = &machines->guests.rb_node;
+ struct rb_node **p = &machines->guests.rb_root.rb_node;
struct rb_node *parent = NULL;
struct machine *machine;
struct machine *default_machine = NULL;
@@ -339,7 +344,7 @@ void machines__process_guests(struct machines *machines,
{
struct rb_node *nd;
- for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
process(pos, data);
}
@@ -352,7 +357,8 @@ void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
machines->host.id_hdr_size = id_hdr_size;
- for (node = rb_first(&machines->guests); node; node = rb_next(node)) {
+ for (node = rb_first_cached(&machines->guests); node;
+ node = rb_next(node)) {
machine = rb_entry(node, struct machine, rb_node);
machine->id_hdr_size = id_hdr_size;
}
@@ -465,9 +471,10 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
pid_t pid, pid_t tid,
bool create)
{
- struct rb_node **p = &threads->entries.rb_node;
+ struct rb_node **p = &threads->entries.rb_root.rb_node;
struct rb_node *parent = NULL;
struct thread *th;
+ bool leftmost = true;
th = threads__get_last_match(threads, machine, pid, tid);
if (th)
@@ -485,8 +492,10 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
if (tid < th->tid)
p = &(*p)->rb_left;
- else
+ else {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
}
if (!create)
@@ -495,7 +504,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
th = thread__new(pid, tid);
if (th != NULL) {
rb_link_node(&th->rb_node, parent, p);
- rb_insert_color(&th->rb_node, &threads->entries);
+ rb_insert_color_cached(&th->rb_node, &threads->entries, leftmost);
/*
* We have to initialize map_groups separately
@@ -506,7 +515,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
* leader and that would screwed the rb tree.
*/
if (thread__init_map_groups(th, machine)) {
- rb_erase_init(&th->rb_node, &threads->entries);
+ rb_erase_cached(&th->rb_node, &threads->entries);
RB_CLEAR_NODE(&th->rb_node);
thread__put(th);
return NULL;
@@ -681,6 +690,59 @@ int machine__process_switch_event(struct machine *machine __maybe_unused,
return 0;
}
+static int machine__process_ksymbol_register(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct symbol *sym;
+ struct map *map;
+
+ map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr);
+ if (!map) {
+ map = dso__new_map(event->ksymbol_event.name);
+ if (!map)
+ return -ENOMEM;
+
+ map->start = event->ksymbol_event.addr;
+ map->pgoff = map->start;
+ map->end = map->start + event->ksymbol_event.len;
+ map_groups__insert(&machine->kmaps, map);
+ }
+
+ sym = symbol__new(event->ksymbol_event.addr, event->ksymbol_event.len,
+ 0, 0, event->ksymbol_event.name);
+ if (!sym)
+ return -ENOMEM;
+ dso__insert_symbol(map->dso, sym);
+ return 0;
+}
+
+static int machine__process_ksymbol_unregister(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct map *map;
+
+ map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr);
+ if (map)
+ map_groups__remove(&machine->kmaps, map);
+
+ return 0;
+}
+
+int machine__process_ksymbol(struct machine *machine __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ if (dump_trace)
+ perf_event__fprintf_ksymbol(event, stdout);
+
+ if (event->ksymbol_event.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER)
+ return machine__process_ksymbol_unregister(machine, event,
+ sample);
+ return machine__process_ksymbol_register(machine, event, sample);
+}
+
static void dso__adjust_kmod_long_name(struct dso *dso, const char *filename)
{
const char *dup_filename;
@@ -744,7 +806,7 @@ size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
struct rb_node *nd;
size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp);
- for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
ret += __dsos__fprintf(&pos->dsos.head, fp);
}
@@ -764,7 +826,7 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
struct rb_node *nd;
size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm);
- for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm);
}
@@ -804,7 +866,8 @@ size_t machine__fprintf(struct machine *machine, FILE *fp)
ret = fprintf(fp, "Threads: %u\n", threads->nr);
- for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&threads->entries); nd;
+ nd = rb_next(nd)) {
struct thread *pos = rb_entry(nd, struct thread, rb_node);
ret += thread__fprintf(pos, fp);
@@ -1107,7 +1170,7 @@ failure:
void machines__destroy_kernel_maps(struct machines *machines)
{
- struct rb_node *next = rb_first(&machines->guests);
+ struct rb_node *next = rb_first_cached(&machines->guests);
machine__destroy_kernel_maps(&machines->host);
@@ -1115,7 +1178,7 @@ void machines__destroy_kernel_maps(struct machines *machines)
struct machine *pos = rb_entry(next, struct machine, rb_node);
next = rb_next(&pos->rb_node);
- rb_erase(&pos->rb_node, &machines->guests);
+ rb_erase_cached(&pos->rb_node, &machines->guests);
machine__delete(pos);
}
}
@@ -1680,7 +1743,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th,
BUG_ON(refcount_read(&th->refcnt) == 0);
if (lock)
down_write(&threads->lock);
- rb_erase_init(&th->rb_node, &threads->entries);
+ rb_erase_cached(&th->rb_node, &threads->entries);
RB_CLEAR_NODE(&th->rb_node);
--threads->nr;
/*
@@ -1708,6 +1771,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
struct thread *parent = machine__findnew_thread(machine,
event->fork.ppid,
event->fork.ptid);
+ bool do_maps_clone = true;
int err = 0;
if (dump_trace)
@@ -1736,9 +1800,25 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
thread = machine__findnew_thread(machine, event->fork.pid,
event->fork.tid);
+ /*
+ * When synthesizing FORK events, we are trying to create thread
+ * objects for the already running tasks on the machine.
+ *
+ * Normally, for a kernel FORK event, we want to clone the parent's
+ * maps because that is what the kernel just did.
+ *
+ * But when synthesizing, this should not be done. If we do, we end up
+ * with overlapping maps as we process the sythesized MMAP2 events that
+ * get delivered shortly thereafter.
+ *
+ * Use the FORK event misc flags in an internal way to signal this
+ * situation, so we can elide the map clone when appropriate.
+ */
+ if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC)
+ do_maps_clone = false;
if (thread == NULL || parent == NULL ||
- thread__fork(thread, parent, sample->time) < 0) {
+ thread__fork(thread, parent, sample->time, do_maps_clone) < 0) {
dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
err = -1;
}
@@ -1795,6 +1875,10 @@ int machine__process_event(struct machine *machine, union perf_event *event,
case PERF_RECORD_SWITCH:
case PERF_RECORD_SWITCH_CPU_WIDE:
ret = machine__process_switch_event(machine, event); break;
+ case PERF_RECORD_KSYMBOL:
+ ret = machine__process_ksymbol(machine, event, sample); break;
+ case PERF_RECORD_BPF_EVENT:
+ ret = machine__process_bpf_event(machine, event, sample); break;
default:
ret = -1;
break;
@@ -1988,7 +2072,7 @@ static void save_iterations(struct iterations *iter,
{
int i;
- iter->nr_loop_iter = nr;
+ iter->nr_loop_iter++;
iter->cycles = 0;
for (i = 0; i < nr; i++)
@@ -2140,6 +2224,27 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
return 0;
}
+static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u8 *cpumode, int ent)
+{
+ int err = 0;
+
+ while (--ent >= 0) {
+ u64 ip = chain->ips[ent];
+
+ if (ip >= PERF_CONTEXT_MAX) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, cpumode, ip,
+ false, NULL, NULL, 0);
+ break;
+ }
+ }
+ return err;
+}
+
static int thread__resolve_callchain_sample(struct thread *thread,
struct callchain_cursor *cursor,
struct perf_evsel *evsel,
@@ -2246,6 +2351,12 @@ static int thread__resolve_callchain_sample(struct thread *thread,
}
check_calls:
+ if (callchain_param.order != ORDER_CALLEE) {
+ err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
+ &cpumode, chain->nr - first_call);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
for (i = first_call, nr_entries = 0;
i < chain_nr && nr_entries < max_stack; i++) {
u64 ip;
@@ -2260,9 +2371,15 @@ check_calls:
continue;
#endif
ip = chain->ips[j];
-
if (ip < PERF_CONTEXT_MAX)
++nr_entries;
+ else if (callchain_param.order != ORDER_CALLEE) {
+ err = find_prev_cpumode(chain, thread, cursor, parent,
+ root_al, &cpumode, j);
+ if (err)
+ return (err < 0) ? err : 0;
+ continue;
+ }
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
@@ -2403,7 +2520,8 @@ int machine__for_each_thread(struct machine *machine,
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
threads = &machine->threads[i];
- for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&threads->entries); nd;
+ nd = rb_next(nd)) {
thread = rb_entry(nd, struct thread, rb_node);
rc = fn(thread, priv);
if (rc != 0)
@@ -2430,7 +2548,7 @@ int machines__for_each_thread(struct machines *machines,
if (rc != 0)
return rc;
- for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
struct machine *machine = rb_entry(nd, struct machine, rb_node);
rc = machine__for_each_thread(machine, fn, priv);
@@ -2443,15 +2561,13 @@ int machines__for_each_thread(struct machines *machines,
int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
struct target *target, struct thread_map *threads,
perf_event__handler_t process, bool data_mmap,
- unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize)
{
if (target__has_task(target))
- return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout);
+ return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap);
else if (target__has_cpu(target))
return perf_event__synthesize_threads(tool, process,
machine, data_mmap,
- proc_map_timeout,
nr_threads_synthesize);
/* command specified */
return 0;
@@ -2542,6 +2658,33 @@ int machine__get_kernel_start(struct machine *machine)
return err;
}
+u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr)
+{
+ u8 addr_cpumode = cpumode;
+ bool kernel_ip;
+
+ if (!machine->single_address_space)
+ goto out;
+
+ kernel_ip = machine__kernel_ip(machine, addr);
+ switch (cpumode) {
+ case PERF_RECORD_MISC_KERNEL:
+ case PERF_RECORD_MISC_USER:
+ addr_cpumode = kernel_ip ? PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+ break;
+ case PERF_RECORD_MISC_GUEST_KERNEL:
+ case PERF_RECORD_MISC_GUEST_USER:
+ addr_cpumode = kernel_ip ? PERF_RECORD_MISC_GUEST_KERNEL :
+ PERF_RECORD_MISC_GUEST_USER;
+ break;
+ default:
+ break;
+ }
+out:
+ return addr_cpumode;
+}
+
struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
{
return dsos__findnew(&machine->dsos, filename);
OpenPOWER on IntegriCloud