From 62a038d34db26771756cf3689e36de638bedd2c4 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:43:33 +0530 Subject: hw-breakpoints: introducing generic hardware breakpoint handler interfaces This patch introduces the generic Hardware Breakpoint interfaces for both user and kernel space requests. This core Api handles the hardware breakpoints through new helpers. It handles the user-space breakpoints and kernel breakpoints in front of arch implementation. One can choose kernel wide breakpoints using the following helpers and passing them a generic struct hw_breakpoint: - register_kernel_hw_breakpoint() - unregister_kernel_hw_breakpoint() - modify_kernel_hw_breakpoint() On the other side, you can choose per task breakpoints. - register_user_hw_breakpoint() - unregister_user_hw_breakpoint() - modify_user_hw_breakpoint() [ fweisbec@gmail.com: fix conflict against perfcounter ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- kernel/Makefile | 1 + kernel/hw_breakpoint.c | 378 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 379 insertions(+) create mode 100644 kernel/hw_breakpoint.c (limited to 'kernel') diff --git a/kernel/Makefile b/kernel/Makefile index a35eee3436de..18ad1110b226 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -96,6 +96,7 @@ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o +obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..c1f64e65a9f3 --- /dev/null +++ b/kernel/hw_breakpoint.c @@ -0,0 +1,378 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2007 Alan Stern + * Copyright (C) IBM Corporation, 2009 + */ + +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + * This file contains the arch-independent routines. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef CONFIG_X86 +#include +#endif +/* + * Spinlock that protects all (un)register operations over kernel/user-space + * breakpoint requests + */ +static DEFINE_SPINLOCK(hw_breakpoint_lock); + +/* Array of kernel-space breakpoint structures */ +struct hw_breakpoint *hbp_kernel[HBP_NUM]; + +/* + * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being + * modified but we need the older copy to handle any hbp exceptions. It will + * sync with hbp_kernel[] value after updation is done through IPIs. + */ +DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); + +/* + * Kernel breakpoints grow downwards, starting from HBP_NUM + * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for + * kernel-space request. We will initialise it here and not in an __init + * routine because load_debug_registers(), which uses this variable can be + * called very early during CPU initialisation. + */ +unsigned int hbp_kernel_pos = HBP_NUM; + +/* + * An array containing refcount of threads using a given bkpt register + * Accesses are synchronised by acquiring hw_breakpoint_lock + */ +unsigned int hbp_user_refcount[HBP_NUM]; + +/* + * Load the debug registers during startup of a CPU. + */ +void load_debug_registers(void) +{ + unsigned long flags; + struct task_struct *tsk = current; + + spin_lock_bh(&hw_breakpoint_lock); + + /* Prevent IPIs for new kernel breakpoint updates */ + local_irq_save(flags); + arch_update_kernel_hw_breakpoint(NULL); + local_irq_restore(flags); + + if (test_tsk_thread_flag(tsk, TIF_DEBUG)) + arch_install_thread_hw_breakpoint(tsk); + + spin_unlock_bh(&hw_breakpoint_lock); +} + +/* + * Erase all the hardware breakpoint info associated with a thread. + * + * If tsk != current then tsk must not be usable (for example, a + * child being cleaned up from a failed fork). + */ +void flush_thread_hw_breakpoint(struct task_struct *tsk) +{ + int i; + struct thread_struct *thread = &(tsk->thread); + + spin_lock_bh(&hw_breakpoint_lock); + + /* The thread no longer has any breakpoints associated with it */ + clear_tsk_thread_flag(tsk, TIF_DEBUG); + for (i = 0; i < HBP_NUM; i++) { + if (thread->hbp[i]) { + hbp_user_refcount[i]--; + kfree(thread->hbp[i]); + thread->hbp[i] = NULL; + } + } + + arch_flush_thread_hw_breakpoint(tsk); + + /* Actually uninstall the breakpoints if necessary */ + if (tsk == current) + arch_uninstall_thread_hw_breakpoint(); + spin_unlock_bh(&hw_breakpoint_lock); +} + +/* + * Copy the hardware breakpoint info from a thread to its cloned child. + */ +int copy_thread_hw_breakpoint(struct task_struct *tsk, + struct task_struct *child, unsigned long clone_flags) +{ + /* + * We will assume that breakpoint settings are not inherited + * and the child starts out with no debug registers set. + * But what about CLONE_PTRACE? + */ + clear_tsk_thread_flag(child, TIF_DEBUG); + + /* We will call flush routine since the debugregs are not inherited */ + arch_flush_thread_hw_breakpoint(child); + + return 0; +} + +static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk, + struct hw_breakpoint *bp) +{ + struct thread_struct *thread = &(tsk->thread); + int rc; + + /* Do not overcommit. Fail if kernel has used the hbp registers */ + if (pos >= hbp_kernel_pos) + return -ENOSPC; + + rc = arch_validate_hwbkpt_settings(bp, tsk); + if (rc) + return rc; + + thread->hbp[pos] = bp; + hbp_user_refcount[pos]++; + + arch_update_user_hw_breakpoint(pos, tsk); + /* + * Does it need to be installed right now? + * Otherwise it will get installed the next time tsk runs + */ + if (tsk == current) + arch_install_thread_hw_breakpoint(tsk); + + return rc; +} + +/* + * Modify the address of a hbp register already in use by the task + * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint() + */ +static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk, + struct hw_breakpoint *bp) +{ + struct thread_struct *thread = &(tsk->thread); + + if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk))) + return -EINVAL; + + if (thread->hbp[pos] == NULL) + return -EINVAL; + + thread->hbp[pos] = bp; + /* + * 'pos' must be that of a hbp register already used by 'tsk' + * Otherwise arch_modify_user_hw_breakpoint() will fail + */ + arch_update_user_hw_breakpoint(pos, tsk); + + if (tsk == current) + arch_install_thread_hw_breakpoint(tsk); + + return 0; +} + +static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk) +{ + hbp_user_refcount[pos]--; + tsk->thread.hbp[pos] = NULL; + + arch_update_user_hw_breakpoint(pos, tsk); + + if (tsk == current) + arch_install_thread_hw_breakpoint(tsk); +} + +/** + * register_user_hw_breakpoint - register a hardware breakpoint for user space + * @tsk: pointer to 'task_struct' of the process to which the address belongs + * @bp: the breakpoint structure to register + * + * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and + * @bp->triggered must be set properly before invocation + * + */ +int register_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp) +{ + struct thread_struct *thread = &(tsk->thread); + int i, rc = -ENOSPC; + + spin_lock_bh(&hw_breakpoint_lock); + + for (i = 0; i < hbp_kernel_pos; i++) { + if (!thread->hbp[i]) { + rc = __register_user_hw_breakpoint(i, tsk, bp); + break; + } + } + if (!rc) + set_tsk_thread_flag(tsk, TIF_DEBUG); + + spin_unlock_bh(&hw_breakpoint_lock); + return rc; +} +EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); + +/** + * modify_user_hw_breakpoint - modify a user-space hardware breakpoint + * @tsk: pointer to 'task_struct' of the process to which the address belongs + * @bp: the breakpoint structure to unregister + * + */ +int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp) +{ + struct thread_struct *thread = &(tsk->thread); + int i, ret = -ENOENT; + + spin_lock_bh(&hw_breakpoint_lock); + for (i = 0; i < hbp_kernel_pos; i++) { + if (bp == thread->hbp[i]) { + ret = __modify_user_hw_breakpoint(i, tsk, bp); + break; + } + } + spin_unlock_bh(&hw_breakpoint_lock); + return ret; +} +EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); + +/** + * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint + * @tsk: pointer to 'task_struct' of the process to which the address belongs + * @bp: the breakpoint structure to unregister + * + */ +void unregister_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp) +{ + struct thread_struct *thread = &(tsk->thread); + int i, pos = -1, hbp_counter = 0; + + spin_lock_bh(&hw_breakpoint_lock); + for (i = 0; i < hbp_kernel_pos; i++) { + if (thread->hbp[i]) + hbp_counter++; + if (bp == thread->hbp[i]) + pos = i; + } + if (pos >= 0) { + __unregister_user_hw_breakpoint(pos, tsk); + hbp_counter--; + } + if (!hbp_counter) + clear_tsk_thread_flag(tsk, TIF_DEBUG); + + spin_unlock_bh(&hw_breakpoint_lock); +} +EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint); + +/** + * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space + * @bp: the breakpoint structure to register + * + * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and + * @bp->triggered must be set properly before invocation + * + */ +int register_kernel_hw_breakpoint(struct hw_breakpoint *bp) +{ + int rc; + + rc = arch_validate_hwbkpt_settings(bp, NULL); + if (rc) + return rc; + + spin_lock_bh(&hw_breakpoint_lock); + + rc = -ENOSPC; + /* Check if we are over-committing */ + if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) { + hbp_kernel_pos--; + hbp_kernel[hbp_kernel_pos] = bp; + on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); + rc = 0; + } + + spin_unlock_bh(&hw_breakpoint_lock); + return rc; +} +EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint); + +/** + * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space + * @bp: the breakpoint structure to unregister + * + * Uninstalls and unregisters @bp. + */ +void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp) +{ + int i, j; + + spin_lock_bh(&hw_breakpoint_lock); + + /* Find the 'bp' in our list of breakpoints for kernel */ + for (i = hbp_kernel_pos; i < HBP_NUM; i++) + if (bp == hbp_kernel[i]) + break; + + /* Check if we did not find a match for 'bp'. If so return early */ + if (i == HBP_NUM) { + spin_unlock_bh(&hw_breakpoint_lock); + return; + } + + /* + * We'll shift the breakpoints one-level above to compact if + * unregistration creates a hole + */ + for (j = i; j > hbp_kernel_pos; j--) + hbp_kernel[j] = hbp_kernel[j-1]; + + hbp_kernel[hbp_kernel_pos] = NULL; + on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); + hbp_kernel_pos++; + + spin_unlock_bh(&hw_breakpoint_lock); +} +EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint); + +static struct notifier_block hw_breakpoint_exceptions_nb = { + .notifier_call = hw_breakpoint_exceptions_notify, + /* we need to be notified first */ + .priority = 0x7fffffff +}; + +static int __init init_hw_breakpoint(void) +{ + return register_die_notifier(&hw_breakpoint_exceptions_nb); +} + +core_initcall(init_hw_breakpoint); -- cgit v1.2.1 From 0722db015c246204044299eae3b02d18d3ca4faf Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:46:40 +0530 Subject: hw-breakpoints: ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces This patch adds an ftrace plugin to detect and profile memory access over kernel variables. It uses HW Breakpoint interfaces to 'watch memory addresses. Signed-off-by: K.Prasad Signed-off-by: Frederic Weisbecker --- kernel/trace/Kconfig | 21 ++ kernel/trace/Makefile | 1 + kernel/trace/trace.h | 23 ++ kernel/trace/trace_ksym.c | 525 ++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace_selftest.c | 53 +++++ 5 files changed, 623 insertions(+) create mode 100644 kernel/trace/trace_ksym.c (limited to 'kernel') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a508b9d2adb8..d7f01e6e8ba5 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -314,6 +314,27 @@ config POWER_TRACER power management decisions, specifically the C-state and P-state behavior. +config KSYM_TRACER + bool "Trace read and write access on kernel memory locations" + depends on HAVE_HW_BREAKPOINT + select TRACING + help + This tracer helps find read and write operations on any given kernel + symbol i.e. /proc/kallsyms. + +config PROFILE_KSYM_TRACER + bool "Profile all kernel memory accesses on 'watched' variables" + depends on KSYM_TRACER + help + This tracer profiles kernel accesses on variables watched through the + ksym tracer ftrace plugin. Depending upon the hardware, all read + and write operations on kernel variables can be monitored for + accesses. + + The results will be displayed in: + /debugfs/tracing/profile_ksym + + Say N if unsure. config STACK_TRACER bool "Trace max stack" diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 06b85850fab4..658aace8c41e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -51,5 +51,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o +obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6e735d4771f8..7d5cc37b8fca 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -15,6 +15,10 @@ #include #include +#ifdef CONFIG_KSYM_TRACER +#include +#endif + enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -40,6 +44,7 @@ enum trace_type { TRACE_KMEM_FREE, TRACE_POWER, TRACE_BLK, + TRACE_KSYM, __TRACE_LAST_TYPE, }; @@ -207,6 +212,21 @@ struct syscall_trace_exit { unsigned long ret; }; +#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy" +extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); + +struct trace_ksym { + struct trace_entry ent; + struct hw_breakpoint *ksym_hbp; + unsigned long ksym_addr; + unsigned long ip; +#ifdef CONFIG_PROFILE_KSYM_TRACER + unsigned long counter; +#endif + struct hlist_node ksym_hlist; + char ksym_name[KSYM_NAME_LEN]; + char p_name[TASK_COMM_LEN]; +}; /* * trace_flag_type is an enumeration that holds different @@ -323,6 +343,7 @@ extern void __ftrace_bad_type(void); TRACE_SYSCALL_ENTER); \ IF_ASSIGN(var, ent, struct syscall_trace_exit, \ TRACE_SYSCALL_EXIT); \ + IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \ __ftrace_bad_type(); \ } while (0) @@ -540,6 +561,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_hw_branches(struct tracer *trace, struct trace_array *tr); +extern int trace_selftest_startup_ksym(struct tracer *trace, + struct trace_array *tr); #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c new file mode 100644 index 000000000000..11c74f6404cc --- /dev/null +++ b/kernel/trace/trace_ksym.c @@ -0,0 +1,525 @@ +/* + * trace_ksym.c - Kernel Symbol Tracer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +#include +#include +#include +#include +#include +#include + +#include "trace_output.h" +#include "trace_stat.h" +#include "trace.h" + +/* For now, let us restrict the no. of symbols traced simultaneously to number + * of available hardware breakpoint registers. + */ +#define KSYM_TRACER_MAX HBP_NUM + +#define KSYM_TRACER_OP_LEN 3 /* rw- */ +#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1) + +static struct trace_array *ksym_trace_array; + +static unsigned int ksym_filter_entry_count; +static unsigned int ksym_tracing_enabled; + +static HLIST_HEAD(ksym_filter_head); + +#ifdef CONFIG_PROFILE_KSYM_TRACER + +#define MAX_UL_INT 0xffffffff + +static DEFINE_MUTEX(ksym_tracer_mutex); + +void ksym_collect_stats(unsigned long hbp_hit_addr) +{ + struct hlist_node *node; + struct trace_ksym *entry; + + rcu_read_lock(); + hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { + if ((entry->ksym_addr == hbp_hit_addr) && + (entry->counter <= MAX_UL_INT)) { + entry->counter++; + break; + } + } + rcu_read_unlock(); +} +#endif /* CONFIG_PROFILE_KSYM_TRACER */ + +void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) +{ + struct ring_buffer_event *event; + struct trace_array *tr; + struct trace_ksym *entry; + int pc; + + if (!ksym_tracing_enabled) + return; + + tr = ksym_trace_array; + pc = preempt_count(); + + event = trace_buffer_lock_reserve(tr, TRACE_KSYM, + sizeof(*entry), 0, pc); + if (!event) + return; + + entry = ring_buffer_event_data(event); + strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN); + entry->ksym_hbp = hbp; + entry->ip = instruction_pointer(regs); + strlcpy(entry->p_name, current->comm, TASK_COMM_LEN); +#ifdef CONFIG_PROFILE_KSYM_TRACER + ksym_collect_stats(hbp->info.address); +#endif /* CONFIG_PROFILE_KSYM_TRACER */ + + trace_buffer_unlock_commit(tr, event, 0, pc); +} + +/* Valid access types are represented as + * + * rw- : Set Read/Write Access Breakpoint + * -w- : Set Write Access Breakpoint + * --- : Clear Breakpoints + * --x : Set Execution Break points (Not available yet) + * + */ +static int ksym_trace_get_access_type(char *access_str) +{ + int pos, access = 0; + + for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) { + switch (access_str[pos]) { + case 'r': + access += (pos == 0) ? 4 : -1; + break; + case 'w': + access += (pos == 1) ? 2 : -1; + break; + case '-': + break; + default: + return -EINVAL; + } + } + + switch (access) { + case 6: + access = HW_BREAKPOINT_RW; + break; + case 2: + access = HW_BREAKPOINT_WRITE; + break; + case 0: + access = 0; + } + + return access; +} + +/* + * There can be several possible malformed requests and we attempt to capture + * all of them. We enumerate some of the rules + * 1. We will not allow kernel symbols with ':' since it is used as a delimiter. + * i.e. multiple ':' symbols disallowed. Possible uses are of the form + * ::. + * 2. No delimiter symbol ':' in the input string + * 3. Spurious operator symbols or symbols not in their respective positions + * 4. :--- i.e. clear breakpoint request when ksym_name not in file + * 5. Kernel symbol not a part of /proc/kallsyms + * 6. Duplicate requests + */ +static int parse_ksym_trace_str(char *input_string, char **ksymname, + unsigned long *addr) +{ + char *delimiter = ":"; + int ret; + + ret = -EINVAL; + *ksymname = strsep(&input_string, delimiter); + *addr = kallsyms_lookup_name(*ksymname); + + /* Check for malformed request: (2), (1) and (5) */ + if ((!input_string) || + (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) || + (*addr == 0)) + goto return_code; + ret = ksym_trace_get_access_type(input_string); + +return_code: + return ret; +} + +int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) +{ + struct trace_ksym *entry; + int ret; + + if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { + printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" + " new requests for tracing can be accepted now.\n", + KSYM_TRACER_MAX); + return -ENOSPC; + } + + entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); + if (!entry->ksym_hbp) { + kfree(entry); + return -ENOMEM; + } + + entry->ksym_hbp->info.name = ksymname; + entry->ksym_hbp->info.type = op; + entry->ksym_addr = entry->ksym_hbp->info.address = addr; +#ifdef CONFIG_X86 + entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4; +#endif + entry->ksym_hbp->triggered = (void *)ksym_hbp_handler; + + ret = register_kernel_hw_breakpoint(entry->ksym_hbp); + if (ret < 0) { + printk(KERN_INFO "ksym_tracer request failed. Try again" + " later!!\n"); + kfree(entry->ksym_hbp); + kfree(entry); + return -EAGAIN; + } + hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); + ksym_filter_entry_count++; + + return 0; +} + +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct trace_ksym *entry; + struct hlist_node *node; + char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX]; + ssize_t ret, cnt = 0; + + mutex_lock(&ksym_tracer_mutex); + + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:", + entry->ksym_hbp->info.name); + if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE) + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, + "-w-\n"); + else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW) + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, + "rw-\n"); + } + ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf)); + mutex_unlock(&ksym_tracer_mutex); + + return ret; +} + +static ssize_t ksym_trace_filter_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct trace_ksym *entry; + struct hlist_node *node; + char *input_string, *ksymname = NULL; + unsigned long ksym_addr = 0; + int ret, op, changed = 0; + + /* Ignore echo "" > ksym_trace_filter */ + if (count == 0) + return 0; + + input_string = kzalloc(count, GFP_KERNEL); + if (!input_string) + return -ENOMEM; + + if (copy_from_user(input_string, buffer, count)) { + kfree(input_string); + return -EFAULT; + } + + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); + if (ret < 0) { + kfree(input_string); + return ret; + } + + mutex_lock(&ksym_tracer_mutex); + + ret = -EINVAL; + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { + if (entry->ksym_addr == ksym_addr) { + /* Check for malformed request: (6) */ + if (entry->ksym_hbp->info.type != op) + changed = 1; + else + goto err_ret; + break; + } + } + if (changed) { + unregister_kernel_hw_breakpoint(entry->ksym_hbp); + entry->ksym_hbp->info.type = op; + if (op > 0) { + ret = register_kernel_hw_breakpoint(entry->ksym_hbp); + if (ret == 0) { + ret = count; + goto unlock_ret_path; + } + } + ksym_filter_entry_count--; + hlist_del_rcu(&(entry->ksym_hlist)); + synchronize_rcu(); + kfree(entry->ksym_hbp); + kfree(entry); + ret = count; + goto err_ret; + } else { + /* Check for malformed request: (4) */ + if (op == 0) + goto err_ret; + ret = process_new_ksym_entry(ksymname, op, ksym_addr); + if (ret) + goto err_ret; + } + ret = count; + goto unlock_ret_path; + +err_ret: + kfree(input_string); + +unlock_ret_path: + mutex_unlock(&ksym_tracer_mutex); + return ret; +} + +static const struct file_operations ksym_tracing_fops = { + .open = tracing_open_generic, + .read = ksym_trace_filter_read, + .write = ksym_trace_filter_write, +}; + +static void ksym_trace_reset(struct trace_array *tr) +{ + struct trace_ksym *entry; + struct hlist_node *node, *node1; + + ksym_tracing_enabled = 0; + + mutex_lock(&ksym_tracer_mutex); + hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, + ksym_hlist) { + unregister_kernel_hw_breakpoint(entry->ksym_hbp); + ksym_filter_entry_count--; + hlist_del_rcu(&(entry->ksym_hlist)); + synchronize_rcu(); + /* Free the 'input_string' only if reset + * after startup self-test + */ +#ifdef CONFIG_FTRACE_SELFTEST + if (strncmp(entry->ksym_hbp->info.name, KSYM_SELFTEST_ENTRY, + strlen(KSYM_SELFTEST_ENTRY)) != 0) +#endif /* CONFIG_FTRACE_SELFTEST*/ + kfree(entry->ksym_hbp->info.name); + kfree(entry->ksym_hbp); + kfree(entry); + } + mutex_unlock(&ksym_tracer_mutex); +} + +static int ksym_trace_init(struct trace_array *tr) +{ + int cpu, ret = 0; + + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); + ksym_tracing_enabled = 1; + ksym_trace_array = tr; + + return ret; +} + +static void ksym_trace_print_header(struct seq_file *m) +{ + + seq_puts(m, + "# TASK-PID CPU# Symbol Type " + "Function \n"); + seq_puts(m, + "# | | | | " + "| \n"); +} + +static enum print_line_t ksym_trace_output(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + struct trace_seq *s = &iter->seq; + struct trace_ksym *field; + char str[KSYM_SYMBOL_LEN]; + int ret; + + if (entry->type != TRACE_KSYM) + return TRACE_TYPE_UNHANDLED; + + trace_assign_type(field, entry); + + ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name, + entry->pid, iter->cpu, field->ksym_name); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + switch (field->ksym_hbp->info.type) { + case HW_BREAKPOINT_WRITE: + ret = trace_seq_printf(s, " W "); + break; + case HW_BREAKPOINT_RW: + ret = trace_seq_printf(s, " RW "); + break; + default: + return TRACE_TYPE_PARTIAL_LINE; + } + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + sprint_symbol(str, field->ip); + ret = trace_seq_printf(s, "%-20s\n", str); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +struct tracer ksym_tracer __read_mostly = +{ + .name = "ksym_tracer", + .init = ksym_trace_init, + .reset = ksym_trace_reset, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_ksym, +#endif + .print_header = ksym_trace_print_header, + .print_line = ksym_trace_output +}; + +__init static int init_ksym_trace(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + ksym_filter_entry_count = 0; + + entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer, + NULL, &ksym_tracing_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'ksym_trace_filter' file\n"); + + return register_tracer(&ksym_tracer); +} +device_initcall(init_ksym_trace); + + +#ifdef CONFIG_PROFILE_KSYM_TRACER +static int ksym_tracer_stat_headers(struct seq_file *m) +{ + seq_printf(m, " Access type "); + seq_printf(m, " Symbol Counter \n"); + return 0; +} + +static int ksym_tracer_stat_show(struct seq_file *m, void *v) +{ + struct hlist_node *stat = v; + struct trace_ksym *entry; + int access_type = 0; + char fn_name[KSYM_NAME_LEN]; + + entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); + + if (entry->ksym_hbp) + access_type = entry->ksym_hbp->info.type; + + switch (access_type) { + case HW_BREAKPOINT_WRITE: + seq_printf(m, " W "); + break; + case HW_BREAKPOINT_RW: + seq_printf(m, " RW "); + break; + default: + seq_printf(m, " NA "); + } + + if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0) + seq_printf(m, " %s ", fn_name); + else + seq_printf(m, " "); + + seq_printf(m, "%15lu\n", entry->counter); + return 0; +} + +static void *ksym_tracer_stat_start(struct tracer_stat *trace) +{ + return &(ksym_filter_head.first); +} + +static void * +ksym_tracer_stat_next(void *v, int idx) +{ + struct hlist_node *stat = v; + + return stat->next; +} + +static struct tracer_stat ksym_tracer_stats = { + .name = "ksym_tracer", + .stat_start = ksym_tracer_stat_start, + .stat_next = ksym_tracer_stat_next, + .stat_headers = ksym_tracer_stat_headers, + .stat_show = ksym_tracer_stat_show +}; + +__init static int ksym_tracer_stat_init(void) +{ + int ret; + + ret = register_stat_tracer(&ksym_tracer_stats); + if (ret) { + printk(KERN_WARNING "Warning: could not register " + "ksym tracer stats\n"); + return 1; + } + + return 0; +} +fs_initcall(ksym_tracer_stat_init); +#endif /* CONFIG_PROFILE_KSYM_TRACER */ diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 00dd6485bdd7..71f2edb0fd84 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_GRAPH_ENT: case TRACE_GRAPH_RET: case TRACE_HW_BRANCHES: + case TRACE_KSYM: return 1; } return 0; @@ -807,3 +808,55 @@ trace_selftest_startup_hw_branches(struct tracer *trace, return ret; } #endif /* CONFIG_HW_BRANCH_TRACER */ + +#ifdef CONFIG_KSYM_TRACER +static int ksym_selftest_dummy; + +int +trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) +{ + unsigned long count; + int ret; + + /* start the tracing */ + ret = tracer_init(trace, tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + + ksym_selftest_dummy = 0; + /* Register the read-write tracing request */ + ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW, + (unsigned long)(&ksym_selftest_dummy)); + + if (ret < 0) { + printk(KERN_CONT "ksym_trace read-write startup test failed\n"); + goto ret_path; + } + /* Perform a read and a write operation over the dummy variable to + * trigger the tracer + */ + if (ksym_selftest_dummy == 0) + ksym_selftest_dummy++; + + /* stop the tracing. */ + tracing_stop(); + /* check the trace buffer */ + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + tracing_start(); + + /* read & write operations - one each is performed on the dummy variable + * triggering two entries in the trace buffer + */ + if (!ret && count != 2) { + printk(KERN_CONT "Ksym tracer startup test failed"); + ret = -1; + } + +ret_path: + return ret; +} +#endif /* CONFIG_KSYM_TRACER */ + -- cgit v1.2.1 From 73874005cd8800440be4299bd095387fff4b90ac Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 3 Jun 2009 01:43:38 +0200 Subject: hw-breakpoints: fix undeclared ksym_tracer_mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ksym_tracer_mutex is declared inside an #ifdef CONFIG_PROFILE_KSYM_TRACER section. This makes it unavailable for the hardware breakpoint tracer if it is configured without the breakpoint profiler. This patch fixes the following build error: kernel/trace/trace_ksym.c: In function ‘ksym_trace_filter_read’: kernel/trace/trace_ksym.c:226: erreur: ‘ksym_tracer_mutex’ undeclared (first use in this function) kernel/trace/trace_ksym.c:226: erreur: (Each undeclared identifier is reported only once kernel/trace/trace_ksym.c:226: erreur: for each function it appears in.) kernel/trace/trace_ksym.c: In function ‘ksym_trace_filter_write’: kernel/trace/trace_ksym.c:273: erreur: ‘ksym_tracer_mutex’ undeclared (first use in this function) kernel/trace/trace_ksym.c: In function ‘ksym_trace_reset’: kernel/trace/trace_ksym.c:335: erreur: ‘ksym_tracer_mutex’ undeclared (first use in this function) make[1]: *** [kernel/trace/trace_ksym.o] Erreur 1 [ Impact: fix a build error ] Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker --- kernel/trace/trace_ksym.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 11c74f6404cc..eef97e7c8db7 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -44,12 +44,12 @@ static unsigned int ksym_tracing_enabled; static HLIST_HEAD(ksym_filter_head); +static DEFINE_MUTEX(ksym_tracer_mutex); + #ifdef CONFIG_PROFILE_KSYM_TRACER #define MAX_UL_INT 0xffffffff -static DEFINE_MUTEX(ksym_tracer_mutex); - void ksym_collect_stats(unsigned long hbp_hit_addr) { struct hlist_node *node; -- cgit v1.2.1 From db59504d89db1462a5281fb55b1d962cb74a398f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:52:36 +0800 Subject: ksym_tracer: Extract trace entry from struct trace_ksym struct trace_ksym is used as an entry in hbp list, and is also used as trace_entry stored in ring buffer. This is not necessary and is a waste of memory in ring buffer. There is also a bug that dereferencing field->ksym_hbp in ksym_trace_output() can be invalid. Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E2A4.4050007@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 13 ++++--------- kernel/trace/trace_ksym.c | 26 ++++++++++++++++++-------- 2 files changed, 22 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7d5cc37b8fca..ff1ef411a176 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -215,17 +215,12 @@ struct syscall_trace_exit { #define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy" extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); -struct trace_ksym { +struct ksym_trace_entry { struct trace_entry ent; - struct hw_breakpoint *ksym_hbp; - unsigned long ksym_addr; unsigned long ip; -#ifdef CONFIG_PROFILE_KSYM_TRACER - unsigned long counter; -#endif - struct hlist_node ksym_hlist; + unsigned char type; char ksym_name[KSYM_NAME_LEN]; - char p_name[TASK_COMM_LEN]; + char cmd[TASK_COMM_LEN]; }; /* @@ -343,7 +338,7 @@ extern void __ftrace_bad_type(void); TRACE_SYSCALL_ENTER); \ IF_ASSIGN(var, ent, struct syscall_trace_exit, \ TRACE_SYSCALL_EXIT); \ - IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \ + IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\ __ftrace_bad_type(); \ } while (0) diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index eef97e7c8db7..085ff055fdfa 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -37,6 +37,15 @@ #define KSYM_TRACER_OP_LEN 3 /* rw- */ #define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1) +struct trace_ksym { + struct hw_breakpoint *ksym_hbp; + unsigned long ksym_addr; +#ifdef CONFIG_PROFILE_KSYM_TRACER + unsigned long counter; +#endif + struct hlist_node ksym_hlist; +}; + static struct trace_array *ksym_trace_array; static unsigned int ksym_filter_entry_count; @@ -71,7 +80,7 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) { struct ring_buffer_event *event; struct trace_array *tr; - struct trace_ksym *entry; + struct ksym_trace_entry *entry; int pc; if (!ksym_tracing_enabled) @@ -85,11 +94,12 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) if (!event) return; - entry = ring_buffer_event_data(event); + entry = ring_buffer_event_data(event); + entry->ip = instruction_pointer(regs); + entry->type = hbp->info.type; strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN); - entry->ksym_hbp = hbp; - entry->ip = instruction_pointer(regs); - strlcpy(entry->p_name, current->comm, TASK_COMM_LEN); + strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); + #ifdef CONFIG_PROFILE_KSYM_TRACER ksym_collect_stats(hbp->info.address); #endif /* CONFIG_PROFILE_KSYM_TRACER */ @@ -380,7 +390,7 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) { struct trace_entry *entry = iter->ent; struct trace_seq *s = &iter->seq; - struct trace_ksym *field; + struct ksym_trace_entry *field; char str[KSYM_SYMBOL_LEN]; int ret; @@ -389,12 +399,12 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name, + ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->cmd, entry->pid, iter->cpu, field->ksym_name); if (!ret) return TRACE_TYPE_PARTIAL_LINE; - switch (field->ksym_hbp->info.type) { + switch (field->type) { case HW_BREAKPOINT_WRITE: ret = trace_seq_printf(s, " W "); break; -- cgit v1.2.1 From be9742e6cb107fe1d77db7a081ea4eb25e79e1ad Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:52:52 +0800 Subject: ksym_tracer: Rewrite ksym_trace_filter_read() Reading ksym_trace_filter gave me some arbitrary characters, when it should show nothing. It's because buf is not initialized when there's no filter. Also reduce stack usage by about 512 bytes. Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E2B4.6030706@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_ksym.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 085ff055fdfa..b6710d31bdf0 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -35,7 +35,6 @@ #define KSYM_TRACER_MAX HBP_NUM #define KSYM_TRACER_OP_LEN 3 /* rw- */ -#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1) struct trace_ksym { struct hw_breakpoint *ksym_hbp; @@ -230,25 +229,33 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, { struct trace_ksym *entry; struct hlist_node *node; - char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX]; - ssize_t ret, cnt = 0; + struct trace_seq *s; + ssize_t cnt = 0; + int ret; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + trace_seq_init(s); mutex_lock(&ksym_tracer_mutex); hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:", - entry->ksym_hbp->info.name); + ret = trace_seq_printf(s, "%s:", entry->ksym_hbp->info.name); if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE) - cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, - "-w-\n"); + ret = trace_seq_puts(s, "-w-\n"); else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW) - cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, - "rw-\n"); + ret = trace_seq_puts(s, "rw-\n"); + WARN_ON_ONCE(!ret); } - ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf)); + + cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); + mutex_unlock(&ksym_tracer_mutex); - return ret; + kfree(s); + + return cnt; } static ssize_t ksym_trace_filter_write(struct file *file, -- cgit v1.2.1 From f088e5471297cc78d7465e1fd997cb1a91a48019 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:53:18 +0800 Subject: ksym_tracer: Fix validation of access type # echo 'pid_max:rw-' > ksym_trace_filter # cat ksym_trace_filter pid_max:rw- # echo 'pid_max:ww-' > ksym_trace_filter (should return -EINVAL) # cat ksym_trace_filter (but it ended up removing filter entry) Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E2CE.6080409@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_ksym.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index b6710d31bdf0..955600929907 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -114,24 +114,22 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) * --x : Set Execution Break points (Not available yet) * */ -static int ksym_trace_get_access_type(char *access_str) +static int ksym_trace_get_access_type(char *str) { - int pos, access = 0; + int access = 0; - for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) { - switch (access_str[pos]) { - case 'r': - access += (pos == 0) ? 4 : -1; - break; - case 'w': - access += (pos == 1) ? 2 : -1; - break; - case '-': - break; - default: - return -EINVAL; - } - } + if (str[0] == 'r') + access += 4; + else if (str[0] != '-') + return -EINVAL; + + if (str[1] == 'w') + access += 2; + else if (str[1] != '-') + return -EINVAL; + + if (str[2] != '-') + return -EINVAL; switch (access) { case 6: @@ -140,8 +138,6 @@ static int ksym_trace_get_access_type(char *access_str) case 2: access = HW_BREAKPOINT_WRITE; break; - case 0: - access = 0; } return access; -- cgit v1.2.1 From 92cf9f8f7e89c6bdbb1a724f879b8b18fc0dfe0f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:53:47 +0800 Subject: ksym_tracer: Fix validation of length of access type Don't take newline into account, otherwise: # echo 'pid_max:-w-' > ksym_trace_filter # echo -n 'pid_max:rw-' > ksym_trace_filter bash: echo: write error: Invalid argument Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E2EB.9070503@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_ksym.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 955600929907..72fcb46c39c0 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -158,21 +158,21 @@ static int ksym_trace_get_access_type(char *str) static int parse_ksym_trace_str(char *input_string, char **ksymname, unsigned long *addr) { - char *delimiter = ":"; int ret; - ret = -EINVAL; - *ksymname = strsep(&input_string, delimiter); + strstrip(input_string); + + *ksymname = strsep(&input_string, ":"); *addr = kallsyms_lookup_name(*ksymname); /* Check for malformed request: (2), (1) and (5) */ if ((!input_string) || - (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) || - (*addr == 0)) - goto return_code; + (strlen(input_string) != KSYM_TRACER_OP_LEN) || + (*addr == 0)) + return -EINVAL;; + ret = ksym_trace_get_access_type(input_string); -return_code: return ret; } -- cgit v1.2.1 From 011ed56853e07e30653d6f1bfddc56b396218664 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:54:08 +0800 Subject: ksym_tracer: NIL-terminate user input filter Make sure the user input string is NULL-terminated. Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E300.7020601@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_ksym.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 72fcb46c39c0..8cbed5a6286f 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -264,11 +264,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, unsigned long ksym_addr = 0; int ret, op, changed = 0; - /* Ignore echo "" > ksym_trace_filter */ - if (count == 0) - return 0; - - input_string = kzalloc(count, GFP_KERNEL); + input_string = kzalloc(count + 1, GFP_KERNEL); if (!input_string) return -ENOMEM; @@ -276,6 +272,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, kfree(input_string); return -EFAULT; } + input_string[count] = '\0'; ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); if (ret < 0) { -- cgit v1.2.1 From 0d109c8f70eab8b9f693bd5caea23012394e4876 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:54:28 +0800 Subject: ksym_tracer: Report error when failed to re-register hbp When access type is changed, the hw break point will be unregistered and then be registered again with new access type. But the registration may fail, in this case, -errno should be returned. Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E314.7070004@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_ksym.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 8cbed5a6286f..891e3b86b3f6 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -302,13 +302,13 @@ static ssize_t ksym_trace_filter_write(struct file *file, ret = count; goto unlock_ret_path; } - } + } else + ret = count; ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); kfree(entry->ksym_hbp); kfree(entry); - ret = count; goto err_ret; } else { /* Check for malformed request: (4) */ -- cgit v1.2.1 From 558df6c8f74ac4a0b9026ef85b0028280f364d96 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:54:48 +0800 Subject: ksym_tracer: Fix memory leak - When remove a filter, we leak entry->ksym_hbp->info.name. - With CONFIG_FTRAC_SELFTEST enabled, we leak ->info.name: # echo ksym_tracer > current_tracer # echo 'ksym_selftest_dummy:rw-' > ksym_trace_filter # echo nop > current_tracer Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E328.8010200@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_ksym.c | 61 +++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 34 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 891e3b86b3f6..7d349d34a0d1 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -179,7 +179,7 @@ static int parse_ksym_trace_str(char *input_string, char **ksymname, int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) { struct trace_ksym *entry; - int ret; + int ret = -ENOMEM; if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" @@ -193,12 +193,13 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) return -ENOMEM; entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); - if (!entry->ksym_hbp) { - kfree(entry); - return -ENOMEM; - } + if (!entry->ksym_hbp) + goto err; + + entry->ksym_hbp->info.name = kstrdup(ksymname, GFP_KERNEL); + if (!entry->ksym_hbp->info.name) + goto err; - entry->ksym_hbp->info.name = ksymname; entry->ksym_hbp->info.type = op; entry->ksym_addr = entry->ksym_hbp->info.address = addr; #ifdef CONFIG_X86 @@ -210,14 +211,18 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) if (ret < 0) { printk(KERN_INFO "ksym_tracer request failed. Try again" " later!!\n"); - kfree(entry->ksym_hbp); - kfree(entry); - return -EAGAIN; + ret = -EAGAIN; + goto err; } hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); ksym_filter_entry_count++; - return 0; +err: + if (entry->ksym_hbp) + kfree(entry->ksym_hbp->info.name); + kfree(entry->ksym_hbp); + kfree(entry); + return ret; } static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, @@ -289,7 +294,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, if (entry->ksym_hbp->info.type != op) changed = 1; else - goto err_ret; + goto out; break; } } @@ -298,34 +303,29 @@ static ssize_t ksym_trace_filter_write(struct file *file, entry->ksym_hbp->info.type = op; if (op > 0) { ret = register_kernel_hw_breakpoint(entry->ksym_hbp); - if (ret == 0) { - ret = count; - goto unlock_ret_path; - } - } else - ret = count; + if (ret == 0) + goto out; + } ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); + kfree(entry->ksym_hbp->info.name); kfree(entry->ksym_hbp); kfree(entry); - goto err_ret; + goto out; } else { /* Check for malformed request: (4) */ if (op == 0) - goto err_ret; + goto out; ret = process_new_ksym_entry(ksymname, op, ksym_addr); - if (ret) - goto err_ret; } - ret = count; - goto unlock_ret_path; +out: + mutex_unlock(&ksym_tracer_mutex); -err_ret: kfree(input_string); -unlock_ret_path: - mutex_unlock(&ksym_tracer_mutex); + if (!ret) + ret = count; return ret; } @@ -349,14 +349,7 @@ static void ksym_trace_reset(struct trace_array *tr) ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); - /* Free the 'input_string' only if reset - * after startup self-test - */ -#ifdef CONFIG_FTRACE_SELFTEST - if (strncmp(entry->ksym_hbp->info.name, KSYM_SELFTEST_ENTRY, - strlen(KSYM_SELFTEST_ENTRY)) != 0) -#endif /* CONFIG_FTRACE_SELFTEST*/ - kfree(entry->ksym_hbp->info.name); + kfree(entry->ksym_hbp->info.name); kfree(entry->ksym_hbp); kfree(entry); } -- cgit v1.2.1 From 9d7e934408b52cd53dd85270eb36941a6a318cc5 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:55:18 +0800 Subject: ksym_tracer: Fix the output of stat tracing - make ksym_tracer_stat_start() return head->first instead of &head->first - make the output properly aligned Before: Access type Symbol Counter NA 0 RW pid_max 0 After: Access Type Symbol Counter ----------- ------ ------- RW pid_max 0 Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E346.5050608@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_ksym.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 7d349d34a0d1..1256a6e8ee24 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -453,8 +453,10 @@ device_initcall(init_ksym_trace); #ifdef CONFIG_PROFILE_KSYM_TRACER static int ksym_tracer_stat_headers(struct seq_file *m) { - seq_printf(m, " Access type "); - seq_printf(m, " Symbol Counter \n"); + seq_puts(m, " Access Type "); + seq_puts(m, " Symbol Counter\n"); + seq_puts(m, " ----------- "); + seq_puts(m, " ------ -------\n"); return 0; } @@ -472,27 +474,27 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) switch (access_type) { case HW_BREAKPOINT_WRITE: - seq_printf(m, " W "); + seq_puts(m, " W "); break; case HW_BREAKPOINT_RW: - seq_printf(m, " RW "); + seq_puts(m, " RW "); break; default: - seq_printf(m, " NA "); + seq_puts(m, " NA "); } if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0) - seq_printf(m, " %s ", fn_name); + seq_printf(m, " %-36s", fn_name); else - seq_printf(m, " "); + seq_printf(m, " %-36s", ""); + seq_printf(m, " %15lu\n", entry->counter); - seq_printf(m, "%15lu\n", entry->counter); return 0; } static void *ksym_tracer_stat_start(struct tracer_stat *trace) { - return &(ksym_filter_head.first); + return ksym_filter_head.first; } static void * -- cgit v1.2.1 From d857ace143df3884954887e1899a65831ca72ece Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 22 Jul 2009 11:21:31 +0800 Subject: tracing/ksym_tracer: fix the output of ksym tracer Fix the output format of ksym tracer, make it properly aligned Befor patch: # tracer: ksym_tracer # # TASK-PID CPU# Symbol Type Function # | | | | | bash 1378 1 ksym_tracer_mutex W mutex_lock+0x11/0x27 bash 1378 1 ksym_filter_head W process_new_ksym_entry+0xd2/0x10c bash 1378 1 ksym_tracer_mutex W mutex_unlock+0x12/0x1b cat 1429 0 ksym_tracer_mutex W mutex_lock+0x11/0x27 After patch: # tracer: ksym_tracer # # TASK-PID CPU# Symbol Type Function # | | | | | cat-1423 [000] ksym_tracer_mutex RW mutex_lock+0x11/0x27 cat-1423 [000] ksym_filter_head RW ksym_trace_filter_read+0x6e/0x10d cat-1423 [000] ksym_tracer_mutex RW mutex_unlock+0x12/0x1b cat-1423 [000] ksym_tracer_mutex RW mutex_lock+0x11/0x27 cat-1423 [000] ksym_filter_head RW ksym_trace_filter_read+0x6e/0x10d cat-1423 [000] ksym_tracer_mutex RW mutex_unlock+0x12/0x1b Signed-off-by: Xiao Guangrong LKML-Reference: <4A6685BB.2090809@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_ksym.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 1256a6e8ee24..fbf3a8e13bc5 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -370,13 +370,12 @@ static int ksym_trace_init(struct trace_array *tr) static void ksym_trace_print_header(struct seq_file *m) { - seq_puts(m, - "# TASK-PID CPU# Symbol Type " - "Function \n"); + "# TASK-PID CPU# Symbol " + "Type Function\n"); seq_puts(m, - "# | | | | " - "| \n"); + "# | | | " + " | |\n"); } static enum print_line_t ksym_trace_output(struct trace_iterator *iter) @@ -392,7 +391,7 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->cmd, + ret = trace_seq_printf(s, "%11s-%-5d [%03d] %-30s ", field->cmd, entry->pid, iter->cpu, field->ksym_name); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -412,7 +411,7 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) return TRACE_TYPE_PARTIAL_LINE; sprint_symbol(str, field->ip); - ret = trace_seq_printf(s, "%-20s\n", str); + ret = trace_seq_printf(s, "%s\n", str); if (!ret) return TRACE_TYPE_PARTIAL_LINE; -- cgit v1.2.1 From 8e068542a8d9efec55126284d2f5cb32f003d507 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 22 Jul 2009 11:23:41 +0800 Subject: tracing/ksym_tracer: fix write operation of ksym_trace_filter This patch fix 2 bugs: - fix the return value of ksym_trace_filter_write() when we want to clear symbol in ksym_trace_filter file for example: # echo global_trace:rw- > /debug/tracing/ksym_trace_filter # echo global_trace:--- > /debug/tracing/ksym_trace_filter -bash: echo: write error: Invalid argument # cat /debug/tracing/ksym_trace_filter # We want to clear 'global_trace' in ksym_trace_filter, it complain with "Invalid argument", but the operation is successful - the "r--" access types is not allowed, but ksym_trace_filter file think it OK for example: # echo ksym_tracer_mutex:r-- > ksym_trace_filter -bash: echo: write error: Resource temporarily unavailable # dmesg ksym_tracer request failed. Try again later!! The error occur at register_kernel_hw_breakpoint(), but It's should at access types parser Signed-off-by: Xiao Guangrong LKML-Reference: <4A66863D.5090802@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_ksym.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index fbf3a8e13bc5..cd5cb656c3d2 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -135,6 +135,9 @@ static int ksym_trace_get_access_type(char *str) case 6: access = HW_BREAKPOINT_RW; break; + case 4: + access = -EINVAL; + break; case 2: access = HW_BREAKPOINT_WRITE; break; @@ -312,6 +315,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, kfree(entry->ksym_hbp->info.name); kfree(entry->ksym_hbp); kfree(entry); + ret = 0; goto out; } else { /* Check for malformed request: (4) */ -- cgit v1.2.1 From 75e33751ca8bbb72dd6f1a74d2810ddc8cbe4bdf Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 23 Jul 2009 12:01:22 +0800 Subject: tracing/ksym_tracer: support quick clear for ksym_trace_filter -- v2 It's rather boring to clear symbol one by one in ksym_trace_filter file, so, this patch will let ksym_trace_filter file support quickly clear all break points. We can write "0" to this file and it will clear all symbols for example: # cat ksym_trace_filter ksym_filter_head:rw- global_trace:rw- # echo 0 > ksym_trace_filter # cat ksym_trace_filter # Changelog v1->v2: Add other ways to clear all breakpoints by writing NULL or "*:---" to ksym_trace_filter file base on K.Prasad's suggestion Signed-off-by: Xiao Guangrong LKML-Reference: <4A67E092.3080202@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_ksym.c | 53 +++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index cd5cb656c3d2..2fde875ead4c 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -163,8 +163,6 @@ static int parse_ksym_trace_str(char *input_string, char **ksymname, { int ret; - strstrip(input_string); - *ksymname = strsep(&input_string, ":"); *addr = kallsyms_lookup_name(*ksymname); @@ -262,6 +260,25 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, return cnt; } +static void __ksym_trace_reset(void) +{ + struct trace_ksym *entry; + struct hlist_node *node, *node1; + + mutex_lock(&ksym_tracer_mutex); + hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, + ksym_hlist) { + unregister_kernel_hw_breakpoint(entry->ksym_hbp); + ksym_filter_entry_count--; + hlist_del_rcu(&(entry->ksym_hlist)); + synchronize_rcu(); + kfree(entry->ksym_hbp->info.name); + kfree(entry->ksym_hbp); + kfree(entry); + } + mutex_unlock(&ksym_tracer_mutex); +} + static ssize_t ksym_trace_filter_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) @@ -282,6 +299,21 @@ static ssize_t ksym_trace_filter_write(struct file *file, } input_string[count] = '\0'; + strstrip(input_string); + + /* + * Clear all breakpoints if: + * 1: echo > ksym_trace_filter + * 2: echo 0 > ksym_trace_filter + * 3: echo "*:---" > ksym_trace_filter + */ + if (!input_string[0] || !strcmp(input_string, "0") || + !strcmp(input_string, "*:---")) { + __ksym_trace_reset(); + kfree(input_string); + return count; + } + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); if (ret < 0) { kfree(input_string); @@ -341,23 +373,8 @@ static const struct file_operations ksym_tracing_fops = { static void ksym_trace_reset(struct trace_array *tr) { - struct trace_ksym *entry; - struct hlist_node *node, *node1; - ksym_tracing_enabled = 0; - - mutex_lock(&ksym_tracer_mutex); - hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, - ksym_hlist) { - unregister_kernel_hw_breakpoint(entry->ksym_hbp); - ksym_filter_entry_count--; - hlist_del_rcu(&(entry->ksym_hlist)); - synchronize_rcu(); - kfree(entry->ksym_hbp->info.name); - kfree(entry->ksym_hbp); - kfree(entry); - } - mutex_unlock(&ksym_tracer_mutex); + __ksym_trace_reset(); } static int ksym_trace_init(struct trace_array *tr) -- cgit v1.2.1 From d6a65dffb30d8636b1e5d4c201564ef401a246cf Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 7 Sep 2009 03:23:20 +0200 Subject: tracing: Fix ring-buffer and ksym tracer merge interaction The compiler warns us about: kernel/trace/trace_ksym.c: In function ksym_hbp_handler: kernel/trace/trace_ksym.c:92: attention : passing argument 1 of trace_buffer_lock_reserve from incompatible pointer type kernel/trace/trace_ksym.c:106: attention : passing argument 1 of trace_buffer_unlock_commit from incompatible pointer type Commit "e77405ad" (tracing: pass around ring buffer instead of tracer) has changed the central tracing APIs. And this change has updated every callsites of these APIs except those that aren't in tracing/core, such as the ksym tracer. Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_ksym.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 2fde875ead4c..6d5609c67378 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -78,17 +78,18 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) { struct ring_buffer_event *event; - struct trace_array *tr; struct ksym_trace_entry *entry; + struct ring_buffer *buffer; int pc; if (!ksym_tracing_enabled) return; - tr = ksym_trace_array; + buffer = ksym_trace_array->buffer; + pc = preempt_count(); - event = trace_buffer_lock_reserve(tr, TRACE_KSYM, + event = trace_buffer_lock_reserve(buffer, TRACE_KSYM, sizeof(*entry), 0, pc); if (!event) return; @@ -103,7 +104,7 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) ksym_collect_stats(hbp->info.address); #endif /* CONFIG_PROFILE_KSYM_TRACER */ - trace_buffer_unlock_commit(tr, event, 0, pc); + trace_buffer_unlock_commit(buffer, event, 0, pc); } /* Valid access types are represented as -- cgit v1.2.1 From fb0459d75c1d0a4ba3cafdd2c754e7486968a676 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 25 Sep 2009 12:25:56 +0200 Subject: perf/core: Provide a kernel-internal interface to get to performance counters There are reasons for kernel code to ask for, and use, performance counters. For example, in CPU freq governors this tends to be a good idea, but there are other examples possible as well of course. This patch adds the needed bits to do enable this functionality; they have been tested in an experimental cpufreq driver that I'm working on, and the changes are all that I needed to access counters properly. [fweisbec@gmail.com: added pid to perf_event_create_kernel_counter so that we can profile a particular task too TODO: Have a better error reporting, don't just return NULL in fail case.] v2: Remove the wrong comment about the fact perf_event_create_kernel_counter must be called from a kernel thread. Signed-off-by: Arjan van de Ven Acked-by: Peter Zijlstra Cc: "K.Prasad" Cc: Alan Stern Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt Cc: Jan Kiszka Cc: Avi Kivity LKML-Reference: <20090925122556.2f8bd939@infradead.org> Signed-off-by: Frederic Weisbecker --- kernel/perf_event.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 12b5ec39bf97..02d4ff041b01 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1725,6 +1725,26 @@ static int perf_release(struct inode *inode, struct file *file) return 0; } +int perf_event_release_kernel(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + perf_event_remove_from_context(event); + mutex_unlock(&ctx->mutex); + + mutex_lock(&event->owner->perf_event_mutex); + list_del_init(&event->owner_entry); + mutex_unlock(&event->owner->perf_event_mutex); + put_task_struct(event->owner); + + free_event(event); + + return 0; +} +EXPORT_SYMBOL_GPL(perf_event_release_kernel); + static int perf_event_read_size(struct perf_event *event) { int entry = sizeof(u64); /* value */ @@ -1750,7 +1770,7 @@ static int perf_event_read_size(struct perf_event *event) return size; } -static u64 perf_event_read_value(struct perf_event *event) +u64 perf_event_read_value(struct perf_event *event) { struct perf_event *child; u64 total = 0; @@ -1761,6 +1781,7 @@ static u64 perf_event_read_value(struct perf_event *event) return total; } +EXPORT_SYMBOL_GPL(perf_event_read_value); static int perf_event_read_entry(struct perf_event *event, u64 read_format, char __user *buf) @@ -4638,6 +4659,58 @@ err_put_context: return err; } +/** + * perf_event_create_kernel_counter + * + * @attr: attributes of the counter to create + * @cpu: cpu in which the counter is bound + * @pid: task to profile + */ +struct perf_event * +perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, + pid_t pid) +{ + struct perf_event *event; + struct perf_event_context *ctx; + int err; + + /* + * Get the target context (task or percpu): + */ + + ctx = find_get_context(pid, cpu); + if (IS_ERR(ctx)) + return NULL ; + + event = perf_event_alloc(attr, cpu, ctx, NULL, + NULL, GFP_KERNEL); + err = PTR_ERR(event); + if (IS_ERR(event)) + goto err_put_context; + + event->filp = NULL; + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + perf_install_in_context(ctx, event, cpu); + ++ctx->generation; + mutex_unlock(&ctx->mutex); + + event->owner = current; + get_task_struct(current); + mutex_lock(¤t->perf_event_mutex); + list_add_tail(&event->owner_entry, ¤t->perf_event_list); + mutex_unlock(¤t->perf_event_mutex); + + return event; + +err_put_context: + if (err < 0) + put_ctx(ctx); + + return NULL; +} +EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); + /* * inherit a event from parent task to child task: */ -- cgit v1.2.1 From 97eaf5300b9d0cd99c310bf8c4a0f2f3296d88a3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 18 Oct 2009 15:33:50 +0200 Subject: perf/core: Add a callback to perf events A simple callback in a perf event can be used for multiple purposes. For example it is useful for triggered based events like hardware breakpoints that need a callback to dispatch a triggered breakpoint event. v2: Simplify a bit the callback attribution as suggested by Paul Mackerras Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: "K.Prasad" Cc: Alan Stern Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Paul Mackerras Cc: Mike Galbraith Cc: Paul Mundt --- kernel/perf_event.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 02d4ff041b01..5087125e2a00 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4293,6 +4293,7 @@ perf_event_alloc(struct perf_event_attr *attr, struct perf_event_context *ctx, struct perf_event *group_leader, struct perf_event *parent_event, + perf_callback_t callback, gfp_t gfpflags) { const struct pmu *pmu; @@ -4335,6 +4336,11 @@ perf_event_alloc(struct perf_event_attr *attr, event->state = PERF_EVENT_STATE_INACTIVE; + if (!callback && parent_event) + callback = parent_event->callback; + + event->callback = callback; + if (attr->disabled) event->state = PERF_EVENT_STATE_OFF; @@ -4611,7 +4617,7 @@ SYSCALL_DEFINE5(perf_event_open, } event = perf_event_alloc(&attr, cpu, ctx, group_leader, - NULL, GFP_KERNEL); + NULL, NULL, GFP_KERNEL); err = PTR_ERR(event); if (IS_ERR(event)) goto err_put_context; @@ -4668,7 +4674,7 @@ err_put_context: */ struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid) + pid_t pid, perf_callback_t callback) { struct perf_event *event; struct perf_event_context *ctx; @@ -4683,7 +4689,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, return NULL ; event = perf_event_alloc(attr, cpu, ctx, NULL, - NULL, GFP_KERNEL); + NULL, callback, GFP_KERNEL); err = PTR_ERR(event); if (IS_ERR(event)) goto err_put_context; @@ -4736,7 +4742,7 @@ inherit_event(struct perf_event *parent_event, child_event = perf_event_alloc(&parent_event->attr, parent_event->cpu, child_ctx, group_leader, parent_event, - GFP_KERNEL); + NULL, GFP_KERNEL); if (IS_ERR(child_event)) return child_event; get_ctx(child_ctx); -- cgit v1.2.1 From 24f1e32c60c45c89a997c73395b69c8af6f0a84e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Sep 2009 19:22:48 +0200 Subject: hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf events This patch rebase the implementation of the breakpoints API on top of perf events instances. Each breakpoints are now perf events that handle the register scheduling, thread/cpu attachment, etc.. The new layering is now made as follows: ptrace kgdb ftrace perf syscall \ | / / \ | / / / Core breakpoint API / / | / | / Breakpoints perf events | | Breakpoints PMU ---- Debug Register constraints handling (Part of core breakpoint API) | | Hardware debug registers Reasons of this rewrite: - Use the centralized/optimized pmu registers scheduling, implying an easier arch integration - More powerful register handling: perf attributes (pinned/flexible events, exclusive/non-exclusive, tunable period, etc...) Impact: - New perf ABI: the hardware breakpoints counters - Ptrace breakpoints setting remains tricky and still needs some per thread breakpoints references. Todo (in the order): - Support breakpoints perf counter events for perf tools (ie: implement perf_bpcounter_event()) - Support from perf tools Changes in v2: - Follow the perf "event " rename - The ptrace regression have been fixed (ptrace breakpoint perf events weren't released when a task ended) - Drop the struct hw_breakpoint and store generic fields in perf_event_attr. - Separate core and arch specific headers, drop asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h - Use new generic len/type for breakpoint - Handle off case: when breakpoints api is not supported by an arch Changes in v3: - Fix broken CONFIG_KVM, we need to propagate the breakpoint api changes to kvm when we exit the guest and restore the bp registers to the host. Changes in v4: - Drop the hw_breakpoint_restore() stub as it is only used by KVM - EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a module - Restore the breakpoints unconditionally on kvm guest exit: TIF_DEBUG_THREAD doesn't anymore cover every cases of running breakpoints and vcpu->arch.switch_db_regs might not always be set when the guest used debug registers. (Waiting for a reliable optimization) Changes in v5: - Split-up the asm-generic/hw-breakpoint.h moving to linux/hw_breakpoint.h into a separate patch - Optimize the breakpoints restoring while switching from kvm guest to host. We only want to restore the state if we have active breakpoints to the host, otherwise we don't care about messed-up address registers. - Add asm/hw_breakpoint.h to Kbuild - Fix bad breakpoint type in trace_selftest.c Changes in v6: - Fix wrong header inclusion in trace.h (triggered a build error with CONFIG_FTRACE_SELFTEST Signed-off-by: Frederic Weisbecker Cc: Prasad Cc: Alan Stern Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt --- kernel/exit.c | 5 + kernel/hw_breakpoint.c | 424 +++++++++++++++++------------------------- kernel/perf_event.c | 53 +++++- kernel/trace/trace.h | 5 +- kernel/trace/trace_entries.h | 6 +- kernel/trace/trace_ksym.c | 126 ++++++------- kernel/trace/trace_selftest.c | 3 +- 7 files changed, 293 insertions(+), 329 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index e61891f80123..266f8920628a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -979,6 +980,10 @@ NORET_TYPE void do_exit(long code) proc_exit_connector(tsk); + /* + * FIXME: do that only when needed, using sched_exit tracepoint + */ + flush_ptrace_hw_breakpoint(tsk); /* * Flush inherited counters to the parent - before the parent * gets woken up by child-exit notifications. diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c1f64e65a9f3..08f6d0163201 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -15,6 +15,7 @@ * * Copyright (C) 2007 Alan Stern * Copyright (C) IBM Corporation, 2009 + * Copyright (C) 2009, Frederic Weisbecker */ /* @@ -35,334 +36,242 @@ #include #include -#include +#include + #include #ifdef CONFIG_X86 #include #endif -/* - * Spinlock that protects all (un)register operations over kernel/user-space - * breakpoint requests - */ -static DEFINE_SPINLOCK(hw_breakpoint_lock); - -/* Array of kernel-space breakpoint structures */ -struct hw_breakpoint *hbp_kernel[HBP_NUM]; - -/* - * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being - * modified but we need the older copy to handle any hbp exceptions. It will - * sync with hbp_kernel[] value after updation is done through IPIs. - */ -DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); - -/* - * Kernel breakpoints grow downwards, starting from HBP_NUM - * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for - * kernel-space request. We will initialise it here and not in an __init - * routine because load_debug_registers(), which uses this variable can be - * called very early during CPU initialisation. - */ -unsigned int hbp_kernel_pos = HBP_NUM; -/* - * An array containing refcount of threads using a given bkpt register - * Accesses are synchronised by acquiring hw_breakpoint_lock - */ -unsigned int hbp_user_refcount[HBP_NUM]; +static atomic_t bp_slot; -/* - * Load the debug registers during startup of a CPU. - */ -void load_debug_registers(void) +int reserve_bp_slot(struct perf_event *bp) { - unsigned long flags; - struct task_struct *tsk = current; - - spin_lock_bh(&hw_breakpoint_lock); - - /* Prevent IPIs for new kernel breakpoint updates */ - local_irq_save(flags); - arch_update_kernel_hw_breakpoint(NULL); - local_irq_restore(flags); - - if (test_tsk_thread_flag(tsk, TIF_DEBUG)) - arch_install_thread_hw_breakpoint(tsk); - - spin_unlock_bh(&hw_breakpoint_lock); -} + if (atomic_inc_return(&bp_slot) == HBP_NUM) { + atomic_dec(&bp_slot); -/* - * Erase all the hardware breakpoint info associated with a thread. - * - * If tsk != current then tsk must not be usable (for example, a - * child being cleaned up from a failed fork). - */ -void flush_thread_hw_breakpoint(struct task_struct *tsk) -{ - int i; - struct thread_struct *thread = &(tsk->thread); - - spin_lock_bh(&hw_breakpoint_lock); - - /* The thread no longer has any breakpoints associated with it */ - clear_tsk_thread_flag(tsk, TIF_DEBUG); - for (i = 0; i < HBP_NUM; i++) { - if (thread->hbp[i]) { - hbp_user_refcount[i]--; - kfree(thread->hbp[i]); - thread->hbp[i] = NULL; - } + return -ENOSPC; } - arch_flush_thread_hw_breakpoint(tsk); - - /* Actually uninstall the breakpoints if necessary */ - if (tsk == current) - arch_uninstall_thread_hw_breakpoint(); - spin_unlock_bh(&hw_breakpoint_lock); + return 0; } -/* - * Copy the hardware breakpoint info from a thread to its cloned child. - */ -int copy_thread_hw_breakpoint(struct task_struct *tsk, - struct task_struct *child, unsigned long clone_flags) +void release_bp_slot(struct perf_event *bp) { - /* - * We will assume that breakpoint settings are not inherited - * and the child starts out with no debug registers set. - * But what about CLONE_PTRACE? - */ - clear_tsk_thread_flag(child, TIF_DEBUG); - - /* We will call flush routine since the debugregs are not inherited */ - arch_flush_thread_hw_breakpoint(child); - - return 0; + atomic_dec(&bp_slot); } -static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk, - struct hw_breakpoint *bp) +int __register_perf_hw_breakpoint(struct perf_event *bp) { - struct thread_struct *thread = &(tsk->thread); - int rc; + int ret; - /* Do not overcommit. Fail if kernel has used the hbp registers */ - if (pos >= hbp_kernel_pos) - return -ENOSPC; + ret = reserve_bp_slot(bp); + if (ret) + return ret; - rc = arch_validate_hwbkpt_settings(bp, tsk); - if (rc) - return rc; + if (!bp->attr.disabled) + ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); - thread->hbp[pos] = bp; - hbp_user_refcount[pos]++; + return ret; +} - arch_update_user_hw_breakpoint(pos, tsk); - /* - * Does it need to be installed right now? - * Otherwise it will get installed the next time tsk runs - */ - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); +int register_perf_hw_breakpoint(struct perf_event *bp) +{ + bp->callback = perf_bp_event; - return rc; + return __register_perf_hw_breakpoint(bp); } /* - * Modify the address of a hbp register already in use by the task - * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint() + * Register a breakpoint bound to a task and a given cpu. + * If cpu is -1, the breakpoint is active for the task in every cpu + * If the task is -1, the breakpoint is active for every tasks in the given + * cpu. */ -static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk, - struct hw_breakpoint *bp) +static struct perf_event * +register_user_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + pid_t pid, + int cpu, + bool active) { - struct thread_struct *thread = &(tsk->thread); - - if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk))) - return -EINVAL; - - if (thread->hbp[pos] == NULL) - return -EINVAL; - - thread->hbp[pos] = bp; + struct perf_event_attr *attr; + struct perf_event *bp; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return ERR_PTR(-ENOMEM); + + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(*attr); + attr->bp_addr = addr; + attr->bp_len = len; + attr->bp_type = type; /* - * 'pos' must be that of a hbp register already used by 'tsk' - * Otherwise arch_modify_user_hw_breakpoint() will fail + * Such breakpoints are used by debuggers to trigger signals when + * we hit the excepted memory op. We can't miss such events, they + * must be pinned. */ - arch_update_user_hw_breakpoint(pos, tsk); + attr->pinned = 1; - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); + if (!active) + attr->disabled = 1; - return 0; -} - -static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk) -{ - hbp_user_refcount[pos]--; - tsk->thread.hbp[pos] = NULL; + bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered); + kfree(attr); - arch_update_user_hw_breakpoint(pos, tsk); - - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); + return bp; } /** * register_user_hw_breakpoint - register a hardware breakpoint for user space + * @addr: is the memory address that triggers the breakpoint + * @len: the length of the access to the memory (1 byte, 2 bytes etc...) + * @type: the type of the access to the memory (read/write/exec) + * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @bp: the breakpoint structure to register - * - * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and - * @bp->triggered must be set properly before invocation + * @active: should we activate it while registering it * */ -int register_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp) +struct perf_event * +register_user_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { - struct thread_struct *thread = &(tsk->thread); - int i, rc = -ENOSPC; - - spin_lock_bh(&hw_breakpoint_lock); - - for (i = 0; i < hbp_kernel_pos; i++) { - if (!thread->hbp[i]) { - rc = __register_user_hw_breakpoint(i, tsk, bp); - break; - } - } - if (!rc) - set_tsk_thread_flag(tsk, TIF_DEBUG); - - spin_unlock_bh(&hw_breakpoint_lock); - return rc; + return register_user_hw_breakpoint_cpu(addr, len, type, triggered, + tsk->pid, -1, active); } EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); /** * modify_user_hw_breakpoint - modify a user-space hardware breakpoint + * @bp: the breakpoint structure to modify + * @addr: is the memory address that triggers the breakpoint + * @len: the length of the access to the memory (1 byte, 2 bytes etc...) + * @type: the type of the access to the memory (read/write/exec) + * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @bp: the breakpoint structure to unregister - * + * @active: should we activate it while registering it */ -int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp) +struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, + unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { - struct thread_struct *thread = &(tsk->thread); - int i, ret = -ENOENT; + /* + * FIXME: do it without unregistering + * - We don't want to lose our slot + * - If the new bp is incorrect, don't lose the older one + */ + unregister_hw_breakpoint(bp); - spin_lock_bh(&hw_breakpoint_lock); - for (i = 0; i < hbp_kernel_pos; i++) { - if (bp == thread->hbp[i]) { - ret = __modify_user_hw_breakpoint(i, tsk, bp); - break; - } - } - spin_unlock_bh(&hw_breakpoint_lock); - return ret; + return register_user_hw_breakpoint(addr, len, type, triggered, + tsk, active); } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); /** - * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint - * @tsk: pointer to 'task_struct' of the process to which the address belongs + * unregister_hw_breakpoint - unregister a user-space hardware breakpoint * @bp: the breakpoint structure to unregister - * */ -void unregister_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp) +void unregister_hw_breakpoint(struct perf_event *bp) { - struct thread_struct *thread = &(tsk->thread); - int i, pos = -1, hbp_counter = 0; - - spin_lock_bh(&hw_breakpoint_lock); - for (i = 0; i < hbp_kernel_pos; i++) { - if (thread->hbp[i]) - hbp_counter++; - if (bp == thread->hbp[i]) - pos = i; - } - if (pos >= 0) { - __unregister_user_hw_breakpoint(pos, tsk); - hbp_counter--; - } - if (!hbp_counter) - clear_tsk_thread_flag(tsk, TIF_DEBUG); - - spin_unlock_bh(&hw_breakpoint_lock); + if (!bp) + return; + perf_event_release_kernel(bp); +} +EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); + +static struct perf_event * +register_kernel_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + int cpu, + bool active) +{ + return register_user_hw_breakpoint_cpu(addr, len, type, triggered, + -1, cpu, active); } -EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint); /** - * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space - * @bp: the breakpoint structure to register - * - * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and - * @bp->triggered must be set properly before invocation + * register_wide_hw_breakpoint - register a wide breakpoint in the kernel + * @addr: is the memory address that triggers the breakpoint + * @len: the length of the access to the memory (1 byte, 2 bytes etc...) + * @type: the type of the access to the memory (read/write/exec) + * @triggered: callback to trigger when we hit the breakpoint + * @active: should we activate it while registering it * + * @return a set of per_cpu pointers to perf events */ -int register_kernel_hw_breakpoint(struct hw_breakpoint *bp) +struct perf_event ** +register_wide_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + bool active) { - int rc; + struct perf_event **cpu_events, **pevent, *bp; + long err; + int cpu; + + cpu_events = alloc_percpu(typeof(*cpu_events)); + if (!cpu_events) + return ERR_PTR(-ENOMEM); - rc = arch_validate_hwbkpt_settings(bp, NULL); - if (rc) - return rc; + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + bp = register_kernel_hw_breakpoint_cpu(addr, len, type, + triggered, cpu, active); - spin_lock_bh(&hw_breakpoint_lock); + *pevent = bp; - rc = -ENOSPC; - /* Check if we are over-committing */ - if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) { - hbp_kernel_pos--; - hbp_kernel[hbp_kernel_pos] = bp; - on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); - rc = 0; + if (IS_ERR(bp) || !bp) { + err = PTR_ERR(bp); + goto fail; + } } - spin_unlock_bh(&hw_breakpoint_lock); - return rc; + return cpu_events; + +fail: + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + if (IS_ERR(*pevent) || !*pevent) + break; + unregister_hw_breakpoint(*pevent); + } + free_percpu(cpu_events); + /* return the error if any */ + return ERR_PTR(err); } -EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint); /** - * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space - * @bp: the breakpoint structure to unregister - * - * Uninstalls and unregisters @bp. + * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel + * @cpu_events: the per cpu set of events to unregister */ -void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp) +void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { - int i, j; - - spin_lock_bh(&hw_breakpoint_lock); - - /* Find the 'bp' in our list of breakpoints for kernel */ - for (i = hbp_kernel_pos; i < HBP_NUM; i++) - if (bp == hbp_kernel[i]) - break; + int cpu; + struct perf_event **pevent; - /* Check if we did not find a match for 'bp'. If so return early */ - if (i == HBP_NUM) { - spin_unlock_bh(&hw_breakpoint_lock); - return; + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + unregister_hw_breakpoint(*pevent); } - - /* - * We'll shift the breakpoints one-level above to compact if - * unregistration creates a hole - */ - for (j = i; j > hbp_kernel_pos; j--) - hbp_kernel[j] = hbp_kernel[j-1]; - - hbp_kernel[hbp_kernel_pos] = NULL; - on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); - hbp_kernel_pos++; - - spin_unlock_bh(&hw_breakpoint_lock); + free_percpu(cpu_events); } -EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint); + static struct notifier_block hw_breakpoint_exceptions_nb = { .notifier_call = hw_breakpoint_exceptions_notify, @@ -374,5 +283,12 @@ static int __init init_hw_breakpoint(void) { return register_die_notifier(&hw_breakpoint_exceptions_nb); } - core_initcall(init_hw_breakpoint); + + +struct pmu perf_ops_bp = { + .enable = arch_install_hw_breakpoint, + .disable = arch_uninstall_hw_breakpoint, + .read = hw_breakpoint_pmu_read, + .unthrottle = hw_breakpoint_pmu_unthrottle +}; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 5087125e2a00..98dc56b2ebe4 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -4229,6 +4230,51 @@ static void perf_event_free_filter(struct perf_event *event) #endif /* CONFIG_EVENT_PROFILE */ +#ifdef CONFIG_HAVE_HW_BREAKPOINT +static void bp_perf_event_destroy(struct perf_event *event) +{ + release_bp_slot(event); +} + +static const struct pmu *bp_perf_event_init(struct perf_event *bp) +{ + int err; + /* + * The breakpoint is already filled if we haven't created the counter + * through perf syscall + * FIXME: manage to get trigerred to NULL if it comes from syscalls + */ + if (!bp->callback) + err = register_perf_hw_breakpoint(bp); + else + err = __register_perf_hw_breakpoint(bp); + if (err) + return ERR_PTR(err); + + bp->destroy = bp_perf_event_destroy; + + return &perf_ops_bp; +} + +void perf_bp_event(struct perf_event *bp, void *regs) +{ + /* TODO */ +} +#else +static void bp_perf_event_destroy(struct perf_event *event) +{ +} + +static const struct pmu *bp_perf_event_init(struct perf_event *bp) +{ + return NULL; +} + +void perf_bp_event(struct perf_event *bp, void *regs) +{ +} +#endif + atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; static void sw_perf_event_destroy(struct perf_event *event) @@ -4375,6 +4421,11 @@ perf_event_alloc(struct perf_event_attr *attr, pmu = tp_perf_event_init(event); break; + case PERF_TYPE_BREAKPOINT: + pmu = bp_perf_event_init(event); + break; + + default: break; } @@ -4686,7 +4737,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, ctx = find_get_context(pid, cpu); if (IS_ERR(ctx)) - return NULL ; + return NULL; event = perf_event_alloc(attr, cpu, ctx, NULL, NULL, callback, GFP_KERNEL); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 91c3d0e9a5a1..d72f06ff263f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -11,14 +11,11 @@ #include #include #include +#include #include #include -#ifdef CONFIG_KSYM_TRACER -#include -#endif - enum trace_type { __TRACE_FIRST_TYPE = 0, diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e19747d4f860..c16a08f399df 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -372,11 +372,11 @@ FTRACE_ENTRY(ksym_trace, ksym_trace_entry, F_STRUCT( __field( unsigned long, ip ) __field( unsigned char, type ) - __array( char , ksym_name, KSYM_NAME_LEN ) __array( char , cmd, TASK_COMM_LEN ) + __field( unsigned long, addr ) ), - F_printk("ip: %pF type: %d ksym_name: %s cmd: %s", + F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s", (void *)__entry->ip, (unsigned int)__entry->type, - __entry->ksym_name, __entry->cmd) + (void *)__entry->addr, __entry->cmd) ); diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 6d5609c67378..fea83eeeef09 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -29,7 +29,11 @@ #include "trace_stat.h" #include "trace.h" -/* For now, let us restrict the no. of symbols traced simultaneously to number +#include +#include + +/* + * For now, let us restrict the no. of symbols traced simultaneously to number * of available hardware breakpoint registers. */ #define KSYM_TRACER_MAX HBP_NUM @@ -37,8 +41,10 @@ #define KSYM_TRACER_OP_LEN 3 /* rw- */ struct trace_ksym { - struct hw_breakpoint *ksym_hbp; + struct perf_event **ksym_hbp; unsigned long ksym_addr; + int type; + int len; #ifdef CONFIG_PROFILE_KSYM_TRACER unsigned long counter; #endif @@ -75,10 +81,11 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) } #endif /* CONFIG_PROFILE_KSYM_TRACER */ -void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) +void ksym_hbp_handler(struct perf_event *hbp, void *data) { struct ring_buffer_event *event; struct ksym_trace_entry *entry; + struct pt_regs *regs = data; struct ring_buffer *buffer; int pc; @@ -96,12 +103,12 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) entry = ring_buffer_event_data(event); entry->ip = instruction_pointer(regs); - entry->type = hbp->info.type; - strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN); + entry->type = hw_breakpoint_type(hbp); + entry->addr = hw_breakpoint_addr(hbp); strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); #ifdef CONFIG_PROFILE_KSYM_TRACER - ksym_collect_stats(hbp->info.address); + ksym_collect_stats(hw_breakpoint_addr(hbp)); #endif /* CONFIG_PROFILE_KSYM_TRACER */ trace_buffer_unlock_commit(buffer, event, 0, pc); @@ -120,31 +127,21 @@ static int ksym_trace_get_access_type(char *str) int access = 0; if (str[0] == 'r') - access += 4; - else if (str[0] != '-') - return -EINVAL; + access |= HW_BREAKPOINT_R; if (str[1] == 'w') - access += 2; - else if (str[1] != '-') - return -EINVAL; + access |= HW_BREAKPOINT_W; - if (str[2] != '-') - return -EINVAL; + if (str[2] == 'x') + access |= HW_BREAKPOINT_X; switch (access) { - case 6: - access = HW_BREAKPOINT_RW; - break; - case 4: - access = -EINVAL; - break; - case 2: - access = HW_BREAKPOINT_WRITE; - break; + case HW_BREAKPOINT_W: + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + return access; + default: + return -EINVAL; } - - return access; } /* @@ -194,36 +191,33 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) if (!entry) return -ENOMEM; - entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); - if (!entry->ksym_hbp) - goto err; - - entry->ksym_hbp->info.name = kstrdup(ksymname, GFP_KERNEL); - if (!entry->ksym_hbp->info.name) - goto err; - - entry->ksym_hbp->info.type = op; - entry->ksym_addr = entry->ksym_hbp->info.address = addr; -#ifdef CONFIG_X86 - entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4; -#endif - entry->ksym_hbp->triggered = (void *)ksym_hbp_handler; + entry->type = op; + entry->ksym_addr = addr; + entry->len = HW_BREAKPOINT_LEN_4; + + ret = -EAGAIN; + entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, + entry->len, entry->type, + ksym_hbp_handler, true); + if (IS_ERR(entry->ksym_hbp)) { + entry->ksym_hbp = NULL; + ret = PTR_ERR(entry->ksym_hbp); + } - ret = register_kernel_hw_breakpoint(entry->ksym_hbp); - if (ret < 0) { + if (!entry->ksym_hbp) { printk(KERN_INFO "ksym_tracer request failed. Try again" " later!!\n"); - ret = -EAGAIN; goto err; } + hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); ksym_filter_entry_count++; + return 0; + err: - if (entry->ksym_hbp) - kfree(entry->ksym_hbp->info.name); - kfree(entry->ksym_hbp); kfree(entry); + return ret; } @@ -244,10 +238,10 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, mutex_lock(&ksym_tracer_mutex); hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - ret = trace_seq_printf(s, "%s:", entry->ksym_hbp->info.name); - if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE) + ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr); + if (entry->type == HW_BREAKPOINT_W) ret = trace_seq_puts(s, "-w-\n"); - else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW) + else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) ret = trace_seq_puts(s, "rw-\n"); WARN_ON_ONCE(!ret); } @@ -269,12 +263,10 @@ static void __ksym_trace_reset(void) mutex_lock(&ksym_tracer_mutex); hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, ksym_hlist) { - unregister_kernel_hw_breakpoint(entry->ksym_hbp); + unregister_wide_hw_breakpoint(entry->ksym_hbp); ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); - kfree(entry->ksym_hbp->info.name); - kfree(entry->ksym_hbp); kfree(entry); } mutex_unlock(&ksym_tracer_mutex); @@ -327,7 +319,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { if (entry->ksym_addr == ksym_addr) { /* Check for malformed request: (6) */ - if (entry->ksym_hbp->info.type != op) + if (entry->type != op) changed = 1; else goto out; @@ -335,18 +327,21 @@ static ssize_t ksym_trace_filter_write(struct file *file, } } if (changed) { - unregister_kernel_hw_breakpoint(entry->ksym_hbp); - entry->ksym_hbp->info.type = op; + unregister_wide_hw_breakpoint(entry->ksym_hbp); + entry->type = op; if (op > 0) { - ret = register_kernel_hw_breakpoint(entry->ksym_hbp); - if (ret == 0) + entry->ksym_hbp = + register_wide_hw_breakpoint(entry->ksym_addr, + entry->len, entry->type, + ksym_hbp_handler, true); + if (IS_ERR(entry->ksym_hbp)) + entry->ksym_hbp = NULL; + if (!entry->ksym_hbp) goto out; } ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); - kfree(entry->ksym_hbp->info.name); - kfree(entry->ksym_hbp); kfree(entry); ret = 0; goto out; @@ -413,16 +408,16 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%11s-%-5d [%03d] %-30s ", field->cmd, - entry->pid, iter->cpu, field->ksym_name); + ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd, + entry->pid, iter->cpu, (char *)field->addr); if (!ret) return TRACE_TYPE_PARTIAL_LINE; switch (field->type) { - case HW_BREAKPOINT_WRITE: + case HW_BREAKPOINT_W: ret = trace_seq_printf(s, " W "); break; - case HW_BREAKPOINT_RW: + case HW_BREAKPOINT_R | HW_BREAKPOINT_W: ret = trace_seq_printf(s, " RW "); break; default: @@ -490,14 +485,13 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); - if (entry->ksym_hbp) - access_type = entry->ksym_hbp->info.type; + access_type = entry->type; switch (access_type) { - case HW_BREAKPOINT_WRITE: + case HW_BREAKPOINT_W: seq_puts(m, " W "); break; - case HW_BREAKPOINT_RW: + case HW_BREAKPOINT_R | HW_BREAKPOINT_W: seq_puts(m, " RW "); break; default: diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 7179c12e4f0f..27c5072c2e6b 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -828,7 +828,8 @@ trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) ksym_selftest_dummy = 0; /* Register the read-write tracing request */ - ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW, + ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, + HW_BREAKPOINT_R | HW_BREAKPOINT_W, (unsigned long)(&ksym_selftest_dummy)); if (ret < 0) { -- cgit v1.2.1 From ba1c813a6b9a0ef14d7112daf51270eff326f037 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 10 Sep 2009 09:26:21 +0200 Subject: hw-breakpoints: Arbitrate access to pmu following registers constraints Allow or refuse to build a counter using the breakpoints pmu following given constraints. We keep track of the pmu users by using three per cpu variables: - nr_cpu_bp_pinned stores the number of pinned cpu breakpoints counters in the given cpu - nr_bp_flexible stores the number of non-pinned breakpoints counters in the given cpu. - task_bp_pinned stores the number of pinned task breakpoints in a cpu The latter is not a simple counter but gathers the number of tasks that have n pinned breakpoints. Considering HBP_NUM the number of available breakpoint address registers: task_bp_pinned[0] is the number of tasks having 1 breakpoint task_bp_pinned[1] is the number of tasks having 2 breakpoints [...] task_bp_pinned[HBP_NUM - 1] is the number of tasks having the maximum number of registers (HBP_NUM). When a breakpoint counter is created and wants an access to the pmu, we evaluate the following constraints: == Non-pinned counter == - If attached to a single cpu, check: (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM -> If there are already non-pinned counters in this cpu, it means there is already a free slot for them. Otherwise, we check that the maximum number of per task breakpoints (for this cpu) plus the number of per cpu breakpoint (for this cpu) doesn't cover every registers. - If attached to every cpus, check: (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM -> This is roughly the same, except we check the number of per cpu bp for every cpu and we keep the max one. Same for the per tasks breakpoints. == Pinned counter == - If attached to a single cpu, check: ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM -> Same checks as before. But now the nr_bp_flexible, if any, must keep one register at least (or flexible breakpoints will never be be fed). - If attached to every cpus, check: ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) + max(per_cpu(task_bp_pinned, *))) < HBP_NUM Changes in v2: - Counter -> event rename Changes in v5: - Fix unreleased non-pinned task-bound-only counters. We only released it in the first cpu. (Thanks to Paul Mackerras for reporting that) Changes in v6: - Currently, events scheduling are done in this order: cpu context pinned + cpu context non-pinned + task context pinned + task context non-pinned events. Then our current constraints are right theoretically but not in practice, because non-pinned counters may be scheduled before we can apply every possible pinned counters. So consider non-pinned counters as pinned for now. Signed-off-by: Frederic Weisbecker Cc: Prasad Cc: Alan Stern Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt --- kernel/hw_breakpoint.c | 211 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 205 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 08f6d0163201..e662dc991c96 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -16,6 +16,8 @@ * Copyright (C) 2007 Alan Stern * Copyright (C) IBM Corporation, 2009 * Copyright (C) 2009, Frederic Weisbecker + * + * Thanks to Ingo Molnar for his many suggestions. */ /* @@ -44,24 +46,221 @@ #include #endif -static atomic_t bp_slot; +/* + * Constraints data + */ + +/* Number of pinned cpu breakpoints in a cpu */ +static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); -int reserve_bp_slot(struct perf_event *bp) +/* Number of pinned task breakpoints in a cpu */ +static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); + +/* Number of non-pinned cpu/task breakpoints in a cpu */ +static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); + +/* Gather the number of total pinned and un-pinned bp in a cpuset */ +struct bp_busy_slots { + unsigned int pinned; + unsigned int flexible; +}; + +/* Serialize accesses to the above constraints */ +static DEFINE_MUTEX(nr_bp_mutex); + +/* + * Report the maximum number of pinned breakpoints a task + * have in this cpu + */ +static unsigned int max_task_bp_pinned(int cpu) { - if (atomic_inc_return(&bp_slot) == HBP_NUM) { - atomic_dec(&bp_slot); + int i; + unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); - return -ENOSPC; + for (i = HBP_NUM -1; i >= 0; i--) { + if (tsk_pinned[i] > 0) + return i + 1; } return 0; } +/* + * Report the number of pinned/un-pinned breakpoints we have in + * a given cpu (cpu > -1) or in all of them (cpu = -1). + */ +static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) +{ + if (cpu >= 0) { + slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); + slots->pinned += max_task_bp_pinned(cpu); + slots->flexible = per_cpu(nr_bp_flexible, cpu); + + return; + } + + for_each_online_cpu(cpu) { + unsigned int nr; + + nr = per_cpu(nr_cpu_bp_pinned, cpu); + nr += max_task_bp_pinned(cpu); + + if (nr > slots->pinned) + slots->pinned = nr; + + nr = per_cpu(nr_bp_flexible, cpu); + + if (nr > slots->flexible) + slots->flexible = nr; + } +} + +/* + * Add a pinned breakpoint for the given task in our constraint table + */ +static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) +{ + int count = 0; + struct perf_event *bp; + struct perf_event_context *ctx = tsk->perf_event_ctxp; + unsigned int *task_bp_pinned; + struct list_head *list; + unsigned long flags; + + if (WARN_ONCE(!ctx, "No perf context for this task")) + return; + + list = &ctx->event_list; + + spin_lock_irqsave(&ctx->lock, flags); + + /* + * The current breakpoint counter is not included in the list + * at the open() callback time + */ + list_for_each_entry(bp, list, event_entry) { + if (bp->attr.type == PERF_TYPE_BREAKPOINT) + count++; + } + + spin_unlock_irqrestore(&ctx->lock, flags); + + if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) + return; + + task_bp_pinned = per_cpu(task_bp_pinned, cpu); + if (enable) { + task_bp_pinned[count]++; + if (count > 0) + task_bp_pinned[count-1]--; + } else { + task_bp_pinned[count]--; + if (count > 0) + task_bp_pinned[count-1]++; + } +} + +/* + * Add/remove the given breakpoint in our constraint table + */ +static void toggle_bp_slot(struct perf_event *bp, bool enable) +{ + int cpu = bp->cpu; + struct task_struct *tsk = bp->ctx->task; + + /* Pinned counter task profiling */ + if (tsk) { + if (cpu >= 0) { + toggle_bp_task_slot(tsk, cpu, enable); + return; + } + + for_each_online_cpu(cpu) + toggle_bp_task_slot(tsk, cpu, enable); + return; + } + + /* Pinned counter cpu profiling */ + if (enable) + per_cpu(nr_cpu_bp_pinned, bp->cpu)++; + else + per_cpu(nr_cpu_bp_pinned, bp->cpu)--; +} + +/* + * Contraints to check before allowing this new breakpoint counter: + * + * == Non-pinned counter == (Considered as pinned for now) + * + * - If attached to a single cpu, check: + * + * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) + * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM + * + * -> If there are already non-pinned counters in this cpu, it means + * there is already a free slot for them. + * Otherwise, we check that the maximum number of per task + * breakpoints (for this cpu) plus the number of per cpu breakpoint + * (for this cpu) doesn't cover every registers. + * + * - If attached to every cpus, check: + * + * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) + * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM + * + * -> This is roughly the same, except we check the number of per cpu + * bp for every cpu and we keep the max one. Same for the per tasks + * breakpoints. + * + * + * == Pinned counter == + * + * - If attached to a single cpu, check: + * + * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) + * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM + * + * -> Same checks as before. But now the nr_bp_flexible, if any, must keep + * one register at least (or they will never be fed). + * + * - If attached to every cpus, check: + * + * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) + * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM + */ +int reserve_bp_slot(struct perf_event *bp) +{ + struct bp_busy_slots slots = {0}; + int ret = 0; + + mutex_lock(&nr_bp_mutex); + + fetch_bp_busy_slots(&slots, bp->cpu); + + /* Flexible counters need to keep at least one slot */ + if (slots.pinned + (!!slots.flexible) == HBP_NUM) { + ret = -ENOSPC; + goto end; + } + + toggle_bp_slot(bp, true); + +end: + mutex_unlock(&nr_bp_mutex); + + return ret; +} + void release_bp_slot(struct perf_event *bp) { - atomic_dec(&bp_slot); + mutex_lock(&nr_bp_mutex); + + toggle_bp_slot(bp, false); + + mutex_unlock(&nr_bp_mutex); } + int __register_perf_hw_breakpoint(struct perf_event *bp) { int ret; -- cgit v1.2.1 From 30ff21e31fe5c8b7b1b7d30cc41e32bc4ee9f175 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 10 Sep 2009 09:35:20 +0800 Subject: ksym_tracer: Remove KSYM_SELFTEST_ENTRY The macro used to be used in both trace_selftest.c and trace_ksym.c, but no longer, so remove it from header file. Signed-off-by: Li Zefan Cc: Prasad Signed-off-by: Frederic Weisbecker --- kernel/trace/trace.h | 1 - kernel/trace/trace_selftest.c | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d72f06ff263f..ee00475742eb 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -367,7 +367,6 @@ int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); int is_tracing_stopped(void); -#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy" extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); extern unsigned long nsecs_to_usecs(unsigned long nsecs); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 27c5072c2e6b..dc98309e839a 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -828,7 +828,8 @@ trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) ksym_selftest_dummy = 0; /* Register the read-write tracing request */ - ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, + + ret = process_new_ksym_entry("ksym_selftest_dummy", HW_BREAKPOINT_R | HW_BREAKPOINT_W, (unsigned long)(&ksym_selftest_dummy)); -- cgit v1.2.1 From 676c0dbe6e514fdd8e434a9e623c781aa9b40b15 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 9 Nov 2009 17:37:34 +0900 Subject: ksym_tracer: Support read accesses independent of read/write. All of the infrastructure already exists to support read accesses for platforms that support a read access independently of read/write (such as in the case of the SuperH UBC). This just trivially hooks up the read case by itself. Signed-off-by: Paul Mundt Cc: Ingo Molnar Cc: Li Zefan Cc: Prasad Cc: Alan Stern Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Jan Kiszka Cc: Jiri Slaby Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Arjan van de Ven LKML-Reference: <20091109083733.GA25848@linux-sh.org> Signed-off-by: Frederic Weisbecker --- kernel/trace/trace_ksym.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index fea83eeeef09..11935b53a6cb 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -136,6 +136,7 @@ static int ksym_trace_get_access_type(char *str) access |= HW_BREAKPOINT_X; switch (access) { + case HW_BREAKPOINT_R: case HW_BREAKPOINT_W: case HW_BREAKPOINT_W | HW_BREAKPOINT_R: return access; @@ -239,7 +240,9 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr); - if (entry->type == HW_BREAKPOINT_W) + if (entry->type == HW_BREAKPOINT_R) + ret = trace_seq_puts(s, "r--\n"); + else if (entry->type == HW_BREAKPOINT_W) ret = trace_seq_puts(s, "-w-\n"); else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) ret = trace_seq_puts(s, "rw-\n"); @@ -414,6 +417,9 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) return TRACE_TYPE_PARTIAL_LINE; switch (field->type) { + case HW_BREAKPOINT_R: + ret = trace_seq_printf(s, " R "); + break; case HW_BREAKPOINT_W: ret = trace_seq_printf(s, " W "); break; @@ -488,6 +494,9 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) access_type = entry->type; switch (access_type) { + case HW_BREAKPOINT_R: + seq_puts(m, " R "); + break; case HW_BREAKPOINT_W: seq_puts(m, " W "); break; -- cgit v1.2.1 From f60d24d2ad04977b0bd9e3eb35dba2d2fa569af9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 10 Nov 2009 10:17:07 +0100 Subject: hw-breakpoints: Fix broken hw-breakpoint sample module The hw-breakpoint sample module has been broken during the hw-breakpoint internals refactoring. Propagate the changes to it. Reported-by: "K. Prasad" Signed-off-by: Frederic Weisbecker --- kernel/hw_breakpoint.c | 3 ++- kernel/kallsyms.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index e662dc991c96..9ea9414e0e58 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -454,6 +454,7 @@ fail: /* return the error if any */ return ERR_PTR(err); } +EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); /** * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel @@ -470,7 +471,7 @@ void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) } free_percpu(cpu_events); } - +EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); static struct notifier_block hw_breakpoint_exceptions_nb = { .notifier_call = hw_breakpoint_exceptions_notify, diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 8b6b8b697c68..8e5288a8a355 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name) } return module_kallsyms_lookup_name(name); } +EXPORT_SYMBOL_GPL(kallsyms_lookup_name); int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), -- cgit v1.2.1