diff options
author | Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> | 2008-02-13 15:03:37 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-13 16:21:20 -0800 |
commit | fb40bd78b0f91b274879cf5db8facd1e04b6052e (patch) | |
tree | 2347ccb5ad07f58ab5a4eb41174bb7b54d5f0c5b /kernel | |
parent | 9170d2f6e1dc4d79650fbf492d1cd45291c66504 (diff) | |
download | blackbird-op-linux-fb40bd78b0f91b274879cf5db8facd1e04b6052e.tar.gz blackbird-op-linux-fb40bd78b0f91b274879cf5db8facd1e04b6052e.zip |
Linux Kernel Markers: support multiple probes
RCU style multiple probes support for the Linux Kernel Markers. Common case
(one probe) is still fast and does not require dynamic allocation or a
supplementary pointer dereference on the fast path.
- Move preempt disable from the marker site to the callback.
Since we now have an internal callback, move the preempt disable/enable to the
callback instead of the marker site.
Since the callback change is done asynchronously (passing from a handler that
supports arguments to a handler that does not setup the arguments is no
arguments are passed), we can safely update it even if it is outside the
preempt disable section.
- Move probe arm to probe connection. Now, a connected probe is automatically
armed.
Remove MARK_MAX_FORMAT_LEN, unused.
This patch modifies the Linux Kernel Markers API : it removes the probe
"arm/disarm" and changes the probe function prototype : it now expects a
va_list * instead of a "...".
If we want to have more than one probe connected to a marker at a given
time (LTTng, or blktrace, ssytemtap) then we need this patch. Without it,
connecting a second probe handler to a marker will fail.
It allow us, for instance, to do interesting combinations :
Do standard tracing with LTTng and, eventually, to compute statistics
with SystemTAP, or to have a special trigger on an event that would call
a systemtap script which would stop flight recorder tracing.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Mike Mason <mmlnx@us.ibm.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: David Smith <dsmith@redhat.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/marker.c | 677 | ||||
-rw-r--r-- | kernel/module.c | 7 |
2 files changed, 508 insertions, 176 deletions
diff --git a/kernel/marker.c b/kernel/marker.c index 5323cfaedbce..c4c2cd8b61f5 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -27,35 +27,42 @@ extern struct marker __start___markers[]; extern struct marker __stop___markers[]; +/* Set to 1 to enable marker debug output */ +const int marker_debug; + /* * markers_mutex nests inside module_mutex. Markers mutex protects the builtin - * and module markers, the hash table and deferred_sync. + * and module markers and the hash table. */ static DEFINE_MUTEX(markers_mutex); /* - * Marker deferred synchronization. - * Upon marker probe_unregister, we delay call to synchronize_sched() to - * accelerate mass unregistration (only when there is no more reference to a - * given module do we call synchronize_sched()). However, we need to make sure - * every critical region has ended before we re-arm a marker that has been - * unregistered and then registered back with a different probe data. - */ -static int deferred_sync; - -/* * Marker hash table, containing the active markers. * Protected by module_mutex. */ #define MARKER_HASH_BITS 6 #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) +/* + * Note about RCU : + * It is used to make sure every handler has finished using its private data + * between two consecutive operation (add or remove) on a given marker. It is + * also used to delay the free of multiple probes array until a quiescent state + * is reached. + * marker entries modifications are protected by the markers_mutex. + */ struct marker_entry { struct hlist_node hlist; char *format; - marker_probe_func *probe; - void *private; + void (*call)(const struct marker *mdata, /* Probe wrapper */ + void *call_private, const char *fmt, ...); + struct marker_probe_closure single; + struct marker_probe_closure *multi; int refcount; /* Number of times armed. 0 if disarmed. */ + struct rcu_head rcu; + void *oldptr; + char rcu_pending:1; + char ptype:1; char name[0]; /* Contains name'\0'format'\0' */ }; @@ -63,7 +70,8 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE]; /** * __mark_empty_function - Empty probe callback - * @mdata: pointer of type const struct marker + * @probe_private: probe private data + * @call_private: call site private data * @fmt: format string * @...: variable argument list * @@ -72,13 +80,267 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE]; * though the function pointer change and the marker enabling are two distinct * operations that modifies the execution flow of preemptible code. */ -void __mark_empty_function(const struct marker *mdata, void *private, - const char *fmt, ...) +void __mark_empty_function(void *probe_private, void *call_private, + const char *fmt, va_list *args) { } EXPORT_SYMBOL_GPL(__mark_empty_function); /* + * marker_probe_cb Callback that prepares the variable argument list for probes. + * @mdata: pointer of type struct marker + * @call_private: caller site private data + * @fmt: format string + * @...: Variable argument list. + * + * Since we do not use "typical" pointer based RCU in the 1 argument case, we + * need to put a full smp_rmb() in this branch. This is why we do not use + * rcu_dereference() for the pointer read. + */ +void marker_probe_cb(const struct marker *mdata, void *call_private, + const char *fmt, ...) +{ + va_list args; + char ptype; + + /* + * disabling preemption to make sure the teardown of the callbacks can + * be done correctly when they are in modules and they insure RCU read + * coherency. + */ + preempt_disable(); + ptype = ACCESS_ONCE(mdata->ptype); + if (likely(!ptype)) { + marker_probe_func *func; + /* Must read the ptype before ptr. They are not data dependant, + * so we put an explicit smp_rmb() here. */ + smp_rmb(); + func = ACCESS_ONCE(mdata->single.func); + /* Must read the ptr before private data. They are not data + * dependant, so we put an explicit smp_rmb() here. */ + smp_rmb(); + va_start(args, fmt); + func(mdata->single.probe_private, call_private, fmt, &args); + va_end(args); + } else { + struct marker_probe_closure *multi; + int i; + /* + * multi points to an array, therefore accessing the array + * depends on reading multi. However, even in this case, + * we must insure that the pointer is read _before_ the array + * data. Same as rcu_dereference, but we need a full smp_rmb() + * in the fast path, so put the explicit barrier here. + */ + smp_read_barrier_depends(); + multi = ACCESS_ONCE(mdata->multi); + for (i = 0; multi[i].func; i++) { + va_start(args, fmt); + multi[i].func(multi[i].probe_private, call_private, fmt, + &args); + va_end(args); + } + } + preempt_enable(); +} +EXPORT_SYMBOL_GPL(marker_probe_cb); + +/* + * marker_probe_cb Callback that does not prepare the variable argument list. + * @mdata: pointer of type struct marker + * @call_private: caller site private data + * @fmt: format string + * @...: Variable argument list. + * + * Should be connected to markers "MARK_NOARGS". + */ +void marker_probe_cb_noarg(const struct marker *mdata, + void *call_private, const char *fmt, ...) +{ + va_list args; /* not initialized */ + char ptype; + + preempt_disable(); + ptype = ACCESS_ONCE(mdata->ptype); + if (likely(!ptype)) { + marker_probe_func *func; + /* Must read the ptype before ptr. They are not data dependant, + * so we put an explicit smp_rmb() here. */ + smp_rmb(); + func = ACCESS_ONCE(mdata->single.func); + /* Must read the ptr before private data. They are not data + * dependant, so we put an explicit smp_rmb() here. */ + smp_rmb(); + func(mdata->single.probe_private, call_private, fmt, &args); + } else { + struct marker_probe_closure *multi; + int i; + /* + * multi points to an array, therefore accessing the array + * depends on reading multi. However, even in this case, + * we must insure that the pointer is read _before_ the array + * data. Same as rcu_dereference, but we need a full smp_rmb() + * in the fast path, so put the explicit barrier here. + */ + smp_read_barrier_depends(); + multi = ACCESS_ONCE(mdata->multi); + for (i = 0; multi[i].func; i++) + multi[i].func(multi[i].probe_private, call_private, fmt, + &args); + } + preempt_enable(); +} +EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); + +static void free_old_closure(struct rcu_head *head) +{ + struct marker_entry *entry = container_of(head, + struct marker_entry, rcu); + kfree(entry->oldptr); + /* Make sure we free the data before setting the pending flag to 0 */ + smp_wmb(); + entry->rcu_pending = 0; +} + +static void debug_print_probes(struct marker_entry *entry) +{ + int i; + + if (!marker_debug) + return; + + if (!entry->ptype) { + printk(KERN_DEBUG "Single probe : %p %p\n", + entry->single.func, + entry->single.probe_private); + } else { + for (i = 0; entry->multi[i].func; i++) + printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, + entry->multi[i].func, + entry->multi[i].probe_private); + } +} + +static struct marker_probe_closure * +marker_entry_add_probe(struct marker_entry *entry, + marker_probe_func *probe, void *probe_private) +{ + int nr_probes = 0; + struct marker_probe_closure *old, *new; + + WARN_ON(!probe); + + debug_print_probes(entry); + old = entry->multi; + if (!entry->ptype) { + if (entry->single.func == probe && + entry->single.probe_private == probe_private) + return ERR_PTR(-EBUSY); + if (entry->single.func == __mark_empty_function) { + /* 0 -> 1 probes */ + entry->single.func = probe; + entry->single.probe_private = probe_private; + entry->refcount = 1; + entry->ptype = 0; + debug_print_probes(entry); + return NULL; + } else { + /* 1 -> 2 probes */ + nr_probes = 1; + old = NULL; + } + } else { + /* (N -> N+1), (N != 0, 1) probes */ + for (nr_probes = 0; old[nr_probes].func; nr_probes++) + if (old[nr_probes].func == probe + && old[nr_probes].probe_private + == probe_private) + return ERR_PTR(-EBUSY); + } + /* + 2 : one for new probe, one for NULL func */ + new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), + GFP_KERNEL); + if (new == NULL) + return ERR_PTR(-ENOMEM); + if (!old) + new[0] = entry->single; + else + memcpy(new, old, + nr_probes * sizeof(struct marker_probe_closure)); + new[nr_probes].func = probe; + new[nr_probes].probe_private = probe_private; + entry->refcount = nr_probes + 1; + entry->multi = new; + entry->ptype = 1; + debug_print_probes(entry); + return old; +} + +static struct marker_probe_closure * +marker_entry_remove_probe(struct marker_entry *entry, + marker_probe_func *probe, void *probe_private) +{ + int nr_probes = 0, nr_del = 0, i; + struct marker_probe_closure *old, *new; + + old = entry->multi; + + debug_print_probes(entry); + if (!entry->ptype) { + /* 0 -> N is an error */ + WARN_ON(entry->single.func == __mark_empty_function); + /* 1 -> 0 probes */ + WARN_ON(probe && entry->single.func != probe); + WARN_ON(entry->single.probe_private != probe_private); + entry->single.func = __mark_empty_function; + entry->refcount = 0; + entry->ptype = 0; + debug_print_probes(entry); + return NULL; + } else { + /* (N -> M), (N > 1, M >= 0) probes */ + for (nr_probes = 0; old[nr_probes].func; nr_probes++) { + if ((!probe || old[nr_probes].func == probe) + && old[nr_probes].probe_private + == probe_private) + nr_del++; + } + } + + if (nr_probes - nr_del == 0) { + /* N -> 0, (N > 1) */ + entry->single.func = __mark_empty_function; + entry->refcount = 0; + entry->ptype = 0; + } else if (nr_probes - nr_del == 1) { + /* N -> 1, (N > 1) */ + for (i = 0; old[i].func; i++) + if ((probe && old[i].func != probe) || + old[i].probe_private != probe_private) + entry->single = old[i]; + entry->refcount = 1; + entry->ptype = 0; + } else { + int j = 0; + /* N -> M, (N > 1, M > 1) */ + /* + 1 for NULL */ + new = kzalloc((nr_probes - nr_del + 1) + * sizeof(struct marker_probe_closure), GFP_KERNEL); + if (new == NULL) + return ERR_PTR(-ENOMEM); + for (i = 0; old[i].func; i++) + if ((probe && old[i].func != probe) || + old[i].probe_private != probe_private) + new[j++] = old[i]; + entry->refcount = nr_probes - nr_del; + entry->ptype = 1; + entry->multi = new; + } + debug_print_probes(entry); + return old; +} + +/* * Get marker if the marker is present in the marker hash table. * Must be called with markers_mutex held. * Returns NULL if not present. @@ -102,8 +364,7 @@ static struct marker_entry *get_marker(const char *name) * Add the marker to the marker hash table. Must be called with markers_mutex * held. */ -static int add_marker(const char *name, const char *format, - marker_probe_func *probe, void *private) +static struct marker_entry *add_marker(const char *name, const char *format) { struct hlist_head *head; struct hlist_node *node; @@ -118,9 +379,8 @@ static int add_marker(const char *name, const char *format, hlist_for_each_entry(e, node, head, hlist) { if (!strcmp(name, e->name)) { printk(KERN_NOTICE - "Marker %s busy, probe %p already installed\n", - name, e->probe); - return -EBUSY; /* Already there */ + "Marker %s busy\n", name); + return ERR_PTR(-EBUSY); /* Already there */ } } /* @@ -130,34 +390,42 @@ static int add_marker(const char *name, const char *format, e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, GFP_KERNEL); if (!e) - return -ENOMEM; + return ERR_PTR(-ENOMEM); memcpy(&e->name[0], name, name_len); if (format) { e->format = &e->name[name_len]; memcpy(e->format, format, format_len); + if (strcmp(e->format, MARK_NOARGS) == 0) + e->call = marker_probe_cb_noarg; + else + e->call = marker_probe_cb; trace_mark(core_marker_format, "name %s format %s", e->name, e->format); - } else + } else { e->format = NULL; - e->probe = probe; - e->private = private; + e->call = marker_probe_cb; + } + e->single.func = __mark_empty_function; + e->single.probe_private = NULL; + e->multi = NULL; + e->ptype = 0; e->refcount = 0; + e->rcu_pending = 0; hlist_add_head(&e->hlist, head); - return 0; + return e; } /* * Remove the marker from the marker hash table. Must be called with mutex_lock * held. */ -static void *remove_marker(const char *name) +static int remove_marker(const char *name) { struct hlist_head *head; struct hlist_node *node; struct marker_entry *e; int found = 0; size_t len = strlen(name) + 1; - void *private = NULL; u32 hash = jhash(name, len-1, 0); head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; @@ -167,12 +435,16 @@ static void *remove_marker(const char *name) break; } } - if (found) { - private = e->private; - hlist_del(&e->hlist); - kfree(e); - } - return private; + if (!found) + return -ENOENT; + if (e->single.func != __mark_empty_function) + return -EBUSY; + hlist_del(&e->hlist); + /* Make sure the call_rcu has been executed */ + if (e->rcu_pending) + rcu_barrier(); + kfree(e); + return 0; } /* @@ -184,6 +456,7 @@ static int marker_set_format(struct marker_entry **entry, const char *format) size_t name_len = strlen((*entry)->name) + 1; size_t format_len = strlen(format) + 1; + e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, GFP_KERNEL); if (!e) @@ -191,11 +464,20 @@ static int marker_set_format(struct marker_entry **entry, const char *format) memcpy(&e->name[0], (*entry)->name, name_len); e->format = &e->name[name_len]; memcpy(e->format, format, format_len); - e->probe = (*entry)->probe; - e->private = (*entry)->private; + if (strcmp(e->format, MARK_NOARGS) == 0) + e->call = marker_probe_cb_noarg; + else + e->call = marker_probe_cb; + e->single = (*entry)->single; + e->multi = (*entry)->multi; + e->ptype = (*entry)->ptype; e->refcount = (*entry)->refcount; + e->rcu_pending = 0; hlist_add_before(&e->hlist, &(*entry)->hlist); hlist_del(&(*entry)->hlist); + /* Make sure the call_rcu has been executed */ + if ((*entry)->rcu_pending) + rcu_barrier(); kfree(*entry); *entry = e; trace_mark(core_marker_format, "name %s format %s", @@ -206,7 +488,8 @@ static int marker_set_format(struct marker_entry **entry, const char *format) /* * Sets the probe callback corresponding to one marker. */ -static int set_marker(struct marker_entry **entry, struct marker *elem) +static int set_marker(struct marker_entry **entry, struct marker *elem, + int active) { int ret; WARN_ON(strcmp((*entry)->name, elem->name) != 0); @@ -226,9 +509,43 @@ static int set_marker(struct marker_entry **entry, struct marker *elem) if (ret) return ret; } - elem->call = (*entry)->probe; - elem->private = (*entry)->private; - elem->state = 1; + + /* + * probe_cb setup (statically known) is done here. It is + * asynchronous with the rest of execution, therefore we only + * pass from a "safe" callback (with argument) to an "unsafe" + * callback (does not set arguments). + */ + elem->call = (*entry)->call; + /* + * Sanity check : + * We only update the single probe private data when the ptr is + * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) + */ + WARN_ON(elem->single.func != __mark_empty_function + && elem->single.probe_private + != (*entry)->single.probe_private && + !elem->ptype); + elem->single.probe_private = (*entry)->single.probe_private; + /* + * Make sure the private data is valid when we update the + * single probe ptr. + */ + smp_wmb(); + elem->single.func = (*entry)->single.func; + /* + * We also make sure that the new probe callbacks array is consistent + * before setting a pointer to it. + */ + rcu_assign_pointer(elem->multi, (*entry)->multi); + /* + * Update the function or multi probe array pointer before setting the + * ptype. + */ + smp_wmb(); + elem->ptype = (*entry)->ptype; + elem->state = active; + return 0; } @@ -240,8 +557,12 @@ static int set_marker(struct marker_entry **entry, struct marker *elem) */ static void disable_marker(struct marker *elem) { + /* leave "call" as is. It is known statically. */ elem->state = 0; - elem->call = __mark_empty_function; + elem->single.func = __mark_empty_function; + /* Update the function before setting the ptype */ + smp_wmb(); + elem->ptype = 0; /* single probe */ /* * Leave the private data and id there, because removal is racy and * should be done only after a synchronize_sched(). These are never used @@ -253,14 +574,11 @@ static void disable_marker(struct marker *elem) * marker_update_probe_range - Update a probe range * @begin: beginning of the range * @end: end of the range - * @probe_module: module address of the probe being updated - * @refcount: number of references left to the given probe_module (out) * * Updates the probe callback corresponding to a range of markers. */ void marker_update_probe_range(struct marker *begin, - struct marker *end, struct module *probe_module, - int *refcount) + struct marker *end) { struct marker *iter; struct marker_entry *mark_entry; @@ -268,15 +586,12 @@ void marker_update_probe_range(struct marker *begin, mutex_lock(&markers_mutex); for (iter = begin; iter < end; iter++) { mark_entry = get_marker(iter->name); - if (mark_entry && mark_entry->refcount) { - set_marker(&mark_entry, iter); + if (mark_entry) { + set_marker(&mark_entry, iter, + !!mark_entry->refcount); /* * ignore error, continue */ - if (probe_module) - if (probe_module == - __module_text_address((unsigned long)mark_entry->probe)) - (*refcount)++; } else { disable_marker(iter); } @@ -289,20 +604,27 @@ void marker_update_probe_range(struct marker *begin, * Issues a synchronize_sched() when no reference to the module passed * as parameter is found in the probes so the probe module can be * safely unloaded from now on. + * + * Internal callback only changed before the first probe is connected to it. + * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 + * transitions. All other transitions will leave the old private data valid. + * This makes the non-atomicity of the callback/private data updates valid. + * + * "special case" updates : + * 0 -> 1 callback + * 1 -> 0 callback + * 1 -> 2 callbacks + * 2 -> 1 callbacks + * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. + * Site effect : marker_set_format may delete the marker entry (creating a + * replacement). */ -static void marker_update_probes(struct module *probe_module) +static void marker_update_probes(void) { - int refcount = 0; - /* Core kernel markers */ - marker_update_probe_range(__start___markers, - __stop___markers, probe_module, &refcount); + marker_update_probe_range(__start___markers, __stop___markers); /* Markers in modules. */ - module_update_markers(probe_module, &refcount); - if (probe_module && refcount == 0) { - synchronize_sched(); - deferred_sync = 0; - } + module_update_markers(); } /** @@ -310,33 +632,49 @@ static void marker_update_probes(struct module *probe_module) * @name: marker name * @format: format string * @probe: probe handler - * @private: probe private data + * @probe_private: probe private data * * private data must be a valid allocated memory address, or NULL. * Returns 0 if ok, error value on error. + * The probe address must at least be aligned on the architecture pointer size. */ int marker_probe_register(const char *name, const char *format, - marker_probe_func *probe, void *private) + marker_probe_func *probe, void *probe_private) { struct marker_entry *entry; int ret = 0; + struct marker_probe_closure *old; mutex_lock(&markers_mutex); entry = get_marker(name); - if (entry && entry->refcount) { - ret = -EBUSY; - goto end; - } - if (deferred_sync) { - synchronize_sched(); - deferred_sync = 0; + if (!entry) { + entry = add_marker(name, format); + if (IS_ERR(entry)) { + ret = PTR_ERR(entry); + goto end; + } } - ret = add_marker(name, format, probe, private); - if (ret) + /* + * If we detect that a call_rcu is pending for this marker, + * make sure it's executed now. + */ + if (entry->rcu_pending) + rcu_barrier(); + old = marker_entry_add_probe(entry, probe, probe_private); + if (IS_ERR(old)) { + ret = PTR_ERR(old); goto end; + } mutex_unlock(&markers_mutex); - marker_update_probes(NULL); - return ret; + marker_update_probes(); /* may update entry */ + mutex_lock(&markers_mutex); + entry = get_marker(name); + WARN_ON(!entry); + entry->oldptr = old; + entry->rcu_pending = 1; + /* write rcu_pending before calling the RCU callback */ + smp_wmb(); + call_rcu(&entry->rcu, free_old_closure); end: mutex_unlock(&markers_mutex); return ret; @@ -346,171 +684,166 @@ EXPORT_SYMBOL_GPL(marker_probe_register); /** * marker_probe_unregister - Disconnect a probe from a marker * @name: marker name + * @probe: probe function pointer + * @probe_private: probe private data * * Returns the private data given to marker_probe_register, or an ERR_PTR(). + * We do not need to call a synchronize_sched to make sure the probes have + * finished running before doing a module unload, because the module unload + * itself uses stop_machine(), which insures that every preempt disabled section + * have finished. */ -void *marker_probe_unregister(const char *name) +int marker_probe_unregister(const char *name, + marker_probe_func *probe, void *probe_private) { - struct module *probe_module; struct marker_entry *entry; - void *private; + struct marker_probe_closure *old; + int ret = 0; mutex_lock(&markers_mutex); entry = get_marker(name); if (!entry) { - private = ERR_PTR(-ENOENT); + ret = -ENOENT; goto end; } - entry->refcount = 0; - /* In what module is the probe handler ? */ - probe_module = __module_text_address((unsigned long)entry->probe); - private = remove_marker(name); - deferred_sync = 1; + if (entry->rcu_pending) + rcu_barrier(); + old = marker_entry_remove_probe(entry, probe, probe_private); mutex_unlock(&markers_mutex); - marker_update_probes(probe_module); - return private; + marker_update_probes(); /* may update entry */ + mutex_lock(&markers_mutex); + entry = get_marker(name); + entry->oldptr = old; + entry->rcu_pending = 1; + /* write rcu_pending before calling the RCU callback */ + smp_wmb(); + call_rcu(&entry->rcu, free_old_closure); + remove_marker(name); /* Ignore busy error message */ end: mutex_unlock(&markers_mutex); - return private; + return ret; } EXPORT_SYMBOL_GPL(marker_probe_unregister); -/** - * marker_probe_unregister_private_data - Disconnect a probe from a marker - * @private: probe private data - * - * Unregister a marker by providing the registered private data. - * Returns the private data given to marker_probe_register, or an ERR_PTR(). - */ -void *marker_probe_unregister_private_data(void *private) +static struct marker_entry * +get_marker_from_private_data(marker_probe_func *probe, void *probe_private) { - struct module *probe_module; - struct hlist_head *head; - struct hlist_node *node; struct marker_entry *entry; - int found = 0; unsigned int i; + struct hlist_head *head; + struct hlist_node *node; - mutex_lock(&markers_mutex); for (i = 0; i < MARKER_TABLE_SIZE; i++) { head = &marker_table[i]; hlist_for_each_entry(entry, node, head, hlist) { - if (entry->private == private) { - found = 1; - goto iter_end; + if (!entry->ptype) { + if (entry->single.func == probe + && entry->single.probe_private + == probe_private) + return entry; + } else { + struct marker_probe_closure *closure; + closure = entry->multi; + for (i = 0; closure[i].func; i++) { + if (closure[i].func == probe && + closure[i].probe_private + == probe_private) + return entry; + } } } } -iter_end: - if (!found) { - private = ERR_PTR(-ENOENT); - goto end; - } - entry->refcount = 0; - /* In what module is the probe handler ? */ - probe_module = __module_text_address((unsigned long)entry->probe); - private = remove_marker(entry->name); - deferred_sync = 1; - mutex_unlock(&markers_mutex); - marker_update_probes(probe_module); - return private; -end: - mutex_unlock(&markers_mutex); - return private; + return NULL; } -EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); /** - * marker_arm - Arm a marker - * @name: marker name + * marker_probe_unregister_private_data - Disconnect a probe from a marker + * @probe: probe function + * @probe_private: probe private data * - * Activate a marker. It keeps a reference count of the number of - * arming/disarming done. - * Returns 0 if ok, error value on error. + * Unregister a probe by providing the registered private data. + * Only removes the first marker found in hash table. + * Return 0 on success or error value. + * We do not need to call a synchronize_sched to make sure the probes have + * finished running before doing a module unload, because the module unload + * itself uses stop_machine(), which insures that every preempt disabled section + * have finished. */ -int marker_arm(const char *name) +int marker_probe_unregister_private_data(marker_probe_func *probe, + void *probe_private) { struct marker_entry *entry; int ret = 0; + struct marker_probe_closure *old; mutex_lock(&markers_mutex); - entry = get_marker(name); + entry = get_marker_from_private_data(probe, probe_private); if (!entry) { ret = -ENOENT; goto end; } - /* - * Only need to update probes when refcount passes from 0 to 1. - */ - if (entry->refcount++) - goto end; -end: + if (entry->rcu_pending) + rcu_barrier(); + old = marker_entry_remove_probe(entry, NULL, probe_private); mutex_unlock(&markers_mutex); - marker_update_probes(NULL); - return ret; -} -EXPORT_SYMBOL_GPL(marker_arm); - -/** - * marker_disarm - Disarm a marker - * @name: marker name - * - * Disarm a marker. It keeps a reference count of the number of arming/disarming - * done. - * Returns 0 if ok, error value on error. - */ -int marker_disarm(const char *name) -{ - struct marker_entry *entry; - int ret = 0; - + marker_update_probes(); /* may update entry */ mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) { - ret = -ENOENT; - goto end; - } - /* - * Only permit decrement refcount if higher than 0. - * Do probe update only on 1 -> 0 transition. - */ - if (entry->refcount) { - if (--entry->refcount) - goto end; - } else { - ret = -EPERM; - goto end; - } + entry = get_marker_from_private_data(probe, probe_private); + WARN_ON(!entry); + entry->oldptr = old; + entry->rcu_pending = 1; + /* write rcu_pending before calling the RCU callback */ + smp_wmb(); + call_rcu(&entry->rcu, free_old_closure); + remove_marker(entry->name); /* Ignore busy error message */ end: mutex_unlock(&markers_mutex); - marker_update_probes(NULL); return ret; } -EXPORT_SYMBOL_GPL(marker_disarm); +EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); /** * marker_get_private_data - Get a marker's probe private data * @name: marker name + * @probe: probe to match + * @num: get the nth matching probe's private data * + * Returns the nth private data pointer (starting from 0) matching, or an + * ERR_PTR. * Returns the private data pointer, or an ERR_PTR. * The private data pointer should _only_ be dereferenced if the caller is the * owner of the data, or its content could vanish. This is mostly used to * confirm that a caller is the owner of a registered probe. */ -void *marker_get_private_data(const char *name) +void *marker_get_private_data(const char *name, marker_probe_func *probe, + int num) { struct hlist_head *head; struct hlist_node *node; struct marker_entry *e; size_t name_len = strlen(name) + 1; u32 hash = jhash(name, name_len-1, 0); - int found = 0; + int i; head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; hlist_for_each_entry(e, node, head, hlist) { if (!strcmp(name, e->name)) { - found = 1; - return e->private; + if (!e->ptype) { + if (num == 0 && e->single.func == probe) + return e->single.probe_private; + else + break; + } else { + struct marker_probe_closure *closure; + int match = 0; + closure = e->multi; + for (i = 0; closure[i].func; i++) { + if (closure[i].func != probe) + continue; + if (match++ == num) + return closure[i].probe_private; + } + } } } return ERR_PTR(-ENOENT); diff --git a/kernel/module.c b/kernel/module.c index 4202da97a1da..92595bad3812 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2038,7 +2038,7 @@ static struct module *load_module(void __user *umod, #ifdef CONFIG_MARKERS if (!mod->taints) marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers, NULL, NULL); + mod->markers + mod->num_markers); #endif err = module_finalize(hdr, sechdrs, mod); if (err < 0) @@ -2564,7 +2564,7 @@ EXPORT_SYMBOL(struct_module); #endif #ifdef CONFIG_MARKERS -void module_update_markers(struct module *probe_module, int *refcount) +void module_update_markers(void) { struct module *mod; @@ -2572,8 +2572,7 @@ void module_update_markers(struct module *probe_module, int *refcount) list_for_each_entry(mod, &modules, list) if (!mod->taints) marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers, - probe_module, refcount); + mod->markers + mod->num_markers); mutex_unlock(&module_mutex); } #endif |