diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/auditsc.c | 84 | ||||
-rw-r--r-- | kernel/exit.c | 3 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/futex.c | 21 | ||||
-rw-r--r-- | kernel/kmod.c | 89 | ||||
-rw-r--r-- | kernel/kprobes.c | 9 | ||||
-rw-r--r-- | kernel/ksysfs.c | 28 | ||||
-rw-r--r-- | kernel/lockdep.c | 1497 | ||||
-rw-r--r-- | kernel/lockdep_proc.c | 301 | ||||
-rw-r--r-- | kernel/mutex.c | 8 | ||||
-rw-r--r-- | kernel/power/Kconfig | 29 | ||||
-rw-r--r-- | kernel/power/disk.c | 251 | ||||
-rw-r--r-- | kernel/power/main.c | 108 | ||||
-rw-r--r-- | kernel/power/power.h | 29 | ||||
-rw-r--r-- | kernel/power/process.c | 90 | ||||
-rw-r--r-- | kernel/power/swap.c | 20 | ||||
-rw-r--r-- | kernel/power/user.c | 154 | ||||
-rw-r--r-- | kernel/ptrace.c | 2 | ||||
-rw-r--r-- | kernel/relay.c | 13 | ||||
-rw-r--r-- | kernel/rwsem.c | 8 | ||||
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/spinlock.c | 32 | ||||
-rw-r--r-- | kernel/sys.c | 33 | ||||
-rw-r--r-- | kernel/sysctl.c | 46 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 4 | ||||
-rw-r--r-- | kernel/timer.c | 24 |
26 files changed, 1921 insertions, 966 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index b7640a5f382a..145cbb79c4b9 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -153,7 +153,7 @@ struct audit_aux_data_execve { struct audit_aux_data d; int argc; int envc; - char mem[0]; + struct mm_struct *mm; }; struct audit_aux_data_socketcall { @@ -831,6 +831,55 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, return rc; } +static void audit_log_execve_info(struct audit_buffer *ab, + struct audit_aux_data_execve *axi) +{ + int i; + long len, ret; + const char __user *p = (const char __user *)axi->mm->arg_start; + char *buf; + + if (axi->mm != current->mm) + return; /* execve failed, no additional info */ + + for (i = 0; i < axi->argc; i++, p += len) { + len = strnlen_user(p, MAX_ARG_STRLEN); + /* + * We just created this mm, if we can't find the strings + * we just copied into it something is _very_ wrong. Similar + * for strings that are too long, we should not have created + * any. + */ + if (!len || len > MAX_ARG_STRLEN) { + WARN_ON(1); + send_sig(SIGKILL, current, 0); + } + + buf = kmalloc(len, GFP_KERNEL); + if (!buf) { + audit_panic("out of memory for argv string\n"); + break; + } + + ret = copy_from_user(buf, p, len); + /* + * There is no reason for this copy to be short. We just + * copied them here, and the mm hasn't been exposed to user- + * space yet. + */ + if (!ret) { + WARN_ON(1); + send_sig(SIGKILL, current, 0); + } + + audit_log_format(ab, "a%d=", i); + audit_log_untrustedstring(ab, buf); + audit_log_format(ab, "\n"); + + kfree(buf); + } +} + static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { int i, call_panic = 0; @@ -971,13 +1020,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts case AUDIT_EXECVE: { struct audit_aux_data_execve *axi = (void *)aux; - int i; - const char *p; - for (i = 0, p = axi->mem; i < axi->argc; i++) { - audit_log_format(ab, "a%d=", i); - p = audit_log_untrustedstring(ab, p); - audit_log_format(ab, "\n"); - } + audit_log_execve_info(ab, axi); break; } case AUDIT_SOCKETCALL: { @@ -1821,32 +1864,31 @@ int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode return 0; } +int audit_argv_kb = 32; + int audit_bprm(struct linux_binprm *bprm) { struct audit_aux_data_execve *ax; struct audit_context *context = current->audit_context; - unsigned long p, next; - void *to; if (likely(!audit_enabled || !context || context->dummy)) return 0; - ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p, - GFP_KERNEL); + /* + * Even though the stack code doesn't limit the arg+env size any more, + * the audit code requires that _all_ arguments be logged in a single + * netlink skb. Hence cap it :-( + */ + if (bprm->argv_len > (audit_argv_kb << 10)) + return -E2BIG; + + ax = kmalloc(sizeof(*ax), GFP_KERNEL); if (!ax) return -ENOMEM; ax->argc = bprm->argc; ax->envc = bprm->envc; - for (p = bprm->p, to = ax->mem; p < MAX_ARG_PAGES*PAGE_SIZE; p = next) { - struct page *page = bprm->page[p / PAGE_SIZE]; - void *kaddr = kmap(page); - next = (p + PAGE_SIZE) & ~(PAGE_SIZE - 1); - memcpy(to, kaddr + (p & (PAGE_SIZE - 1)), next - p); - to += next - p; - kunmap(page); - } - + ax->mm = bprm->mm; ax->d.type = AUDIT_EXECVE; ax->d.next = context->aux; context->aux = (void *)ax; diff --git a/kernel/exit.c b/kernel/exit.c index e8af8d0c2483..464c2b172f07 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -45,6 +45,7 @@ #include <linux/resource.h> #include <linux/blkdev.h> #include <linux/task_io_accounting_ops.h> +#include <linux/freezer.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -594,6 +595,8 @@ static void exit_mm(struct task_struct * tsk) tsk->mm = NULL; up_read(&mm->mmap_sem); enter_lazy_tlb(mm, current); + /* We don't want this task to be frozen prematurely */ + clear_freeze_flag(tsk); task_unlock(tsk); mmput(mm); } diff --git a/kernel/fork.c b/kernel/fork.c index ba39bdb2a7b8..469838998220 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -334,6 +334,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm) atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); + mm->flags = (current->mm) ? current->mm->flags + : MMF_DUMP_FILTER_DEFAULT; mm->core_waiters = 0; mm->nr_ptes = 0; set_mm_counter(mm, file_rss, 0); diff --git a/kernel/futex.c b/kernel/futex.c index 5c3f45d07c53..a12425051ee9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -346,15 +346,20 @@ static int futex_handle_fault(unsigned long address, vma = find_vma(mm, address); if (vma && address >= vma->vm_start && (vma->vm_flags & VM_WRITE)) { - switch (handle_mm_fault(mm, vma, address, 1)) { - case VM_FAULT_MINOR: - ret = 0; - current->min_flt++; - break; - case VM_FAULT_MAJOR: + int fault; + fault = handle_mm_fault(mm, vma, address, 1); + if (unlikely((fault & VM_FAULT_ERROR))) { +#if 0 + /* XXX: let's do this when we verify it is OK */ + if (ret & VM_FAULT_OOM) + ret = -ENOMEM; +#endif + } else { ret = 0; - current->maj_flt++; - break; + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; } } if (!fshared) diff --git a/kernel/kmod.c b/kernel/kmod.c index 78d365c524ed..beedbdc64608 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -33,6 +33,8 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/resource.h> +#include <linux/notifier.h> +#include <linux/suspend.h> #include <asm/uaccess.h> extern int max_threads; @@ -265,6 +267,88 @@ static void __call_usermodehelper(struct work_struct *work) } } +#ifdef CONFIG_PM +/* + * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY + * (used for preventing user land processes from being created after the user + * land has been frozen during a system-wide hibernation or suspend operation). + */ +static int usermodehelper_disabled; + +/* Number of helpers running */ +static atomic_t running_helpers = ATOMIC_INIT(0); + +/* + * Wait queue head used by usermodehelper_pm_callback() to wait for all running + * helpers to finish. + */ +static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); + +/* + * Time to wait for running_helpers to become zero before the setting of + * usermodehelper_disabled in usermodehelper_pm_callback() fails + */ +#define RUNNING_HELPERS_TIMEOUT (5 * HZ) + +static int usermodehelper_pm_callback(struct notifier_block *nfb, + unsigned long action, + void *ignored) +{ + long retval; + + switch (action) { + case PM_HIBERNATION_PREPARE: + case PM_SUSPEND_PREPARE: + usermodehelper_disabled = 1; + smp_mb(); + /* + * From now on call_usermodehelper_exec() won't start any new + * helpers, so it is sufficient if running_helpers turns out to + * be zero at one point (it may be increased later, but that + * doesn't matter). + */ + retval = wait_event_timeout(running_helpers_waitq, + atomic_read(&running_helpers) == 0, + RUNNING_HELPERS_TIMEOUT); + if (retval) { + return NOTIFY_OK; + } else { + usermodehelper_disabled = 0; + return NOTIFY_BAD; + } + case PM_POST_HIBERNATION: + case PM_POST_SUSPEND: + usermodehelper_disabled = 0; + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static void helper_lock(void) +{ + atomic_inc(&running_helpers); + smp_mb__after_atomic_inc(); +} + +static void helper_unlock(void) +{ + if (atomic_dec_and_test(&running_helpers)) + wake_up(&running_helpers_waitq); +} + +static void register_pm_notifier_callback(void) +{ + pm_notifier(usermodehelper_pm_callback, 0); +} +#else /* CONFIG_PM */ +#define usermodehelper_disabled 0 + +static inline void helper_lock(void) {} +static inline void helper_unlock(void) {} +static inline void register_pm_notifier_callback(void) {} +#endif /* CONFIG_PM */ + /** * call_usermodehelper_setup - prepare to call a usermode helper * @path - path to usermode executable @@ -369,12 +453,13 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, DECLARE_COMPLETION_ONSTACK(done); int retval; + helper_lock(); if (sub_info->path[0] == '\0') { retval = 0; goto out; } - if (!khelper_wq) { + if (!khelper_wq || usermodehelper_disabled) { retval = -EBUSY; goto out; } @@ -390,6 +475,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, out: call_usermodehelper_freeinfo(sub_info); + helper_unlock(); return retval; } EXPORT_SYMBOL(call_usermodehelper_exec); @@ -431,4 +517,5 @@ void __init usermodehelper_init(void) { khelper_wq = create_singlethread_workqueue("khelper"); BUG_ON(!khelper_wq); + register_pm_notifier_callback(); } diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9e47d8c493f3..3e9f513a728d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -675,9 +675,18 @@ static struct notifier_block kprobe_exceptions_nb = { .priority = 0x7fffffff /* we need to be notified first */ }; +unsigned long __weak arch_deref_entry_point(void *entry) +{ + return (unsigned long)entry; +} int __kprobes register_jprobe(struct jprobe *jp) { + unsigned long addr = arch_deref_entry_point(jp->entry); + + if (!kernel_text_address(addr)) + return -EINVAL; + /* Todo: Verify probepoint is a function entry point */ jp->kp.pre_handler = setjmp_pre_handler; jp->kp.break_handler = longjmp_break_handler; diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 559deca5ed15..2565e1b6dd7b 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -62,6 +62,28 @@ static ssize_t kexec_crash_loaded_show(struct kset *kset, char *page) KERNEL_ATTR_RO(kexec_crash_loaded); #endif /* CONFIG_KEXEC */ +/* + * Make /sys/kernel/notes give the raw contents of our kernel .notes section. + */ +extern const char __start_notes __attribute__((weak)); +extern const char __stop_notes __attribute__((weak)); +#define notes_size (&__stop_notes - &__start_notes) + +static ssize_t notes_read(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + memcpy(buf, &__start_notes + off, count); + return count; +} + +static struct bin_attribute notes_attr = { + .attr = { + .name = "notes", + .mode = S_IRUGO, + }, + .read = ¬es_read, +}; + decl_subsys(kernel, NULL, NULL); EXPORT_SYMBOL_GPL(kernel_subsys); @@ -88,6 +110,12 @@ static int __init ksysfs_init(void) error = sysfs_create_group(&kernel_subsys.kobj, &kernel_attr_group); + if (!error && notes_size > 0) { + notes_attr.size = notes_size; + error = sysfs_create_bin_file(&kernel_subsys.kobj, + ¬es_attr); + } + return error; } diff --git a/kernel/lockdep.c b/kernel/lockdep.c index edba2ffb43de..734da579ad13 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -5,7 +5,8 @@ * * Started by Ingo Molnar: * - * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> * * this code maps all the lock dependencies as they occur in a live kernel * and will warn about the following classes of locking bugs: @@ -37,11 +38,26 @@ #include <linux/debug_locks.h> #include <linux/irqflags.h> #include <linux/utsname.h> +#include <linux/hash.h> #include <asm/sections.h> #include "lockdep_internals.h" +#ifdef CONFIG_PROVE_LOCKING +int prove_locking = 1; +module_param(prove_locking, int, 0644); +#else +#define prove_locking 0 +#endif + +#ifdef CONFIG_LOCK_STAT +int lock_stat = 1; +module_param(lock_stat, int, 0644); +#else +#define lock_stat 0 +#endif + /* * lockdep_lock: protects the lockdep graph, the hashes and the * class/list/hash allocators. @@ -96,23 +112,6 @@ unsigned long nr_list_entries; static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; /* - * Allocate a lockdep entry. (assumes the graph_lock held, returns - * with NULL on failure) - */ -static struct lock_list *alloc_list_entry(void) -{ - if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { - if (!debug_locks_off_graph_unlock()) - return NULL; - - printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); - printk("turning off the locking correctness validator.\n"); - return NULL; - } - return list_entries + nr_list_entries++; -} - -/* * All data structures here are protected by the global debug_lock. * * Mutex key structs only get allocated, once during bootup, and never @@ -121,6 +120,117 @@ static struct lock_list *alloc_list_entry(void) unsigned long nr_lock_classes; static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; +#ifdef CONFIG_LOCK_STAT +static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); + +static int lock_contention_point(struct lock_class *class, unsigned long ip) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { + if (class->contention_point[i] == 0) { + class->contention_point[i] = ip; + break; + } + if (class->contention_point[i] == ip) + break; + } + + return i; +} + +static void lock_time_inc(struct lock_time *lt, s64 time) +{ + if (time > lt->max) + lt->max = time; + + if (time < lt->min || !lt->min) + lt->min = time; + + lt->total += time; + lt->nr++; +} + +static inline void lock_time_add(struct lock_time *src, struct lock_time *dst) +{ + dst->min += src->min; + dst->max += src->max; + dst->total += src->total; + dst->nr += src->nr; +} + +struct lock_class_stats lock_stats(struct lock_class *class) +{ + struct lock_class_stats stats; + int cpu, i; + + memset(&stats, 0, sizeof(struct lock_class_stats)); + for_each_possible_cpu(cpu) { + struct lock_class_stats *pcs = + &per_cpu(lock_stats, cpu)[class - lock_classes]; + + for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) + stats.contention_point[i] += pcs->contention_point[i]; + + lock_time_add(&pcs->read_waittime, &stats.read_waittime); + lock_time_add(&pcs->write_waittime, &stats.write_waittime); + + lock_time_add(&pcs->read_holdtime, &stats.read_holdtime); + lock_time_add(&pcs->write_holdtime, &stats.write_holdtime); + + for (i = 0; i < ARRAY_SIZE(stats.bounces); i++) + stats.bounces[i] += pcs->bounces[i]; + } + + return stats; +} + +void clear_lock_stats(struct lock_class *class) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct lock_class_stats *cpu_stats = + &per_cpu(lock_stats, cpu)[class - lock_classes]; + + memset(cpu_stats, 0, sizeof(struct lock_class_stats)); + } + memset(class->contention_point, 0, sizeof(class->contention_point)); +} + +static struct lock_class_stats *get_lock_stats(struct lock_class *class) +{ + return &get_cpu_var(lock_stats)[class - lock_classes]; +} + +static void put_lock_stats(struct lock_class_stats *stats) +{ + put_cpu_var(lock_stats); +} + +static void lock_release_holdtime(struct held_lock *hlock) +{ + struct lock_class_stats *stats; + s64 holdtime; + + if (!lock_stat) + return; + + holdtime = sched_clock() - hlock->holdtime_stamp; + + stats = get_lock_stats(hlock->class); + if (hlock->read) + lock_time_inc(&stats->read_holdtime, holdtime); + else + lock_time_inc(&stats->write_holdtime, holdtime); + put_lock_stats(stats); +} +#else +static inline void lock_release_holdtime(struct held_lock *hlock) +{ +} +#endif + /* * We keep a global list of all lock classes. The list only grows, * never shrinks. The list is only accessed with the lockdep @@ -133,24 +243,18 @@ LIST_HEAD(all_lock_classes); */ #define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1) #define CLASSHASH_SIZE (1UL << CLASSHASH_BITS) -#define CLASSHASH_MASK (CLASSHASH_SIZE - 1) -#define __classhashfn(key) ((((unsigned long)key >> CLASSHASH_BITS) + (unsigned long)key) & CLASSHASH_MASK) +#define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS) #define classhashentry(key) (classhash_table + __classhashfn((key))) static struct list_head classhash_table[CLASSHASH_SIZE]; -unsigned long nr_lock_chains; -static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; - /* * We put the lock dependency chains into a hash-table as well, to cache * their existence: */ #define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1) #define CHAINHASH_SIZE (1UL << CHAINHASH_BITS) -#define CHAINHASH_MASK (CHAINHASH_SIZE - 1) -#define __chainhashfn(chain) \ - (((chain >> CHAINHASH_BITS) + chain) & CHAINHASH_MASK) +#define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS) #define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) static struct list_head chainhash_table[CHAINHASH_SIZE]; @@ -223,26 +327,6 @@ static int verbose(struct lock_class *class) return 0; } -#ifdef CONFIG_TRACE_IRQFLAGS - -static int hardirq_verbose(struct lock_class *class) -{ -#if HARDIRQ_VERBOSE - return class_filter(class); -#endif - return 0; -} - -static int softirq_verbose(struct lock_class *class) -{ -#if SOFTIRQ_VERBOSE - return class_filter(class); -#endif - return 0; -} - -#endif - /* * Stack-trace: tightly packed array of stack backtrace * addresses. Protected by the graph_lock. @@ -291,6 +375,11 @@ unsigned int max_recursion_depth; * about it later on, in lockdep_info(). */ static int lockdep_init_error; +static unsigned long lockdep_init_trace_data[20]; +static struct stack_trace lockdep_init_trace = { + .max_entries = ARRAY_SIZE(lockdep_init_trace_data), + .entries = lockdep_init_trace_data, +}; /* * Various lockdep statistics: @@ -482,6 +571,262 @@ static void print_lock_dependencies(struct lock_class *class, int depth) } } +static void print_kernel_version(void) +{ + printk("%s %.*s\n", init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); +} + +static int very_verbose(struct lock_class *class) +{ +#if VERY_VERBOSE + return class_filter(class); +#endif + return 0; +} + +/* + * Is this the address of a static object: + */ +static int static_obj(void *obj) +{ + unsigned long start = (unsigned long) &_stext, + end = (unsigned long) &_end, + addr = (unsigned long) obj; +#ifdef CONFIG_SMP + int i; +#endif + + /* + * static variable? + */ + if ((addr >= start) && (addr < end)) + return 1; + +#ifdef CONFIG_SMP + /* + * percpu var? + */ + for_each_possible_cpu(i) { + start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); + end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM + + per_cpu_offset(i); + + if ((addr >= start) && (addr < end)) + return 1; + } +#endif + + /* + * module var? + */ + return is_module_address(addr); +} + +/* + * To make lock name printouts unique, we calculate a unique + * class->name_version generation counter: + */ +static int count_matching_names(struct lock_class *new_class) +{ + struct lock_class *class; + int count = 0; + + if (!new_class->name) + return 0; + + list_for_each_entry(class, &all_lock_classes, lock_entry) { + if (new_class->key - new_class->subclass == class->key) + return class->name_version; + if (class->name && !strcmp(class->name, new_class->name)) + count = max(count, class->name_version); + } + + return count + 1; +} + +/* + * Register a lock's class in the hash-table, if the class is not present + * yet. Otherwise we look it up. We cache the result in the lock object + * itself, so actual lookup of the hash should be once per lock object. + */ +static inline struct lock_class * +look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) +{ + struct lockdep_subclass_key *key; + struct list_head *hash_head; + struct lock_class *class; + +#ifdef CONFIG_DEBUG_LOCKDEP + /* + * If the architecture calls into lockdep before initializing + * the hashes then we'll warn about it later. (we cannot printk + * right now) + */ + if (unlikely(!lockdep_initialized)) { + lockdep_init(); + lockdep_init_error = 1; + save_stack_trace(&lockdep_init_trace); + } +#endif + + /* + * Static locks do not have their class-keys yet - for them the key + * is the lock object itself: + */ + if (unlikely(!lock->key)) + lock->key = (void *)lock; + + /* + * NOTE: the class-key must be unique. For dynamic locks, a static + * lock_class_key variable is passed in through the mutex_init() + * (or spin_lock_init()) call - which acts as the key. For static + * locks we use the lock object itself as the key. + */ + BUILD_BUG_ON(sizeof(struct lock_class_key) > + sizeof(struct lockdep_map)); + + key = lock->key->subkeys + subclass; + + hash_head = classhashentry(key); + + /* + * We can walk the hash lockfree, because the hash only + * grows, and we are careful when adding entries to the end: + */ + list_for_each_entry(class, hash_head, hash_entry) { + if (class->key == key) { + WARN_ON_ONCE(class->name != lock->name); + return class; + } + } + + return NULL; +} + +/* + * Register a lock's class in the hash-table, if the class is not present + * yet. Otherwise we look it up. We cache the result in the lock object + * itself, so actual lookup of the hash should be once per lock object. + */ +static inline struct lock_class * +register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) +{ + struct lockdep_subclass_key *key; + struct list_head *hash_head; + struct lock_class *class; + unsigned long flags; + + class = look_up_lock_class(lock, subclass); + if (likely(class)) + return class; + + /* + * Debug-check: all keys must be persistent! + */ + if (!static_obj(lock->key)) { + debug_locks_off(); + printk("INFO: trying to register non-static key.\n"); + printk("the code is fine but needs lockdep annotation.\n"); + printk("turning off the locking correctness validator.\n"); + dump_stack(); + + return NULL; + } + + key = lock->key->subkeys + subclass; + hash_head = classhashentry(key); + + raw_local_irq_save(flags); + if (!graph_lock()) { + raw_local_irq_restore(flags); + return NULL; + } + /* + * We have to do the hash-walk again, to avoid races + * with another CPU: + */ + list_for_each_entry(class, hash_head, hash_entry) + if (class->key == key) + goto out_unlock_set; + /* + * Allocate a new key from the static array, and add it to + * the hash: + */ + if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { + if (!debug_locks_off_graph_unlock()) { + raw_local_irq_restore(flags); + return NULL; + } + raw_local_irq_restore(flags); + + printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); + printk("turning off the locking correctness validator.\n"); + return NULL; + } + class = lock_classes + nr_lock_classes++; + debug_atomic_inc(&nr_unused_locks); + class->key = key; + class->name = lock->name; + class->subclass = subclass; + INIT_LIST_HEAD(&class->lock_entry); + INIT_LIST_HEAD(&class->locks_before); + INIT_LIST_HEAD(&class->locks_after); + class->name_version = count_matching_names(class); + /* + * We use RCU's safe list-add method to make + * parallel walking of the hash-list safe: + */ + list_add_tail_rcu(&class->hash_entry, hash_head); + + if (verbose(class)) { + graph_unlock(); + raw_local_irq_restore(flags); + + printk("\nnew class %p: %s", class->key, class->name); + if (class->name_version > 1) + printk("#%d", class->name_version); + printk("\n"); + dump_stack(); + + raw_local_irq_save(flags); + if (!graph_lock()) { + raw_local_irq_restore(flags); + return NULL; + } + } +out_unlock_set: + graph_unlock(); + raw_local_irq_restore(flags); + + if (!subclass || force) + lock->class_cache = class; + + if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) + return NULL; + + return class; +} + +#ifdef CONFIG_PROVE_LOCKING +/* + * Allocate a lockdep entry. (assumes the graph_lock held, returns + * with NULL on failure) + */ +static struct lock_list *alloc_list_entry(void) +{ + if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { + if (!debug_locks_off_graph_unlock()) + return NULL; + + printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); + printk("turning off the locking correctness validator.\n"); + return NULL; + } + return list_entries + nr_list_entries++; +} + /* * Add a new dependency to the head of the list: */ @@ -542,13 +887,6 @@ print_circular_bug_entry(struct lock_list *target, unsigned int depth) return 0; } -static void print_kernel_version(void) -{ - printk("%s %.*s\n", init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); -} - /* * When a circular dependency is detected, print the * header first: @@ -640,15 +978,7 @@ check_noncircular(struct lock_class *source, unsigned int depth) return 1; } -static int very_verbose(struct lock_class *class) -{ -#if VERY_VERBOSE - return class_filter(class); -#endif - return 0; -} #ifdef CONFIG_TRACE_IRQFLAGS - /* * Forwards and backwards subgraph searching, for the purposes of * proving that two subgraphs can be connected by a new dependency @@ -821,6 +1151,78 @@ check_usage(struct task_struct *curr, struct held_lock *prev, bit_backwards, bit_forwards, irqclass); } +static int +check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next) +{ + /* + * Prove that the new dependency does not connect a hardirq-safe + * lock with a hardirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at <prev>, and the + * forwards-subgraph starting at <next>: + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, + LOCK_ENABLED_HARDIRQS, "hard")) + return 0; + + /* + * Prove that the new dependency does not connect a hardirq-safe-read + * lock with a hardirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at <prev>, and the + * forwards-subgraph starting at <next>: + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, + LOCK_ENABLED_HARDIRQS, "hard-read")) + return 0; + + /* + * Prove that the new dependency does not connect a softirq-safe + * lock with a softirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at <prev>, and the + * forwards-subgraph starting at <next>: + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, + LOCK_ENABLED_SOFTIRQS, "soft")) + return 0; + /* + * Prove that the new dependency does not connect a softirq-safe-read + * lock with a softirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at <prev>, and the + * forwards-subgraph starting at <next>: + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ, + LOCK_ENABLED_SOFTIRQS, "soft")) + return 0; + + return 1; +} + +static void inc_chains(void) +{ + if (current->hardirq_context) + nr_hardirq_chains++; + else { + if (current->softirq_context) + nr_softirq_chains++; + else + nr_process_chains++; + } +} + +#else + +static inline int +check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next) +{ + return 1; +} + +static inline void inc_chains(void) +{ + nr_process_chains++; +} + #endif static int @@ -922,47 +1324,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, if (!(check_noncircular(next->class, 0))) return print_circular_bug_tail(); -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * Prove that the new dependency does not connect a hardirq-safe - * lock with a hardirq-unsafe lock - to achieve this we search - * the backwards-subgraph starting at <prev>, and the - * forwards-subgraph starting at <next>: - */ - if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, - LOCK_ENABLED_HARDIRQS, "hard")) + if (!check_prev_add_irq(curr, prev, next)) return 0; /* - * Prove that the new dependency does not connect a hardirq-safe-read - * lock with a hardirq-unsafe lock - to achieve this we search - * the backwards-subgraph starting at <prev>, and the - * forwards-subgraph starting at <next>: - */ - if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, - LOCK_ENABLED_HARDIRQS, "hard-read")) - return 0; - - /* - * Prove that the new dependency does not connect a softirq-safe - * lock with a softirq-unsafe lock - to achieve this we search - * the backwards-subgraph starting at <prev>, and the - * forwards-subgraph starting at <next>: - */ - if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, - LOCK_ENABLED_SOFTIRQS, "soft")) - return 0; - /* - * Prove that the new dependency does not connect a softirq-safe-read - * lock with a softirq-unsafe lock - to achieve this we search - * the backwards-subgraph starting at <prev>, and the - * forwards-subgraph starting at <next>: - */ - if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ, - LOCK_ENABLED_SOFTIRQS, "soft")) - return 0; -#endif - /* * For recursive read-locks we do all the dependency checks, * but we dont store read-triggered dependencies (only * write-triggered dependencies). This ensures that only the @@ -1088,224 +1453,8 @@ out_bug: return 0; } - -/* - * Is this the address of a static object: - */ -static int static_obj(void *obj) -{ - unsigned long start = (unsigned long) &_stext, - end = (unsigned long) &_end, - addr = (unsigned long) obj; -#ifdef CONFIG_SMP - int i; -#endif - - /* - * static variable? - */ - if ((addr >= start) && (addr < end)) - return 1; - -#ifdef CONFIG_SMP - /* - * percpu var? - */ - for_each_possible_cpu(i) { - start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); - end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM - + per_cpu_offset(i); - - if ((addr >= start) && (addr < end)) - return 1; - } -#endif - - /* - * module var? - */ - return is_module_address(addr); -} - -/* - * To make lock name printouts unique, we calculate a unique - * class->name_version generation counter: - */ -static int count_matching_names(struct lock_class *new_class) -{ - struct lock_class *class; - int count = 0; - - if (!new_class->name) - return 0; - - list_for_each_entry(class, &all_lock_classes, lock_entry) { - if (new_class->key - new_class->subclass == class->key) - return class->name_version; - if (class->name && !strcmp(class->name, new_class->name)) - count = max(count, class->name_version); - } - - return count + 1; -} - -/* - * Register a lock's class in the hash-table, if the class is not present - * yet. Otherwise we look it up. We cache the result in the lock object - * itself, so actual lookup of the hash should be once per lock object. - */ -static inline struct lock_class * -look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) -{ - struct lockdep_subclass_key *key; - struct list_head *hash_head; - struct lock_class *class; - -#ifdef CONFIG_DEBUG_LOCKDEP - /* - * If the architecture calls into lockdep before initializing - * the hashes then we'll warn about it later. (we cannot printk - * right now) - */ - if (unlikely(!lockdep_initialized)) { - lockdep_init(); - lockdep_init_error = 1; - } -#endif - - /* - * Static locks do not have their class-keys yet - for them the key - * is the lock object itself: - */ - if (unlikely(!lock->key)) - lock->key = (void *)lock; - - /* - * NOTE: the class-key must be unique. For dynamic locks, a static - * lock_class_key variable is passed in through the mutex_init() - * (or spin_lock_init()) call - which acts as the key. For static - * locks we use the lock object itself as the key. - */ - BUILD_BUG_ON(sizeof(struct lock_class_key) > sizeof(struct lock_class)); - - key = lock->key->subkeys + subclass; - - hash_head = classhashentry(key); - - /* - * We can walk the hash lockfree, because the hash only - * grows, and we are careful when adding entries to the end: - */ - list_for_each_entry(class, hash_head, hash_entry) - if (class->key == key) - return class; - - return NULL; -} - -/* - * Register a lock's class in the hash-table, if the class is not present - * yet. Otherwise we look it up. We cache the result in the lock object - * itself, so actual lookup of the hash should be once per lock object. - */ -static inline struct lock_class * -register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) -{ - struct lockdep_subclass_key *key; - struct list_head *hash_head; - struct lock_class *class; - unsigned long flags; - - class = look_up_lock_class(lock, subclass); - if (likely(class)) - return class; - - /* - * Debug-check: all keys must be persistent! - */ - if (!static_obj(lock->key)) { - debug_locks_off(); - printk("INFO: trying to register non-static key.\n"); - printk("the code is fine but needs lockdep annotation.\n"); - printk("turning off the locking correctness validator.\n"); - dump_stack(); - - return NULL; - } - - key = lock->key->subkeys + subclass; - hash_head = classhashentry(key); - - raw_local_irq_save(flags); - if (!graph_lock()) { - raw_local_irq_restore(flags); - return NULL; - } - /* - * We have to do the hash-walk again, to avoid races - * with another CPU: - */ - list_for_each_entry(class, hash_head, hash_entry) - if (class->key == key) - goto out_unlock_set; - /* - * Allocate a new key from the static array, and add it to - * the hash: - */ - if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { - if (!debug_locks_off_graph_unlock()) { - raw_local_irq_restore(flags); - return NULL; - } - raw_local_irq_restore(flags); - - printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); - printk("turning off the locking correctness validator.\n"); - return NULL; - } - class = lock_classes + nr_lock_classes++; - debug_atomic_inc(&nr_unused_locks); - class->key = key; - class->name = lock->name; - class->subclass = subclass; - INIT_LIST_HEAD(&class->lock_entry); - INIT_LIST_HEAD(&class->locks_before); - INIT_LIST_HEAD(&class->locks_after); - class->name_version = count_matching_names(class); - /* - * We use RCU's safe list-add method to make - * parallel walking of the hash-list safe: - */ - list_add_tail_rcu(&class->hash_entry, hash_head); - - if (verbose(class)) { - graph_unlock(); - raw_local_irq_restore(flags); - - printk("\nnew class %p: %s", class->key, class->name); - if (class->name_version > 1) - printk("#%d", class->name_version); - printk("\n"); - dump_stack(); - - raw_local_irq_save(flags); - if (!graph_lock()) { - raw_local_irq_restore(flags); - return NULL; - } - } -out_unlock_set: - graph_unlock(); - raw_local_irq_restore(flags); - - if (!subclass || force) - lock->class_cache = class; - - if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) - return NULL; - - return class; -} +unsigned long nr_lock_chains; +static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; /* * Look up a dependency chain. If the key is not present yet then @@ -1366,21 +1515,72 @@ cache_hit: chain->chain_key = chain_key; list_add_tail_rcu(&chain->entry, hash_head); debug_atomic_inc(&chain_lookup_misses); -#ifdef CONFIG_TRACE_IRQFLAGS - if (current->hardirq_context) - nr_hardirq_chains++; - else { - if (current->softirq_context) - nr_softirq_chains++; - else - nr_process_chains++; - } -#else - nr_process_chains++; -#endif + inc_chains(); + + return 1; +} + +static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, + struct held_lock *hlock, int chain_head) +{ + /* + * Trylock needs to maintain the stack of held locks, but it + * does not add new dependencies, because trylock can be done + * in any order. + * + * We look up the chain_key and do the O(N^2) check and update of + * the dependencies only if this is a new dependency chain. + * (If lookup_chain_cache() returns with 1 it acquires + * graph_lock for us) + */ + if (!hlock->trylock && (hlock->check == 2) && + lookup_chain_cache(curr->curr_chain_key, hlock->class)) { + /* + * Check whether last held lock: + * + * - is irq-safe, if this lock is irq-unsafe + * - is softirq-safe, if this lock is hardirq-unsafe + * + * And check whether the new lock's dependency graph + * could lead back to the previous lock. + * + * any of these scenarios could lead to a deadlock. If + * All validations + */ + int ret = check_deadlock(curr, hlock, lock, hlock->read); + + if (!ret) + return 0; + /* + * Mark recursive read, as we jump over it when + * building dependencies (just like we jump over + * trylock entries): + */ + if (ret == 2) + hlock->read = 2; + /* + * Add dependency only if this lock is not the head + * of the chain, and if it's not a secondary read-lock: + */ + if (!chain_head && ret != 2) + if (!check_prevs_add(curr, hlock)) + return 0; + graph_unlock(); + } else + /* after lookup_chain_cache(): */ + if (unlikely(!debug_locks)) + return 0; return 1; } +#else +static inline int validate_chain(struct task_struct *curr, + struct lockdep_map *lock, struct held_lock *hlock, + int chain_head) +{ + return 1; +} +#endif /* * We are building curr_chain_key incrementally, so double-check @@ -1425,6 +1625,57 @@ static void check_chain_key(struct task_struct *curr) #endif } +static int +print_usage_bug(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) +{ + if (!debug_locks_off_graph_unlock() || debug_locks_silent) + return 0; + + printk("\n=================================\n"); + printk( "[ INFO: inconsistent lock state ]\n"); + print_kernel_version(); + printk( "---------------------------------\n"); + + printk("inconsistent {%s} -> {%s} usage.\n", + usage_str[prev_bit], usage_str[new_bit]); + + printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", + curr->comm, curr->pid, + trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, + trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, + trace_hardirqs_enabled(curr), + trace_softirqs_enabled(curr)); + print_lock(this); + + printk("{%s} state was registered at:\n", usage_str[prev_bit]); + print_stack_trace(this->class->usage_traces + prev_bit, 1); + + print_irqtrace_events(curr); + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +/* + * Print out an error if an invalid bit is set: + */ +static inline int +valid_state(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) +{ + if (unlikely(this->class->usage_mask & (1 << bad_bit))) + return print_usage_bug(curr, this, bad_bit, new_bit); + return 1; +} + +static int mark_lock(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit); + #ifdef CONFIG_TRACE_IRQFLAGS /* @@ -1518,90 +1769,30 @@ void print_irqtrace_events(struct task_struct *curr) print_ip_sym(curr->softirq_disable_ip); } -#endif - -static int -print_usage_bug(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) +static int hardirq_verbose(struct lock_class *class) { - if (!debug_locks_off_graph_unlock() || debug_locks_silent) - return 0; - - printk("\n=================================\n"); - printk( "[ INFO: inconsistent lock state ]\n"); - print_kernel_version(); - printk( "---------------------------------\n"); - - printk("inconsistent {%s} -> {%s} usage.\n", - usage_str[prev_bit], usage_str[new_bit]); - - printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", - curr->comm, curr->pid, - trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, - trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, - trace_hardirqs_enabled(curr), - trace_softirqs_enabled(curr)); - print_lock(this); - - printk("{%s} state was registered at:\n", usage_str[prev_bit]); - print_stack_trace(this->class->usage_traces + prev_bit, 1); - - print_irqtrace_events(curr); - printk("\nother info that might help us debug this:\n"); - lockdep_print_held_locks(curr); - - printk("\nstack backtrace:\n"); - dump_stack(); - +#if HARDIRQ_VERBOSE + return class_filter(class); +#endif return 0; } -/* - * Print out an error if an invalid bit is set: - */ -static inline int -valid_state(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) +static int softirq_verbose(struct lock_class *class) { - if (unlikely(this->class->usage_mask & (1 << bad_bit))) - return print_usage_bug(curr, this, bad_bit, new_bit); - return 1; +#if SOFTIRQ_VERBOSE + return class_filter(class); +#endif + return 0; } #define STRICT_READ_CHECKS 1 -/* - * Mark a lock with a usage bit, and validate the state transition: - */ -static int mark_lock(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit new_bit) +static int mark_lock_irq(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit) { - unsigned int new_mask = 1 << new_bit, ret = 1; - - /* - * If already set then do not dirty the cacheline, - * nor do any checks: - */ - if (likely(this->class->usage_mask & new_mask)) - return 1; - - if (!graph_lock()) - return 0; - /* - * Make sure we didnt race: - */ - if (unlikely(this->class->usage_mask & new_mask)) { - graph_unlock(); - return 1; - } - - this->class->usage_mask |= new_mask; + int ret = 1; - if (!save_trace(this->class->usage_traces + new_bit)) - return 0; - - switch (new_bit) { -#ifdef CONFIG_TRACE_IRQFLAGS + switch(new_bit) { case LOCK_USED_IN_HARDIRQ: if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) return 0; @@ -1760,37 +1951,14 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, if (softirq_verbose(this->class)) ret = 2; break; -#endif - case LOCK_USED: - /* - * Add it to the global list of classes: - */ - list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes); - debug_atomic_dec(&nr_unused_locks); - break; default: - if (!debug_locks_off_graph_unlock()) - return 0; WARN_ON(1); - return 0; - } - - graph_unlock(); - - /* - * We must printk outside of the graph_lock: - */ - if (ret == 2) { - printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); - print_lock(this); - print_irqtrace_events(curr); - dump_stack(); + break; } return ret; } -#ifdef CONFIG_TRACE_IRQFLAGS /* * Mark all held locks with a usage bit: */ @@ -1973,9 +2141,176 @@ void trace_softirqs_off(unsigned long ip) debug_atomic_inc(&redundant_softirqs_off); } +static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) +{ + /* + * If non-trylock use in a hardirq or softirq context, then + * mark the lock as used in these contexts: + */ + if (!hlock->trylock) { + if (hlock->read) { + if (curr->hardirq_context) + if (!mark_lock(curr, hlock, + LOCK_USED_IN_HARDIRQ_READ)) + return 0; + if (curr->softirq_context) + if (!mark_lock(curr, hlock, + LOCK_USED_IN_SOFTIRQ_READ)) + return 0; + } else { + if (curr->hardirq_context) + if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) + return 0; + if (curr->softirq_context) + if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ)) + return 0; + } + } + if (!hlock->hardirqs_off) { + if (hlock->read) { + if (!mark_lock(curr, hlock, + LOCK_ENABLED_HARDIRQS_READ)) + return 0; + if (curr->softirqs_enabled) + if (!mark_lock(curr, hlock, + LOCK_ENABLED_SOFTIRQS_READ)) + return 0; + } else { + if (!mark_lock(curr, hlock, + LOCK_ENABLED_HARDIRQS)) + return 0; + if (curr->softirqs_enabled) + if (!mark_lock(curr, hlock, + LOCK_ENABLED_SOFTIRQS)) + return 0; + } + } + + return 1; +} + +static int separate_irq_context(struct task_struct *curr, + struct held_lock *hlock) +{ + unsigned int depth = curr->lockdep_depth; + + /* + * Keep track of points where we cross into an interrupt context: + */ + hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + + curr->softirq_context; + if (depth) { + struct held_lock *prev_hlock; + + prev_hlock = curr->held_locks + depth-1; + /* + * If we cross into another context, reset the + * hash key (this also prevents the checking and the + * adding of the dependency to 'prev'): + */ + if (prev_hlock->irq_context != hlock->irq_context) + return 1; + } + return 0; +} + +#else + +static inline +int mark_lock_irq(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit) +{ + WARN_ON(1); + return 1; +} + +static inline int mark_irqflags(struct task_struct *curr, + struct held_lock *hlock) +{ + return 1; +} + +static inline int separate_irq_context(struct task_struct *curr, + struct held_lock *hlock) +{ + return 0; +} + #endif /* + * Mark a lock with a usage bit, and validate the state transition: + */ +static int mark_lock(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit) +{ + unsigned int new_mask = 1 << new_bit, ret = 1; + + /* + * If already set then do not dirty the cacheline, + * nor do any checks: + */ + if (likely(this->class->usage_mask & new_mask)) + return 1; + + if (!graph_lock()) + return 0; + /* + * Make sure we didnt race: + */ + if (unlikely(this->class->usage_mask & new_mask)) { + graph_unlock(); + return 1; + } + + this->class->usage_mask |= new_mask; + + if (!save_trace(this->class->usage_traces + new_bit)) + return 0; + + switch (new_bit) { + case LOCK_USED_IN_HARDIRQ: + case LOCK_USED_IN_SOFTIRQ: + case LOCK_USED_IN_HARDIRQ_READ: + case LOCK_USED_IN_SOFTIRQ_READ: + case LOCK_ENABLED_HARDIRQS: + case LOCK_ENABLED_SOFTIRQS: + case LOCK_ENABLED_HARDIRQS_READ: + case LOCK_ENABLED_SOFTIRQS_READ: + ret = mark_lock_irq(curr, this, new_bit); + if (!ret) + return 0; + break; + case LOCK_USED: + /* + * Add it to the global list of classes: + */ + list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes); + debug_atomic_dec(&nr_unused_locks); + break; + default: + if (!debug_locks_off_graph_unlock()) + return 0; + WARN_ON(1); + return 0; + } + + graph_unlock(); + + /* + * We must printk outside of the graph_lock: + */ + if (ret == 2) { + printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); + print_lock(this); + print_irqtrace_events(curr); + dump_stack(); + } + + return ret; +} + +/* * Initialize a lock instance's lock-class mapping info: */ void lockdep_init_map(struct lockdep_map *lock, const char *name, @@ -1999,6 +2334,9 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, lock->name = name; lock->key = key; lock->class_cache = NULL; +#ifdef CONFIG_LOCK_STAT + lock->cpu = raw_smp_processor_id(); +#endif if (subclass) register_lock_class(lock, subclass, 1); } @@ -2020,6 +2358,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int chain_head = 0; u64 chain_key; + if (!prove_locking) + check = 1; + if (unlikely(!debug_locks)) return 0; @@ -2070,57 +2411,18 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->read = read; hlock->check = check; hlock->hardirqs_off = hardirqs_off; - - if (check != 2) - goto out_calc_hash; -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * If non-trylock use in a hardirq or softirq context, then - * mark the lock as used in these contexts: - */ - if (!trylock) { - if (read) { - if (curr->hardirq_context) - if (!mark_lock(curr, hlock, - LOCK_USED_IN_HARDIRQ_READ)) - return 0; - if (curr->softirq_context) - if (!mark_lock(curr, hlock, - LOCK_USED_IN_SOFTIRQ_READ)) - return 0; - } else { - if (curr->hardirq_context) - if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) - return 0; - if (curr->softirq_context) - if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ)) - return 0; - } - } - if (!hardirqs_off) { - if (read) { - if (!mark_lock(curr, hlock, - LOCK_ENABLED_HARDIRQS_READ)) - return 0; - if (curr->softirqs_enabled) - if (!mark_lock(curr, hlock, - LOCK_ENABLED_SOFTIRQS_READ)) - return 0; - } else { - if (!mark_lock(curr, hlock, - LOCK_ENABLED_HARDIRQS)) - return 0; - if (curr->softirqs_enabled) - if (!mark_lock(curr, hlock, - LOCK_ENABLED_SOFTIRQS)) - return 0; - } - } +#ifdef CONFIG_LOCK_STAT + hlock->waittime_stamp = 0; + hlock->holdtime_stamp = sched_clock(); #endif + + if (check == 2 && !mark_irqflags(curr, hlock)) + return 0; + /* mark it as used: */ if (!mark_lock(curr, hlock, LOCK_USED)) return 0; -out_calc_hash: + /* * Calculate the chain hash: it's the combined has of all the * lock keys along the dependency chain. We save the hash value @@ -2143,77 +2445,15 @@ out_calc_hash: } hlock->prev_chain_key = chain_key; - -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * Keep track of points where we cross into an interrupt context: - */ - hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + - curr->softirq_context; - if (depth) { - struct held_lock *prev_hlock; - - prev_hlock = curr->held_locks + depth-1; - /* - * If we cross into another context, reset the - * hash key (this also prevents the checking and the - * adding of the dependency to 'prev'): - */ - if (prev_hlock->irq_context != hlock->irq_context) { - chain_key = 0; - chain_head = 1; - } + if (separate_irq_context(curr, hlock)) { + chain_key = 0; + chain_head = 1; } -#endif chain_key = iterate_chain_key(chain_key, id); curr->curr_chain_key = chain_key; - /* - * Trylock needs to maintain the stack of held locks, but it - * does not add new dependencies, because trylock can be done - * in any order. - * - * We look up the chain_key and do the O(N^2) check and update of - * the dependencies only if this is a new dependency chain. - * (If lookup_chain_cache() returns with 1 it acquires - * graph_lock for us) - */ - if (!trylock && (check == 2) && lookup_chain_cache(chain_key, class)) { - /* - * Check whether last held lock: - * - * - is irq-safe, if this lock is irq-unsafe - * - is softirq-safe, if this lock is hardirq-unsafe - * - * And check whether the new lock's dependency graph - * could lead back to the previous lock. - * - * any of these scenarios could lead to a deadlock. If - * All validations - */ - int ret = check_deadlock(curr, hlock, lock, read); - - if (!ret) - return 0; - /* - * Mark recursive read, as we jump over it when - * building dependencies (just like we jump over - * trylock entries): - */ - if (ret == 2) - hlock->read = 2; - /* - * Add dependency only if this lock is not the head - * of the chain, and if it's not a secondary read-lock: - */ - if (!chain_head && ret != 2) - if (!check_prevs_add(curr, hlock)) - return 0; - graph_unlock(); - } else - /* after lookup_chain_cache(): */ - if (unlikely(!debug_locks)) - return 0; + if (!validate_chain(curr, lock, hlock, chain_head)) + return 0; curr->lockdep_depth++; check_chain_key(curr); @@ -2315,6 +2555,8 @@ lock_release_non_nested(struct task_struct *curr, return print_unlock_inbalance_bug(curr, lock, ip); found_it: + lock_release_holdtime(hlock); + /* * We have the right lock to unlock, 'hlock' points to it. * Now we remove it from the stack, and add back the other @@ -2367,6 +2609,8 @@ static int lock_release_nested(struct task_struct *curr, curr->curr_chain_key = hlock->prev_chain_key; + lock_release_holdtime(hlock); + #ifdef CONFIG_DEBUG_LOCKDEP hlock->prev_chain_key = 0; hlock->class = NULL; @@ -2441,6 +2685,9 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, { unsigned long flags; + if (unlikely(!lock_stat && !prove_locking)) + return; + if (unlikely(current->lockdep_recursion)) return; @@ -2460,6 +2707,9 @@ void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) { unsigned long flags; + if (unlikely(!lock_stat && !prove_locking)) + return; + if (unlikely(current->lockdep_recursion)) return; @@ -2473,6 +2723,166 @@ void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) EXPORT_SYMBOL_GPL(lock_release); +#ifdef CONFIG_LOCK_STAT +static int +print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, + unsigned long ip) +{ + if (!debug_locks_off()) + return 0; + if (debug_locks_silent) + return 0; + + printk("\n=================================\n"); + printk( "[ BUG: bad contention detected! ]\n"); + printk( "---------------------------------\n"); + printk("%s/%d is trying to contend lock (", + curr->comm, curr->pid); + print_lockdep_cache(lock); + printk(") at:\n"); + print_ip_sym(ip); + printk("but there are no locks held!\n"); + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +static void +__lock_contended(struct lockdep_map *lock, unsigned long ip) +{ + struct task_struct *curr = current; + struct held_lock *hlock, *prev_hlock; + struct lock_class_stats *stats; + unsigned int depth; + int i, point; + + depth = curr->lockdep_depth; + if (DEBUG_LOCKS_WARN_ON(!depth)) + return; + + prev_hlock = NULL; + for (i = depth-1; i >= 0; i--) { + hlock = curr->held_locks + i; + /* + * We must not cross into another context: + */ + if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) + break; + if (hlock->instance == lock) + goto found_it; + prev_hlock = hlock; + } + print_lock_contention_bug(curr, lock, ip); + return; + +found_it: + hlock->waittime_stamp = sched_clock(); + + point = lock_contention_point(hlock->class, ip); + + stats = get_lock_stats(hlock->class); + if (point < ARRAY_SIZE(stats->contention_point)) + stats->contention_point[i]++; + if (lock->cpu != smp_processor_id()) + stats->bounces[bounce_contended + !!hlock->read]++; + put_lock_stats(stats); +} + +static void +__lock_acquired(struct lockdep_map *lock) +{ + struct task_struct *curr = current; + struct held_lock *hlock, *prev_hlock; + struct lock_class_stats *stats; + unsigned int depth; + u64 now; + s64 waittime = 0; + int i, cpu; + + depth = curr->lockdep_depth; + if (DEBUG_LOCKS_WARN_ON(!depth)) + return; + + prev_hlock = NULL; + for (i = depth-1; i >= 0; i--) { + hlock = curr->held_locks + i; + /* + * We must not cross into another context: + */ + if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) + break; + if (hlock->instance == lock) + goto found_it; + prev_hlock = hlock; + } + print_lock_contention_bug(curr, lock, _RET_IP_); + return; + +found_it: + cpu = smp_processor_id(); + if (hlock->waittime_stamp) { + now = sched_clock(); + waittime = now - hlock->waittime_stamp; + hlock->holdtime_stamp = now; + } + + stats = get_lock_stats(hlock->class); + if (waittime) { + if (hlock->read) + lock_time_inc(&stats->read_waittime, waittime); + else + lock_time_inc(&stats->write_waittime, waittime); + } + if (lock->cpu != cpu) + stats->bounces[bounce_acquired + !!hlock->read]++; + put_lock_stats(stats); + + lock->cpu = cpu; +} + +void lock_contended(struct lockdep_map *lock, unsigned long ip) +{ + unsigned long flags; + + if (unlikely(!lock_stat)) + return; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + current->lockdep_recursion = 1; + __lock_contended(lock, ip); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lock_contended); + +void lock_acquired(struct lockdep_map *lock) +{ + unsigned long flags; + + if (unlikely(!lock_stat)) + return; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + current->lockdep_recursion = 1; + __lock_acquired(lock); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lock_acquired); +#endif + /* * Used by the testsuite, sanitize the validator state * after a simulated failure: @@ -2636,8 +3046,11 @@ void __init lockdep_info(void) sizeof(struct held_lock) * MAX_LOCK_DEPTH); #ifdef CONFIG_DEBUG_LOCKDEP - if (lockdep_init_error) - printk("WARNING: lockdep init error! Arch code didnt call lockdep_init() early enough?\n"); + if (lockdep_init_error) { + printk("WARNING: lockdep init error! Arch code didn't call lockdep_init() early enough?\n"); + printk("Call stack leading to lockdep invocation was:\n"); + print_stack_trace(&lockdep_init_trace, 0); + } #endif } diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 58f35e586ee3..9f17af4a2490 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -5,7 +5,8 @@ * * Started by Ingo Molnar: * - * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> * * Code for /proc/lockdep and /proc/lockdep_stats: * @@ -15,6 +16,10 @@ #include <linux/seq_file.h> #include <linux/kallsyms.h> #include <linux/debug_locks.h> +#include <linux/vmalloc.h> +#include <linux/sort.h> +#include <asm/uaccess.h> +#include <asm/div64.h> #include "lockdep_internals.h" @@ -271,8 +276,10 @@ static int lockdep_stats_show(struct seq_file *m, void *v) if (nr_list_entries) factor = sum_forward_deps / nr_list_entries; +#ifdef CONFIG_PROVE_LOCKING seq_printf(m, " dependency chains: %11lu [max: %lu]\n", nr_lock_chains, MAX_LOCKDEP_CHAINS); +#endif #ifdef CONFIG_TRACE_IRQFLAGS seq_printf(m, " in-hardirq chains: %11u\n", @@ -342,6 +349,292 @@ static const struct file_operations proc_lockdep_stats_operations = { .release = seq_release, }; +#ifdef CONFIG_LOCK_STAT + +struct lock_stat_data { + struct lock_class *class; + struct lock_class_stats stats; +}; + +struct lock_stat_seq { + struct lock_stat_data *iter; + struct lock_stat_data *iter_end; + struct lock_stat_data stats[MAX_LOCKDEP_KEYS]; +}; + +/* + * sort on absolute number of contentions + */ +static int lock_stat_cmp(const void *l, const void *r) +{ + const struct lock_stat_data *dl = l, *dr = r; + unsigned long nl, nr; + + nl = dl->stats.read_waittime.nr + dl->stats.write_waittime.nr; + nr = dr->stats.read_waittime.nr + dr->stats.write_waittime.nr; + + return nr - nl; +} + +static void seq_line(struct seq_file *m, char c, int offset, int length) +{ + int i; + + for (i = 0; i < offset; i++) + seq_puts(m, " "); + for (i = 0; i < length; i++) + seq_printf(m, "%c", c); + seq_puts(m, "\n"); +} + +static void snprint_time(char *buf, size_t bufsiz, s64 nr) +{ + unsigned long rem; + + rem = do_div(nr, 1000); /* XXX: do_div_signed */ + snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, ((int)rem+5)/10); +} + +static void seq_time(struct seq_file *m, s64 time) +{ + char num[15]; + + snprint_time(num, sizeof(num), time); + seq_printf(m, " %14s", num); +} + +static void seq_lock_time(struct seq_file *m, struct lock_time *lt) +{ + seq_printf(m, "%14lu", lt->nr); + seq_time(m, lt->min); + seq_time(m, lt->max); + seq_time(m, lt->total); +} + +static void seq_stats(struct seq_file *m, struct lock_stat_data *data) +{ + char name[39]; + struct lock_class *class; + struct lock_class_stats *stats; + int i, namelen; + + class = data->class; + stats = &data->stats; + + namelen = 38; + if (class->name_version > 1) + namelen -= 2; /* XXX truncates versions > 9 */ + if (class->subclass) + namelen -= 2; + + if (!class->name) { + char str[KSYM_NAME_LEN]; + const char *key_name; + + key_name = __get_key_name(class->key, str); + snprintf(name, namelen, "%s", key_name); + } else { + snprintf(name, namelen, "%s", class->name); + } + namelen = strlen(name); + if (class->name_version > 1) { + snprintf(name+namelen, 3, "#%d", class->name_version); + namelen += 2; + } + if (class->subclass) { + snprintf(name+namelen, 3, "/%d", class->subclass); + namelen += 2; + } + + if (stats->write_holdtime.nr) { + if (stats->read_holdtime.nr) + seq_printf(m, "%38s-W:", name); + else + seq_printf(m, "%40s:", name); + + seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]); + seq_lock_time(m, &stats->write_waittime); + seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]); + seq_lock_time(m, &stats->write_holdtime); + seq_puts(m, "\n"); + } + + if (stats->read_holdtime.nr) { + seq_printf(m, "%38s-R:", name); + seq_printf(m, "%14lu ", stats->bounces[bounce_contended_read]); + seq_lock_time(m, &stats->read_waittime); + seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_read]); + seq_lock_time(m, &stats->read_holdtime); + seq_puts(m, "\n"); + } + + if (stats->read_waittime.nr + stats->write_waittime.nr == 0) + return; + + if (stats->read_holdtime.nr) + namelen += 2; + + for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { + char sym[KSYM_SYMBOL_LEN]; + char ip[32]; + + if (class->contention_point[i] == 0) + break; + + if (!i) + seq_line(m, '-', 40-namelen, namelen); + + sprint_symbol(sym, class->contention_point[i]); + snprintf(ip, sizeof(ip), "[<%p>]", + (void *)class->contention_point[i]); + seq_printf(m, "%40s %14lu %29s %s\n", name, + stats->contention_point[i], + ip, sym); + } + if (i) { + seq_puts(m, "\n"); + seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); + seq_puts(m, "\n"); + } +} + +static void seq_header(struct seq_file *m) +{ + seq_printf(m, "lock_stat version 0.2\n"); + seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); + seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " + "%14s %14s\n", + "class name", + "con-bounces", + "contentions", + "waittime-min", + "waittime-max", + "waittime-total", + "acq-bounces", + "acquisitions", + "holdtime-min", + "holdtime-max", + "holdtime-total"); + seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); + seq_printf(m, "\n"); +} + +static void *ls_start(struct seq_file *m, loff_t *pos) +{ + struct lock_stat_seq *data = m->private; + + if (data->iter == data->stats) + seq_header(m); + + if (data->iter == data->iter_end) + data->iter = NULL; + + return data->iter; +} + +static void *ls_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct lock_stat_seq *data = m->private; + + (*pos)++; + + data->iter = v; + data->iter++; + if (data->iter == data->iter_end) + data->iter = NULL; + + return data->iter; +} + +static void ls_stop(struct seq_file *m, void *v) +{ +} + +static int ls_show(struct seq_file *m, void *v) +{ + struct lock_stat_seq *data = m->private; + + seq_stats(m, data->iter); + return 0; +} + +static struct seq_operations lockstat_ops = { + .start = ls_start, + .next = ls_next, + .stop = ls_stop, + .show = ls_show, +}; + +static int lock_stat_open(struct inode *inode, struct file *file) +{ + int res; + struct lock_class *class; + struct lock_stat_seq *data = vmalloc(sizeof(struct lock_stat_seq)); + + if (!data) + return -ENOMEM; + + res = seq_open(file, &lockstat_ops); + if (!res) { + struct lock_stat_data *iter = data->stats; + struct seq_file *m = file->private_data; + + data->iter = iter; + list_for_each_entry(class, &all_lock_classes, lock_entry) { + iter->class = class; + iter->stats = lock_stats(class); + iter++; + } + data->iter_end = iter; + + sort(data->stats, data->iter_end - data->iter, + sizeof(struct lock_stat_data), + lock_stat_cmp, NULL); + + m->private = data; + } else + vfree(data); + + return res; +} + +static ssize_t lock_stat_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct lock_class *class; + char c; + + if (count) { + if (get_user(c, buf)) + return -EFAULT; + + if (c != '0') + return count; + + list_for_each_entry(class, &all_lock_classes, lock_entry) + clear_lock_stats(class); + } + return count; +} + +static int lock_stat_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + + vfree(seq->private); + seq->private = NULL; + return seq_release(inode, file); +} + +static const struct file_operations proc_lock_stat_operations = { + .open = lock_stat_open, + .write = lock_stat_write, + .read = seq_read, + .llseek = seq_lseek, + .release = lock_stat_release, +}; +#endif /* CONFIG_LOCK_STAT */ + static int __init lockdep_proc_init(void) { struct proc_dir_entry *entry; @@ -354,6 +647,12 @@ static int __init lockdep_proc_init(void) if (entry) entry->proc_fops = &proc_lockdep_stats_operations; +#ifdef CONFIG_LOCK_STAT + entry = create_proc_entry("lock_stat", S_IRUSR, NULL); + if (entry) + entry->proc_fops = &proc_lock_stat_operations; +#endif + return 0; } diff --git a/kernel/mutex.c b/kernel/mutex.c index 303eab18484b..691b86564dd9 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -139,6 +139,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) list_add_tail(&waiter.list, &lock->wait_list); waiter.task = task; + old_val = atomic_xchg(&lock->count, -1); + if (old_val == 1) + goto done; + + lock_contended(&lock->dep_map, _RET_IP_); + for (;;) { /* * Lets try to take the lock again - this is needed even if @@ -174,6 +180,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) spin_lock_mutex(&lock->wait_lock, flags); } +done: + lock_acquired(&lock->dep_map); /* got the lock - rejoice! */ mutex_remove_waiter(lock, &waiter, task_thread_info(task)); debug_mutex_set_owner(lock, task_thread_info(task)); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 495b7d4dd330..7358609e4735 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -33,13 +33,20 @@ config PM_DEBUG bool "Power Management Debug Support" depends on PM ---help--- - This option enables verbose debugging support in the Power Management - code. This is helpful when debugging and reporting various PM bugs, - like suspend support. + This option enables various debugging support in the Power Management + code. This is helpful when debugging and reporting PM bugs, like + suspend support. + +config PM_VERBOSE + bool "Verbose Power Management debugging" + depends on PM_DEBUG + default n + ---help--- + This option enables verbose messages from the Power Management code. config DISABLE_CONSOLE_SUSPEND bool "Keep console(s) enabled during suspend/resume (DANGEROUS)" - depends on PM && PM_DEBUG + depends on PM_DEBUG default n ---help--- This option turns off the console suspend mechanism that prevents @@ -50,7 +57,7 @@ config DISABLE_CONSOLE_SUSPEND config PM_TRACE bool "Suspend/resume event tracing" - depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL + depends on PM_DEBUG && X86_32 && EXPERIMENTAL default n ---help--- This enables some cheesy code to save the last PM event point in the @@ -65,18 +72,6 @@ config PM_TRACE CAUTION: this option will cause your machine's real-time clock to be set to an invalid time after a resume. -config PM_SYSFS_DEPRECATED - bool "Driver model /sys/devices/.../power/state files (DEPRECATED)" - depends on PM && SYSFS - default n - help - The driver model started out with a sysfs file intended to provide - a userspace hook for device power management. This feature has never - worked very well, except for limited testing purposes, and so it will - be removed. It's not clear that a generic mechanism could really - handle the wide variability of device power states; any replacements - are likely to be bus or driver specific. - config SOFTWARE_SUSPEND bool "Software Suspend (Hibernation)" depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) diff --git a/kernel/power/disk.c b/kernel/power/disk.c index f445b9cd60fb..324ac0188ce1 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -45,7 +45,7 @@ enum { static int hibernation_mode = HIBERNATION_SHUTDOWN; -struct hibernation_ops *hibernation_ops; +static struct hibernation_ops *hibernation_ops; /** * hibernation_set_ops - set the global hibernate operations @@ -54,7 +54,8 @@ struct hibernation_ops *hibernation_ops; void hibernation_set_ops(struct hibernation_ops *ops) { - if (ops && !(ops->prepare && ops->enter && ops->finish)) { + if (ops && !(ops->prepare && ops->enter && ops->finish + && ops->pre_restore && ops->restore_cleanup)) { WARN_ON(1); return; } @@ -74,9 +75,9 @@ void hibernation_set_ops(struct hibernation_ops *ops) * platform driver if so configured and return an error code if it fails */ -static int platform_prepare(void) +static int platform_prepare(int platform_mode) { - return (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) ? + return (platform_mode && hibernation_ops) ? hibernation_ops->prepare() : 0; } @@ -85,13 +86,145 @@ static int platform_prepare(void) * using the platform driver (must be called after platform_prepare()) */ -static void platform_finish(void) +static void platform_finish(int platform_mode) { - if (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) + if (platform_mode && hibernation_ops) hibernation_ops->finish(); } /** + * platform_pre_restore - prepare the platform for the restoration from a + * hibernation image. If the restore fails after this function has been + * called, platform_restore_cleanup() must be called. + */ + +static int platform_pre_restore(int platform_mode) +{ + return (platform_mode && hibernation_ops) ? + hibernation_ops->pre_restore() : 0; +} + +/** + * platform_restore_cleanup - switch the platform to the normal mode of + * operation after a failing restore. If platform_pre_restore() has been + * called before the failing restore, this function must be called too, + * regardless of the result of platform_pre_restore(). + */ + +static void platform_restore_cleanup(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->restore_cleanup(); +} + +/** + * hibernation_snapshot - quiesce devices and create the hibernation + * snapshot image. + * @platform_mode - if set, use the platform driver, if available, to + * prepare the platform frimware for the power transition. + * + * Must be called with pm_mutex held + */ + +int hibernation_snapshot(int platform_mode) +{ + int error; + + /* Free memory before shutting down devices. */ + error = swsusp_shrink_memory(); + if (error) + return error; + + suspend_console(); + error = device_suspend(PMSG_FREEZE); + if (error) + goto Resume_console; + + error = platform_prepare(platform_mode); + if (error) + goto Resume_devices; + + error = disable_nonboot_cpus(); + if (!error) { + if (hibernation_mode != HIBERNATION_TEST) { + in_suspend = 1; + error = swsusp_suspend(); + /* Control returns here after successful restore */ + } else { + printk("swsusp debug: Waiting for 5 seconds.\n"); + mdelay(5000); + } + } + enable_nonboot_cpus(); + Resume_devices: + platform_finish(platform_mode); + device_resume(); + Resume_console: + resume_console(); + return error; +} + +/** + * hibernation_restore - quiesce devices and restore the hibernation + * snapshot image. If successful, control returns in hibernation_snaphot() + * @platform_mode - if set, use the platform driver, if available, to + * prepare the platform frimware for the transition. + * + * Must be called with pm_mutex held + */ + +int hibernation_restore(int platform_mode) +{ + int error; + + pm_prepare_console(); + suspend_console(); + error = device_suspend(PMSG_PRETHAW); + if (error) + goto Finish; + + error = platform_pre_restore(platform_mode); + if (!error) { + error = disable_nonboot_cpus(); + if (!error) + error = swsusp_resume(); + enable_nonboot_cpus(); + } + platform_restore_cleanup(platform_mode); + device_resume(); + Finish: + resume_console(); + pm_restore_console(); + return error; +} + +/** + * hibernation_platform_enter - enter the hibernation state using the + * platform driver (if available) + */ + +int hibernation_platform_enter(void) +{ + int error; + + if (hibernation_ops) { + kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); + /* + * We have cancelled the power transition by running + * hibernation_ops->finish() before saving the image, so we + * should let the firmware know that we're going to enter the + * sleep state after all + */ + error = hibernation_ops->prepare(); + if (!error) + error = hibernation_ops->enter(); + } else { + error = -ENOSYS; + } + return error; +} + +/** * power_down - Shut the machine down for hibernation. * * Use the platform driver, if configured so; otherwise try @@ -111,11 +244,7 @@ static void power_down(void) kernel_restart(NULL); break; case HIBERNATION_PLATFORM: - if (hibernation_ops) { - kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); - hibernation_ops->enter(); - break; - } + hibernation_platform_enter(); } kernel_halt(); /* @@ -152,9 +281,16 @@ int hibernate(void) { int error; + mutex_lock(&pm_mutex); /* The snapshot device should not be opened while we're running */ - if (!atomic_add_unless(&snapshot_device_available, -1, 0)) - return -EBUSY; + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } + + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + if (error) + goto Exit; /* Allocate memory management structures */ error = create_basic_memory_bitmaps(); @@ -165,75 +301,35 @@ int hibernate(void) if (error) goto Finish; - mutex_lock(&pm_mutex); if (hibernation_mode == HIBERNATION_TESTPROC) { printk("swsusp debug: Waiting for 5 seconds.\n"); mdelay(5000); goto Thaw; } + error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); + if (in_suspend && !error) { + unsigned int flags = 0; - /* Free memory before shutting down devices. */ - error = swsusp_shrink_memory(); - if (error) - goto Thaw; - - error = platform_prepare(); - if (error) - goto Thaw; - - suspend_console(); - error = device_suspend(PMSG_FREEZE); - if (error) { - printk(KERN_ERR "PM: Some devices failed to suspend\n"); - goto Resume_devices; - } - error = disable_nonboot_cpus(); - if (error) - goto Enable_cpus; - - if (hibernation_mode == HIBERNATION_TEST) { - printk("swsusp debug: Waiting for 5 seconds.\n"); - mdelay(5000); - goto Enable_cpus; - } - - pr_debug("PM: snapshotting memory.\n"); - in_suspend = 1; - error = swsusp_suspend(); - if (error) - goto Enable_cpus; - - if (in_suspend) { - enable_nonboot_cpus(); - platform_finish(); - device_resume(); - resume_console(); + if (hibernation_mode == HIBERNATION_PLATFORM) + flags |= SF_PLATFORM_MODE; pr_debug("PM: writing image.\n"); - error = swsusp_write(); + error = swsusp_write(flags); + swsusp_free(); if (!error) power_down(); - else { - swsusp_free(); - goto Thaw; - } } else { pr_debug("PM: Image restored successfully.\n"); + swsusp_free(); } - - swsusp_free(); - Enable_cpus: - enable_nonboot_cpus(); - Resume_devices: - platform_finish(); - device_resume(); - resume_console(); Thaw: - mutex_unlock(&pm_mutex); unprepare_processes(); Finish: free_basic_memory_bitmaps(); Exit: + pm_notifier_call_chain(PM_POST_HIBERNATION); atomic_inc(&snapshot_device_available); + Unlock: + mutex_unlock(&pm_mutex); return error; } @@ -253,6 +349,7 @@ int hibernate(void) static int software_resume(void) { int error; + unsigned int flags; mutex_lock(&pm_mutex); if (!swsusp_resume_device) { @@ -300,30 +397,12 @@ static int software_resume(void) pr_debug("PM: Reading swsusp image.\n"); - error = swsusp_read(); - if (error) { - swsusp_free(); - goto Thaw; - } - - pr_debug("PM: Preparing devices for restore.\n"); - - suspend_console(); - error = device_suspend(PMSG_PRETHAW); - if (error) - goto Free; - - error = disable_nonboot_cpus(); + error = swsusp_read(&flags); if (!error) - swsusp_resume(); + hibernation_restore(flags & SF_PLATFORM_MODE); - enable_nonboot_cpus(); - Free: - swsusp_free(); - device_resume(); - resume_console(); - Thaw: printk(KERN_ERR "PM: Restore failed, recovering.\n"); + swsusp_free(); unprepare_processes(); Done: free_basic_memory_bitmaps(); @@ -333,7 +412,7 @@ static int software_resume(void) Unlock: mutex_unlock(&pm_mutex); pr_debug("PM: Resume from disk failed.\n"); - return 0; + return error; } late_initcall(software_resume); diff --git a/kernel/power/main.c b/kernel/power/main.c index fc45ed22620f..32147b57c3bf 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -23,6 +23,8 @@ #include "power.h" +BLOCKING_NOTIFIER_HEAD(pm_chain_head); + /*This is just an arbitrary number */ #define FREE_PAGE_NUMBER (100) @@ -63,14 +65,11 @@ static inline void pm_finish(suspend_state_t state) /** * suspend_prepare - Do prep work before entering low-power state. - * @state: State we're entering. * - * This is common code that is called for each state that we're - * entering. Allocate a console, stop all processes, then make sure - * the platform can enter the requested state. + * This is common code that is called for each state that we're entering. + * Run suspend notifiers, allocate a console and stop all processes. */ - -static int suspend_prepare(suspend_state_t state) +static int suspend_prepare(void) { int error; unsigned int free_pages; @@ -78,6 +77,10 @@ static int suspend_prepare(suspend_state_t state) if (!pm_ops || !pm_ops->enter) return -EPERM; + error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); + if (error) + goto Finish; + pm_prepare_console(); if (freeze_processes()) { @@ -85,46 +88,23 @@ static int suspend_prepare(suspend_state_t state) goto Thaw; } - if ((free_pages = global_page_state(NR_FREE_PAGES)) - < FREE_PAGE_NUMBER) { + free_pages = global_page_state(NR_FREE_PAGES); + if (free_pages < FREE_PAGE_NUMBER) { pr_debug("PM: free some memory\n"); shrink_all_memory(FREE_PAGE_NUMBER - free_pages); if (nr_free_pages() < FREE_PAGE_NUMBER) { error = -ENOMEM; printk(KERN_ERR "PM: No enough memory\n"); - goto Thaw; } } - - if (pm_ops->set_target) { - error = pm_ops->set_target(state); - if (error) - goto Thaw; - } - suspend_console(); - error = device_suspend(PMSG_SUSPEND); - if (error) { - printk(KERN_ERR "Some devices failed to suspend\n"); - goto Resume_console; - } - if (pm_ops->prepare) { - if ((error = pm_ops->prepare(state))) - goto Resume_devices; - } - - error = disable_nonboot_cpus(); if (!error) return 0; - enable_nonboot_cpus(); - pm_finish(state); - Resume_devices: - device_resume(); - Resume_console: - resume_console(); Thaw: thaw_processes(); pm_restore_console(); + Finish: + pm_notifier_call_chain(PM_POST_SUSPEND); return error; } @@ -140,6 +120,12 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void) local_irq_enable(); } +/** + * suspend_enter - enter the desired system sleep state. + * @state: state to enter + * + * This function should be called after devices have been suspended. + */ int suspend_enter(suspend_state_t state) { int error = 0; @@ -159,23 +145,58 @@ int suspend_enter(suspend_state_t state) return error; } +/** + * suspend_devices_and_enter - suspend devices and enter the desired system sleep + * state. + * @state: state to enter + */ +int suspend_devices_and_enter(suspend_state_t state) +{ + int error; + + if (!pm_ops) + return -ENOSYS; + + if (pm_ops->set_target) { + error = pm_ops->set_target(state); + if (error) + return error; + } + suspend_console(); + error = device_suspend(PMSG_SUSPEND); + if (error) { + printk(KERN_ERR "Some devices failed to suspend\n"); + goto Resume_console; + } + if (pm_ops->prepare) { + error = pm_ops->prepare(state); + if (error) + goto Resume_devices; + } + error = disable_nonboot_cpus(); + if (!error) + suspend_enter(state); + + enable_nonboot_cpus(); + pm_finish(state); + Resume_devices: + device_resume(); + Resume_console: + resume_console(); + return error; +} /** * suspend_finish - Do final work before exiting suspend sequence. - * @state: State we're coming out of. * * Call platform code to clean up, restart processes, and free the * console that we've allocated. This is not called for suspend-to-disk. */ - -static void suspend_finish(suspend_state_t state) +static void suspend_finish(void) { - enable_nonboot_cpus(); - pm_finish(state); - device_resume(); - resume_console(); thaw_processes(); pm_restore_console(); + pm_notifier_call_chain(PM_POST_SUSPEND); } @@ -207,7 +228,6 @@ static inline int valid_state(suspend_state_t state) * Then, do the setup for suspend, enter the state, and cleaup (after * we've woken up). */ - static int enter_state(suspend_state_t state) { int error; @@ -218,14 +238,14 @@ static int enter_state(suspend_state_t state) return -EBUSY; pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); - if ((error = suspend_prepare(state))) + if ((error = suspend_prepare())) goto Unlock; pr_debug("PM: Entering %s sleep\n", pm_states[state]); - error = suspend_enter(state); + error = suspend_devices_and_enter(state); pr_debug("PM: Finishing wakeup.\n"); - suspend_finish(state); + suspend_finish(); Unlock: mutex_unlock(&pm_mutex); return error; diff --git a/kernel/power/power.h b/kernel/power/power.h index 51381487103f..5f24c786f8ec 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -25,7 +25,10 @@ struct swsusp_info { */ #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) -extern struct hibernation_ops *hibernation_ops; +/* kernel/power/disk.c */ +extern int hibernation_snapshot(int platform_mode); +extern int hibernation_restore(int platform_mode); +extern int hibernation_platform_enter(void); #endif extern int pfn_is_nosave(unsigned long); @@ -152,16 +155,34 @@ extern sector_t alloc_swapdev_block(int swap); extern void free_all_swap_pages(int swap); extern int swsusp_swap_in_use(void); +/* + * Flags that can be passed from the hibernatig hernel to the "boot" kernel in + * the image header. + */ +#define SF_PLATFORM_MODE 1 + +/* kernel/power/disk.c */ extern int swsusp_check(void); extern int swsusp_shrink_memory(void); extern void swsusp_free(void); extern int swsusp_suspend(void); extern int swsusp_resume(void); -extern int swsusp_read(void); -extern int swsusp_write(void); +extern int swsusp_read(unsigned int *flags_p); +extern int swsusp_write(unsigned int flags); extern void swsusp_close(void); -extern int suspend_enter(suspend_state_t state); struct timeval; +/* kernel/power/swsusp.c */ extern void swsusp_show_speed(struct timeval *, struct timeval *, unsigned int, char *); + +/* kernel/power/main.c */ +extern int suspend_enter(suspend_state_t state); +extern int suspend_devices_and_enter(suspend_state_t state); +extern struct blocking_notifier_head pm_chain_head; + +static inline int pm_notifier_call_chain(unsigned long val) +{ + return (blocking_notifier_call_chain(&pm_chain_head, val, NULL) + == NOTIFY_BAD) ? -EINVAL : 0; +} diff --git a/kernel/power/process.c b/kernel/power/process.c index e0233d8422b9..3434940a3df1 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -40,7 +40,7 @@ static inline void frozen_process(void) current->flags |= PF_FROZEN; wmb(); } - clear_tsk_thread_flag(current, TIF_FREEZE); + clear_freeze_flag(current); } /* Refrigerator is place where frozen processes are stored :-). */ @@ -72,20 +72,19 @@ void refrigerator(void) schedule(); } pr_debug("%s left refrigerator\n", current->comm); - current->state = save; + __set_current_state(save); } -static inline void freeze_process(struct task_struct *p) +static void freeze_task(struct task_struct *p) { unsigned long flags; if (!freezing(p)) { rmb(); if (!frozen(p)) { + set_freeze_flag(p); if (p->state == TASK_STOPPED) force_sig_specific(SIGSTOP, p); - - freeze(p); spin_lock_irqsave(&p->sighand->siglock, flags); signal_wake_up(p, p->state == TASK_STOPPED); spin_unlock_irqrestore(&p->sighand->siglock, flags); @@ -99,19 +98,14 @@ static void cancel_freezing(struct task_struct *p) if (freezing(p)) { pr_debug(" clean up: %s\n", p->comm); - do_not_freeze(p); + clear_freeze_flag(p); spin_lock_irqsave(&p->sighand->siglock, flags); recalc_sigpending_and_wake(p); spin_unlock_irqrestore(&p->sighand->siglock, flags); } } -static inline int is_user_space(struct task_struct *p) -{ - return p->mm && !(p->flags & PF_BORROWED_MM); -} - -static unsigned int try_to_freeze_tasks(int freeze_user_space) +static int try_to_freeze_tasks(int freeze_user_space) { struct task_struct *g, *p; unsigned long end_time; @@ -122,26 +116,40 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { - if (!freezeable(p)) + if (frozen(p) || !freezeable(p)) continue; - if (frozen(p)) - continue; - - if (p->state == TASK_TRACED && frozen(p->parent)) { - cancel_freezing(p); - continue; + if (freeze_user_space) { + if (p->state == TASK_TRACED && + frozen(p->parent)) { + cancel_freezing(p); + continue; + } + /* + * Kernel threads should not have TIF_FREEZE set + * at this point, so we must ensure that either + * p->mm is not NULL *and* PF_BORROWED_MM is + * unset, or TIF_FRREZE is left unset. + * The task_lock() is necessary to prevent races + * with exit_mm() or use_mm()/unuse_mm() from + * occuring. + */ + task_lock(p); + if (!p->mm || (p->flags & PF_BORROWED_MM)) { + task_unlock(p); + continue; + } + freeze_task(p); + task_unlock(p); + } else { + freeze_task(p); } - if (freeze_user_space && !is_user_space(p)) - continue; - - freeze_process(p); if (!freezer_should_skip(p)) todo++; } while_each_thread(g, p); read_unlock(&tasklist_lock); yield(); /* Yield is okay here */ - if (todo && time_after(jiffies, end_time)) + if (time_after(jiffies, end_time)) break; } while (todo); @@ -152,49 +160,41 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) * but it cleans up leftover PF_FREEZE requests. */ printk("\n"); - printk(KERN_ERR "Stopping %s timed out after %d seconds " + printk(KERN_ERR "Freezing of %s timed out after %d seconds " "(%d tasks refusing to freeze):\n", - freeze_user_space ? "user space processes" : - "kernel threads", + freeze_user_space ? "user space " : "tasks ", TIMEOUT / HZ, todo); + show_state(); read_lock(&tasklist_lock); do_each_thread(g, p) { - if (freeze_user_space && !is_user_space(p)) - continue; - task_lock(p); - if (freezeable(p) && !frozen(p) && - !freezer_should_skip(p)) + if (freezing(p) && !freezer_should_skip(p)) printk(KERN_ERR " %s\n", p->comm); - cancel_freezing(p); task_unlock(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } - return todo; + return todo ? -EBUSY : 0; } /** * freeze_processes - tell processes to enter the refrigerator - * - * Returns 0 on success, or the number of processes that didn't freeze, - * although they were told to. */ int freeze_processes(void) { - unsigned int nr_unfrozen; + int error; printk("Stopping tasks ... "); - nr_unfrozen = try_to_freeze_tasks(FREEZER_USER_SPACE); - if (nr_unfrozen) - return nr_unfrozen; + error = try_to_freeze_tasks(FREEZER_USER_SPACE); + if (error) + return error; sys_sync(); - nr_unfrozen = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); - if (nr_unfrozen) - return nr_unfrozen; + error = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); + if (error) + return error; printk("done.\n"); BUG_ON(in_atomic()); @@ -210,7 +210,7 @@ static void thaw_tasks(int thaw_user_space) if (!freezeable(p)) continue; - if (is_user_space(p) == !thaw_user_space) + if (!p->mm == thaw_user_space) continue; thaw_process(p); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 8b1a1b837145..917aba100575 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -33,8 +33,9 @@ extern char resume_file[]; #define SWSUSP_SIG "S1SUSPEND" struct swsusp_header { - char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; + char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)]; sector_t image; + unsigned int flags; /* Flags to pass to the "boot" kernel */ char orig_sig[10]; char sig[10]; } __attribute__((packed)); @@ -138,7 +139,7 @@ static int wait_on_bio_chain(struct bio **bio_chain) * Saving part */ -static int mark_swapfiles(sector_t start) +static int mark_swapfiles(sector_t start, unsigned int flags) { int error; @@ -148,6 +149,7 @@ static int mark_swapfiles(sector_t start) memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); memcpy(swsusp_header->sig,SWSUSP_SIG, 10); swsusp_header->image = start; + swsusp_header->flags = flags; error = bio_write_page(swsusp_resume_block, swsusp_header, NULL); } else { @@ -369,6 +371,7 @@ static int enough_swap(unsigned int nr_pages) /** * swsusp_write - Write entire image and metadata. + * @flags: flags to pass to the "boot" kernel in the image header * * It is important _NOT_ to umount filesystems at this point. We want * them synced (in case something goes wrong) but we DO not want to mark @@ -376,7 +379,7 @@ static int enough_swap(unsigned int nr_pages) * correctly, we'll mark system clean, anyway.) */ -int swsusp_write(void) +int swsusp_write(unsigned int flags) { struct swap_map_handle handle; struct snapshot_handle snapshot; @@ -415,7 +418,7 @@ int swsusp_write(void) if (!error) { flush_swap_writer(&handle); printk("S"); - error = mark_swapfiles(start); + error = mark_swapfiles(start, flags); printk("|\n"); } } @@ -540,13 +543,20 @@ static int load_image(struct swap_map_handle *handle, return error; } -int swsusp_read(void) +/** + * swsusp_read - read the hibernation image. + * @flags_p: flags passed by the "frozen" kernel in the image header should + * be written into this memeory location + */ + +int swsusp_read(unsigned int *flags_p) { int error; struct swap_map_handle handle; struct snapshot_handle snapshot; struct swsusp_info *header; + *flags_p = swsusp_header->flags; if (IS_ERR(resume_bdev)) { pr_debug("swsusp: block device not initialised\n"); return PTR_ERR(resume_bdev); diff --git a/kernel/power/user.c b/kernel/power/user.c index d65305b515b1..bd0723a7df3f 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -128,92 +128,6 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, return res; } -static inline int platform_prepare(void) -{ - int error = 0; - - if (hibernation_ops) - error = hibernation_ops->prepare(); - - return error; -} - -static inline void platform_finish(void) -{ - if (hibernation_ops) - hibernation_ops->finish(); -} - -static inline int snapshot_suspend(int platform_suspend) -{ - int error; - - mutex_lock(&pm_mutex); - /* Free memory before shutting down devices. */ - error = swsusp_shrink_memory(); - if (error) - goto Finish; - - if (platform_suspend) { - error = platform_prepare(); - if (error) - goto Finish; - } - suspend_console(); - error = device_suspend(PMSG_FREEZE); - if (error) - goto Resume_devices; - - error = disable_nonboot_cpus(); - if (!error) { - in_suspend = 1; - error = swsusp_suspend(); - } - enable_nonboot_cpus(); - Resume_devices: - if (platform_suspend) - platform_finish(); - - device_resume(); - resume_console(); - Finish: - mutex_unlock(&pm_mutex); - return error; -} - -static inline int snapshot_restore(int platform_suspend) -{ - int error; - - mutex_lock(&pm_mutex); - pm_prepare_console(); - if (platform_suspend) { - error = platform_prepare(); - if (error) - goto Finish; - } - suspend_console(); - error = device_suspend(PMSG_PRETHAW); - if (error) - goto Resume_devices; - - error = disable_nonboot_cpus(); - if (!error) - error = swsusp_resume(); - - enable_nonboot_cpus(); - Resume_devices: - if (platform_suspend) - platform_finish(); - - device_resume(); - resume_console(); - Finish: - pm_restore_console(); - mutex_unlock(&pm_mutex); - return error; -} - static int snapshot_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { @@ -237,10 +151,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, if (data->frozen) break; mutex_lock(&pm_mutex); - if (freeze_processes()) { - thaw_processes(); - error = -EBUSY; + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + if (!error) { + error = freeze_processes(); + if (error) + thaw_processes(); } + if (error) + pm_notifier_call_chain(PM_POST_HIBERNATION); mutex_unlock(&pm_mutex); if (!error) data->frozen = 1; @@ -251,6 +169,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; mutex_lock(&pm_mutex); thaw_processes(); + pm_notifier_call_chain(PM_POST_HIBERNATION); mutex_unlock(&pm_mutex); data->frozen = 0; break; @@ -260,7 +179,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EPERM; break; } - error = snapshot_suspend(data->platform_suspend); + error = hibernation_snapshot(data->platform_suspend); if (!error) error = put_user(in_suspend, (unsigned int __user *)arg); if (!error) @@ -274,7 +193,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EPERM; break; } - error = snapshot_restore(data->platform_suspend); + error = hibernation_restore(data->platform_suspend); break; case SNAPSHOT_FREE: @@ -336,47 +255,19 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; case SNAPSHOT_S2RAM: - if (!pm_ops) { - error = -ENOSYS; - break; - } - if (!data->frozen) { error = -EPERM; break; } - if (!mutex_trylock(&pm_mutex)) { error = -EBUSY; break; } - - if (pm_ops->prepare) { - error = pm_ops->prepare(PM_SUSPEND_MEM); - if (error) - goto OutS3; - } - - /* Put devices to sleep */ - suspend_console(); - error = device_suspend(PMSG_SUSPEND); - if (error) { - printk(KERN_ERR "Failed to suspend some devices.\n"); - } else { - error = disable_nonboot_cpus(); - if (!error) { - /* Enter S3, system is already frozen */ - suspend_enter(PM_SUSPEND_MEM); - enable_nonboot_cpus(); - } - /* Wake up devices */ - device_resume(); - } - resume_console(); - if (pm_ops->finish) - pm_ops->finish(PM_SUSPEND_MEM); - - OutS3: + /* + * Tasks are frozen and the notifiers have been called with + * PM_HIBERNATION_PREPARE + */ + error = suspend_devices_and_enter(PM_SUSPEND_MEM); mutex_unlock(&pm_mutex); break; @@ -386,19 +277,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, switch (arg) { case PMOPS_PREPARE: - if (hibernation_ops) { - data->platform_suspend = 1; - error = 0; - } else { - error = -ENOSYS; - } + data->platform_suspend = 1; + error = 0; break; case PMOPS_ENTER: - if (data->platform_suspend) { - kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); - error = hibernation_ops->enter(); - } + if (data->platform_suspend) + error = hibernation_platform_enter(); + break; case PMOPS_FINISH: diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4a1745f1dadf..82a558b655da 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -142,7 +142,7 @@ static int may_attach(struct task_struct *task) return -EPERM; smp_rmb(); if (task->mm) - dumpable = task->mm->dumpable; + dumpable = get_dumpable(task->mm); if (!dumpable && !capable(CAP_SYS_PTRACE)) return -EPERM; diff --git a/kernel/relay.c b/kernel/relay.c index a615a8f513fc..510fbbd7b500 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -80,7 +80,7 @@ static struct vm_operations_struct relay_file_mmap_ops = { * * Caller should already have grabbed mmap_sem. */ -int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) +static int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) { unsigned long length = vma->vm_end - vma->vm_start; struct file *filp = vma->vm_file; @@ -145,7 +145,7 @@ depopulate: * * Returns channel buffer if successful, %NULL otherwise. */ -struct rchan_buf *relay_create_buf(struct rchan *chan) +static struct rchan_buf *relay_create_buf(struct rchan *chan) { struct rchan_buf *buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); if (!buf) @@ -175,7 +175,7 @@ free_buf: * * Should only be called from kref_put(). */ -void relay_destroy_channel(struct kref *kref) +static void relay_destroy_channel(struct kref *kref) { struct rchan *chan = container_of(kref, struct rchan, kref); kfree(chan); @@ -185,7 +185,7 @@ void relay_destroy_channel(struct kref *kref) * relay_destroy_buf - destroy an rchan_buf struct and associated buffer * @buf: the buffer struct */ -void relay_destroy_buf(struct rchan_buf *buf) +static void relay_destroy_buf(struct rchan_buf *buf) { struct rchan *chan = buf->chan; unsigned int i; @@ -210,7 +210,7 @@ void relay_destroy_buf(struct rchan_buf *buf) * rchan_buf_struct and the channel buffer. Should only be called from * kref_put(). */ -void relay_remove_buf(struct kref *kref) +static void relay_remove_buf(struct kref *kref) { struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); buf->chan->cb->remove_buf_file(buf->dentry); @@ -223,11 +223,10 @@ void relay_remove_buf(struct kref *kref) * * Returns 1 if the buffer is empty, 0 otherwise. */ -int relay_buf_empty(struct rchan_buf *buf) +static int relay_buf_empty(struct rchan_buf *buf) { return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1; } -EXPORT_SYMBOL_GPL(relay_buf_empty); /** * relay_buf_full - boolean, is the channel buffer full? diff --git a/kernel/rwsem.c b/kernel/rwsem.c index 9a87886b022e..1ec620c03064 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c @@ -20,7 +20,7 @@ void down_read(struct rw_semaphore *sem) might_sleep(); rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); - __down_read(sem); + LOCK_CONTENDED(sem, __down_read_trylock, __down_read); } EXPORT_SYMBOL(down_read); @@ -47,7 +47,7 @@ void down_write(struct rw_semaphore *sem) might_sleep(); rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); - __down_write(sem); + LOCK_CONTENDED(sem, __down_write_trylock, __down_write); } EXPORT_SYMBOL(down_write); @@ -111,7 +111,7 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) might_sleep(); rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); - __down_read(sem); + LOCK_CONTENDED(sem, __down_read_trylock, __down_read); } EXPORT_SYMBOL(down_read_nested); @@ -130,7 +130,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass) might_sleep(); rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); - __down_write_nested(sem, subclass); + LOCK_CONTENDED(sem, __down_write_trylock, __down_write); } EXPORT_SYMBOL(down_write_nested); diff --git a/kernel/sched.c b/kernel/sched.c index cb31fb4a1379..645256b228c3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -301,7 +301,7 @@ struct rq { struct lock_class_key rq_lock_key; }; -static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp; +static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); static DEFINE_MUTEX(sched_hotcpu_mutex); static inline void check_preempt_curr(struct rq *rq, struct task_struct *p) diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 2c6c2bf85514..cd72424c2662 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -72,7 +72,7 @@ void __lockfunc _read_lock(rwlock_t *lock) { preempt_disable(); rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - _raw_read_lock(lock); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); } EXPORT_SYMBOL(_read_lock); @@ -88,8 +88,8 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) * _raw_spin_lock_flags() code, because lockdep assumes * that interrupts are not re-enabled during lock-acquire: */ -#ifdef CONFIG_PROVE_LOCKING - _raw_spin_lock(lock); +#ifdef CONFIG_LOCKDEP + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); #else _raw_spin_lock_flags(lock, &flags); #endif @@ -102,7 +102,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock) local_irq_disable(); preempt_disable(); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - _raw_spin_lock(lock); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); } EXPORT_SYMBOL(_spin_lock_irq); @@ -111,7 +111,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock) local_bh_disable(); preempt_disable(); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - _raw_spin_lock(lock); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); } EXPORT_SYMBOL(_spin_lock_bh); @@ -122,7 +122,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) local_irq_save(flags); preempt_disable(); rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - _raw_read_lock(lock); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); return flags; } EXPORT_SYMBOL(_read_lock_irqsave); @@ -132,7 +132,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock) local_irq_disable(); preempt_disable(); rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - _raw_read_lock(lock); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); } EXPORT_SYMBOL(_read_lock_irq); @@ -141,7 +141,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock) local_bh_disable(); preempt_disable(); rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - _raw_read_lock(lock); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); } EXPORT_SYMBOL(_read_lock_bh); @@ -152,7 +152,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) local_irq_save(flags); preempt_disable(); rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - _raw_write_lock(lock); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); return flags; } EXPORT_SYMBOL(_write_lock_irqsave); @@ -162,7 +162,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock) local_irq_disable(); preempt_disable(); rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - _raw_write_lock(lock); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); } EXPORT_SYMBOL(_write_lock_irq); @@ -171,7 +171,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock) local_bh_disable(); preempt_disable(); rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - _raw_write_lock(lock); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); } EXPORT_SYMBOL(_write_lock_bh); @@ -179,7 +179,7 @@ void __lockfunc _spin_lock(spinlock_t *lock) { preempt_disable(); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - _raw_spin_lock(lock); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); } EXPORT_SYMBOL(_spin_lock); @@ -188,7 +188,7 @@ void __lockfunc _write_lock(rwlock_t *lock) { preempt_disable(); rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - _raw_write_lock(lock); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); } EXPORT_SYMBOL(_write_lock); @@ -289,7 +289,7 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) { preempt_disable(); spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); - _raw_spin_lock(lock); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); } EXPORT_SYMBOL(_spin_lock_nested); @@ -305,8 +305,8 @@ unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclas * _raw_spin_lock_flags() code, because lockdep assumes * that interrupts are not re-enabled during lock-acquire: */ -#ifdef CONFIG_PROVE_SPIN_LOCKING - _raw_spin_lock(lock); +#ifdef CONFIG_LOCKDEP + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); #else _raw_spin_lock_flags(lock, &flags); #endif diff --git a/kernel/sys.c b/kernel/sys.c index 18987c7f6add..08562f419768 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -100,6 +100,13 @@ struct pid *cad_pid; EXPORT_SYMBOL(cad_pid); /* + * If set, this is used for preparing the system to power off. + */ + +void (*pm_power_off_prepare)(void); +EXPORT_SYMBOL(pm_power_off_prepare); + +/* * Notifier list for kernel code which wants to be called * at shutdown. This is used to stop any idling DMA operations * and the like. @@ -867,6 +874,8 @@ EXPORT_SYMBOL_GPL(kernel_halt); void kernel_power_off(void) { kernel_shutdown_prepare(SYSTEM_POWER_OFF); + if (pm_power_off_prepare) + pm_power_off_prepare(); printk(KERN_EMERG "Power down.\n"); machine_power_off(); } @@ -1027,7 +1036,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) return -EPERM; } if (new_egid != old_egid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } if (rgid != (gid_t) -1 || @@ -1057,13 +1066,13 @@ asmlinkage long sys_setgid(gid_t gid) if (capable(CAP_SETGID)) { if (old_egid != gid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->gid = current->egid = current->sgid = current->fsgid = gid; } else if ((gid == current->gid) || (gid == current->sgid)) { if (old_egid != gid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->egid = current->fsgid = gid; @@ -1094,7 +1103,7 @@ static int set_user(uid_t new_ruid, int dumpclear) switch_uid(new_user); if (dumpclear) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->uid = new_ruid; @@ -1150,7 +1159,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) return -EAGAIN; if (new_euid != old_euid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->fsuid = current->euid = new_euid; @@ -1200,7 +1209,7 @@ asmlinkage long sys_setuid(uid_t uid) return -EPERM; if (old_euid != uid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->fsuid = current->euid = uid; @@ -1245,7 +1254,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) } if (euid != (uid_t) -1) { if (euid != current->euid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->euid = euid; @@ -1295,7 +1304,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) } if (egid != (gid_t) -1) { if (egid != current->egid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->egid = egid; @@ -1341,7 +1350,7 @@ asmlinkage long sys_setfsuid(uid_t uid) uid == current->suid || uid == current->fsuid || capable(CAP_SETUID)) { if (uid != old_fsuid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->fsuid = uid; @@ -1370,7 +1379,7 @@ asmlinkage long sys_setfsgid(gid_t gid) gid == current->sgid || gid == current->fsgid || capable(CAP_SETGID)) { if (gid != old_fsgid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->fsgid = gid; @@ -2167,14 +2176,14 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, error = put_user(current->pdeath_signal, (int __user *)arg2); break; case PR_GET_DUMPABLE: - error = current->mm->dumpable; + error = get_dumpable(current->mm); break; case PR_SET_DUMPABLE: if (arg2 < 0 || arg2 > 1) { error = -EINVAL; break; } - current->mm->dumpable = arg2; + set_dumpable(current->mm, arg2); break; case PR_SET_UNALIGN: diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 44a1d699aad7..222299844ad1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -78,6 +78,7 @@ extern int percpu_pagelist_fraction; extern int compat_log; extern int maps_protect; extern int sysctl_stat_interval; +extern int audit_argv_kb; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; @@ -160,6 +161,8 @@ extern ctl_table inotify_table[]; int sysctl_legacy_va_layout; #endif +extern int prove_locking; +extern int lock_stat; /* The default sysctl tables: */ @@ -281,6 +284,26 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_PROVE_LOCKING + { + .ctl_name = CTL_UNNUMBERED, + .procname = "prove_locking", + .data = &prove_locking, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_LOCK_STAT + { + .ctl_name = CTL_UNNUMBERED, + .procname = "lock_stat", + .data = &lock_stat, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif { .ctl_name = CTL_UNNUMBERED, .procname = "sched_features", @@ -306,6 +329,16 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_AUDITSYSCALL + { + .ctl_name = CTL_UNNUMBERED, + .procname = "audit_argv_kb", + .data = &audit_argv_kb, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif { .ctl_name = KERN_CORE_PATTERN, .procname = "core_pattern", @@ -660,7 +693,7 @@ static ctl_table kern_table[] = { { .ctl_name = KERN_ACPI_VIDEO_FLAGS, .procname = "acpi_video_flags", - .data = &acpi_video_flags, + .data = &acpi_realmode_flags, .maxlen = sizeof (unsigned long), .mode = 0644, .proc_handler = &proc_doulongvec_minmax, @@ -715,13 +748,17 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dostring, .strategy = &sysctl_string, }, - +/* + * NOTE: do not add new entries to this table unless you have read + * Documentation/sysctl/ctl_unnumbered.txt + */ { .ctl_name = 0 } }; /* Constants for minimum and maximum testing in vm_table. We use these as one-element integer vectors. */ static int zero; +static int two = 2; static int one_hundred = 100; @@ -1112,7 +1149,10 @@ static ctl_table fs_table[] = { .data = &lease_break_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &two, }, { .ctl_name = FS_AIO_NR, diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 027d46c906e0..88c81026e003 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -401,7 +401,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * this is optimized for the most common adjustments of -1,0,1, * for other values we can do a bit more work. */ -static void clocksource_adjust(struct clocksource *clock, s64 offset) +static void clocksource_adjust(s64 offset) { s64 error, interval = clock->cycle_interval; int adj; @@ -472,7 +472,7 @@ void update_wall_time(void) } /* correct the clock when NTP error is too big */ - clocksource_adjust(clock, offset); + clocksource_adjust(offset); /* store full nanoseconds into xtime */ xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; diff --git a/kernel/timer.c b/kernel/timer.c index dbc03ab14eed..6ce1952eea7d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -103,14 +103,14 @@ static inline tvec_base_t *tbase_get_base(tvec_base_t *base) static inline void timer_set_deferrable(struct timer_list *timer) { timer->base = ((tvec_base_t *)((unsigned long)(timer->base) | - TBASE_DEFERRABLE_FLAG)); + TBASE_DEFERRABLE_FLAG)); } static inline void timer_set_base(struct timer_list *timer, tvec_base_t *new_base) { timer->base = (tvec_base_t *)((unsigned long)(new_base) | - tbase_get_deferrable(timer->base)); + tbase_get_deferrable(timer->base)); } /** @@ -445,10 +445,10 @@ EXPORT_SYMBOL(__mod_timer); void add_timer_on(struct timer_list *timer, int cpu) { tvec_base_t *base = per_cpu(tvec_bases, cpu); - unsigned long flags; + unsigned long flags; timer_stats_timer_set_start_info(timer); - BUG_ON(timer_pending(timer) || !timer->function); + BUG_ON(timer_pending(timer) || !timer->function); spin_lock_irqsave(&base->lock, flags); timer_set_base(timer, base); internal_add_timer(base, timer); @@ -627,7 +627,7 @@ static inline void __run_timers(tvec_base_t *base) while (time_after_eq(jiffies, base->timer_jiffies)) { struct list_head work_list; struct list_head *head = &work_list; - int index = base->timer_jiffies & TVR_MASK; + int index = base->timer_jiffies & TVR_MASK; /* * Cascade timers: @@ -644,8 +644,8 @@ static inline void __run_timers(tvec_base_t *base) unsigned long data; timer = list_first_entry(head, struct timer_list,entry); - fn = timer->function; - data = timer->data; + fn = timer->function; + data = timer->data; timer_stats_account_timer(timer); @@ -689,8 +689,8 @@ static unsigned long __next_timer_interrupt(tvec_base_t *base) index = slot = timer_jiffies & TVR_MASK; do { list_for_each_entry(nte, base->tv1.vec + slot, entry) { - if (tbase_get_deferrable(nte->base)) - continue; + if (tbase_get_deferrable(nte->base)) + continue; found = 1; expires = nte->expires; @@ -834,7 +834,7 @@ void update_process_times(int user_tick) if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); scheduler_tick(); - run_posix_cpu_timers(p); + run_posix_cpu_timers(p); } /* @@ -909,7 +909,7 @@ static inline void update_times(unsigned long ticks) update_wall_time(); calc_load(ticks); } - + /* * The 64-bit jiffies value is not atomic - you MUST NOT read it * without sampling the sequence number in xtime_lock. @@ -1105,7 +1105,7 @@ asmlinkage long sys_gettid(void) /** * do_sysinfo - fill in sysinfo struct * @info: pointer to buffer to fill - */ + */ int do_sysinfo(struct sysinfo *info) { unsigned long mem_total, sav_total; |