diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit_tree.c | 19 | ||||
-rw-r--r-- | kernel/fork.c | 4 | ||||
-rw-r--r-- | kernel/sched/core.c | 35 | ||||
-rw-r--r-- | kernel/sched/fair.c | 11 | ||||
-rw-r--r-- | kernel/sched/rt.c | 13 | ||||
-rw-r--r-- | kernel/sched/sched.h | 8 | ||||
-rw-r--r-- | kernel/sched/stop_task.c | 22 | ||||
-rw-r--r-- | kernel/task_work.c | 1 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 39 | ||||
-rw-r--r-- | kernel/timer.c | 9 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 4 |
11 files changed, 122 insertions, 43 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 3a5ca582ba1e..ed206fd88cca 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -250,7 +250,6 @@ static void untag_chunk(struct node *p) spin_unlock(&hash_lock); spin_unlock(&entry->lock); fsnotify_destroy_mark(entry); - fsnotify_put_mark(entry); goto out; } @@ -259,7 +258,7 @@ static void untag_chunk(struct node *p) fsnotify_duplicate_mark(&new->mark, entry); if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { - free_chunk(new); + fsnotify_put_mark(&new->mark); goto Fallback; } @@ -293,7 +292,7 @@ static void untag_chunk(struct node *p) spin_unlock(&hash_lock); spin_unlock(&entry->lock); fsnotify_destroy_mark(entry); - fsnotify_put_mark(entry); + fsnotify_put_mark(&new->mark); /* drop initial reference */ goto out; Fallback: @@ -322,7 +321,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) entry = &chunk->mark; if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) { - free_chunk(chunk); + fsnotify_put_mark(entry); return -ENOSPC; } @@ -347,6 +346,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) insert_hash(chunk); spin_unlock(&hash_lock); spin_unlock(&entry->lock); + fsnotify_put_mark(entry); /* drop initial reference */ return 0; } @@ -396,7 +396,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) fsnotify_duplicate_mark(chunk_entry, old_entry); if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) { spin_unlock(&old_entry->lock); - free_chunk(chunk); + fsnotify_put_mark(chunk_entry); fsnotify_put_mark(old_entry); return -ENOSPC; } @@ -444,8 +444,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) spin_unlock(&chunk_entry->lock); spin_unlock(&old_entry->lock); fsnotify_destroy_mark(old_entry); + fsnotify_put_mark(chunk_entry); /* drop initial reference */ fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ - fsnotify_put_mark(old_entry); /* and kill it */ return 0; } @@ -916,7 +916,12 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark); evict_chunk(chunk); - fsnotify_put_mark(entry); + + /* + * We are guaranteed to have at least one reference to the mark from + * either the inode or the caller of fsnotify_destroy_mark(). + */ + BUG_ON(atomic_read(&entry->refcnt) < 1); } static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode, diff --git a/kernel/fork.c b/kernel/fork.c index 3bd2280d79f6..2c8857e12855 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -455,8 +455,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (retval) goto out; - if (file && uprobe_mmap(tmp)) - goto out; + if (file) + uprobe_mmap(tmp); } /* a new mm has just been created */ arch_dup_mmap(oldmm, mm); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 82ad284f823b..fbf1fd098dc6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) #endif +static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) +{ + u64 temp = (__force u64) rtime; + + temp *= (__force u64) utime; + + if (sizeof(cputime_t) == 4) + temp = div_u64(temp, (__force u32) total); + else + temp = div64_u64(temp, (__force u64) total); + + return (__force cputime_t) temp; +} + void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { cputime_t rtime, utime = p->utime, total = utime + p->stime; @@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) */ rtime = nsecs_to_cputime(p->se.sum_exec_runtime); - if (total) { - u64 temp = (__force u64) rtime; - - temp *= (__force u64) utime; - do_div(temp, (__force u32) total); - utime = (__force cputime_t) temp; - } else + if (total) + utime = scale_utime(utime, rtime, total); + else utime = rtime; /* @@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) total = cputime.utime + cputime.stime; rtime = nsecs_to_cputime(cputime.sum_exec_runtime); - if (total) { - u64 temp = (__force u64) rtime; - - temp *= (__force u64) cputime.utime; - do_div(temp, (__force u32) total); - utime = (__force cputime_t) temp; - } else + if (total) + utime = scale_utime(cputime.utime, rtime, total); + else utime = rtime; sig->prev_utime = max(sig->prev_utime, utime); @@ -7246,6 +7252,7 @@ int in_sched_functions(unsigned long addr) #ifdef CONFIG_CGROUP_SCHED struct task_group root_task_group; +LIST_HEAD(task_groups); #endif DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d0cc03b3e70b..c219bf8d704c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3387,6 +3387,14 @@ static int tg_load_down(struct task_group *tg, void *data) static void update_h_load(long cpu) { + struct rq *rq = cpu_rq(cpu); + unsigned long now = jiffies; + + if (rq->h_load_throttle == now) + return; + + rq->h_load_throttle = now; + rcu_read_lock(); walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); rcu_read_unlock(); @@ -4293,11 +4301,10 @@ redo: env.src_rq = busiest; env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); + update_h_load(env.src_cpu); more_balance: local_irq_save(flags); double_rq_lock(this_rq, busiest); - if (!env.loop) - update_h_load(env.src_cpu); /* * cur_ld_moved - load moved in current iteration diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 573e1ca01102..944cb68420e9 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -788,6 +788,19 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) const struct cpumask *span; span = sched_rt_period_mask(); +#ifdef CONFIG_RT_GROUP_SCHED + /* + * FIXME: isolated CPUs should really leave the root task group, + * whether they are isolcpus or were isolated via cpusets, lest + * the timer run on a CPU which does not service all runqueues, + * potentially leaving other CPUs indefinitely throttled. If + * isolation is really required, the user will turn the throttle + * off to kill the perturbations it causes anyway. Meanwhile, + * this maintains functionality for boot and/or troubleshooting. + */ + if (rt_b == &root_task_group.rt_bandwidth) + span = cpu_online_mask; +#endif for_each_cpu(i, span) { int enqueue = 0; struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c35a1a7dd4d6..f6714d009e77 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -80,7 +80,7 @@ extern struct mutex sched_domains_mutex; struct cfs_rq; struct rt_rq; -static LIST_HEAD(task_groups); +extern struct list_head task_groups; struct cfs_bandwidth { #ifdef CONFIG_CFS_BANDWIDTH @@ -374,7 +374,11 @@ struct rq { #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this cpu: */ struct list_head leaf_cfs_rq_list; -#endif +#ifdef CONFIG_SMP + unsigned long h_load_throttle; +#endif /* CONFIG_SMP */ +#endif /* CONFIG_FAIR_GROUP_SCHED */ + #ifdef CONFIG_RT_GROUP_SCHED struct list_head leaf_rt_rq_list; #endif diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 7b386e86fd23..da5eb5bed84a 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -27,8 +27,10 @@ static struct task_struct *pick_next_task_stop(struct rq *rq) { struct task_struct *stop = rq->stop; - if (stop && stop->on_rq) + if (stop && stop->on_rq) { + stop->se.exec_start = rq->clock_task; return stop; + } return NULL; } @@ -52,6 +54,21 @@ static void yield_task_stop(struct rq *rq) static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) { + struct task_struct *curr = rq->curr; + u64 delta_exec; + + delta_exec = rq->clock_task - curr->se.exec_start; + if (unlikely((s64)delta_exec < 0)) + delta_exec = 0; + + schedstat_set(curr->se.statistics.exec_max, + max(curr->se.statistics.exec_max, delta_exec)); + + curr->se.sum_exec_runtime += delta_exec; + account_group_exec_runtime(curr, delta_exec); + + curr->se.exec_start = rq->clock_task; + cpuacct_charge(curr, delta_exec); } static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued) @@ -60,6 +77,9 @@ static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued) static void set_curr_task_stop(struct rq *rq) { + struct task_struct *stop = rq->stop; + + stop->se.exec_start = rq->clock_task; } static void switched_to_stop(struct rq *rq, struct task_struct *p) diff --git a/kernel/task_work.c b/kernel/task_work.c index 91d4e1742a0c..d320d44903bd 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -75,6 +75,7 @@ void task_work_run(void) p = q->next; q->func(q); q = p; + cond_resched(); } } } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e16af197a2bc..34e5eac81424 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -115,6 +115,7 @@ static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) { tk->xtime_sec += ts->tv_sec; tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift; + tk_normalize_xtime(tk); } static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm) @@ -276,7 +277,7 @@ static void timekeeping_forward_now(struct timekeeper *tk) tk->xtime_nsec += cycle_delta * tk->mult; /* If arch requires, add in gettimeoffset() */ - tk->xtime_nsec += arch_gettimeoffset() << tk->shift; + tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift; tk_normalize_xtime(tk); @@ -427,7 +428,7 @@ int do_settimeofday(const struct timespec *tv) struct timespec ts_delta, xt; unsigned long flags; - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) + if (!timespec_valid_strict(tv)) return -EINVAL; write_seqlock_irqsave(&tk->lock, flags); @@ -463,6 +464,8 @@ int timekeeping_inject_offset(struct timespec *ts) { struct timekeeper *tk = &timekeeper; unsigned long flags; + struct timespec tmp; + int ret = 0; if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) return -EINVAL; @@ -471,10 +474,17 @@ int timekeeping_inject_offset(struct timespec *ts) timekeeping_forward_now(tk); + /* Make sure the proposed value is valid */ + tmp = timespec_add(tk_xtime(tk), *ts); + if (!timespec_valid_strict(&tmp)) { + ret = -EINVAL; + goto error; + } tk_xtime_add(tk, ts); tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); +error: /* even if we error out, we forwarded the time, so call update */ timekeeping_update(tk, true); write_sequnlock_irqrestore(&tk->lock, flags); @@ -482,7 +492,7 @@ int timekeeping_inject_offset(struct timespec *ts) /* signal hrtimers about time change */ clock_was_set(); - return 0; + return ret; } EXPORT_SYMBOL(timekeeping_inject_offset); @@ -649,7 +659,20 @@ void __init timekeeping_init(void) struct timespec now, boot, tmp; read_persistent_clock(&now); + if (!timespec_valid_strict(&now)) { + pr_warn("WARNING: Persistent clock returned invalid value!\n" + " Check your CMOS/BIOS settings.\n"); + now.tv_sec = 0; + now.tv_nsec = 0; + } + read_boot_clock(&boot); + if (!timespec_valid_strict(&boot)) { + pr_warn("WARNING: Boot clock returned invalid value!\n" + " Check your CMOS/BIOS settings.\n"); + boot.tv_sec = 0; + boot.tv_nsec = 0; + } seqlock_init(&tk->lock); @@ -690,7 +713,7 @@ static struct timespec timekeeping_suspend_time; static void __timekeeping_inject_sleeptime(struct timekeeper *tk, struct timespec *delta) { - if (!timespec_valid(delta)) { + if (!timespec_valid_strict(delta)) { printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " "sleep delta value!\n"); return; @@ -1129,6 +1152,10 @@ static void update_wall_time(void) offset = (clock->read(clock) - clock->cycle_last) & clock->mask; #endif + /* Check if there's really nothing to do */ + if (offset < tk->cycle_interval) + goto out; + /* * With NO_HZ we may have to accumulate many cycle_intervals * (think "ticks") worth of time at once. To do this efficiently, @@ -1161,9 +1188,9 @@ static void update_wall_time(void) * the vsyscall implementations are converted to use xtime_nsec * (shifted nanoseconds), this can be killed. */ - remainder = tk->xtime_nsec & ((1 << tk->shift) - 1); + remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); tk->xtime_nsec -= remainder; - tk->xtime_nsec += 1 << tk->shift; + tk->xtime_nsec += 1ULL << tk->shift; tk->ntp_error += remainder << tk->ntp_error_shift; /* diff --git a/kernel/timer.c b/kernel/timer.c index a61c09374eba..8c5e7b908c68 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1407,13 +1407,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds) #endif -#ifndef __alpha__ - -/* - * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this - * should be moved into arch/i386 instead? - */ - /** * sys_getpid - return the thread group id of the current process * @@ -1469,8 +1462,6 @@ SYSCALL_DEFINE0(getegid) return from_kgid_munged(current_user_ns(), current_egid()); } -#endif - static void process_timeout(unsigned long __data) { wake_up_process((struct task_struct *)__data); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 60e4d7875672..6b245f64c8dd 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -506,6 +506,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) int size; syscall_nr = syscall_get_nr(current, regs); + if (syscall_nr < 0) + return; if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) return; @@ -580,6 +582,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) int size; syscall_nr = syscall_get_nr(current, regs); + if (syscall_nr < 0) + return; if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) return; |