From f629299b544b6cc12b4e3e85fec96f4ce5809482 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 24 Jul 2011 23:15:42 +0200 Subject: trace events: Update version number reference to new 3.x scheme for EVENT_POWER_TRACING_DEPRECATED What was scheduled to be 2.6.41 is now going to be 3.1 . Signed-off-by: Jesper Juhl Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Steven Rostedt Link: http://lkml.kernel.org/r/alpine.LNX.2.00.1107250929370.8080@swampdragon.chaosbits.net Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 2ad39e556cb4..cd3134510f3d 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -82,7 +82,7 @@ config EVENT_POWER_TRACING_DEPRECATED power:power_frequency This is for userspace compatibility and will vanish after 5 kernel iterations, - namely 2.6.41. + namely 3.1. config CONTEXT_SWITCH_TRACER bool -- cgit v1.2.1 From 1dd75f91ae713049eb6baaa640078f3a6549e522 Mon Sep 17 00:00:00 2001 From: "jhbird.choi@samsung.com" Date: Thu, 21 Jul 2011 15:29:14 +0900 Subject: genirq: Fix wrong bit operation (!msk & 0x01) should be !(msk & 0x01) Signed-off-by: Jonghwan Choi Link: http://lkml.kernel.org/r/1311229754-6003-1-git-send-email-jhbird.choi@samsung.com Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- kernel/irq/generic-chip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 3a2cab407b93..e38544dddb18 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -246,7 +246,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask); for (i = gc->irq_base; msk; msk >>= 1, i++) { - if (!msk & 0x01) + if (!(msk & 0x01)) continue; if (flags & IRQ_GC_INIT_NESTED_LOCK) @@ -301,7 +301,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, raw_spin_unlock(&gc_lock); for (; msk; msk >>= 1, i++) { - if (!msk & 0x01) + if (!(msk & 0x01)) continue; /* Remove handler first. That will mask the irq line */ -- cgit v1.2.1 From f3637a5f2e2eb391ff5757bc83fb5de8f9726464 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 7 Jul 2011 22:32:17 +0200 Subject: irq: Always set IRQF_ONESHOT if no primary handler is specified If no primary handler is specified then a default one is assigned which always returns IRQ_WAKE_THREAD. This handler requires the IRQF_ONESHOT flag on LEVEL / EIO typed irqs because the source of interrupt is not disabled. Since it is required for those users and there is no difference for others it makes sense to add this flag unconditionally. Signed-off-by: Sebastian Andrzej Siewior Link: http://lkml.kernel.org/r/1310070737-18514-1-git-send-email-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/irq/manage.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0a7840aeb0fb..3f9cd4799da7 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1322,6 +1322,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, if (!thread_fn) return -EINVAL; handler = irq_default_primary_handler; + irqflags |= IRQF_ONESHOT; } action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); -- cgit v1.2.1 From b6873807a7143b7d6d8b06809295e559d07d7deb Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 11 Jul 2011 12:17:31 +0200 Subject: irq: Track the owner of irq descriptor Interrupt descriptors can be allocated from modules. The interrupts are used by other modules, but we have no refcount on the module which provides the interrupts and there is no way to establish one on the device level as the interrupt using module is agnostic to the fact that the interrupt is provided by a module rather than by some builtin interrupt controller. To prevent removal of the interrupt providing module, we can track the owner of the interrupt descriptor, which also provides the relevant irq chip functions in the irq descriptor. request/setup_irq() can now acquire a refcount on the owner module to prevent unloading. free_irq() drops the refcount. Signed-off-by: Sebastian Andrzej Siewior Link: http://lkml.kernel.org/r/20110711101731.GA13804@Chamillionaire.breakpoint.cc Signed-off-by: Thomas Gleixner --- kernel/irq/irqdesc.c | 36 ++++++++++++++++++++++++------------ kernel/irq/manage.c | 17 +++++++++++++---- 2 files changed, 37 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 4c60a50e66b2..cb65d0360e31 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -70,7 +70,8 @@ static inline void desc_smp_init(struct irq_desc *desc, int node) { } static inline int desc_node(struct irq_desc *desc) { return 0; } #endif -static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) +static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, + struct module *owner) { int cpu; @@ -86,6 +87,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) desc->irq_count = 0; desc->irqs_unhandled = 0; desc->name = NULL; + desc->owner = owner; for_each_possible_cpu(cpu) *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; desc_smp_init(desc, node); @@ -128,7 +130,7 @@ static void free_masks(struct irq_desc *desc) static inline void free_masks(struct irq_desc *desc) { } #endif -static struct irq_desc *alloc_desc(int irq, int node) +static struct irq_desc *alloc_desc(int irq, int node, struct module *owner) { struct irq_desc *desc; gfp_t gfp = GFP_KERNEL; @@ -147,7 +149,7 @@ static struct irq_desc *alloc_desc(int irq, int node) raw_spin_lock_init(&desc->lock); lockdep_set_class(&desc->lock, &irq_desc_lock_class); - desc_set_defaults(irq, desc, node); + desc_set_defaults(irq, desc, node, owner); return desc; @@ -173,13 +175,14 @@ static void free_desc(unsigned int irq) kfree(desc); } -static int alloc_descs(unsigned int start, unsigned int cnt, int node) +static int alloc_descs(unsigned int start, unsigned int cnt, int node, + struct module *owner) { struct irq_desc *desc; int i; for (i = 0; i < cnt; i++) { - desc = alloc_desc(start + i, node); + desc = alloc_desc(start + i, node, owner); if (!desc) goto err; mutex_lock(&sparse_irq_lock); @@ -227,7 +230,7 @@ int __init early_irq_init(void) nr_irqs = initcnt; for (i = 0; i < initcnt; i++) { - desc = alloc_desc(i, node); + desc = alloc_desc(i, node, NULL); set_bit(i, allocated_irqs); irq_insert_desc(i, desc); } @@ -261,7 +264,7 @@ int __init early_irq_init(void) alloc_masks(&desc[i], GFP_KERNEL, node); raw_spin_lock_init(&desc[i].lock); lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - desc_set_defaults(i, &desc[i], node); + desc_set_defaults(i, &desc[i], node, NULL); } return arch_early_irq_init(); } @@ -276,8 +279,16 @@ static void free_desc(unsigned int irq) dynamic_irq_cleanup(irq); } -static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) +static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, + struct module *owner) { + u32 i; + + for (i = 0; i < cnt; i++) { + struct irq_desc *desc = irq_to_desc(start + i); + + desc->owner = owner; + } return start; } @@ -337,7 +348,8 @@ EXPORT_SYMBOL_GPL(irq_free_descs); * Returns the first irq number or error code */ int __ref -irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) +__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, + struct module *owner) { int start, ret; @@ -366,13 +378,13 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) bitmap_set(allocated_irqs, start, cnt); mutex_unlock(&sparse_irq_lock); - return alloc_descs(start, cnt, node); + return alloc_descs(start, cnt, node, owner); err: mutex_unlock(&sparse_irq_lock); return ret; } -EXPORT_SYMBOL_GPL(irq_alloc_descs); +EXPORT_SYMBOL_GPL(__irq_alloc_descs); /** * irq_reserve_irqs - mark irqs allocated @@ -440,7 +452,7 @@ void dynamic_irq_cleanup(unsigned int irq) unsigned long flags; raw_spin_lock_irqsave(&desc->lock, flags); - desc_set_defaults(irq, desc, desc_node(desc)); + desc_set_defaults(irq, desc, desc_node(desc), NULL); raw_spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 3f9cd4799da7..2e9425889fa8 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -883,6 +883,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (desc->irq_data.chip == &no_irq_chip) return -ENOSYS; + if (!try_module_get(desc->owner)) + return -ENODEV; /* * Some drivers like serial.c use request_irq() heavily, * so we have to be careful not to interfere with a @@ -906,8 +908,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ nested = irq_settings_is_nested_thread(desc); if (nested) { - if (!new->thread_fn) - return -EINVAL; + if (!new->thread_fn) { + ret = -EINVAL; + goto out_mput; + } /* * Replace the primary handler which was provided from * the driver for non nested interrupt handling by the @@ -929,8 +933,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) t = kthread_create(irq_thread, new, "irq/%d-%s", irq, new->name); - if (IS_ERR(t)) - return PTR_ERR(t); + if (IS_ERR(t)) { + ret = PTR_ERR(t); + goto out_mput; + } /* * We keep the reference to the task struct even if * the thread dies to avoid that the interrupt code @@ -1095,6 +1101,8 @@ out_thread: kthread_stop(t); put_task_struct(t); } +out_mput: + module_put(desc->owner); return ret; } @@ -1203,6 +1211,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) put_task_struct(action->thread); } + module_put(desc->owner); return action; } -- cgit v1.2.1 From b77f0f3c1f587791aa5d9bd1b0012c9a89eb9258 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 5 Aug 2011 16:40:40 -0400 Subject: jump label: Reduce the cycle count by changing the link order In the course of testing jump labels for use with the CFS bandwidth controller, Paul Turner, discovered that using jump labels reduced the branch count and the instruction count, but did not reduce the cycle count or wall time. I noticed that having the jump_label.o included in the kernel but not used in any way still caused this increase in cycle count and wall time. Thus, I moved jump_label.o in the kernel/Makefile, thus changing the link order, and presumably moving it out of hot icache areas. This brought down the cycle count/time as expected. In addition to Paul's testing, I've tested the patch using a single 'static_branch()' in the getppid() path, and basically running tight loops of calls to getppid(). Here are my results for the branch disabled case: With jump labels turned on (CONFIG_JUMP_LABEL), branch disabled: Performance counter stats for 'bash -c /tmp/getppid;true' (50 runs): 3,969,510,217 instructions # 0.864 IPC ( +-0.000% ) 4,592,334,954 cycles ( +- 0.046% ) 751,634,470 branches ( +- 0.000% ) 1.722635797 seconds time elapsed ( +- 0.046% ) Jump labels turned off (CONFIG_JUMP_LABEL not set), branch disabled: Performance counter stats for 'bash -c /tmp/getppid;true' (50 runs): 4,009,611,846 instructions # 0.867 IPC ( +-0.000% ) 4,622,210,580 cycles ( +- 0.012% ) 771,662,904 branches ( +- 0.000% ) 1.734341454 seconds time elapsed ( +- 0.022% ) Signed-off-by: Jason Baron Cc: rth@redhat.com Cc: a.p.zijlstra@chello.nl Cc: rostedt@goodmis.org Link: http://lkml.kernel.org/r/20110805204040.GG2522@redhat.com Signed-off-by: Ingo Molnar Tested-by: Paul Turner --- kernel/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/Makefile b/kernel/Makefile index d06467fc8f7c..eca595e2fd52 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ - async.o range.o jump_label.o + async.o range.o obj-y += groups.o ifdef CONFIG_FUNCTION_TRACER @@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += events/ obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o obj-$(CONFIG_PADATA) += padata.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is -- cgit v1.2.1 From 80e0401e35410a69bfae05b454db8a7187edd6b8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 5 Aug 2011 14:26:17 +0200 Subject: lockdep: Fix wrong assumption in match_held_lock match_held_lock() was assuming it was being called on a lock class that had already seen usage. This condition was true for bug-free code using lockdep_assert_held(), since you're in fact holding the lock when calling it. However the assumption fails the moment you assume the assertion can fail, which is the whole point of having the assertion in the first place. Anyway, now that there's more lockdep_is_held() users, notably __rcu_dereference_check(), its much easier to trigger this since we test for a number of locks and we only need to hold any one of them to be good. Reported-by: Sergey Senozhatsky Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1312547787.28695.2.camel@twins Signed-off-by: Ingo Molnar --- kernel/lockdep.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 8c24294e477f..91d67ce3a8d5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3111,7 +3111,13 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) if (!class) class = look_up_lock_class(lock, 0); - if (DEBUG_LOCKS_WARN_ON(!class)) + /* + * If look_up_lock_class() failed to find a class, we're trying + * to test if we hold a lock that has never yet been acquired. + * Clearly if the lock hasn't been acquired _ever_, we're not + * holding it either, so report failure. + */ + if (!class) return 0; if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) -- cgit v1.2.1 From f2c0d0266cc5eb36a4aa44944b4096ec121490aa Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Mon, 8 Aug 2011 06:22:43 +0200 Subject: cap_syslog: don't use WARN_ONCE for CAP_SYS_ADMIN deprecation warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit syslog-ng versions before 3.3.0beta1 (2011-05-12) assume that CAP_SYS_ADMIN is sufficient to access syslog, so ever since CAP_SYSLOG was introduced (2010-11-25) they have triggered a warning. Commit ee24aebffb75 ("cap_syslog: accept CAP_SYS_ADMIN for now") improved matters a little by making syslog-ng work again, just keeping the WARN_ONCE(). But still, this is a warning that writes a stack trace we don't care about to syslog, sets a taint flag, and alarms sysadmins when nothing worse has happened than use of an old userspace with a recent kernel. Convert the WARN_ONCE to a printk_once to avoid that while continuing to give userspace developers a hint that this is an unwanted backward-compatibility feature and won't be around forever. Reported-by: Ralf Hildebrandt Reported-by: Niels Reported-by: Paweł Sikora Signed-off-by: Jonathan Nieder Liked-by: Gergely Nagy Acked-by: Serge Hallyn Acked-by: James Morris Signed-off-by: Linus Torvalds --- kernel/printk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 37dff3429adb..836a2ae0ac31 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -318,8 +318,10 @@ static int check_syslog_permissions(int type, bool from_file) return 0; /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ if (capable(CAP_SYS_ADMIN)) { - WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " - "but no CAP_SYSLOG (deprecated).\n"); + printk_once(KERN_WARNING "%s (%d): " + "Attempt to access syslog with CAP_SYS_ADMIN " + "but no CAP_SYSLOG (deprecated).\n", + current->comm, task_pid_nr(current)); return 0; } return -EPERM; -- cgit v1.2.1 From c09c47caedc9854d59378d6e34c989e51cfdd2b4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 11 Aug 2011 10:36:05 +0200 Subject: blktrace: add FLUSH/FUA support Add FLUSH/FUA support to blktrace. As FLUSH precedes WRITE and/or FUA follows WRITE, use the same 'F' flag for both cases and distinguish them by their (relative) position. The end results look like (other flags might be shown also): - WRITE: W - WRITE_FLUSH: FW - WRITE_FUA: WF - WRITE_FLUSH_FUA: FWF Note that we reuse TC_BARRIER due to lack of bit space of act_mask so that the older versions of blktrace tools will report flush requests as barriers from now on. Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Namhyung Kim Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 6957aa298dfa..7c910a5593a6 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= MASK_TC_BIT(rw, RAHEAD); what |= MASK_TC_BIT(rw, META); what |= MASK_TC_BIT(rw, DISCARD); + what |= MASK_TC_BIT(rw, FLUSH); + what |= MASK_TC_BIT(rw, FUA); pid = tsk->pid; if (act_log_check(bt, what, sector, pid)) @@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) goto out; } + if (tc & BLK_TC_FLUSH) + rwbs[i++] = 'F'; + if (tc & BLK_TC_DISCARD) rwbs[i++] = 'D'; else if (tc & BLK_TC_WRITE) @@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) else rwbs[i++] = 'N'; + if (tc & BLK_TC_FUA) + rwbs[i++] = 'F'; if (tc & BLK_TC_AHEAD) rwbs[i++] = 'A'; - if (tc & BLK_TC_BARRIER) - rwbs[i++] = 'B'; if (tc & BLK_TC_SYNC) rwbs[i++] = 'S'; if (tc & BLK_TC_META) @@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); static int blk_log_action_classic(struct trace_iterator *iter, const char *act) { - char rwbs[6]; + char rwbs[RWBS_LEN]; unsigned long long ts = iter->ts; unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); unsigned secs = (unsigned long)ts; @@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act) static int blk_log_action(struct trace_iterator *iter, const char *act) { - char rwbs[6]; + char rwbs[RWBS_LEN]; const struct blk_io_trace *t = te_blk_io_trace(iter->ent); fill_rwbs(rwbs, t); @@ -1561,7 +1566,7 @@ static const struct { } mask_maps[] = { { BLK_TC_READ, "read" }, { BLK_TC_WRITE, "write" }, - { BLK_TC_BARRIER, "barrier" }, + { BLK_TC_FLUSH, "flush" }, { BLK_TC_SYNC, "sync" }, { BLK_TC_QUEUE, "queue" }, { BLK_TC_REQUEUE, "requeue" }, @@ -1573,6 +1578,7 @@ static const struct { { BLK_TC_META, "meta" }, { BLK_TC_DISCARD, "discard" }, { BLK_TC_DRV_DATA, "drv_data" }, + { BLK_TC_FUA, "fua" }, }; static int blk_trace_str2mask(const char *str) @@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) { int i = 0; + if (rw & REQ_FLUSH) + rwbs[i++] = 'F'; + if (rw & WRITE) rwbs[i++] = 'W'; else if (rw & REQ_DISCARD) @@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) else rwbs[i++] = 'N'; + if (rw & REQ_FUA) + rwbs[i++] = 'F'; if (rw & REQ_RAHEAD) rwbs[i++] = 'A'; if (rw & REQ_SYNC) -- cgit v1.2.1 From 72fa59970f8698023045ab0713d66f3f4f96945c Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Mon, 8 Aug 2011 19:02:04 +0400 Subject: move RLIMIT_NPROC check from set_user() to do_execve_common() The patch http://lkml.org/lkml/2003/7/13/226 introduced an RLIMIT_NPROC check in set_user() to check for NPROC exceeding via setuid() and similar functions. Before the check there was a possibility to greatly exceed the allowed number of processes by an unprivileged user if the program relied on rlimit only. But the check created new security threat: many poorly written programs simply don't check setuid() return code and believe it cannot fail if executed with root privileges. So, the check is removed in this patch because of too often privilege escalations related to buggy programs. The NPROC can still be enforced in the common code flow of daemons spawning user processes. Most of daemons do fork()+setuid()+execve(). The check introduced in execve() (1) enforces the same limit as in setuid() and (2) doesn't create similar security issues. Neil Brown suggested to track what specific process has exceeded the limit by setting PF_NPROC_EXCEEDED process flag. With the change only this process would fail on execve(), and other processes' execve() behaviour is not changed. Solar Designer suggested to re-check whether NPROC limit is still exceeded at the moment of execve(). If the process was sleeping for days between set*uid() and execve(), and the NPROC counter step down under the limit, the defered execve() failure because NPROC limit was exceeded days ago would be unexpected. If the limit is not exceeded anymore, we clear the flag on successful calls to execve() and fork(). The flag is also cleared on successful calls to set_user() as the limit was exceeded for the previous user, not the current one. Similar check was introduced in -ow patches (without the process flag). v3 - clear PF_NPROC_EXCEEDED on successful calls to set_user(). Reviewed-by: James Morris Signed-off-by: Vasiliy Kulikov Acked-by: NeilBrown Signed-off-by: Linus Torvalds --- kernel/cred.c | 6 ++---- kernel/fork.c | 1 + kernel/sys.c | 15 +++++++++++---- 3 files changed, 14 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/cred.c b/kernel/cred.c index 174fa84eca30..8ef31f53c44c 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -508,10 +508,8 @@ int commit_creds(struct cred *new) key_fsgid_changed(task); /* do it - * - What if a process setreuid()'s and this brings the - * new uid over his NPROC rlimit? We can check this now - * cheaply with the new uid cache, so if it matters - * we should be checking for it. -DaveM + * RLIMIT_NPROC limits on user->processes have already been checked + * in set_user(). */ alter_cred_subscribers(new, 2); if (new->user != old->user) diff --git a/kernel/fork.c b/kernel/fork.c index e7ceaca89609..8e6b6f4fb272 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1111,6 +1111,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->real_cred->user != INIT_USER) goto bad_fork_free; } + current->flags &= ~PF_NPROC_EXCEEDED; retval = copy_creds(p, clone_flags); if (retval < 0) diff --git a/kernel/sys.c b/kernel/sys.c index a101ba36c444..dd948a1fca4c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -621,11 +621,18 @@ static int set_user(struct cred *new) if (!new_user) return -EAGAIN; + /* + * We don't fail in case of NPROC limit excess here because too many + * poorly written programs don't check set*uid() return code, assuming + * it never fails if called by root. We may still enforce NPROC limit + * for programs doing set*uid()+execve() by harmlessly deferring the + * failure to the execve() stage. + */ if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && - new_user != INIT_USER) { - free_uid(new_user); - return -EAGAIN; - } + new_user != INIT_USER) + current->flags |= PF_NPROC_EXCEEDED; + else + current->flags &= ~PF_NPROC_EXCEEDED; free_uid(new->user); new->user = new_user; -- cgit v1.2.1 From 17f2ae7f677f023997e02fd2ebabd90ea2a0390d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 14 Aug 2011 13:34:31 +0200 Subject: PM / Domains: Fix build for CONFIG_PM_RUNTIME unset Function genpd_queue_power_off_work() is not defined for CONFIG_PM_RUNTIME, so pm_genpd_poweroff_unused() causes a build error to happen in that case. Fix the problem by making pm_genpd_poweroff_unused() depend on CONFIG_PM_RUNTIME too. Signed-off-by: Rafael J. Wysocki --- kernel/power/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index b1914cb9095c..3744c594b19b 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -231,3 +231,7 @@ config PM_CLK config PM_GENERIC_DOMAINS bool depends on PM + +config PM_GENERIC_DOMAINS_RUNTIME + def_bool y + depends on PM_RUNTIME && PM_GENERIC_DOMAINS -- cgit v1.2.1 From d522a0d17963e9c2e556db2cbd60c96d40505b6c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 18 Aug 2011 12:19:27 -0700 Subject: irqdesc: fix new kernel-doc warning Fix kernel-doc warning in irqdesc.c: Warning(kernel/irq/irqdesc.c:353): No description found for parameter 'owner' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- kernel/irq/irqdesc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index cb65d0360e31..039b889ea053 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -344,6 +344,7 @@ EXPORT_SYMBOL_GPL(irq_free_descs); * @from: Start the search from this irq number * @cnt: Number of consecutive irqs to allocate. * @node: Preferred node on which the irq descriptor should be allocated + * @owner: Owning module (can be NULL) * * Returns the first irq number or error code */ -- cgit v1.2.1