summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.preempt3
-rw-r--r--kernel/Makefile8
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/auditsc.c2
-rw-r--r--kernel/cgroup.c18
-rw-r--r--kernel/compat.c7
-rw-r--r--kernel/configs.c4
-rw-r--r--kernel/cpuset.c10
-rw-r--r--kernel/cred.c6
-rw-r--r--kernel/debug/debug_core.c2
-rw-r--r--kernel/debug/gdbstub.c22
-rw-r--r--kernel/debug/kdb/kdb_bt.c5
-rw-r--r--kernel/debug/kdb/kdb_cmds4
-rw-r--r--kernel/debug/kdb/kdb_debugger.c21
-rw-r--r--kernel/debug/kdb/kdb_io.c36
-rw-r--r--kernel/debug/kdb/kdb_main.c4
-rw-r--r--kernel/debug/kdb/kdb_private.h3
-rw-r--r--kernel/delayacct.c2
-rw-r--r--kernel/events/core.c168
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c90
-rw-r--r--kernel/futex.c58
-rw-r--r--kernel/gcov/Kconfig2
-rw-r--r--kernel/irq/Kconfig4
-rw-r--r--kernel/irq/Makefile1
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/devres.c2
-rw-r--r--kernel/irq/generic-chip.c4
-rw-r--r--kernel/irq/irqdesc.c37
-rw-r--r--kernel/irq/irqdomain.c180
-rw-r--r--kernel/irq/manage.c17
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/kmod.c2
-rw-r--r--kernel/lockdep.c45
-rw-r--r--kernel/module.c80
-rw-r--r--kernel/notifier.c31
-rw-r--r--kernel/nsproxy.c4
-rw-r--r--kernel/panic.c2
-rw-r--r--kernel/params.c18
-rw-r--r--kernel/pid.c1
-rw-r--r--kernel/power/Kconfig8
-rw-r--r--kernel/printk.c8
-rw-r--r--kernel/rcupdate.c2
-rw-r--r--kernel/rcutorture.c4
-rw-r--r--kernel/rcutree_trace.c2
-rw-r--r--kernel/resource.c21
-rw-r--r--kernel/rwsem.c18
-rw-r--r--kernel/sched.c251
-rw-r--r--kernel/sched_autogroup.h1
-rw-r--r--kernel/sched_fair.c72
-rw-r--r--kernel/sched_features.h4
-rw-r--r--kernel/sched_rt.c26
-rw-r--r--kernel/signal.c17
-rw-r--r--kernel/stop_machine.c80
-rw-r--r--kernel/sys.c85
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl_binary.c2
-rw-r--r--kernel/sysctl_check.c2
-rw-r--r--kernel/taskstats.c21
-rw-r--r--kernel/time/alarmtimer.c18
-rw-r--r--kernel/time/timekeeping.c28
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/blktrace.c21
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_mmiotrace.c2
-rw-r--r--kernel/tsacct.c15
-rw-r--r--kernel/watchdog.c7
-rw-r--r--kernel/workqueue.c7
68 files changed, 1176 insertions, 463 deletions
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index bf987b95b356..24e7cb0ba26a 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -35,6 +35,7 @@ config PREEMPT_VOLUNTARY
config PREEMPT
bool "Preemptible Kernel (Low-Latency Desktop)"
+ select PREEMPT_COUNT
help
This option reduces the latency of the kernel by making
all kernel code (that is not executing in a critical section)
@@ -52,3 +53,5 @@ config PREEMPT
endchoice
+config PREEMPT_COUNT
+ bool \ No newline at end of file
diff --git a/kernel/Makefile b/kernel/Makefile
index 2d64cfcc8b42..eca595e2fd52 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
- async.o range.o jump_label.o
+ async.o range.o
obj-y += groups.o
ifdef CONFIG_FUNCTION_TRACER
@@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += events/
obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
obj-$(CONFIG_PADATA) += padata.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_JUMP_LABEL) += jump_label.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
@@ -125,11 +126,10 @@ targets += config_data.gz
$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
$(call if_changed,gzip)
-quiet_cmd_ikconfiggz = IKCFG $@
- cmd_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") > $@
+ filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;")
targets += config_data.h
$(obj)/config_data.h: $(obj)/config_data.gz FORCE
- $(call if_changed,ikconfiggz)
+ $(call filechk,ikconfiggz)
$(obj)/time.o: $(obj)/timeconst.h
diff --git a/kernel/audit.c b/kernel/audit.c
index 52501b5d4902..0a1355ca3d79 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -43,7 +43,7 @@
#include <linux/init.h>
#include <asm/types.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/slab.h>
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 00d79df03e76..ce4b054acee5 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -44,7 +44,7 @@
#include <linux/init.h>
#include <asm/types.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/mm.h>
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2731d115d725..1d2b6ceea95d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -27,9 +27,11 @@
*/
#include <linux/cgroup.h>
+#include <linux/cred.h>
#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/init_task.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mm.h>
@@ -59,7 +61,7 @@
#include <linux/poll.h>
#include <linux/flex_array.h> /* used in cgroup_attach_proc */
-#include <asm/atomic.h>
+#include <linux/atomic.h>
static DEFINE_MUTEX(cgroup_mutex);
@@ -1514,6 +1516,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
struct cgroup *root_cgrp = &root->top_cgroup;
struct inode *inode;
struct cgroupfs_root *existing_root;
+ const struct cred *cred;
int i;
BUG_ON(sb->s_root != NULL);
@@ -1593,7 +1596,9 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
BUG_ON(!list_empty(&root_cgrp->children));
BUG_ON(root->number_of_cgroups != 1);
+ cred = override_creds(&init_cred);
cgroup_populate_dir(root_cgrp);
+ revert_creds(cred);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
} else {
@@ -1697,7 +1702,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
{
char *start;
struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
- rcu_read_lock_held() ||
cgroup_lock_is_held());
if (!dentry || cgrp == dummytop) {
@@ -1723,7 +1727,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
break;
dentry = rcu_dereference_check(cgrp->dentry,
- rcu_read_lock_held() ||
cgroup_lock_is_held());
if (!cgrp->parent)
continue;
@@ -3542,7 +3545,8 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
}
/* the process need read permission on control file */
- ret = file_permission(cfile, MAY_READ);
+ /* AV: shouldn't we check that it's been opened for read instead? */
+ ret = inode_permission(cfile->f_path.dentry->d_inode, MAY_READ);
if (ret < 0)
goto fail;
@@ -4813,8 +4817,7 @@ unsigned short css_id(struct cgroup_subsys_state *css)
* on this or this is under rcu_read_lock(). Once css->id is allocated,
* it's unchanged until freed.
*/
- cssid = rcu_dereference_check(css->id,
- rcu_read_lock_held() || atomic_read(&css->refcnt));
+ cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt));
if (cssid)
return cssid->id;
@@ -4826,8 +4829,7 @@ unsigned short css_depth(struct cgroup_subsys_state *css)
{
struct css_id *cssid;
- cssid = rcu_dereference_check(css->id,
- rcu_read_lock_held() || atomic_read(&css->refcnt));
+ cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt));
if (cssid)
return cssid->depth;
diff --git a/kernel/compat.c b/kernel/compat.c
index fc9eb093acd5..e2435ee9993a 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -158,6 +158,7 @@ int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user
__put_user(ts->tv_sec, &cts->tv_sec) ||
__put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
}
+EXPORT_SYMBOL_GPL(put_compat_timespec);
static long compat_nanosleep_restart(struct restart_block *restart)
{
@@ -890,6 +891,7 @@ sigset_from_compat (sigset_t *set, compat_sigset_t *compat)
case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32 );
}
}
+EXPORT_SYMBOL_GPL(sigset_from_compat);
asmlinkage long
compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
@@ -991,11 +993,8 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat
sigset_from_compat(&newset, &newset32);
sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP));
- spin_lock_irq(&current->sighand->siglock);
current->saved_sigmask = current->blocked;
- current->blocked = newset;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
+ set_current_blocked(&newset);
current->state = TASK_INTERRUPTIBLE;
schedule();
diff --git a/kernel/configs.c b/kernel/configs.c
index b4066b44a99d..42e8fa075eed 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -92,8 +92,8 @@ static void __exit ikconfig_cleanup(void)
module_init(ikconfig_init);
module_exit(ikconfig_cleanup);
+#endif /* CONFIG_IKCONFIG_PROC */
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Randy Dunlap");
MODULE_DESCRIPTION("Echo the kernel .config file used to build the kernel");
-
-#endif /* CONFIG_IKCONFIG_PROC */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 9c9b7545c810..10131fdaff70 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -55,7 +55,7 @@
#include <linux/sort.h>
#include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h>
@@ -2460,11 +2460,19 @@ static int cpuset_spread_node(int *rotor)
int cpuset_mem_spread_node(void)
{
+ if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE)
+ current->cpuset_mem_spread_rotor =
+ node_random(&current->mems_allowed);
+
return cpuset_spread_node(&current->cpuset_mem_spread_rotor);
}
int cpuset_slab_spread_node(void)
{
+ if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE)
+ current->cpuset_slab_spread_rotor =
+ node_random(&current->mems_allowed);
+
return cpuset_spread_node(&current->cpuset_slab_spread_rotor);
}
diff --git a/kernel/cred.c b/kernel/cred.c
index 174fa84eca30..8ef31f53c44c 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -508,10 +508,8 @@ int commit_creds(struct cred *new)
key_fsgid_changed(task);
/* do it
- * - What if a process setreuid()'s and this brings the
- * new uid over his NPROC rlimit? We can check this now
- * cheaply with the new uid cache, so if it matters
- * we should be checking for it. -DaveM
+ * RLIMIT_NPROC limits on user->processes have already been checked
+ * in set_user().
*/
alter_cred_subscribers(new, 2);
if (new->user != old->user)
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index bad6786dee88..0d7c08784efb 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -51,7 +51,7 @@
#include <asm/cacheflush.h>
#include <asm/byteorder.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/system.h>
#include "debug_core.h"
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index a11db956dd62..34872482315e 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -42,6 +42,8 @@
/* Our I/O buffers. */
static char remcom_in_buffer[BUFMAX];
static char remcom_out_buffer[BUFMAX];
+static int gdbstub_use_prev_in_buf;
+static int gdbstub_prev_in_buf_pos;
/* Storage for the registers, in GDB format. */
static unsigned long gdb_regs[(NUMREGBYTES +
@@ -58,6 +60,13 @@ static int gdbstub_read_wait(void)
int ret = -1;
int i;
+ if (unlikely(gdbstub_use_prev_in_buf)) {
+ if (gdbstub_prev_in_buf_pos < gdbstub_use_prev_in_buf)
+ return remcom_in_buffer[gdbstub_prev_in_buf_pos++];
+ else
+ gdbstub_use_prev_in_buf = 0;
+ }
+
/* poll any additional I/O interfaces that are defined */
while (ret < 0)
for (i = 0; kdb_poll_funcs[i] != NULL; i++) {
@@ -109,7 +118,6 @@ static void get_packet(char *buffer)
buffer[count] = ch;
count = count + 1;
}
- buffer[count] = 0;
if (ch == '#') {
xmitcsum = hex_to_bin(gdbstub_read_wait()) << 4;
@@ -124,6 +132,7 @@ static void get_packet(char *buffer)
if (dbg_io_ops->flush)
dbg_io_ops->flush();
}
+ buffer[count] = 0;
} while (checksum != xmitcsum);
}
@@ -1082,12 +1091,11 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd)
case 'c':
strcpy(remcom_in_buffer, cmd);
return 0;
- case '?':
- gdb_cmd_status(ks);
- break;
- case '\0':
- strcpy(remcom_out_buffer, "");
- break;
+ case '$':
+ strcpy(remcom_in_buffer, cmd);
+ gdbstub_use_prev_in_buf = strlen(remcom_in_buffer);
+ gdbstub_prev_in_buf_pos = 0;
+ return 0;
}
dbg_io_ops->write_char('+');
put_packet(remcom_out_buffer);
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 2f62fe85f16a..7179eac7b41c 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -112,9 +112,8 @@ kdb_bt(int argc, const char **argv)
unsigned long addr;
long offset;
- kdbgetintenv("BTARGS", &argcount); /* Arguments to print */
- kdbgetintenv("BTAPROMPT", &btaprompt); /* Prompt after each
- * proc in bta */
+ /* Prompt after each proc in bta */
+ kdbgetintenv("BTAPROMPT", &btaprompt);
if (strcmp(argv[0], "bta") == 0) {
struct task_struct *g, *p;
diff --git a/kernel/debug/kdb/kdb_cmds b/kernel/debug/kdb/kdb_cmds
index 56c88e4db309..9834ad303ab6 100644
--- a/kernel/debug/kdb/kdb_cmds
+++ b/kernel/debug/kdb/kdb_cmds
@@ -18,16 +18,12 @@ defcmd dumpcommon "" "Common kdb debugging"
endefcmd
defcmd dumpall "" "First line debugging"
- set BTSYMARG 1
- set BTARGS 9
pid R
-dumpcommon
-bta
endefcmd
defcmd dumpcpu "" "Same as dumpall but only tasks on cpus"
- set BTSYMARG 1
- set BTARGS 9
pid R
-dumpcommon
-btc
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c
index dd0b1b7dd02c..d9ca9aa481ec 100644
--- a/kernel/debug/kdb/kdb_debugger.c
+++ b/kernel/debug/kdb/kdb_debugger.c
@@ -30,6 +30,8 @@ EXPORT_SYMBOL_GPL(kdb_poll_funcs);
int kdb_poll_idx = 1;
EXPORT_SYMBOL_GPL(kdb_poll_idx);
+static struct kgdb_state *kdb_ks;
+
int kdb_stub(struct kgdb_state *ks)
{
int error = 0;
@@ -39,6 +41,7 @@ int kdb_stub(struct kgdb_state *ks)
kdb_dbtrap_t db_result = KDB_DB_NOBPT;
int i;
+ kdb_ks = ks;
if (KDB_STATE(REENTRY)) {
reason = KDB_REASON_SWITCH;
KDB_STATE_CLEAR(REENTRY);
@@ -123,20 +126,8 @@ int kdb_stub(struct kgdb_state *ks)
KDB_STATE_CLEAR(PAGER);
kdbnearsym_cleanup();
if (error == KDB_CMD_KGDB) {
- if (KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)) {
- /*
- * This inteface glue which allows kdb to transition in into
- * the gdb stub. In order to do this the '?' or '' gdb serial
- * packet response is processed here. And then control is
- * passed to the gdbstub.
- */
- if (KDB_STATE(DOING_KGDB))
- gdbstub_state(ks, "?");
- else
- gdbstub_state(ks, "");
+ if (KDB_STATE(DOING_KGDB))
KDB_STATE_CLEAR(DOING_KGDB);
- KDB_STATE_CLEAR(DOING_KGDB2);
- }
return DBG_PASS_EVENT;
}
kdb_bp_install(ks->linux_regs);
@@ -166,3 +157,7 @@ int kdb_stub(struct kgdb_state *ks)
return kgdb_info[ks->cpu].ret_state;
}
+void kdb_gdb_state_pass(char *buf)
+{
+ gdbstub_state(kdb_ks, buf);
+}
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 96fdaac46a80..4802eb5840e1 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -31,15 +31,21 @@ char kdb_prompt_str[CMD_BUFLEN];
int kdb_trap_printk;
-static void kgdb_transition_check(char *buffer)
+static int kgdb_transition_check(char *buffer)
{
- int slen = strlen(buffer);
- if (strncmp(buffer, "$?#3f", slen) != 0 &&
- strncmp(buffer, "$qSupported#37", slen) != 0 &&
- strncmp(buffer, "+$qSupported#37", slen) != 0) {
+ if (buffer[0] != '+' && buffer[0] != '$') {
KDB_STATE_SET(KGDB_TRANS);
kdb_printf("%s", buffer);
+ } else {
+ int slen = strlen(buffer);
+ if (slen > 3 && buffer[slen - 3] == '#') {
+ kdb_gdb_state_pass(buffer);
+ strcpy(buffer, "kgdb");
+ KDB_STATE_SET(DOING_KGDB);
+ return 1;
+ }
}
+ return 0;
}
static int kdb_read_get_key(char *buffer, size_t bufsize)
@@ -251,6 +257,10 @@ poll_again:
case 13: /* enter */
*lastchar++ = '\n';
*lastchar++ = '\0';
+ if (!KDB_STATE(KGDB_TRANS)) {
+ KDB_STATE_SET(KGDB_TRANS);
+ kdb_printf("%s", buffer);
+ }
kdb_printf("\n");
return buffer;
case 4: /* Del */
@@ -382,22 +392,26 @@ poll_again:
* printed characters if we think that
* kgdb is connecting, until the check
* fails */
- if (!KDB_STATE(KGDB_TRANS))
- kgdb_transition_check(buffer);
- else
+ if (!KDB_STATE(KGDB_TRANS)) {
+ if (kgdb_transition_check(buffer))
+ return buffer;
+ } else {
kdb_printf("%c", key);
+ }
}
/* Special escape to kgdb */
if (lastchar - buffer >= 5 &&
strcmp(lastchar - 5, "$?#3f") == 0) {
+ kdb_gdb_state_pass(lastchar - 5);
strcpy(buffer, "kgdb");
KDB_STATE_SET(DOING_KGDB);
return buffer;
}
- if (lastchar - buffer >= 14 &&
- strcmp(lastchar - 14, "$qSupported#37") == 0) {
+ if (lastchar - buffer >= 11 &&
+ strcmp(lastchar - 11, "$qSupported") == 0) {
+ kdb_gdb_state_pass(lastchar - 11);
strcpy(buffer, "kgdb");
- KDB_STATE_SET(DOING_KGDB2);
+ KDB_STATE_SET(DOING_KGDB);
return buffer;
}
}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index be14779bcef6..63786e71a3cd 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -145,7 +145,6 @@ static char *__env[] = {
#endif
"RADIX=16",
"MDCOUNT=8", /* lines of md output */
- "BTARGS=9", /* 9 possible args in bt */
KDB_PLATFORM_ENV,
"DTABCOUNT=30",
"NOSECT=1",
@@ -172,6 +171,7 @@ static char *__env[] = {
(char *)0,
(char *)0,
(char *)0,
+ (char *)0,
};
static const int __nenv = (sizeof(__env) / sizeof(char *));
@@ -1386,7 +1386,7 @@ int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
}
if (result == KDB_CMD_KGDB) {
- if (!(KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)))
+ if (!KDB_STATE(DOING_KGDB))
kdb_printf("Entering please attach debugger "
"or use $D#44+ or $3#33\n");
break;
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 35d69ed1dfb5..e381d105b40b 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -21,7 +21,6 @@
#define KDB_CMD_SS (-1003)
#define KDB_CMD_SSB (-1004)
#define KDB_CMD_KGDB (-1005)
-#define KDB_CMD_KGDB2 (-1006)
/* Internal debug flags */
#define KDB_DEBUG_FLAG_BP 0x0002 /* Breakpoint subsystem debug */
@@ -146,7 +145,6 @@ extern int kdb_state;
* keyboard on this cpu */
#define KDB_STATE_KEXEC 0x00040000 /* kexec issued */
#define KDB_STATE_DOING_KGDB 0x00080000 /* kgdb enter now issued */
-#define KDB_STATE_DOING_KGDB2 0x00100000 /* kgdb enter now issued */
#define KDB_STATE_KGDB_TRANS 0x00200000 /* Transition to kgdb */
#define KDB_STATE_ARCH 0xff000000 /* Reserved for arch
* specific use */
@@ -218,6 +216,7 @@ extern void kdb_print_nameval(const char *name, unsigned long val);
extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
extern void kdb_meminfo_proc_show(void);
extern char *kdb_getstr(char *, size_t, char *);
+extern void kdb_gdb_state_pass(char *buf);
/* Defines for kdb_symbol_print */
#define KDB_SP_SPACEB 0x0001 /* Space before string */
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index ead9b610aa71..418b3f7053aa 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -19,8 +19,10 @@
#include <linux/time.h>
#include <linux/sysctl.h>
#include <linux/delayacct.h>
+#include <linux/module.h>
int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */
+EXPORT_SYMBOL_GPL(delayacct_on);
struct kmem_cache *delayacct_cache;
static int __init delayacct_setup_disable(char *str)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b8785e26ee1c..d1a1bee35228 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -29,6 +29,7 @@
#include <linux/hardirq.h>
#include <linux/rculist.h>
#include <linux/uaccess.h>
+#include <linux/suspend.h>
#include <linux/syscalls.h>
#include <linux/anon_inodes.h>
#include <linux/kernel_stat.h>
@@ -399,14 +400,54 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
local_irq_restore(flags);
}
-static inline void perf_cgroup_sched_out(struct task_struct *task)
+static inline void perf_cgroup_sched_out(struct task_struct *task,
+ struct task_struct *next)
{
- perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
+ struct perf_cgroup *cgrp1;
+ struct perf_cgroup *cgrp2 = NULL;
+
+ /*
+ * we come here when we know perf_cgroup_events > 0
+ */
+ cgrp1 = perf_cgroup_from_task(task);
+
+ /*
+ * next is NULL when called from perf_event_enable_on_exec()
+ * that will systematically cause a cgroup_switch()
+ */
+ if (next)
+ cgrp2 = perf_cgroup_from_task(next);
+
+ /*
+ * only schedule out current cgroup events if we know
+ * that we are switching to a different cgroup. Otherwise,
+ * do no touch the cgroup events.
+ */
+ if (cgrp1 != cgrp2)
+ perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
}
-static inline void perf_cgroup_sched_in(struct task_struct *task)
+static inline void perf_cgroup_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
- perf_cgroup_switch(task, PERF_CGROUP_SWIN);
+ struct perf_cgroup *cgrp1;
+ struct perf_cgroup *cgrp2 = NULL;
+
+ /*
+ * we come here when we know perf_cgroup_events > 0
+ */
+ cgrp1 = perf_cgroup_from_task(task);
+
+ /* prev can never be NULL */
+ cgrp2 = perf_cgroup_from_task(prev);
+
+ /*
+ * only need to schedule in cgroup events if we are changing
+ * cgroup during ctxsw. Cgroup events were not scheduled
+ * out of ctxsw out if that was not the case.
+ */
+ if (cgrp1 != cgrp2)
+ perf_cgroup_switch(task, PERF_CGROUP_SWIN);
}
static inline int perf_cgroup_connect(int fd, struct perf_event *event,
@@ -518,11 +559,13 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
{
}
-static inline void perf_cgroup_sched_out(struct task_struct *task)
+static inline void perf_cgroup_sched_out(struct task_struct *task,
+ struct task_struct *next)
{
}
-static inline void perf_cgroup_sched_in(struct task_struct *task)
+static inline void perf_cgroup_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
}
@@ -1988,7 +2031,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
* cgroup event are system-wide mode only
*/
if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
- perf_cgroup_sched_out(task);
+ perf_cgroup_sched_out(task, next);
}
static void task_ctx_sched_out(struct perf_event_context *ctx)
@@ -2153,7 +2196,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
* accessing the event control register. If a NMI hits, then it will
* keep the event running.
*/
-void __perf_event_task_sched_in(struct task_struct *task)
+void __perf_event_task_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
struct perf_event_context *ctx;
int ctxn;
@@ -2171,7 +2215,7 @@ void __perf_event_task_sched_in(struct task_struct *task)
* cgroup event are system-wide mode only
*/
if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
- perf_cgroup_sched_in(task);
+ perf_cgroup_sched_in(prev, task);
}
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -2427,7 +2471,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
* ctxswin cgroup events which are already scheduled
* in.
*/
- perf_cgroup_sched_out(current);
+ perf_cgroup_sched_out(current, NULL);
raw_spin_lock(&ctx->lock);
task_ctx_sched_out(ctx);
@@ -3353,8 +3397,8 @@ static int perf_event_index(struct perf_event *event)
}
static void calc_timer_values(struct perf_event *event,
- u64 *running,
- u64 *enabled)
+ u64 *enabled,
+ u64 *running)
{
u64 now, ctx_time;
@@ -5715,6 +5759,7 @@ struct pmu *perf_init_event(struct perf_event *event)
pmu = idr_find(&pmu_idr, event->attr.type);
rcu_read_unlock();
if (pmu) {
+ event->pmu = pmu;
ret = pmu->event_init(event);
if (ret)
pmu = ERR_PTR(ret);
@@ -5722,6 +5767,7 @@ struct pmu *perf_init_event(struct perf_event *event)
}
list_for_each_entry_rcu(pmu, &pmus, entry) {
+ event->pmu = pmu;
ret = pmu->event_init(event);
if (!ret)
goto unlock;
@@ -5848,8 +5894,6 @@ done:
return ERR_PTR(err);
}
- event->pmu = pmu;
-
if (!event->parent) {
if (event->attach_state & PERF_ATTACH_TASK)
jump_label_inc(&perf_sched_events);
@@ -6809,7 +6853,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
mutex_lock(&swhash->hlist_mutex);
- if (swhash->hlist_refcount > 0) {
+ if (swhash->hlist_refcount > 0 && !swhash->swevent_hlist) {
struct swevent_hlist *hlist;
hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
@@ -6898,7 +6942,14 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
- switch (action & ~CPU_TASKS_FROZEN) {
+ /*
+ * Ignore suspend/resume action, the perf_pm_notifier will
+ * take care of that.
+ */
+ if (action & CPU_TASKS_FROZEN)
+ return NOTIFY_OK;
+
+ switch (action) {
case CPU_UP_PREPARE:
case CPU_DOWN_FAILED:
@@ -6917,6 +6968,90 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
+static void perf_pm_resume_cpu(void *unused)
+{
+ struct perf_cpu_context *cpuctx;
+ struct perf_event_context *ctx;
+ struct pmu *pmu;
+ int idx;
+
+ idx = srcu_read_lock(&pmus_srcu);
+ list_for_each_entry_rcu(pmu, &pmus, entry) {
+ cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ ctx = cpuctx->task_ctx;
+
+ perf_ctx_lock(cpuctx, ctx);
+ perf_pmu_disable(cpuctx->ctx.pmu);
+
+ cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+ if (ctx)
+ ctx_sched_out(ctx, cpuctx, EVENT_ALL);
+
+ perf_pmu_enable(cpuctx->ctx.pmu);
+ perf_ctx_unlock(cpuctx, ctx);
+ }
+ srcu_read_unlock(&pmus_srcu, idx);
+}
+
+static void perf_pm_suspend_cpu(void *unused)
+{
+ struct perf_cpu_context *cpuctx;
+ struct perf_event_context *ctx;
+ struct pmu *pmu;
+ int idx;
+
+ idx = srcu_read_lock(&pmus_srcu);
+ list_for_each_entry_rcu(pmu, &pmus, entry) {
+ cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ ctx = cpuctx->task_ctx;
+
+ perf_ctx_lock(cpuctx, ctx);
+ perf_pmu_disable(cpuctx->ctx.pmu);
+
+ perf_event_sched_in(cpuctx, ctx, current);
+
+ perf_pmu_enable(cpuctx->ctx.pmu);
+ perf_ctx_unlock(cpuctx, ctx);
+ }
+ srcu_read_unlock(&pmus_srcu, idx);
+}
+
+static int perf_resume(void)
+{
+ get_online_cpus();
+ smp_call_function(perf_pm_resume_cpu, NULL, 1);
+ put_online_cpus();
+
+ return NOTIFY_OK;
+}
+
+static int perf_suspend(void)
+{
+ get_online_cpus();
+ smp_call_function(perf_pm_suspend_cpu, NULL, 1);
+ put_online_cpus();
+
+ return NOTIFY_OK;
+}
+
+static int perf_pm(struct notifier_block *self, unsigned long action, void *ptr)
+{
+ switch (action) {
+ case PM_POST_HIBERNATION:
+ case PM_POST_SUSPEND:
+ return perf_resume();
+ case PM_HIBERNATION_PREPARE:
+ case PM_SUSPEND_PREPARE:
+ return perf_suspend();
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block perf_pm_notifier = {
+ .notifier_call = perf_pm,
+};
+
void __init perf_event_init(void)
{
int ret;
@@ -6931,6 +7066,7 @@ void __init perf_event_init(void)
perf_tp_register();
perf_cpu_notifier(perf_cpu_notify);
register_reboot_notifier(&perf_reboot_notifier);
+ register_pm_notifier(&perf_pm_notifier);
ret = init_hw_breakpoint();
WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
diff --git a/kernel/exit.c b/kernel/exit.c
index 73bb192a3d32..2913b3509d42 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -85,7 +85,6 @@ static void __exit_signal(struct task_struct *tsk)
struct tty_struct *uninitialized_var(tty);
sighand = rcu_dereference_check(tsk->sighand,
- rcu_read_lock_held() ||
lockdep_tasklist_lock_is_held());
spin_lock(&sighand->siglock);
@@ -898,7 +897,6 @@ NORET_TYPE void do_exit(long code)
profile_task_exit(tsk);
- WARN_ON(atomic_read(&tsk->fs_excl));
WARN_ON(blk_needs_flush_plug(tsk));
if (unlikely(in_interrupt()))
@@ -982,6 +980,7 @@ NORET_TYPE void do_exit(long code)
trace_sched_process_exit(tsk);
exit_sem(tsk);
+ exit_shm(tsk);
exit_files(tsk);
exit_fs(tsk);
check_stack_usage();
diff --git a/kernel/fork.c b/kernel/fork.c
index ca339c5c5819..8e6b6f4fb272 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -80,7 +80,7 @@
* Protected counters by write_lock_irq(&tasklist_lock)
*/
unsigned long total_forks; /* Handle normal Linux uptimes. */
-int nr_threads; /* The idle threads do not count.. */
+int nr_threads; /* The idle threads do not count.. */
int max_threads; /* tunable limit on nr_threads */
@@ -232,7 +232,7 @@ void __init fork_init(unsigned long mempages)
/*
* we need to allow at least 20 threads to boot a system
*/
- if(max_threads < 20)
+ if (max_threads < 20)
max_threads = 20;
init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
@@ -268,7 +268,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
return NULL;
}
- err = arch_dup_task_struct(tsk, orig);
+ err = arch_dup_task_struct(tsk, orig);
if (err)
goto out;
@@ -288,9 +288,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
tsk->stack_canary = get_random_int();
#endif
- /* One for us, one for whoever does the "release_task()" (usually parent) */
- atomic_set(&tsk->usage,2);
- atomic_set(&tsk->fs_excl, 0);
+ /*
+ * One for us, one for whoever does the "release_task()" (usually
+ * parent)
+ */
+ atomic_set(&tsk->usage, 2);
#ifdef CONFIG_BLK_DEV_IO_TRACE
tsk->btrace_seq = 0;
#endif
@@ -438,7 +440,7 @@ fail_nomem:
goto out;
}
-static inline int mm_alloc_pgd(struct mm_struct * mm)
+static inline int mm_alloc_pgd(struct mm_struct *mm)
{
mm->pgd = pgd_alloc(mm);
if (unlikely(!mm->pgd))
@@ -446,7 +448,7 @@ static inline int mm_alloc_pgd(struct mm_struct * mm)
return 0;
}
-static inline void mm_free_pgd(struct mm_struct * mm)
+static inline void mm_free_pgd(struct mm_struct *mm)
{
pgd_free(mm, mm->pgd);
}
@@ -483,7 +485,7 @@ static void mm_init_aio(struct mm_struct *mm)
#endif
}
-static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
+static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
{
atomic_set(&mm->mm_users, 1);
atomic_set(&mm->mm_count, 1);
@@ -514,9 +516,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
/*
* Allocate and initialize an mm_struct.
*/
-struct mm_struct * mm_alloc(void)
+struct mm_struct *mm_alloc(void)
{
- struct mm_struct * mm;
+ struct mm_struct *mm;
mm = allocate_mm();
if (!mm)
@@ -584,7 +586,7 @@ void added_exe_file_vma(struct mm_struct *mm)
void removed_exe_file_vma(struct mm_struct *mm)
{
mm->num_exe_file_vmas--;
- if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
+ if ((mm->num_exe_file_vmas == 0) && mm->exe_file) {
fput(mm->exe_file);
mm->exe_file = NULL;
}
@@ -776,9 +778,9 @@ fail_nocontext:
return NULL;
}
-static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
{
- struct mm_struct * mm, *oldmm;
+ struct mm_struct *mm, *oldmm;
int retval;
tsk->min_flt = tsk->maj_flt = 0;
@@ -845,7 +847,7 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
return 0;
}
-static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
{
struct files_struct *oldf, *newf;
int error = 0;
@@ -1109,6 +1111,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->real_cred->user != INIT_USER)
goto bad_fork_free;
}
+ current->flags &= ~PF_NPROC_EXCEEDED;
retval = copy_creds(p, clone_flags);
if (retval < 0)
@@ -1167,13 +1170,17 @@ static struct task_struct *copy_process(unsigned long clone_flags,
cgroup_fork(p);
#ifdef CONFIG_NUMA
p->mempolicy = mpol_dup(p->mempolicy);
- if (IS_ERR(p->mempolicy)) {
- retval = PTR_ERR(p->mempolicy);
- p->mempolicy = NULL;
- goto bad_fork_cleanup_cgroup;
- }
+ if (IS_ERR(p->mempolicy)) {
+ retval = PTR_ERR(p->mempolicy);
+ p->mempolicy = NULL;
+ goto bad_fork_cleanup_cgroup;
+ }
mpol_fix_fork_child_flag(p);
#endif
+#ifdef CONFIG_CPUSETS
+ p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
+ p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
+#endif
#ifdef CONFIG_TRACE_IRQFLAGS
p->irq_events = 0;
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
@@ -1213,25 +1220,33 @@ static struct task_struct *copy_process(unsigned long clone_flags,
retval = perf_event_init_task(p);
if (retval)
goto bad_fork_cleanup_policy;
-
- if ((retval = audit_alloc(p)))
+ retval = audit_alloc(p);
+ if (retval)
goto bad_fork_cleanup_policy;
/* copy all the process information */
- if ((retval = copy_semundo(clone_flags, p)))
+ retval = copy_semundo(clone_flags, p);
+ if (retval)
goto bad_fork_cleanup_audit;
- if ((retval = copy_files(clone_flags, p)))
+ retval = copy_files(clone_flags, p);
+ if (retval)
goto bad_fork_cleanup_semundo;
- if ((retval = copy_fs(clone_flags, p)))
+ retval = copy_fs(clone_flags, p);
+ if (retval)
goto bad_fork_cleanup_files;
- if ((retval = copy_sighand(clone_flags, p)))
+ retval = copy_sighand(clone_flags, p);
+ if (retval)
goto bad_fork_cleanup_fs;
- if ((retval = copy_signal(clone_flags, p)))
+ retval = copy_signal(clone_flags, p);
+ if (retval)
goto bad_fork_cleanup_sighand;
- if ((retval = copy_mm(clone_flags, p)))
+ retval = copy_mm(clone_flags, p);
+ if (retval)
goto bad_fork_cleanup_signal;
- if ((retval = copy_namespaces(clone_flags, p)))
+ retval = copy_namespaces(clone_flags, p);
+ if (retval)
goto bad_fork_cleanup_mm;
- if ((retval = copy_io(clone_flags, p)))
+ retval = copy_io(clone_flags, p);
+ if (retval)
goto bad_fork_cleanup_namespaces;
retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
if (retval)
@@ -1253,7 +1268,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
/*
* Clear TID on mm_release()?
*/
- p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
+ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif
@@ -1321,7 +1336,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
* it's process group.
* A fatal signal pending means that current will exit, so the new
* thread can't slip out of an OOM kill (or normal SIGKILL).
- */
+ */
recalc_sigpending();
if (signal_pending(current)) {
spin_unlock(&current->sighand->siglock);
@@ -1585,6 +1600,7 @@ void __init proc_caches_init(void)
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
mmap_init();
+ nsproxy_cache_init();
}
/*
@@ -1681,12 +1697,14 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
*/
if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
do_sysvsem = 1;
- if ((err = unshare_fs(unshare_flags, &new_fs)))
+ err = unshare_fs(unshare_flags, &new_fs);
+ if (err)
goto bad_unshare_out;
- if ((err = unshare_fd(unshare_flags, &new_fd)))
+ err = unshare_fd(unshare_flags, &new_fd);
+ if (err)
goto bad_unshare_cleanup_fs;
- if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
- new_fs)))
+ err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs);
+ if (err)
goto bad_unshare_cleanup_fd;
if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
diff --git a/kernel/futex.c b/kernel/futex.c
index 3fbc76cbb9aa..11cbe052b2e8 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -218,6 +218,8 @@ static void drop_futex_key_refs(union futex_key *key)
* @uaddr: virtual address of the futex
* @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
* @key: address where result is stored.
+ * @rw: mapping needs to be read/write (values: VERIFY_READ,
+ * VERIFY_WRITE)
*
* Returns a negative error code or 0
* The key words are stored in *key on success.
@@ -229,12 +231,12 @@ static void drop_futex_key_refs(union futex_key *key)
* lock_page() might sleep, the caller should not hold a spinlock.
*/
static int
-get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
+get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
{
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
struct page *page, *page_head;
- int err;
+ int err, ro = 0;
/*
* The futex address must be "naturally" aligned.
@@ -262,8 +264,18 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
again:
err = get_user_pages_fast(address, 1, 1, &page);
+ /*
+ * If write access is not required (eg. FUTEX_WAIT), try
+ * and get read-only access.
+ */
+ if (err == -EFAULT && rw == VERIFY_READ) {
+ err = get_user_pages_fast(address, 1, 0, &page);
+ ro = 1;
+ }
if (err < 0)
return err;
+ else
+ err = 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
page_head = page;
@@ -305,6 +317,13 @@ again:
if (!page_head->mapping) {
unlock_page(page_head);
put_page(page_head);
+ /*
+ * ZERO_PAGE pages don't have a mapping. Avoid a busy loop
+ * trying to find one. RW mapping would have COW'd (and thus
+ * have a mapping) so this page is RO and won't ever change.
+ */
+ if ((page_head == ZERO_PAGE(address)))
+ return -EFAULT;
goto again;
}
@@ -316,6 +335,15 @@ again:
* the object not the particular process.
*/
if (PageAnon(page_head)) {
+ /*
+ * A RO anonymous page will never change and thus doesn't make
+ * sense for futex operations.
+ */
+ if (ro) {
+ err = -EFAULT;
+ goto out;
+ }
+
key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
key->private.mm = mm;
key->private.address = address;
@@ -327,9 +355,10 @@ again:
get_futex_key_refs(key);
+out:
unlock_page(page_head);
put_page(page_head);
- return 0;
+ return err;
}
static inline void put_futex_key(union futex_key *key)
@@ -355,8 +384,8 @@ static int fault_in_user_writeable(u32 __user *uaddr)
int ret;
down_read(&mm->mmap_sem);
- ret = get_user_pages(current, mm, (unsigned long)uaddr,
- 1, 1, 0, NULL, NULL);
+ ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
+ FAULT_FLAG_WRITE);
up_read(&mm->mmap_sem);
return ret < 0 ? ret : 0;
@@ -940,7 +969,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
if (!bitset)
return -EINVAL;
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
if (unlikely(ret != 0))
goto out;
@@ -986,10 +1015,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
int ret, op_ret;
retry:
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
if (unlikely(ret != 0))
goto out;
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
if (unlikely(ret != 0))
goto out_put_key1;
@@ -1243,10 +1272,11 @@ retry:
pi_state = NULL;
}
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
if (unlikely(ret != 0))
goto out;
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
+ requeue_pi ? VERIFY_WRITE : VERIFY_READ);
if (unlikely(ret != 0))
goto out_put_key1;
@@ -1790,7 +1820,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
* while the syscall executes.
*/
retry:
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
if (unlikely(ret != 0))
return ret;
@@ -1941,7 +1971,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
}
retry:
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
if (unlikely(ret != 0))
goto out;
@@ -2060,7 +2090,7 @@ retry:
if ((uval & FUTEX_TID_MASK) != vpid)
return -EPERM;
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
if (unlikely(ret != 0))
goto out;
@@ -2249,7 +2279,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
debug_rt_mutex_init_waiter(&rt_waiter);
rt_waiter.task = NULL;
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
if (unlikely(ret != 0))
goto out;
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index 5bf924d80b5c..a92028196cc1 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -3,7 +3,7 @@ menu "GCOV-based kernel profiling"
config GCOV_KERNEL
bool "Enable gcov-based kernel profiling"
depends on DEBUG_FS
- select CONSTRUCTORS
+ select CONSTRUCTORS if !UML
default n
---help---
This option enables gcov-based code profiling (e.g. for code coverage
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index d1d051b38e0b..5a38bf4de641 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -52,6 +52,10 @@ config IRQ_EDGE_EOI_HANDLER
config GENERIC_IRQ_CHIP
bool
+# Generic irq_domain hw <--> linux irq number translation
+config IRQ_DOMAIN
+ bool
+
# Support forced irq threading
config IRQ_FORCED_THREADING
bool
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 73290056cfb6..fff17381f0af 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -2,6 +2,7 @@
obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o
obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
+obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
obj-$(CONFIG_PM_SLEEP) += pm.o
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index d5a3009da71a..dc5114b4c16c 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -178,7 +178,7 @@ void irq_shutdown(struct irq_desc *desc)
desc->depth = 1;
if (desc->irq_data.chip->irq_shutdown)
desc->irq_data.chip->irq_shutdown(&desc->irq_data);
- if (desc->irq_data.chip->irq_disable)
+ else if (desc->irq_data.chip->irq_disable)
desc->irq_data.chip->irq_disable(&desc->irq_data);
else
desc->irq_data.chip->irq_mask(&desc->irq_data);
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 1ef4ffcdfa55..bd8e788d71e0 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -87,8 +87,8 @@ void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id)
{
struct irq_devres match_data = { irq, dev_id };
- free_irq(irq, dev_id);
WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match,
&match_data));
+ free_irq(irq, dev_id);
}
EXPORT_SYMBOL(devm_free_irq);
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 3a2cab407b93..e38544dddb18 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -246,7 +246,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask);
for (i = gc->irq_base; msk; msk >>= 1, i++) {
- if (!msk & 0x01)
+ if (!(msk & 0x01))
continue;
if (flags & IRQ_GC_INIT_NESTED_LOCK)
@@ -301,7 +301,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
raw_spin_unlock(&gc_lock);
for (; msk; msk >>= 1, i++) {
- if (!msk & 0x01)
+ if (!(msk & 0x01))
continue;
/* Remove handler first. That will mask the irq line */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 4c60a50e66b2..039b889ea053 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -70,7 +70,8 @@ static inline void desc_smp_init(struct irq_desc *desc, int node) { }
static inline int desc_node(struct irq_desc *desc) { return 0; }
#endif
-static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
+static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
+ struct module *owner)
{
int cpu;
@@ -86,6 +87,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
desc->irq_count = 0;
desc->irqs_unhandled = 0;
desc->name = NULL;
+ desc->owner = owner;
for_each_possible_cpu(cpu)
*per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
desc_smp_init(desc, node);
@@ -128,7 +130,7 @@ static void free_masks(struct irq_desc *desc)
static inline void free_masks(struct irq_desc *desc) { }
#endif
-static struct irq_desc *alloc_desc(int irq, int node)
+static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
{
struct irq_desc *desc;
gfp_t gfp = GFP_KERNEL;
@@ -147,7 +149,7 @@ static struct irq_desc *alloc_desc(int irq, int node)
raw_spin_lock_init(&desc->lock);
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
- desc_set_defaults(irq, desc, node);
+ desc_set_defaults(irq, desc, node, owner);
return desc;
@@ -173,13 +175,14 @@ static void free_desc(unsigned int irq)
kfree(desc);
}
-static int alloc_descs(unsigned int start, unsigned int cnt, int node)
+static int alloc_descs(unsigned int start, unsigned int cnt, int node,
+ struct module *owner)
{
struct irq_desc *desc;
int i;
for (i = 0; i < cnt; i++) {
- desc = alloc_desc(start + i, node);
+ desc = alloc_desc(start + i, node, owner);
if (!desc)
goto err;
mutex_lock(&sparse_irq_lock);
@@ -227,7 +230,7 @@ int __init early_irq_init(void)
nr_irqs = initcnt;
for (i = 0; i < initcnt; i++) {
- desc = alloc_desc(i, node);
+ desc = alloc_desc(i, node, NULL);
set_bit(i, allocated_irqs);
irq_insert_desc(i, desc);
}
@@ -261,7 +264,7 @@ int __init early_irq_init(void)
alloc_masks(&desc[i], GFP_KERNEL, node);
raw_spin_lock_init(&desc[i].lock);
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
- desc_set_defaults(i, &desc[i], node);
+ desc_set_defaults(i, &desc[i], node, NULL);
}
return arch_early_irq_init();
}
@@ -276,8 +279,16 @@ static void free_desc(unsigned int irq)
dynamic_irq_cleanup(irq);
}
-static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
+static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
+ struct module *owner)
{
+ u32 i;
+
+ for (i = 0; i < cnt; i++) {
+ struct irq_desc *desc = irq_to_desc(start + i);
+
+ desc->owner = owner;
+ }
return start;
}
@@ -333,11 +344,13 @@ EXPORT_SYMBOL_GPL(irq_free_descs);
* @from: Start the search from this irq number
* @cnt: Number of consecutive irqs to allocate.
* @node: Preferred node on which the irq descriptor should be allocated
+ * @owner: Owning module (can be NULL)
*
* Returns the first irq number or error code
*/
int __ref
-irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
+__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
+ struct module *owner)
{
int start, ret;
@@ -366,13 +379,13 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
bitmap_set(allocated_irqs, start, cnt);
mutex_unlock(&sparse_irq_lock);
- return alloc_descs(start, cnt, node);
+ return alloc_descs(start, cnt, node, owner);
err:
mutex_unlock(&sparse_irq_lock);
return ret;
}
-EXPORT_SYMBOL_GPL(irq_alloc_descs);
+EXPORT_SYMBOL_GPL(__irq_alloc_descs);
/**
* irq_reserve_irqs - mark irqs allocated
@@ -440,7 +453,7 @@ void dynamic_irq_cleanup(unsigned int irq)
unsigned long flags;
raw_spin_lock_irqsave(&desc->lock, flags);
- desc_set_defaults(irq, desc, desc_node(desc));
+ desc_set_defaults(irq, desc, desc_node(desc), NULL);
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
new file mode 100644
index 000000000000..d5828da3fd38
--- /dev/null
+++ b/kernel/irq/irqdomain.c
@@ -0,0 +1,180 @@
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+
+static LIST_HEAD(irq_domain_list);
+static DEFINE_MUTEX(irq_domain_mutex);
+
+/**
+ * irq_domain_add() - Register an irq_domain
+ * @domain: ptr to initialized irq_domain structure
+ *
+ * Registers an irq_domain structure. The irq_domain must at a minimum be
+ * initialized with an ops structure pointer, and either a ->to_irq hook or
+ * a valid irq_base value. Everything else is optional.
+ */
+void irq_domain_add(struct irq_domain *domain)
+{
+ struct irq_data *d;
+ int hwirq;
+
+ /*
+ * This assumes that the irq_domain owner has already allocated
+ * the irq_descs. This block will be removed when support for dynamic
+ * allocation of irq_descs is added to irq_domain.
+ */
+ for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) {
+ d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq));
+ if (d || d->domain) {
+ /* things are broken; just report, don't clean up */
+ WARN(1, "error: irq_desc already assigned to a domain");
+ return;
+ }
+ d->domain = domain;
+ d->hwirq = hwirq;
+ }
+
+ mutex_lock(&irq_domain_mutex);
+ list_add(&domain->list, &irq_domain_list);
+ mutex_unlock(&irq_domain_mutex);
+}
+
+/**
+ * irq_domain_del() - Unregister an irq_domain
+ * @domain: ptr to registered irq_domain.
+ */
+void irq_domain_del(struct irq_domain *domain)
+{
+ struct irq_data *d;
+ int hwirq;
+
+ mutex_lock(&irq_domain_mutex);
+ list_del(&domain->list);
+ mutex_unlock(&irq_domain_mutex);
+
+ /* Clear the irq_domain assignments */
+ for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) {
+ d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq));
+ d->domain = NULL;
+ }
+}
+
+#if defined(CONFIG_OF_IRQ)
+/**
+ * irq_create_of_mapping() - Map a linux irq number from a DT interrupt spec
+ *
+ * Used by the device tree interrupt mapping code to translate a device tree
+ * interrupt specifier to a valid linux irq number. Returns either a valid
+ * linux IRQ number or 0.
+ *
+ * When the caller no longer need the irq number returned by this function it
+ * should arrange to call irq_dispose_mapping().
+ */
+unsigned int irq_create_of_mapping(struct device_node *controller,
+ const u32 *intspec, unsigned int intsize)
+{
+ struct irq_domain *domain;
+ unsigned long hwirq;
+ unsigned int irq, type;
+ int rc = -EINVAL;
+
+ /* Find a domain which can translate the irq spec */
+ mutex_lock(&irq_domain_mutex);
+ list_for_each_entry(domain, &irq_domain_list, list) {
+ if (!domain->ops->dt_translate)
+ continue;
+ rc = domain->ops->dt_translate(domain, controller,
+ intspec, intsize, &hwirq, &type);
+ if (rc == 0)
+ break;
+ }
+ mutex_unlock(&irq_domain_mutex);
+
+ if (rc != 0)
+ return 0;
+
+ irq = irq_domain_to_irq(domain, hwirq);
+ if (type != IRQ_TYPE_NONE)
+ irq_set_irq_type(irq, type);
+ pr_debug("%s: mapped hwirq=%i to irq=%i, flags=%x\n",
+ controller->full_name, (int)hwirq, irq, type);
+ return irq;
+}
+EXPORT_SYMBOL_GPL(irq_create_of_mapping);
+
+/**
+ * irq_dispose_mapping() - Discard a mapping created by irq_create_of_mapping()
+ * @irq: linux irq number to be discarded
+ *
+ * Calling this function indicates the caller no longer needs a reference to
+ * the linux irq number returned by a prior call to irq_create_of_mapping().
+ */
+void irq_dispose_mapping(unsigned int irq)
+{
+ /*
+ * nothing yet; will be filled when support for dynamic allocation of
+ * irq_descs is added to irq_domain
+ */
+}
+EXPORT_SYMBOL_GPL(irq_dispose_mapping);
+
+int irq_domain_simple_dt_translate(struct irq_domain *d,
+ struct device_node *controller,
+ const u32 *intspec, unsigned int intsize,
+ unsigned long *out_hwirq, unsigned int *out_type)
+{
+ if (d->of_node != controller)
+ return -EINVAL;
+ if (intsize < 1)
+ return -EINVAL;
+
+ *out_hwirq = intspec[0];
+ *out_type = IRQ_TYPE_NONE;
+ if (intsize > 1)
+ *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
+ return 0;
+}
+
+struct irq_domain_ops irq_domain_simple_ops = {
+ .dt_translate = irq_domain_simple_dt_translate,
+};
+EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
+
+/**
+ * irq_domain_create_simple() - Set up a 'simple' translation range
+ */
+void irq_domain_add_simple(struct device_node *controller, int irq_base)
+{
+ struct irq_domain *domain;
+
+ domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+ if (!domain) {
+ WARN_ON(1);
+ return;
+ }
+
+ domain->irq_base = irq_base;
+ domain->of_node = of_node_get(controller);
+ domain->ops = &irq_domain_simple_ops;
+ irq_domain_add(domain);
+}
+EXPORT_SYMBOL_GPL(irq_domain_add_simple);
+
+void irq_domain_generate_simple(const struct of_device_id *match,
+ u64 phys_base, unsigned int irq_start)
+{
+ struct device_node *node;
+ pr_info("looking for phys_base=%llx, irq_start=%i\n",
+ (unsigned long long) phys_base, (int) irq_start);
+ node = of_find_matching_node_by_address(NULL, match, phys_base);
+ if (node)
+ irq_domain_add_simple(node, irq_start);
+ else
+ pr_info("no node found\n");
+}
+EXPORT_SYMBOL_GPL(irq_domain_generate_simple);
+#endif /* CONFIG_OF_IRQ */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0a7840aeb0fb..9b956fa20308 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -883,6 +883,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (desc->irq_data.chip == &no_irq_chip)
return -ENOSYS;
+ if (!try_module_get(desc->owner))
+ return -ENODEV;
/*
* Some drivers like serial.c use request_irq() heavily,
* so we have to be careful not to interfere with a
@@ -906,8 +908,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
*/
nested = irq_settings_is_nested_thread(desc);
if (nested) {
- if (!new->thread_fn)
- return -EINVAL;
+ if (!new->thread_fn) {
+ ret = -EINVAL;
+ goto out_mput;
+ }
/*
* Replace the primary handler which was provided from
* the driver for non nested interrupt handling by the
@@ -929,8 +933,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
new->name);
- if (IS_ERR(t))
- return PTR_ERR(t);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
+ goto out_mput;
+ }
/*
* We keep the reference to the task struct even if
* the thread dies to avoid that the interrupt code
@@ -1095,6 +1101,8 @@ out_thread:
kthread_stop(t);
put_task_struct(t);
}
+out_mput:
+ module_put(desc->owner);
return ret;
}
@@ -1203,6 +1211,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
put_task_struct(action->thread);
}
+ module_put(desc->owner);
return action;
}
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 8d814cbc8109..296fbc84d659 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1095,7 +1095,7 @@ size_t crash_get_memory_size(void)
size_t size = 0;
mutex_lock(&kexec_mutex);
if (crashk_res.end != crashk_res.start)
- size = crashk_res.end - crashk_res.start + 1;
+ size = resource_size(&crashk_res);
mutex_unlock(&kexec_mutex);
return size;
}
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 47613dfb7b28..ddc7644c1305 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -274,7 +274,7 @@ static void __call_usermodehelper(struct work_struct *work)
* (used for preventing user land processes from being created after the user
* land has been frozen during a system-wide hibernation or suspend operation).
*/
-static int usermodehelper_disabled;
+static int usermodehelper_disabled = 1;
/* Number of helpers running */
static atomic_t running_helpers = ATOMIC_INIT(0);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 3956f5149e25..91d67ce3a8d5 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2468,7 +2468,7 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark)
BUG_ON(usage_bit >= LOCK_USAGE_STATES);
- if (hlock_class(hlock)->key == &__lockdep_no_validate__)
+ if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys)
continue;
if (!mark_lock(curr, hlock, usage_bit))
@@ -2485,23 +2485,9 @@ static void __trace_hardirqs_on_caller(unsigned long ip)
{
struct task_struct *curr = current;
- if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
- return;
-
- if (unlikely(curr->hardirqs_enabled)) {
- /*
- * Neither irq nor preemption are disabled here
- * so this is racy by nature but losing one hit
- * in a stat is not a big deal.
- */
- __debug_atomic_inc(redundant_hardirqs_on);
- return;
- }
/* we'll do an OFF -> ON transition: */
curr->hardirqs_enabled = 1;
- if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
- return;
/*
* We are going to turn hardirqs on, so set the
* usage bit for all held locks:
@@ -2529,9 +2515,25 @@ void trace_hardirqs_on_caller(unsigned long ip)
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
+ if (unlikely(current->hardirqs_enabled)) {
+ /*
+ * Neither irq nor preemption are disabled here
+ * so this is racy by nature but losing one hit
+ * in a stat is not a big deal.
+ */
+ __debug_atomic_inc(redundant_hardirqs_on);
+ return;
+ }
+
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
+ if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
+ return;
+
+ if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
+ return;
+
current->lockdep_recursion = 1;
__trace_hardirqs_on_caller(ip);
current->lockdep_recursion = 0;
@@ -2872,10 +2874,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
void lockdep_init_map(struct lockdep_map *lock, const char *name,
struct lock_class_key *key, int subclass)
{
- int i;
-
- for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++)
- lock->class_cache[i] = NULL;
+ memset(lock, 0, sizeof(*lock));
#ifdef CONFIG_LOCK_STAT
lock->cpu = raw_smp_processor_id();
@@ -3112,7 +3111,13 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
if (!class)
class = look_up_lock_class(lock, 0);
- if (DEBUG_LOCKS_WARN_ON(!class))
+ /*
+ * If look_up_lock_class() failed to find a class, we're trying
+ * to test if we hold a lock that has never yet been acquired.
+ * Clearly if the lock hasn't been acquired _ever_, we're not
+ * holding it either, so report failure.
+ */
+ if (!class)
return 0;
if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
diff --git a/kernel/module.c b/kernel/module.c
index 795bdc7f5c3f..04379f92f843 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -545,9 +545,9 @@ static void setup_modinfo_##field(struct module *mod, const char *s) \
mod->field = kstrdup(s, GFP_KERNEL); \
} \
static ssize_t show_modinfo_##field(struct module_attribute *mattr, \
- struct module *mod, char *buffer) \
+ struct module_kobject *mk, char *buffer) \
{ \
- return sprintf(buffer, "%s\n", mod->field); \
+ return sprintf(buffer, "%s\n", mk->mod->field); \
} \
static int modinfo_##field##_exists(struct module *mod) \
{ \
@@ -902,9 +902,9 @@ void symbol_put_addr(void *addr)
EXPORT_SYMBOL_GPL(symbol_put_addr);
static ssize_t show_refcnt(struct module_attribute *mattr,
- struct module *mod, char *buffer)
+ struct module_kobject *mk, char *buffer)
{
- return sprintf(buffer, "%u\n", module_refcount(mod));
+ return sprintf(buffer, "%u\n", module_refcount(mk->mod));
}
static struct module_attribute refcnt = {
@@ -952,11 +952,11 @@ static inline int module_unload_init(struct module *mod)
#endif /* CONFIG_MODULE_UNLOAD */
static ssize_t show_initstate(struct module_attribute *mattr,
- struct module *mod, char *buffer)
+ struct module_kobject *mk, char *buffer)
{
const char *state = "unknown";
- switch (mod->state) {
+ switch (mk->mod->state) {
case MODULE_STATE_LIVE:
state = "live";
break;
@@ -975,10 +975,27 @@ static struct module_attribute initstate = {
.show = show_initstate,
};
+static ssize_t store_uevent(struct module_attribute *mattr,
+ struct module_kobject *mk,
+ const char *buffer, size_t count)
+{
+ enum kobject_action action;
+
+ if (kobject_action_type(buffer, count, &action) == 0)
+ kobject_uevent(&mk->kobj, action);
+ return count;
+}
+
+struct module_attribute module_uevent = {
+ .attr = { .name = "uevent", .mode = 0200 },
+ .store = store_uevent,
+};
+
static struct module_attribute *modinfo_attrs[] = {
&modinfo_version,
&modinfo_srcversion,
&initstate,
+ &module_uevent,
#ifdef CONFIG_MODULE_UNLOAD
&refcnt,
#endif
@@ -1187,7 +1204,7 @@ struct module_sect_attrs
};
static ssize_t module_sect_show(struct module_attribute *mattr,
- struct module *mod, char *buf)
+ struct module_kobject *mk, char *buf)
{
struct module_sect_attr *sattr =
container_of(mattr, struct module_sect_attr, mattr);
@@ -1697,6 +1714,15 @@ static void unset_module_core_ro_nx(struct module *mod) { }
static void unset_module_init_ro_nx(struct module *mod) { }
#endif
+void __weak module_free(struct module *mod, void *module_region)
+{
+ vfree(module_region);
+}
+
+void __weak module_arch_cleanup(struct module *mod)
+{
+}
+
/* Free a module, remove from lists, etc. */
static void free_module(struct module *mod)
{
@@ -1851,6 +1877,26 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
return ret;
}
+int __weak apply_relocate(Elf_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ pr_err("module %s: REL relocation unsupported\n", me->name);
+ return -ENOEXEC;
+}
+
+int __weak apply_relocate_add(Elf_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ pr_err("module %s: RELA relocation unsupported\n", me->name);
+ return -ENOEXEC;
+}
+
static int apply_relocations(struct module *mod, const struct load_info *info)
{
unsigned int i;
@@ -2235,6 +2281,11 @@ static void dynamic_debug_remove(struct _ddebug *debug)
ddebug_remove_module(debug->modname);
}
+void * __weak module_alloc(unsigned long size)
+{
+ return size == 0 ? NULL : vmalloc_exec(size);
+}
+
static void *module_alloc_update_bounds(unsigned long size)
{
void *ret = module_alloc(size);
@@ -2645,6 +2696,14 @@ static void flush_module_icache(const struct module *mod)
set_fs(old_fs);
}
+int __weak module_frob_arch_sections(Elf_Ehdr *hdr,
+ Elf_Shdr *sechdrs,
+ char *secstrings,
+ struct module *mod)
+{
+ return 0;
+}
+
static struct module *layout_and_allocate(struct load_info *info)
{
/* Module within temporary copy. */
@@ -2716,6 +2775,13 @@ static void module_deallocate(struct module *mod, struct load_info *info)
module_free(mod, mod->module_core);
}
+int __weak module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *me)
+{
+ return 0;
+}
+
static int post_relocation(struct module *mod, const struct load_info *info)
{
/* Sort exception table now relocations are done. */
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 2488ba7eb568..8d7b435806c9 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -525,37 +525,6 @@ void srcu_init_notifier_head(struct srcu_notifier_head *nh)
}
EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
-/**
- * register_reboot_notifier - Register function to be called at reboot time
- * @nb: Info about notifier function to be called
- *
- * Registers a function with the list of functions
- * to be called at reboot time.
- *
- * Currently always returns zero, as blocking_notifier_chain_register()
- * always returns zero.
- */
-int register_reboot_notifier(struct notifier_block *nb)
-{
- return blocking_notifier_chain_register(&reboot_notifier_list, nb);
-}
-EXPORT_SYMBOL(register_reboot_notifier);
-
-/**
- * unregister_reboot_notifier - Unregister previously registered reboot notifier
- * @nb: Hook to be unregistered
- *
- * Unregisters a previously registered reboot
- * notifier function.
- *
- * Returns zero on success, or %-ENOENT on failure.
- */
-int unregister_reboot_notifier(struct notifier_block *nb)
-{
- return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
-}
-EXPORT_SYMBOL(unregister_reboot_notifier);
-
static ATOMIC_NOTIFIER_HEAD(die_chain);
int notrace __kprobes notify_die(enum die_val val, const char *str,
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index d6a00f3de15d..9aeab4b98c64 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -271,10 +271,8 @@ out:
return err;
}
-static int __init nsproxy_cache_init(void)
+int __init nsproxy_cache_init(void)
{
nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
return 0;
}
-
-module_init(nsproxy_cache_init);
diff --git a/kernel/panic.c b/kernel/panic.c
index 69231670eb95..d7bb6974efb5 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -119,6 +119,8 @@ NORET_TYPE void panic(const char * fmt, ...)
}
mdelay(PANIC_TIMER_STEP);
}
+ }
+ if (panic_timeout != 0) {
/*
* This will not be a clean reboot, with everything
* shutting down. But if there is a chance of
diff --git a/kernel/params.c b/kernel/params.c
index ed72e1330862..22df3e0d142a 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -225,8 +225,8 @@ int parse_args(const char *name,
int ret; \
\
ret = strtolfn(val, 0, &l); \
- if (ret == -EINVAL || ((type)l != l)) \
- return -EINVAL; \
+ if (ret < 0 || ((type)l != l)) \
+ return ret < 0 ? ret : -EINVAL; \
*((type *)kp->arg) = l; \
return 0; \
} \
@@ -511,7 +511,7 @@ struct module_param_attrs
#define to_param_attr(n) container_of(n, struct param_attribute, mattr)
static ssize_t param_attr_show(struct module_attribute *mattr,
- struct module *mod, char *buf)
+ struct module_kobject *mk, char *buf)
{
int count;
struct param_attribute *attribute = to_param_attr(mattr);
@@ -531,7 +531,7 @@ static ssize_t param_attr_show(struct module_attribute *mattr,
/* sysfs always hands a nul-terminated string in buf. We rely on that. */
static ssize_t param_attr_store(struct module_attribute *mattr,
- struct module *owner,
+ struct module_kobject *km,
const char *buf, size_t len)
{
int err;
@@ -730,6 +730,10 @@ static struct module_kobject * __init locate_module_kobject(const char *name)
mk->kobj.kset = module_kset;
err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL,
"%s", name);
+#ifdef CONFIG_MODULES
+ if (!err)
+ err = sysfs_create_file(&mk->kobj, &module_uevent.attr);
+#endif
if (err) {
kobject_put(&mk->kobj);
printk(KERN_ERR
@@ -807,7 +811,7 @@ static void __init param_sysfs_builtin(void)
}
ssize_t __modver_version_show(struct module_attribute *mattr,
- struct module *mod, char *buf)
+ struct module_kobject *mk, char *buf)
{
struct module_version_attribute *vattr =
container_of(mattr, struct module_version_attribute, mattr);
@@ -852,7 +856,7 @@ static ssize_t module_attr_show(struct kobject *kobj,
if (!attribute->show)
return -EIO;
- ret = attribute->show(attribute, mk->mod, buf);
+ ret = attribute->show(attribute, mk, buf);
return ret;
}
@@ -871,7 +875,7 @@ static ssize_t module_attr_store(struct kobject *kobj,
if (!attribute->store)
return -EIO;
- ret = attribute->store(attribute, mk->mod, buf, len);
+ ret = attribute->store(attribute, mk, buf, len);
return ret;
}
diff --git a/kernel/pid.c b/kernel/pid.c
index 57a8346a270e..e432057f3b21 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -405,7 +405,6 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
if (pid) {
struct hlist_node *first;
first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
- rcu_read_lock_held() ||
lockdep_tasklist_lock_is_held());
if (first)
result = hlist_entry(first, struct task_struct, pids[(type)].node);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 7b856b3458d2..3744c594b19b 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -193,8 +193,8 @@ config APM_EMULATION
notification of APM "events" (e.g. battery status change).
In order to use APM, you will need supporting software. For location
- and more information, read <file:Documentation/power/pm.txt> and the
- Battery Powered Linux mini-HOWTO, available from
+ and more information, read <file:Documentation/power/apm-acpi.txt>
+ and the Battery Powered Linux mini-HOWTO, available from
<http://www.tldp.org/docs.html#howto>.
This driver does not spin down disk drives (see the hdparm(8)
@@ -231,3 +231,7 @@ config PM_CLK
config PM_GENERIC_DOMAINS
bool
depends on PM
+
+config PM_GENERIC_DOMAINS_RUNTIME
+ def_bool y
+ depends on PM_RUNTIME && PM_GENERIC_DOMAINS
diff --git a/kernel/printk.c b/kernel/printk.c
index 37dff3429adb..28a40d8171b8 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -318,8 +318,10 @@ static int check_syslog_permissions(int type, bool from_file)
return 0;
/* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */
if (capable(CAP_SYS_ADMIN)) {
- WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN "
- "but no CAP_SYSLOG (deprecated).\n");
+ printk_once(KERN_WARNING "%s (%d): "
+ "Attempt to access syslog with CAP_SYS_ADMIN "
+ "but no CAP_SYSLOG (deprecated).\n",
+ current->comm, task_pid_nr(current));
return 0;
}
return -EPERM;
@@ -1602,7 +1604,7 @@ static int __init printk_late_init(void)
struct console *con;
for_each_console(con) {
- if (con->flags & CON_BOOT) {
+ if (!keep_bootcon && con->flags & CON_BOOT) {
printk(KERN_INFO "turn off boot console %s%d\n",
con->name, con->index);
unregister_console(con);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 7784bd216b6a..ddddb320be61 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -37,7 +37,7 @@
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 2e138db03382..98f51b13bb7e 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -33,7 +33,7 @@
#include <linux/rcupdate.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
@@ -941,7 +941,6 @@ static void rcu_torture_timer(unsigned long unused)
idx = cur_ops->readlock();
completed = cur_ops->completed();
p = rcu_dereference_check(rcu_torture_current,
- rcu_read_lock_held() ||
rcu_read_lock_bh_held() ||
rcu_read_lock_sched_held() ||
srcu_read_lock_held(&srcu_ctl));
@@ -1002,7 +1001,6 @@ rcu_torture_reader(void *arg)
idx = cur_ops->readlock();
completed = cur_ops->completed();
p = rcu_dereference_check(rcu_torture_current,
- rcu_read_lock_held() ||
rcu_read_lock_bh_held() ||
rcu_read_lock_sched_held() ||
srcu_read_lock_held(&srcu_ctl));
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 4e144876dc68..3b0c0986afc0 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -31,7 +31,7 @@
#include <linux/rcupdate.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/completion.h>
diff --git a/kernel/resource.c b/kernel/resource.c
index 3ff40178dce7..3b3cedc52592 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -553,6 +553,27 @@ int allocate_resource(struct resource *root, struct resource *new,
EXPORT_SYMBOL(allocate_resource);
+/**
+ * lookup_resource - find an existing resource by a resource start address
+ * @root: root resource descriptor
+ * @start: resource start address
+ *
+ * Returns a pointer to the resource if found, NULL otherwise
+ */
+struct resource *lookup_resource(struct resource *root, resource_size_t start)
+{
+ struct resource *res;
+
+ read_lock(&resource_lock);
+ for (res = root->child; res; res = res->sibling) {
+ if (res->start == start)
+ break;
+ }
+ read_unlock(&resource_lock);
+
+ return res;
+}
+
/*
* Insert a resource into the resource tree. If successful, return NULL,
* otherwise return the conflicting resource (compare to __request_resource())
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index cae050b05f5e..9f48f3d82e9b 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -11,7 +11,7 @@
#include <linux/rwsem.h>
#include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
/*
* lock for reading
@@ -117,15 +117,6 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
EXPORT_SYMBOL(down_read_nested);
-void down_read_non_owner(struct rw_semaphore *sem)
-{
- might_sleep();
-
- __down_read(sem);
-}
-
-EXPORT_SYMBOL(down_read_non_owner);
-
void down_write_nested(struct rw_semaphore *sem, int subclass)
{
might_sleep();
@@ -136,13 +127,6 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
EXPORT_SYMBOL(down_write_nested);
-void up_read_non_owner(struct rw_semaphore *sem)
-{
- __up_read(sem);
-}
-
-EXPORT_SYMBOL(up_read_non_owner);
-
#endif
diff --git a/kernel/sched.c b/kernel/sched.c
index 84b9e076812e..ec5f472bc5b9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -75,6 +75,9 @@
#include <asm/tlb.h>
#include <asm/irq_regs.h>
#include <asm/mutex.h>
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#endif
#include "sched_cpupri.h"
#include "workqueue_sched.h"
@@ -124,7 +127,7 @@
static inline int rt_policy(int policy)
{
- if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR))
+ if (policy == SCHED_FIFO || policy == SCHED_RR)
return 1;
return 0;
}
@@ -422,6 +425,7 @@ struct rt_rq {
*/
struct root_domain {
atomic_t refcount;
+ atomic_t rto_count;
struct rcu_head rcu;
cpumask_var_t span;
cpumask_var_t online;
@@ -431,7 +435,6 @@ struct root_domain {
* one runnable RT task.
*/
cpumask_var_t rto_mask;
- atomic_t rto_count;
struct cpupri cpupri;
};
@@ -528,6 +531,12 @@ struct rq {
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
u64 prev_irq_time;
#endif
+#ifdef CONFIG_PARAVIRT
+ u64 prev_steal_time;
+#endif
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+ u64 prev_steal_time_rq;
+#endif
/* calc_load related fields */
unsigned long calc_load_update;
@@ -581,7 +590,6 @@ static inline int cpu_of(struct rq *rq)
#define rcu_dereference_check_sched_domain(p) \
rcu_dereference_check((p), \
- rcu_read_lock_held() || \
lockdep_is_held(&sched_domains_mutex))
/*
@@ -1568,38 +1576,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
return rq->avg_load_per_task;
}
-#ifdef CONFIG_FAIR_GROUP_SCHED
-
-/*
- * Compute the cpu's hierarchical load factor for each task group.
- * This needs to be done in a top-down fashion because the load of a child
- * group is a fraction of its parents load.
- */
-static int tg_load_down(struct task_group *tg, void *data)
-{
- unsigned long load;
- long cpu = (long)data;
-
- if (!tg->parent) {
- load = cpu_rq(cpu)->load.weight;
- } else {
- load = tg->parent->cfs_rq[cpu]->h_load;
- load *= tg->se[cpu]->load.weight;
- load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
- }
-
- tg->cfs_rq[cpu]->h_load = load;
-
- return 0;
-}
-
-static void update_h_load(long cpu)
-{
- walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
-}
-
-#endif
-
#ifdef CONFIG_PREEMPT
static void double_rq_lock(struct rq *rq1, struct rq *rq2);
@@ -1953,10 +1929,28 @@ void account_system_vtime(struct task_struct *curr)
}
EXPORT_SYMBOL_GPL(account_system_vtime);
-static void update_rq_clock_task(struct rq *rq, s64 delta)
+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+
+#ifdef CONFIG_PARAVIRT
+static inline u64 steal_ticks(u64 steal)
{
- s64 irq_delta;
+ if (unlikely(steal > NSEC_PER_SEC))
+ return div_u64(steal, TICK_NSEC);
+
+ return __iter_div_u64_rem(steal, TICK_NSEC, &steal);
+}
+#endif
+static void update_rq_clock_task(struct rq *rq, s64 delta)
+{
+/*
+ * In theory, the compile should just see 0 here, and optimize out the call
+ * to sched_rt_avg_update. But I don't trust it...
+ */
+#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
+ s64 steal = 0, irq_delta = 0;
+#endif
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
/*
@@ -1979,12 +1973,35 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
rq->prev_irq_time += irq_delta;
delta -= irq_delta;
+#endif
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+ if (static_branch((&paravirt_steal_rq_enabled))) {
+ u64 st;
+
+ steal = paravirt_steal_clock(cpu_of(rq));
+ steal -= rq->prev_steal_time_rq;
+
+ if (unlikely(steal > delta))
+ steal = delta;
+
+ st = steal_ticks(steal);
+ steal = st * TICK_NSEC;
+
+ rq->prev_steal_time_rq += steal;
+
+ delta -= steal;
+ }
+#endif
+
rq->clock_task += delta;
- if (irq_delta && sched_feat(NONIRQ_POWER))
- sched_rt_avg_update(rq, irq_delta);
+#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
+ if ((irq_delta + steal) && sched_feat(NONTASK_POWER))
+ sched_rt_avg_update(rq, irq_delta + steal);
+#endif
}
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
static int irqtime_account_hi_update(void)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
@@ -2019,12 +2036,7 @@ static int irqtime_account_si_update(void)
#define sched_clock_irqtime (0)
-static void update_rq_clock_task(struct rq *rq, s64 delta)
-{
- rq->clock_task += delta;
-}
-
-#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+#endif
#include "sched_idletask.c"
#include "sched_fair.c"
@@ -2497,7 +2509,7 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
if (p->sched_class->task_woken)
p->sched_class->task_woken(rq, p);
- if (unlikely(rq->idle_stamp)) {
+ if (rq->idle_stamp) {
u64 delta = rq->clock - rq->idle_stamp;
u64 max = 2*sysctl_sched_migration_cost;
@@ -2886,7 +2898,7 @@ void sched_fork(struct task_struct *p)
#if defined(CONFIG_SMP)
p->on_cpu = 0;
#endif
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPT_COUNT
/* Want to start with kernel preemption disabled. */
task_thread_info(p)->preempt_count = 1;
#endif
@@ -3053,7 +3065,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
local_irq_disable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
- perf_event_task_sched_in(current);
+ perf_event_task_sched_in(prev, current);
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
local_irq_enable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
@@ -3877,6 +3889,25 @@ void account_idle_time(cputime_t cputime)
cpustat->idle = cputime64_add(cpustat->idle, cputime64);
}
+static __always_inline bool steal_account_process_tick(void)
+{
+#ifdef CONFIG_PARAVIRT
+ if (static_branch(&paravirt_steal_enabled)) {
+ u64 steal, st = 0;
+
+ steal = paravirt_steal_clock(smp_processor_id());
+ steal -= this_rq()->prev_steal_time;
+
+ st = steal_ticks(steal);
+ this_rq()->prev_steal_time += st * TICK_NSEC;
+
+ account_steal_time(st);
+ return st;
+ }
+#endif
+ return false;
+}
+
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -3908,6 +3939,9 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ if (steal_account_process_tick())
+ return;
+
if (irqtime_account_hi_update()) {
cpustat->irq = cputime64_add(cpustat->irq, tmp);
} else if (irqtime_account_si_update()) {
@@ -3961,6 +3995,9 @@ void account_process_tick(struct task_struct *p, int user_tick)
return;
}
+ if (steal_account_process_tick())
+ return;
+
if (user_tick)
account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
@@ -4242,9 +4279,9 @@ pick_next_task(struct rq *rq)
}
/*
- * schedule() is the main scheduler function.
+ * __schedule() is the main scheduler function.
*/
-asmlinkage void __sched schedule(void)
+static void __sched __schedule(void)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
@@ -4285,16 +4322,6 @@ need_resched:
if (to_wakeup)
try_to_wake_up_local(to_wakeup);
}
-
- /*
- * If we are going to sleep and we have plugged IO
- * queued, make sure to submit it to avoid deadlocks.
- */
- if (blk_needs_flush_plug(prev)) {
- raw_spin_unlock(&rq->lock);
- blk_schedule_flush_plug(prev);
- raw_spin_lock(&rq->lock);
- }
}
switch_count = &prev->nvcsw;
}
@@ -4332,17 +4359,34 @@ need_resched:
if (need_resched())
goto need_resched;
}
+
+static inline void sched_submit_work(struct task_struct *tsk)
+{
+ if (!tsk->state)
+ return;
+ /*
+ * If we are going to sleep and we have plugged IO queued,
+ * make sure to submit it to avoid deadlocks.
+ */
+ if (blk_needs_flush_plug(tsk))
+ blk_schedule_flush_plug(tsk);
+}
+
+asmlinkage void schedule(void)
+{
+ struct task_struct *tsk = current;
+
+ sched_submit_work(tsk);
+ __schedule();
+}
EXPORT_SYMBOL(schedule);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
{
- bool ret = false;
-
- rcu_read_lock();
if (lock->owner != owner)
- goto fail;
+ return false;
/*
* Ensure we emit the owner->on_cpu, dereference _after_ checking
@@ -4352,11 +4396,7 @@ static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
*/
barrier();
- ret = owner->on_cpu;
-fail:
- rcu_read_unlock();
-
- return ret;
+ return owner->on_cpu;
}
/*
@@ -4368,21 +4408,21 @@ int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
if (!sched_feat(OWNER_SPIN))
return 0;
+ rcu_read_lock();
while (owner_running(lock, owner)) {
if (need_resched())
- return 0;
+ break;
arch_mutex_cpu_relax();
}
+ rcu_read_unlock();
/*
- * If the owner changed to another task there is likely
- * heavy contention, stop spinning.
+ * We break out the loop above on need_resched() and when the
+ * owner changed, which is a sign for heavy contention. Return
+ * success only when lock->owner is NULL.
*/
- if (lock->owner)
- return 0;
-
- return 1;
+ return lock->owner == NULL;
}
#endif
@@ -4405,7 +4445,7 @@ asmlinkage void __sched notrace preempt_schedule(void)
do {
add_preempt_count_notrace(PREEMPT_ACTIVE);
- schedule();
+ __schedule();
sub_preempt_count_notrace(PREEMPT_ACTIVE);
/*
@@ -4433,7 +4473,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
do {
add_preempt_count(PREEMPT_ACTIVE);
local_irq_enable();
- schedule();
+ __schedule();
local_irq_disable();
sub_preempt_count(PREEMPT_ACTIVE);
@@ -5558,7 +5598,7 @@ static inline int should_resched(void)
static void __cond_resched(void)
{
add_preempt_count(PREEMPT_ACTIVE);
- schedule();
+ __schedule();
sub_preempt_count(PREEMPT_ACTIVE);
}
@@ -7413,6 +7453,7 @@ static void __sdt_free(const struct cpumask *cpu_map)
struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
if (sd && (sd->flags & SD_OVERLAP))
free_sched_groups(sd->groups, 0);
+ kfree(*per_cpu_ptr(sdd->sd, j));
kfree(*per_cpu_ptr(sdd->sg, j));
kfree(*per_cpu_ptr(sdd->sgp, j));
}
@@ -7898,17 +7939,10 @@ int in_sched_functions(unsigned long addr)
&& addr < (unsigned long)__sched_text_end);
}
-static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
+static void init_cfs_rq(struct cfs_rq *cfs_rq)
{
cfs_rq->tasks_timeline = RB_ROOT;
INIT_LIST_HEAD(&cfs_rq->tasks);
-#ifdef CONFIG_FAIR_GROUP_SCHED
- cfs_rq->rq = rq;
- /* allow initial update_cfs_load() to truncate */
-#ifdef CONFIG_SMP
- cfs_rq->load_stamp = 1;
-#endif
-#endif
cfs_rq->min_vruntime = (u64)(-(1LL << 20));
#ifndef CONFIG_64BIT
cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
@@ -7928,13 +7962,9 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
/* delimiter for bitsearch: */
__set_bit(MAX_RT_PRIO, array->bitmap);
-#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
+#if defined CONFIG_SMP
rt_rq->highest_prio.curr = MAX_RT_PRIO;
-#ifdef CONFIG_SMP
rt_rq->highest_prio.next = MAX_RT_PRIO;
-#endif
-#endif
-#ifdef CONFIG_SMP
rt_rq->rt_nr_migratory = 0;
rt_rq->overloaded = 0;
plist_head_init(&rt_rq->pushable_tasks);
@@ -7944,11 +7974,6 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
rt_rq->rt_throttled = 0;
rt_rq->rt_runtime = 0;
raw_spin_lock_init(&rt_rq->rt_runtime_lock);
-
-#ifdef CONFIG_RT_GROUP_SCHED
- rt_rq->rt_nr_boosted = 0;
- rt_rq->rq = rq;
-#endif
}
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -7957,11 +7982,17 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
struct sched_entity *parent)
{
struct rq *rq = cpu_rq(cpu);
- tg->cfs_rq[cpu] = cfs_rq;
- init_cfs_rq(cfs_rq, rq);
+
cfs_rq->tg = tg;
+ cfs_rq->rq = rq;
+#ifdef CONFIG_SMP
+ /* allow initial update_cfs_load() to truncate */
+ cfs_rq->load_stamp = 1;
+#endif
+ tg->cfs_rq[cpu] = cfs_rq;
tg->se[cpu] = se;
+
/* se could be NULL for root_task_group */
if (!se)
return;
@@ -7984,12 +8015,14 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
{
struct rq *rq = cpu_rq(cpu);
- tg->rt_rq[cpu] = rt_rq;
- init_rt_rq(rt_rq, rq);
+ rt_rq->highest_prio.curr = MAX_RT_PRIO;
+ rt_rq->rt_nr_boosted = 0;
+ rt_rq->rq = rq;
rt_rq->tg = tg;
- rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
+ tg->rt_rq[cpu] = rt_rq;
tg->rt_se[cpu] = rt_se;
+
if (!rt_se)
return;
@@ -8071,7 +8104,7 @@ void __init sched_init(void)
rq->nr_running = 0;
rq->calc_load_active = 0;
rq->calc_load_update = jiffies + LOAD_FREQ;
- init_cfs_rq(&rq->cfs, rq);
+ init_cfs_rq(&rq->cfs);
init_rt_rq(&rq->rt, rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
root_task_group.shares = root_task_group_load;
@@ -8185,7 +8218,7 @@ void __init sched_init(void)
scheduler_running = 1;
}
-#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
static inline int preempt_count_equals(int preempt_offset)
{
int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
@@ -8195,7 +8228,6 @@ static inline int preempt_count_equals(int preempt_offset)
void __might_sleep(const char *file, int line, int preempt_offset)
{
-#ifdef in_atomic
static unsigned long prev_jiffy; /* ratelimiting */
if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
@@ -8217,7 +8249,6 @@ void __might_sleep(const char *file, int line, int preempt_offset)
if (irqs_disabled())
print_irqtrace_events(current);
dump_stack();
-#endif
}
EXPORT_SYMBOL(__might_sleep);
#endif
@@ -8376,6 +8407,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
if (!se)
goto err_free_rq;
+ init_cfs_rq(cfs_rq);
init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
}
@@ -8403,7 +8435,7 @@ static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
-#else /* !CONFG_FAIR_GROUP_SCHED */
+#else /* !CONFIG_FAIR_GROUP_SCHED */
static inline void free_fair_sched_group(struct task_group *tg)
{
}
@@ -8424,7 +8456,8 @@ static void free_rt_sched_group(struct task_group *tg)
{
int i;
- destroy_rt_bandwidth(&tg->rt_bandwidth);
+ if (tg->rt_se)
+ destroy_rt_bandwidth(&tg->rt_bandwidth);
for_each_possible_cpu(i) {
if (tg->rt_rq)
@@ -8465,6 +8498,8 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
if (!rt_se)
goto err_free_rq;
+ init_rt_rq(rt_rq, cpu_rq(i));
+ rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
}
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
index 05577055cfca..c2f0e7248dca 100644
--- a/kernel/sched_autogroup.h
+++ b/kernel/sched_autogroup.h
@@ -13,6 +13,7 @@ struct autogroup {
int nice;
};
+static inline bool task_group_is_autogroup(struct task_group *tg);
static inline struct task_group *
autogroup_task_group(struct task_struct *p, struct task_group *tg);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c768588e180b..bc8ee9993814 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -135,14 +135,6 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
return grp->my_q;
}
-/* Given a group's cfs_rq on one cpu, return its corresponding cfs_rq on
- * another cpu ('this_cpu')
- */
-static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
-{
- return cfs_rq->tg->cfs_rq[this_cpu];
-}
-
static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
if (!cfs_rq->on_list) {
@@ -271,11 +263,6 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
return NULL;
}
-static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
-{
- return &cpu_rq(this_cpu)->cfs;
-}
-
static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
}
@@ -334,11 +321,6 @@ static inline int entity_before(struct sched_entity *a,
return (s64)(a->vruntime - b->vruntime) < 0;
}
-static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
- return se->vruntime - cfs_rq->min_vruntime;
-}
-
static void update_min_vruntime(struct cfs_rq *cfs_rq)
{
u64 vruntime = cfs_rq->min_vruntime;
@@ -372,7 +354,6 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
struct rb_node *parent = NULL;
struct sched_entity *entry;
- s64 key = entity_key(cfs_rq, se);
int leftmost = 1;
/*
@@ -385,7 +366,7 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
* We dont care about collisions. Nodes with
* the same key stay together.
*/
- if (key < entity_key(cfs_rq, entry)) {
+ if (entity_before(se, entry)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
@@ -1336,7 +1317,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
}
for_each_sched_entity(se) {
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ cfs_rq = cfs_rq_of(se);
update_cfs_load(cfs_rq, 0);
update_cfs_shares(cfs_rq);
@@ -1370,13 +1351,16 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
*/
if (task_sleep && parent_entity(se))
set_next_buddy(parent_entity(se));
+
+ /* avoid re-evaluating load for this entity */
+ se = parent_entity(se);
break;
}
flags |= DEQUEUE_SLEEP;
}
for_each_sched_entity(se) {
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ cfs_rq = cfs_rq_of(se);
update_cfs_load(cfs_rq, 0);
update_cfs_shares(cfs_rq);
@@ -1481,7 +1465,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
* effect of the currently running task from the load
* of the current CPU:
*/
- rcu_read_lock();
if (sync) {
tg = task_group(current);
weight = current->se.load.weight;
@@ -1517,7 +1500,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
balanced = this_eff_load <= prev_eff_load;
} else
balanced = true;
- rcu_read_unlock();
/*
* If the currently running task will sleep within
@@ -1921,8 +1903,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
if (!sched_feat(WAKEUP_PREEMPT))
return;
- update_curr(cfs_rq);
find_matching_se(&se, &pse);
+ update_curr(cfs_rq_of(se));
BUG_ON(!pse);
if (wakeup_preempt_entity(se, pse) == 1) {
/*
@@ -2231,11 +2213,43 @@ static void update_shares(int cpu)
struct rq *rq = cpu_rq(cpu);
rcu_read_lock();
+ /*
+ * Iterates the task_group tree in a bottom up fashion, see
+ * list_add_leaf_cfs_rq() for details.
+ */
for_each_leaf_cfs_rq(rq, cfs_rq)
update_shares_cpu(cfs_rq->tg, cpu);
rcu_read_unlock();
}
+/*
+ * Compute the cpu's hierarchical load factor for each task group.
+ * This needs to be done in a top-down fashion because the load of a child
+ * group is a fraction of its parents load.
+ */
+static int tg_load_down(struct task_group *tg, void *data)
+{
+ unsigned long load;
+ long cpu = (long)data;
+
+ if (!tg->parent) {
+ load = cpu_rq(cpu)->load.weight;
+ } else {
+ load = tg->parent->cfs_rq[cpu]->h_load;
+ load *= tg->se[cpu]->load.weight;
+ load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
+ }
+
+ tg->cfs_rq[cpu]->h_load = load;
+
+ return 0;
+}
+
+static void update_h_load(long cpu)
+{
+ walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
+}
+
static unsigned long
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move,
@@ -2243,14 +2257,12 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
int *all_pinned)
{
long rem_load_move = max_load_move;
- int busiest_cpu = cpu_of(busiest);
- struct task_group *tg;
+ struct cfs_rq *busiest_cfs_rq;
rcu_read_lock();
- update_h_load(busiest_cpu);
+ update_h_load(cpu_of(busiest));
- list_for_each_entry_rcu(tg, &task_groups, list) {
- struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
+ for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) {
unsigned long busiest_h_load = busiest_cfs_rq->h_load;
unsigned long busiest_weight = busiest_cfs_rq->load.weight;
u64 rem_load, moved_load;
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 1e7066d76c26..2e74677cb040 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -61,9 +61,9 @@ SCHED_FEAT(LB_BIAS, 1)
SCHED_FEAT(OWNER_SPIN, 1)
/*
- * Decrement CPU power based on irq activity
+ * Decrement CPU power based on time not spent running tasks
*/
-SCHED_FEAT(NONIRQ_POWER, 1)
+SCHED_FEAT(NONTASK_POWER, 1)
/*
* Queue remote wakeups on the target CPU and process them
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 10d018212bab..97540f0c9e47 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -185,11 +185,23 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
typedef struct task_group *rt_rq_iter_t;
-#define for_each_rt_rq(rt_rq, iter, rq) \
- for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \
- (&iter->list != &task_groups) && \
- (rt_rq = iter->rt_rq[cpu_of(rq)]); \
- iter = list_entry_rcu(iter->list.next, typeof(*iter), list))
+static inline struct task_group *next_task_group(struct task_group *tg)
+{
+ do {
+ tg = list_entry_rcu(tg->list.next,
+ typeof(struct task_group), list);
+ } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
+
+ if (&tg->list == &task_groups)
+ tg = NULL;
+
+ return tg;
+}
+
+#define for_each_rt_rq(rt_rq, iter, rq) \
+ for (iter = container_of(&task_groups, typeof(*iter), list); \
+ (iter = next_task_group(iter)) && \
+ (rt_rq = iter->rt_rq[cpu_of(rq)]);)
static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
{
@@ -1126,7 +1138,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
rt_rq = &rq->rt;
- if (unlikely(!rt_rq->rt_nr_running))
+ if (!rt_rq->rt_nr_running)
return NULL;
if (rt_rq_throttled(rt_rq))
@@ -1548,7 +1560,7 @@ skip:
static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
{
/* Try to pull RT tasks here if we lower this rq's prio */
- if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio)
+ if (rq->rt.highest_prio.curr > prev->prio)
pull_rt_task(rq);
}
diff --git a/kernel/signal.c b/kernel/signal.c
index d7f70aed1cc0..291c9700be75 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3102,15 +3102,11 @@ SYSCALL_DEFINE0(sgetmask)
SYSCALL_DEFINE1(ssetmask, int, newmask)
{
- int old;
-
- spin_lock_irq(&current->sighand->siglock);
- old = current->blocked.sig[0];
+ int old = current->blocked.sig[0];
+ sigset_t newset;
- siginitset(&current->blocked, newmask & ~(sigmask(SIGKILL)|
- sigmask(SIGSTOP)));
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
+ siginitset(&newset, newmask & ~(sigmask(SIGKILL) | sigmask(SIGSTOP)));
+ set_current_blocked(&newset);
return old;
}
@@ -3167,11 +3163,8 @@ SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
return -EFAULT;
sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP));
- spin_lock_irq(&current->sighand->siglock);
current->saved_sigmask = current->blocked;
- current->blocked = newset;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
+ set_current_blocked(&newset);
current->state = TASK_INTERRUPTIBLE;
schedule();
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index e3516b29076c..ba5070ce5765 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -19,7 +19,7 @@
#include <linux/interrupt.h>
#include <linux/kallsyms.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
/*
* Structure to determine completion condition and record errors. May
@@ -136,10 +136,11 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
static DEFINE_MUTEX(stop_cpus_mutex);
static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
-int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
+static void queue_stop_cpus_work(const struct cpumask *cpumask,
+ cpu_stop_fn_t fn, void *arg,
+ struct cpu_stop_done *done)
{
struct cpu_stop_work *work;
- struct cpu_stop_done done;
unsigned int cpu;
/* initialize works and done */
@@ -147,9 +148,8 @@ int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
work = &per_cpu(stop_cpus_work, cpu);
work->fn = fn;
work->arg = arg;
- work->done = &done;
+ work->done = done;
}
- cpu_stop_init_done(&done, cpumask_weight(cpumask));
/*
* Disable preemption while queueing to avoid getting
@@ -161,7 +161,15 @@ int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu),
&per_cpu(stop_cpus_work, cpu));
preempt_enable();
+}
+static int __stop_cpus(const struct cpumask *cpumask,
+ cpu_stop_fn_t fn, void *arg)
+{
+ struct cpu_stop_done done;
+
+ cpu_stop_init_done(&done, cpumask_weight(cpumask));
+ queue_stop_cpus_work(cpumask, fn, arg, &done);
wait_for_completion(&done.completion);
return done.executed ? done.ret : -ENOENT;
}
@@ -431,8 +439,15 @@ static int stop_machine_cpu_stop(void *data)
struct stop_machine_data *smdata = data;
enum stopmachine_state curstate = STOPMACHINE_NONE;
int cpu = smp_processor_id(), err = 0;
+ unsigned long flags;
bool is_active;
+ /*
+ * When called from stop_machine_from_inactive_cpu(), irq might
+ * already be disabled. Save the state and restore it on exit.
+ */
+ local_save_flags(flags);
+
if (!smdata->active_cpus)
is_active = cpu == cpumask_first(cpu_online_mask);
else
@@ -460,7 +475,7 @@ static int stop_machine_cpu_stop(void *data)
}
} while (curstate != STOPMACHINE_EXIT);
- local_irq_enable();
+ local_irq_restore(flags);
return err;
}
@@ -487,4 +502,57 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
}
EXPORT_SYMBOL_GPL(stop_machine);
+/**
+ * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU
+ * @fn: the function to run
+ * @data: the data ptr for the @fn()
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
+ *
+ * This is identical to stop_machine() but can be called from a CPU which
+ * is not active. The local CPU is in the process of hotplug (so no other
+ * CPU hotplug can start) and not marked active and doesn't have enough
+ * context to sleep.
+ *
+ * This function provides stop_machine() functionality for such state by
+ * using busy-wait for synchronization and executing @fn directly for local
+ * CPU.
+ *
+ * CONTEXT:
+ * Local CPU is inactive. Temporarily stops all active CPUs.
+ *
+ * RETURNS:
+ * 0 if all executions of @fn returned 0, any non zero return value if any
+ * returned non zero.
+ */
+int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
+ const struct cpumask *cpus)
+{
+ struct stop_machine_data smdata = { .fn = fn, .data = data,
+ .active_cpus = cpus };
+ struct cpu_stop_done done;
+ int ret;
+
+ /* Local CPU must be inactive and CPU hotplug in progress. */
+ BUG_ON(cpu_active(raw_smp_processor_id()));
+ smdata.num_threads = num_active_cpus() + 1; /* +1 for local */
+
+ /* No proper task established and can't sleep - busy wait for lock. */
+ while (!mutex_trylock(&stop_cpus_mutex))
+ cpu_relax();
+
+ /* Schedule work on other CPUs and execute directly for local CPU */
+ set_state(&smdata, STOPMACHINE_PREPARE);
+ cpu_stop_init_done(&done, num_active_cpus());
+ queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata,
+ &done);
+ ret = stop_machine_cpu_stop(&smdata);
+
+ /* Busy wait for completion. */
+ while (!completion_done(&done.completion))
+ cpu_relax();
+
+ mutex_unlock(&stop_cpus_mutex);
+ return ret ?: done.ret;
+}
+
#endif /* CONFIG_STOP_MACHINE */
diff --git a/kernel/sys.c b/kernel/sys.c
index e4128b278f23..18ee1d2f6474 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -8,7 +8,6 @@
#include <linux/mm.h>
#include <linux/utsname.h>
#include <linux/mman.h>
-#include <linux/notifier.h>
#include <linux/reboot.h>
#include <linux/prctl.h>
#include <linux/highuid.h>
@@ -38,6 +37,8 @@
#include <linux/fs_struct.h>
#include <linux/gfp.h>
#include <linux/syscore_ops.h>
+#include <linux/version.h>
+#include <linux/ctype.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
@@ -45,6 +46,8 @@
#include <linux/user_namespace.h>
#include <linux/kmsg_dump.h>
+/* Move somewhere else to avoid recompiling? */
+#include <generated/utsrelease.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -320,6 +323,37 @@ void kernel_restart_prepare(char *cmd)
}
/**
+ * register_reboot_notifier - Register function to be called at reboot time
+ * @nb: Info about notifier function to be called
+ *
+ * Registers a function with the list of functions
+ * to be called at reboot time.
+ *
+ * Currently always returns zero, as blocking_notifier_chain_register()
+ * always returns zero.
+ */
+int register_reboot_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&reboot_notifier_list, nb);
+}
+EXPORT_SYMBOL(register_reboot_notifier);
+
+/**
+ * unregister_reboot_notifier - Unregister previously registered reboot notifier
+ * @nb: Hook to be unregistered
+ *
+ * Unregisters a previously registered reboot
+ * notifier function.
+ *
+ * Returns zero on success, or %-ENOENT on failure.
+ */
+int unregister_reboot_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
+}
+EXPORT_SYMBOL(unregister_reboot_notifier);
+
+/**
* kernel_restart - reboot the system
* @cmd: pointer to buffer containing command to execute for restart
* or %NULL
@@ -591,11 +625,18 @@ static int set_user(struct cred *new)
if (!new_user)
return -EAGAIN;
+ /*
+ * We don't fail in case of NPROC limit excess here because too many
+ * poorly written programs don't check set*uid() return code, assuming
+ * it never fails if called by root. We may still enforce NPROC limit
+ * for programs doing set*uid()+execve() by harmlessly deferring the
+ * failure to the execve() stage.
+ */
if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
- new_user != INIT_USER) {
- free_uid(new_user);
- return -EAGAIN;
- }
+ new_user != INIT_USER)
+ current->flags |= PF_NPROC_EXCEEDED;
+ else
+ current->flags &= ~PF_NPROC_EXCEEDED;
free_uid(new->user);
new->user = new_user;
@@ -1124,6 +1165,34 @@ DECLARE_RWSEM(uts_sem);
#define override_architecture(name) 0
#endif
+/*
+ * Work around broken programs that cannot handle "Linux 3.0".
+ * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
+ */
+static int override_release(char __user *release, int len)
+{
+ int ret = 0;
+ char buf[len];
+
+ if (current->personality & UNAME26) {
+ char *rest = UTS_RELEASE;
+ int ndots = 0;
+ unsigned v;
+
+ while (*rest) {
+ if (*rest == '.' && ++ndots >= 3)
+ break;
+ if (!isdigit(*rest) && *rest != '.')
+ break;
+ rest++;
+ }
+ v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
+ snprintf(buf, len, "2.6.%u%s", v, rest);
+ ret = copy_to_user(release, buf, len);
+ }
+ return ret;
+}
+
SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
{
int errno = 0;
@@ -1133,6 +1202,8 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
errno = -EFAULT;
up_read(&uts_sem);
+ if (!errno && override_release(name->release, sizeof(name->release)))
+ errno = -EFAULT;
if (!errno && override_architecture(name))
errno = -EFAULT;
return errno;
@@ -1154,6 +1225,8 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
error = -EFAULT;
up_read(&uts_sem);
+ if (!error && override_release(name->release, sizeof(name->release)))
+ error = -EFAULT;
if (!error && override_architecture(name))
error = -EFAULT;
return error;
@@ -1188,6 +1261,8 @@ SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
if (!error && override_architecture(name))
error = -EFAULT;
+ if (!error && override_release(name->release, sizeof(name->release)))
+ error = -EFAULT;
return error ? -EFAULT : 0;
}
#endif
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 62cbc8877fef..a9a5de07c4f1 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -16,7 +16,6 @@ asmlinkage long sys_ni_syscall(void)
return -ENOSYS;
}
-cond_syscall(sys_nfsservctl);
cond_syscall(sys_quotactl);
cond_syscall(sys32_quotactl);
cond_syscall(sys_acct);
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 3b8e028b9601..e8bffbe2ba4b 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1,6 +1,6 @@
#include <linux/stat.h>
#include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
#include <linux/sunrpc/debug.h>
#include <linux/string.h>
#include <net/ip_vs.h>
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index 4e4932a7b360..362da653813d 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -1,6 +1,6 @@
#include <linux/stat.h>
#include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
#include <linux/sunrpc/debug.h>
#include <linux/string.h>
#include <net/ip_vs.h>
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index fc0f22005417..e66046456f4f 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -28,7 +28,7 @@
#include <linux/fs.h>
#include <linux/file.h>
#include <net/genetlink.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
/*
* Maximum length of a cpumask that can be specified in
@@ -291,30 +291,28 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
if (!cpumask_subset(mask, cpu_possible_mask))
return -EINVAL;
- s = NULL;
if (isadd == REGISTER) {
for_each_cpu(cpu, mask) {
- if (!s)
- s = kmalloc_node(sizeof(struct listener),
- GFP_KERNEL, cpu_to_node(cpu));
+ s = kmalloc_node(sizeof(struct listener),
+ GFP_KERNEL, cpu_to_node(cpu));
if (!s)
goto cleanup;
+
s->pid = pid;
- INIT_LIST_HEAD(&s->list);
s->valid = 1;
listeners = &per_cpu(listener_array, cpu);
down_write(&listeners->sem);
- list_for_each_entry_safe(s2, tmp, &listeners->list, list) {
- if (s2->pid == pid)
- goto next_cpu;
+ list_for_each_entry(s2, &listeners->list, list) {
+ if (s2->pid == pid && s2->valid)
+ goto exists;
}
list_add(&s->list, &listeners->list);
s = NULL;
-next_cpu:
+exists:
up_write(&listeners->sem);
+ kfree(s); /* nop if NULL */
}
- kfree(s);
return 0;
}
@@ -657,6 +655,7 @@ static struct genl_ops taskstats_ops = {
.cmd = TASKSTATS_CMD_GET,
.doit = taskstats_user_cmd,
.policy = taskstats_cmd_get_policy,
+ .flags = GENL_ADMIN_PERM,
};
static struct genl_ops cgroupstats_ops = {
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 59f369f98a04..ea5e1a928d5b 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -441,6 +441,8 @@ static int alarm_timer_create(struct k_itimer *new_timer)
static void alarm_timer_get(struct k_itimer *timr,
struct itimerspec *cur_setting)
{
+ memset(cur_setting, 0, sizeof(struct itimerspec));
+
cur_setting->it_interval =
ktime_to_timespec(timr->it.alarmtimer.period);
cur_setting->it_value =
@@ -479,11 +481,17 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
if (!rtcdev)
return -ENOTSUPP;
- /* Save old values */
- old_setting->it_interval =
- ktime_to_timespec(timr->it.alarmtimer.period);
- old_setting->it_value =
- ktime_to_timespec(timr->it.alarmtimer.node.expires);
+ /*
+ * XXX HACK! Currently we can DOS a system if the interval
+ * period on alarmtimers is too small. Cap the interval here
+ * to 100us and solve this properly in a future patch! -jstultz
+ */
+ if ((new_setting->it_interval.tv_sec == 0) &&
+ (new_setting->it_interval.tv_nsec < 100000))
+ new_setting->it_interval.tv_nsec = 100000;
+
+ if (old_setting)
+ alarm_timer_get(timr, old_setting);
/* If the timer was already set, cancel it */
alarm_cancel(&timr->it.alarmtimer);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 342408cf68dd..2b021b0e8507 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -604,6 +604,12 @@ static struct timespec timekeeping_suspend_time;
*/
static void __timekeeping_inject_sleeptime(struct timespec *delta)
{
+ if (!timespec_valid(delta)) {
+ printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid "
+ "sleep delta value!\n");
+ return;
+ }
+
xtime = timespec_add(xtime, *delta);
wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta);
total_sleep_time = timespec_add(total_sleep_time, *delta);
@@ -686,12 +692,34 @@ static void timekeeping_resume(void)
static int timekeeping_suspend(void)
{
unsigned long flags;
+ struct timespec delta, delta_delta;
+ static struct timespec old_delta;
read_persistent_clock(&timekeeping_suspend_time);
write_seqlock_irqsave(&xtime_lock, flags);
timekeeping_forward_now();
timekeeping_suspended = 1;
+
+ /*
+ * To avoid drift caused by repeated suspend/resumes,
+ * which each can add ~1 second drift error,
+ * try to compensate so the difference in system time
+ * and persistent_clock time stays close to constant.
+ */
+ delta = timespec_sub(xtime, timekeeping_suspend_time);
+ delta_delta = timespec_sub(delta, old_delta);
+ if (abs(delta_delta.tv_sec) >= 2) {
+ /*
+ * if delta_delta is too large, assume time correction
+ * has occured and set old_delta to the current delta.
+ */
+ old_delta = delta;
+ } else {
+ /* Otherwise try to adjust old_system to compensate */
+ timekeeping_suspend_time =
+ timespec_add(timekeeping_suspend_time, delta_delta);
+ }
write_sequnlock_irqrestore(&xtime_lock, flags);
clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2ad39e556cb4..cd3134510f3d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -82,7 +82,7 @@ config EVENT_POWER_TRACING_DEPRECATED
power:power_frequency
This is for userspace compatibility
and will vanish after 5 kernel iterations,
- namely 2.6.41.
+ namely 3.1.
config CONTEXT_SWITCH_TRACER
bool
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 6957aa298dfa..7c910a5593a6 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
what |= MASK_TC_BIT(rw, RAHEAD);
what |= MASK_TC_BIT(rw, META);
what |= MASK_TC_BIT(rw, DISCARD);
+ what |= MASK_TC_BIT(rw, FLUSH);
+ what |= MASK_TC_BIT(rw, FUA);
pid = tsk->pid;
if (act_log_check(bt, what, sector, pid))
@@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
goto out;
}
+ if (tc & BLK_TC_FLUSH)
+ rwbs[i++] = 'F';
+
if (tc & BLK_TC_DISCARD)
rwbs[i++] = 'D';
else if (tc & BLK_TC_WRITE)
@@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
else
rwbs[i++] = 'N';
+ if (tc & BLK_TC_FUA)
+ rwbs[i++] = 'F';
if (tc & BLK_TC_AHEAD)
rwbs[i++] = 'A';
- if (tc & BLK_TC_BARRIER)
- rwbs[i++] = 'B';
if (tc & BLK_TC_SYNC)
rwbs[i++] = 'S';
if (tc & BLK_TC_META)
@@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
{
- char rwbs[6];
+ char rwbs[RWBS_LEN];
unsigned long long ts = iter->ts;
unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
unsigned secs = (unsigned long)ts;
@@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
static int blk_log_action(struct trace_iterator *iter, const char *act)
{
- char rwbs[6];
+ char rwbs[RWBS_LEN];
const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
fill_rwbs(rwbs, t);
@@ -1561,7 +1566,7 @@ static const struct {
} mask_maps[] = {
{ BLK_TC_READ, "read" },
{ BLK_TC_WRITE, "write" },
- { BLK_TC_BARRIER, "barrier" },
+ { BLK_TC_FLUSH, "flush" },
{ BLK_TC_SYNC, "sync" },
{ BLK_TC_QUEUE, "queue" },
{ BLK_TC_REQUEUE, "requeue" },
@@ -1573,6 +1578,7 @@ static const struct {
{ BLK_TC_META, "meta" },
{ BLK_TC_DISCARD, "discard" },
{ BLK_TC_DRV_DATA, "drv_data" },
+ { BLK_TC_FUA, "fua" },
};
static int blk_trace_str2mask(const char *str)
@@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
{
int i = 0;
+ if (rw & REQ_FLUSH)
+ rwbs[i++] = 'F';
+
if (rw & WRITE)
rwbs[i++] = 'W';
else if (rw & REQ_DISCARD)
@@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
else
rwbs[i++] = 'N';
+ if (rw & REQ_FUA)
+ rwbs[i++] = 'F';
if (rw & REQ_RAHEAD)
rwbs[i++] = 'A';
if (rw & REQ_SYNC)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3f381d0b20a8..616846bcfee5 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -2,7 +2,7 @@
#define _LINUX_KERNEL_TRACE_H
#include <linux/fs.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/sched.h>
#include <linux/clocksource.h>
#include <linux/ring_buffer.h>
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 017fa376505d..fd3c8aae55e5 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -12,7 +12,7 @@
#include <linux/slab.h>
#include <linux/time.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include "trace.h"
#include "trace_output.h"
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 24dc60d9fa1f..5bbfac85866e 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -78,6 +78,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
#define KB 1024
#define MB (1024*KB)
+#define KB_MASK (~(KB-1))
/*
* fill in extended accounting fields
*/
@@ -95,14 +96,14 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB;
mmput(mm);
}
- stats->read_char = p->ioac.rchar;
- stats->write_char = p->ioac.wchar;
- stats->read_syscalls = p->ioac.syscr;
- stats->write_syscalls = p->ioac.syscw;
+ stats->read_char = p->ioac.rchar & KB_MASK;
+ stats->write_char = p->ioac.wchar & KB_MASK;
+ stats->read_syscalls = p->ioac.syscr & KB_MASK;
+ stats->write_syscalls = p->ioac.syscw & KB_MASK;
#ifdef CONFIG_TASK_IO_ACCOUNTING
- stats->read_bytes = p->ioac.read_bytes;
- stats->write_bytes = p->ioac.write_bytes;
- stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes;
+ stats->read_bytes = p->ioac.read_bytes & KB_MASK;
+ stats->write_bytes = p->ioac.write_bytes & KB_MASK;
+ stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes & KB_MASK;
#else
stats->read_bytes = 0;
stats->write_bytes = 0;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 36491cd5b7d4..d680381b0e9c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -321,7 +321,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
*/
static int watchdog(void *unused)
{
- static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
sched_setscheduler(current, SCHED_FIFO, &param);
@@ -350,7 +350,8 @@ static int watchdog(void *unused)
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
-
+ param.sched_priority = 0;
+ sched_setscheduler(current, SCHED_NORMAL, &param);
return 0;
}
@@ -438,7 +439,7 @@ static int watchdog_enable(int cpu)
/* create the watchdog thread */
if (!p) {
- p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
+ p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu);
if (IS_ERR(p)) {
printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
if (!err) {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 25fb1b0e53fa..1783aabc6128 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2412,8 +2412,13 @@ reflush:
for_each_cwq_cpu(cpu, wq) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+ bool drained;
- if (!cwq->nr_active && list_empty(&cwq->delayed_works))
+ spin_lock_irq(&cwq->gcwq->lock);
+ drained = !cwq->nr_active && list_empty(&cwq->delayed_works);
+ spin_unlock_irq(&cwq->gcwq->lock);
+
+ if (drained)
continue;
if (++flush_cnt == 10 ||
OpenPOWER on IntegriCloud