34 files changed, 1297 insertions, 1096 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 0b084fa44b1f..21c7fa615bd3 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -49,6 +49,8 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/kthread.h>
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
 
 #include <linux/audit.h>
 
@@ -265,7 +267,6 @@ void audit_log_lost(const char *message)
 }
 
 static int audit_log_config_change(char *function_name, int new, int old,
-				   kuid_t loginuid, u32 sessionid, u32 sid,
 				   int allow_changes)
 {
 	struct audit_buffer *ab;
@@ -274,29 +275,17 @@ static int audit_log_config_change(char *function_name, int new, int old,
 	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
 	if (unlikely(!ab))
 		return rc;
-	audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new,
-			 old, from_kuid(&init_user_ns, loginuid), sessionid);
-	if (sid) {
-		char *ctx = NULL;
-		u32 len;
-
-		rc = security_secid_to_secctx(sid, &ctx, &len);
-		if (rc) {
-			audit_log_format(ab, " sid=%u", sid);
-			allow_changes = 0; /* Something weird, deny request */
-		} else {
-			audit_log_format(ab, " subj=%s", ctx);
-			security_release_secctx(ctx, len);
-		}
-	}
+	audit_log_format(ab, "%s=%d old=%d", function_name, new, old);
+	audit_log_session_info(ab);
+	rc = audit_log_task_context(ab);
+	if (rc)
+		allow_changes = 0; /* Something weird, deny request */
 	audit_log_format(ab, " res=%d", allow_changes);
 	audit_log_end(ab);
 	return rc;
 }
 
-static int audit_do_config_change(char *function_name, int *to_change,
-				  int new, kuid_t loginuid, u32 sessionid,
-				  u32 sid)
+static int audit_do_config_change(char *function_name, int *to_change, int new)
 {
 	int allow_changes, rc = 0, old = *to_change;
 
@@ -307,8 +296,7 @@ static int audit_do_config_change(char *function_name, int *to_change,
 		allow_changes = 1;
 
 	if (audit_enabled != AUDIT_OFF) {
-		rc = audit_log_config_change(function_name, new, old, loginuid,
-					     sessionid, sid, allow_changes);
+		rc = audit_log_config_change(function_name, new, old, allow_changes);
 		if (rc)
 			allow_changes = 0;
 	}
@@ -322,44 +310,37 @@ static int audit_do_config_change(char *function_name, int *to_change,
 	return rc;
 }
 
-static int audit_set_rate_limit(int limit, kuid_t loginuid, u32 sessionid,
-				u32 sid)
+static int audit_set_rate_limit(int limit)
 {
-	return audit_do_config_change("audit_rate_limit", &audit_rate_limit,
-				      limit, loginuid, sessionid, sid);
+	return audit_do_config_change("audit_rate_limit", &audit_rate_limit, limit);
 }
 
-static int audit_set_backlog_limit(int limit, kuid_t loginuid, u32 sessionid,
-				   u32 sid)
+static int audit_set_backlog_limit(int limit)
 {
-	return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit,
-				      limit, loginuid, sessionid, sid);
+	return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit, limit);
 }
 
-static int audit_set_enabled(int state, kuid_t loginuid, u32 sessionid, u32 sid)
+static int audit_set_enabled(int state)
 {
 	int rc;
 	if (state < AUDIT_OFF || state > AUDIT_LOCKED)
 		return -EINVAL;
 
-	rc =  audit_do_config_change("audit_enabled", &audit_enabled, state,
-				     loginuid, sessionid, sid);
-
+	rc =  audit_do_config_change("audit_enabled", &audit_enabled, state);
 	if (!rc)
 		audit_ever_enabled |= !!state;
 
 	return rc;
 }
 
-static int audit_set_failure(int state, kuid_t loginuid, u32 sessionid, u32 sid)
+static int audit_set_failure(int state)
 {
 	if (state != AUDIT_FAIL_SILENT
 	    && state != AUDIT_FAIL_PRINTK
 	    && state != AUDIT_FAIL_PANIC)
 		return -EINVAL;
 
-	return audit_do_config_change("audit_failure", &audit_failure, state,
-				      loginuid, sessionid, sid);
+	return audit_do_config_change("audit_failure", &audit_failure, state);
 }
 
 /*
@@ -417,34 +398,53 @@ static void kauditd_send_skb(struct sk_buff *skb)
 		consume_skb(skb);
 }
 
-static int kauditd_thread(void *dummy)
+/*
+ * flush_hold_queue - empty the hold queue if auditd appears
+ *
+ * If auditd just started, drain the queue of messages already
+ * sent to syslog/printk.  Remember loss here is ok.  We already
+ * called audit_log_lost() if it didn't go out normally.  so the
+ * race between the skb_dequeue and the next check for audit_pid
+ * doesn't matter.
+ *
+ * If you ever find kauditd to be too slow we can get a perf win
+ * by doing our own locking and keeping better track if there
+ * are messages in this queue.  I don't see the need now, but
+ * in 5 years when I want to play with this again I'll see this
+ * note and still have no friggin idea what i'm thinking today.
+ */
+static void flush_hold_queue(void)
 {
 	struct sk_buff *skb;
 
+	if (!audit_default || !audit_pid)
+		return;
+
+	skb = skb_dequeue(&audit_skb_hold_queue);
+	if (likely(!skb))
+		return;
+
+	while (skb && audit_pid) {
+		kauditd_send_skb(skb);
+		skb = skb_dequeue(&audit_skb_hold_queue);
+	}
+
+	/*
+	 * if auditd just disappeared but we
+	 * dequeued an skb we need to drop ref
+	 */
+	if (skb)
+		consume_skb(skb);
+}
+
+static int kauditd_thread(void *dummy)
+{
 	set_freezable();
 	while (!kthread_should_stop()) {
-		/*
-		 * if auditd just started drain the queue of messages already
-		 * sent to syslog/printk.  remember loss here is ok.  we already
-		 * called audit_log_lost() if it didn't go out normally.  so the
-		 * race between the skb_dequeue and the next check for audit_pid
-		 * doesn't matter.
-		 *
-		 * if you ever find kauditd to be too slow we can get a perf win
-		 * by doing our own locking and keeping better track if there
-		 * are messages in this queue.  I don't see the need now, but
-		 * in 5 years when I want to play with this again I'll see this
-		 * note and still have no friggin idea what i'm thinking today.
-		 */
-		if (audit_default && audit_pid) {
-			skb = skb_dequeue(&audit_skb_hold_queue);
-			if (unlikely(skb)) {
-				while (skb && audit_pid) {
-					kauditd_send_skb(skb);
-					skb = skb_dequeue(&audit_skb_hold_queue);
-				}
-			}
-		}
+		struct sk_buff *skb;
+		DECLARE_WAITQUEUE(wait, current);
+
+		flush_hold_queue();
 
 		skb = skb_dequeue(&audit_skb_queue);
 		wake_up(&audit_backlog_wait);
@@ -453,19 +453,18 @@ static int kauditd_thread(void *dummy)
 				kauditd_send_skb(skb);
 			else
 				audit_printk_skb(skb);
-		} else {
-			DECLARE_WAITQUEUE(wait, current);
-			set_current_state(TASK_INTERRUPTIBLE);
-			add_wait_queue(&kauditd_wait, &wait);
-
-			if (!skb_queue_len(&audit_skb_queue)) {
-				try_to_freeze();
-				schedule();
-			}
+			continue;
+		}
+		set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(&kauditd_wait, &wait);
 
-			__set_current_state(TASK_RUNNING);
-			remove_wait_queue(&kauditd_wait, &wait);
+		if (!skb_queue_len(&audit_skb_queue)) {
+			try_to_freeze();
+			schedule();
 		}
+
+		__set_current_state(TASK_RUNNING);
+		remove_wait_queue(&kauditd_wait, &wait);
 	}
 	return 0;
 }
@@ -579,13 +578,14 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
 		return -EPERM;
 
 	switch (msg_type) {
-	case AUDIT_GET:
 	case AUDIT_LIST:
-	case AUDIT_LIST_RULES:
-	case AUDIT_SET:
 	case AUDIT_ADD:
-	case AUDIT_ADD_RULE:
 	case AUDIT_DEL:
+		return -EOPNOTSUPP;
+	case AUDIT_GET:
+	case AUDIT_SET:
+	case AUDIT_LIST_RULES:
+	case AUDIT_ADD_RULE:
 	case AUDIT_DEL_RULE:
 	case AUDIT_SIGNAL_INFO:
 	case AUDIT_TTY_GET:
@@ -608,12 +608,10 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
 	return err;
 }
 
-static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type,
-				     kuid_t auid, u32 ses, u32 sid)
+static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
 {
 	int rc = 0;
-	char *ctx = NULL;
-	u32 len;
+	uid_t uid = from_kuid(&init_user_ns, current_uid());
 
 	if (!audit_enabled) {
 		*ab = NULL;
@@ -623,33 +621,21 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type,
 	*ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
 	if (unlikely(!*ab))
 		return rc;
-	audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u",
-			 task_tgid_vnr(current),
-			 from_kuid(&init_user_ns, current_uid()),
-			 from_kuid(&init_user_ns, auid), ses);
-	if (sid) {
-		rc = security_secid_to_secctx(sid, &ctx, &len);
-		if (rc)
-			audit_log_format(*ab, " ssid=%u", sid);
-		else {
-			audit_log_format(*ab, " subj=%s", ctx);
-			security_release_secctx(ctx, len);
-		}
-	}
+	audit_log_format(*ab, "pid=%d uid=%u", task_tgid_vnr(current), uid);
+	audit_log_session_info(*ab);
+	audit_log_task_context(*ab);
 
 	return rc;
 }
 
 static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	u32			seq, sid;
+	u32			seq;
 	void			*data;
 	struct audit_status	*status_get, status_set;
 	int			err;
 	struct audit_buffer	*ab;
 	u16			msg_type = nlh->nlmsg_type;
-	kuid_t			loginuid; /* loginuid of sender */
-	u32			sessionid;
 	struct audit_sig_info   *sig_data;
 	char			*ctx = NULL;
 	u32			len;
@@ -668,9 +654,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			return err;
 		}
 	}
-	loginuid = audit_get_loginuid(current);
-	sessionid = audit_get_sessionid(current);
-	security_task_getsecid(current, &sid);
 	seq  = nlh->nlmsg_seq;
 	data = nlmsg_data(nlh);
 
@@ -691,14 +674,12 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			return -EINVAL;
 		status_get   = (struct audit_status *)data;
 		if (status_get->mask & AUDIT_STATUS_ENABLED) {
-			err = audit_set_enabled(status_get->enabled,
-						loginuid, sessionid, sid);
+			err = audit_set_enabled(status_get->enabled);
 			if (err < 0)
 				return err;
 		}
 		if (status_get->mask & AUDIT_STATUS_FAILURE) {
-			err = audit_set_failure(status_get->failure,
-						loginuid, sessionid, sid);
+			err = audit_set_failure(status_get->failure);
 			if (err < 0)
 				return err;
 		}
@@ -706,22 +687,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			int new_pid = status_get->pid;
 
 			if (audit_enabled != AUDIT_OFF)
-				audit_log_config_change("audit_pid", new_pid,
-							audit_pid, loginuid,
-							sessionid, sid, 1);
-
+				audit_log_config_change("audit_pid", new_pid, audit_pid, 1);
 			audit_pid = new_pid;
 			audit_nlk_portid = NETLINK_CB(skb).portid;
 		}
 		if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) {
-			err = audit_set_rate_limit(status_get->rate_limit,
-						   loginuid, sessionid, sid);
+			err = audit_set_rate_limit(status_get->rate_limit);
 			if (err < 0)
 				return err;
 		}
 		if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
-			err = audit_set_backlog_limit(status_get->backlog_limit,
-						      loginuid, sessionid, sid);
+			err = audit_set_backlog_limit(status_get->backlog_limit);
 		break;
 	case AUDIT_USER:
 	case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
@@ -729,25 +705,22 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (!audit_enabled && msg_type != AUDIT_USER_AVC)
 			return 0;
 
-		err = audit_filter_user();
+		err = audit_filter_user(msg_type);
 		if (err == 1) {
 			err = 0;
 			if (msg_type == AUDIT_USER_TTY) {
-				err = tty_audit_push_task(current, loginuid,
-							     sessionid);
+				err = tty_audit_push_current();
 				if (err)
 					break;
 			}
-			audit_log_common_recv_msg(&ab, msg_type,
-						  loginuid, sessionid, sid);
-
+			audit_log_common_recv_msg(&ab, msg_type);
 			if (msg_type != AUDIT_USER_TTY)
 				audit_log_format(ab, " msg='%.1024s'",
 						 (char *)data);
 			else {
 				int size;
 
-				audit_log_format(ab, " msg=");
+				audit_log_format(ab, " data=");
 				size = nlmsg_len(nlh);
 				if (size > 0 &&
 				    ((unsigned char *)data)[size - 1] == '\0')
@@ -758,50 +731,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			audit_log_end(ab);
 		}
 		break;
-	case AUDIT_ADD:
-	case AUDIT_DEL:
-		if (nlmsg_len(nlh) < sizeof(struct audit_rule))
-			return -EINVAL;
-		if (audit_enabled == AUDIT_LOCKED) {
-			audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE,
-						  loginuid, sessionid, sid);
-
-			audit_log_format(ab, " audit_enabled=%d res=0",
-					 audit_enabled);
-			audit_log_end(ab);
-			return -EPERM;
-		}
-		/* fallthrough */
-	case AUDIT_LIST:
-		err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid,
-					   seq, data, nlmsg_len(nlh),
-					   loginuid, sessionid, sid);
-		break;
 	case AUDIT_ADD_RULE:
 	case AUDIT_DEL_RULE:
 		if (nlmsg_len(nlh) < sizeof(struct audit_rule_data))
 			return -EINVAL;
 		if (audit_enabled == AUDIT_LOCKED) {
-			audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE,
-						  loginuid, sessionid, sid);
-
-			audit_log_format(ab, " audit_enabled=%d res=0",
-					 audit_enabled);
+			audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+			audit_log_format(ab, " audit_enabled=%d res=0", audit_enabled);
 			audit_log_end(ab);
 			return -EPERM;
 		}
 		/* fallthrough */
 	case AUDIT_LIST_RULES:
 		err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid,
-					   seq, data, nlmsg_len(nlh),
-					   loginuid, sessionid, sid);
+					   seq, data, nlmsg_len(nlh));
 		break;
 	case AUDIT_TRIM:
 		audit_trim_trees();
-
-		audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE,
-					  loginuid, sessionid, sid);
-
+		audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
 		audit_log_format(ab, " op=trim res=1");
 		audit_log_end(ab);
 		break;
@@ -831,8 +778,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		/* OK, here comes... */
 		err = audit_tag_tree(old, new);
 
-		audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE,
-					  loginuid, sessionid, sid);
+		audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
 
 		audit_log_format(ab, " op=make_equiv old=");
 		audit_log_untrustedstring(ab, old);
@@ -871,27 +817,30 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		struct audit_tty_status s;
 		struct task_struct *tsk = current;
 
-		spin_lock_irq(&tsk->sighand->siglock);
+		spin_lock(&tsk->sighand->siglock);
 		s.enabled = tsk->signal->audit_tty != 0;
-		spin_unlock_irq(&tsk->sighand->siglock);
+		s.log_passwd = tsk->signal->audit_tty_log_passwd;
+		spin_unlock(&tsk->sighand->siglock);
 
 		audit_send_reply(NETLINK_CB(skb).portid, seq,
 				 AUDIT_TTY_GET, 0, 0, &s, sizeof(s));
 		break;
 	}
 	case AUDIT_TTY_SET: {
-		struct audit_tty_status *s;
+		struct audit_tty_status s;
 		struct task_struct *tsk = current;
 
-		if (nlh->nlmsg_len < sizeof(struct audit_tty_status))
-			return -EINVAL;
-		s = data;
-		if (s->enabled != 0 && s->enabled != 1)
+		memset(&s, 0, sizeof(s));
+		/* guard against past and future API changes */
+		memcpy(&s, data, min(sizeof(s), (size_t)nlh->nlmsg_len));
+		if ((s.enabled != 0 && s.enabled != 1) ||
+		    (s.log_passwd != 0 && s.log_passwd != 1))
 			return -EINVAL;
 
-		spin_lock_irq(&tsk->sighand->siglock);
-		tsk->signal->audit_tty = s->enabled != 0;
-		spin_unlock_irq(&tsk->sighand->siglock);
+		spin_lock(&tsk->sighand->siglock);
+		tsk->signal->audit_tty = s.enabled;
+		tsk->signal->audit_tty_log_passwd = s.log_passwd;
+		spin_unlock(&tsk->sighand->siglock);
 		break;
 	}
 	default:
@@ -1434,6 +1383,14 @@ void audit_log_d_path(struct audit_buffer *ab, const char *prefix,
 	kfree(pathname);
 }
 
+void audit_log_session_info(struct audit_buffer *ab)
+{
+	u32 sessionid = audit_get_sessionid(current);
+	uid_t auid = from_kuid(&init_user_ns, audit_get_loginuid(current));
+
+	audit_log_format(ab, " auid=%u ses=%u\n", auid, sessionid);
+}
+
 void audit_log_key(struct audit_buffer *ab, char *key)
 {
 	audit_log_format(ab, " key=");
@@ -1443,6 +1400,224 @@ void audit_log_key(struct audit_buffer *ab, char *key)
 		audit_log_format(ab, "(null)");
 }
 
+void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap)
+{
+	int i;
+
+	audit_log_format(ab, " %s=", prefix);
+	CAP_FOR_EACH_U32(i) {
+		audit_log_format(ab, "%08x",
+				 cap->cap[(_KERNEL_CAPABILITY_U32S-1) - i]);
+	}
+}
+
+void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
+{
+	kernel_cap_t *perm = &name->fcap.permitted;
+	kernel_cap_t *inh = &name->fcap.inheritable;
+	int log = 0;
+
+	if (!cap_isclear(*perm)) {
+		audit_log_cap(ab, "cap_fp", perm);
+		log = 1;
+	}
+	if (!cap_isclear(*inh)) {
+		audit_log_cap(ab, "cap_fi", inh);
+		log = 1;
+	}
+
+	if (log)
+		audit_log_format(ab, " cap_fe=%d cap_fver=%x",
+				 name->fcap.fE, name->fcap_ver);
+}
+
+static inline int audit_copy_fcaps(struct audit_names *name,
+				   const struct dentry *dentry)
+{
+	struct cpu_vfs_cap_data caps;
+	int rc;
+
+	if (!dentry)
+		return 0;
+
+	rc = get_vfs_caps_from_disk(dentry, &caps);
+	if (rc)
+		return rc;
+
+	name->fcap.permitted = caps.permitted;
+	name->fcap.inheritable = caps.inheritable;
+	name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
+	name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >>
+				VFS_CAP_REVISION_SHIFT;
+
+	return 0;
+}
+
+/* Copy inode data into an audit_names. */
+void audit_copy_inode(struct audit_names *name, const struct dentry *dentry,
+		      const struct inode *inode)
+{
+	name->ino   = inode->i_ino;
+	name->dev   = inode->i_sb->s_dev;
+	name->mode  = inode->i_mode;
+	name->uid   = inode->i_uid;
+	name->gid   = inode->i_gid;
+	name->rdev  = inode->i_rdev;
+	security_inode_getsecid(inode, &name->osid);
+	audit_copy_fcaps(name, dentry);
+}
+
+/**
+ * audit_log_name - produce AUDIT_PATH record from struct audit_names
+ * @context: audit_context for the task
+ * @n: audit_names structure with reportable details
+ * @path: optional path to report instead of audit_names->name
+ * @record_num: record number to report when handling a list of names
+ * @call_panic: optional pointer to int that will be updated if secid fails
+ */
+void audit_log_name(struct audit_context *context, struct audit_names *n,
+		    struct path *path, int record_num, int *call_panic)
+{
+	struct audit_buffer *ab;
+	ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
+	if (!ab)
+		return;
+
+	audit_log_format(ab, "item=%d", record_num);
+
+	if (path)
+		audit_log_d_path(ab, " name=", path);
+	else if (n->name) {
+		switch (n->name_len) {
+		case AUDIT_NAME_FULL:
+			/* log the full path */
+			audit_log_format(ab, " name=");
+			audit_log_untrustedstring(ab, n->name->name);
+			break;
+		case 0:
+			/* name was specified as a relative path and the
+			 * directory component is the cwd */
+			audit_log_d_path(ab, " name=", &context->pwd);
+			break;
+		default:
+			/* log the name's directory component */
+			audit_log_format(ab, " name=");
+			audit_log_n_untrustedstring(ab, n->name->name,
+						    n->name_len);
+		}
+	} else
+		audit_log_format(ab, " name=(null)");
+
+	if (n->ino != (unsigned long)-1) {
+		audit_log_format(ab, " inode=%lu"
+				 " dev=%02x:%02x mode=%#ho"
+				 " ouid=%u ogid=%u rdev=%02x:%02x",
+				 n->ino,
+				 MAJOR(n->dev),
+				 MINOR(n->dev),
+				 n->mode,
+				 from_kuid(&init_user_ns, n->uid),
+				 from_kgid(&init_user_ns, n->gid),
+				 MAJOR(n->rdev),
+				 MINOR(n->rdev));
+	}
+	if (n->osid != 0) {
+		char *ctx = NULL;
+		u32 len;
+		if (security_secid_to_secctx(
+			n->osid, &ctx, &len)) {
+			audit_log_format(ab, " osid=%u", n->osid);
+			if (call_panic)
+				*call_panic = 2;
+		} else {
+			audit_log_format(ab, " obj=%s", ctx);
+			security_release_secctx(ctx, len);
+		}
+	}
+
+	audit_log_fcaps(ab, n);
+	audit_log_end(ab);
+}
+
+int audit_log_task_context(struct audit_buffer *ab)
+{
+	char *ctx = NULL;
+	unsigned len;
+	int error;
+	u32 sid;
+
+	security_task_getsecid(current, &sid);
+	if (!sid)
+		return 0;
+
+	error = security_secid_to_secctx(sid, &ctx, &len);
+	if (error) {
+		if (error != -EINVAL)
+			goto error_path;
+		return 0;
+	}
+
+	audit_log_format(ab, " subj=%s", ctx);
+	security_release_secctx(ctx, len);
+	return 0;
+
+error_path:
+	audit_panic("error in audit_log_task_context");
+	return error;
+}
+EXPORT_SYMBOL(audit_log_task_context);
+
+void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
+{
+	const struct cred *cred;
+	char name[sizeof(tsk->comm)];
+	struct mm_struct *mm = tsk->mm;
+	char *tty;
+
+	if (!ab)
+		return;
+
+	/* tsk == current */
+	cred = current_cred();
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
+		tty = tsk->signal->tty->name;
+	else
+		tty = "(none)";
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	audit_log_format(ab,
+			 " ppid=%ld pid=%d auid=%u uid=%u gid=%u"
+			 " euid=%u suid=%u fsuid=%u"
+			 " egid=%u sgid=%u fsgid=%u ses=%u tty=%s",
+			 sys_getppid(),
+			 tsk->pid,
+			 from_kuid(&init_user_ns, audit_get_loginuid(tsk)),
+			 from_kuid(&init_user_ns, cred->uid),
+			 from_kgid(&init_user_ns, cred->gid),
+			 from_kuid(&init_user_ns, cred->euid),
+			 from_kuid(&init_user_ns, cred->suid),
+			 from_kuid(&init_user_ns, cred->fsuid),
+			 from_kgid(&init_user_ns, cred->egid),
+			 from_kgid(&init_user_ns, cred->sgid),
+			 from_kgid(&init_user_ns, cred->fsgid),
+			 audit_get_sessionid(tsk), tty);
+
+	get_task_comm(name, tsk);
+	audit_log_format(ab, " comm=");
+	audit_log_untrustedstring(ab, name);
+
+	if (mm) {
+		down_read(&mm->mmap_sem);
+		if (mm->exe_file)
+			audit_log_d_path(ab, " exe=", &mm->exe_file->f_path);
+		up_read(&mm->mmap_sem);
+	}
+	audit_log_task_context(ab);
+}
+EXPORT_SYMBOL(audit_log_task_info);
+
 /**
  * audit_log_link_denied - report a link restriction denial
  * @operation: specific link opreation
@@ -1451,19 +1626,28 @@ void audit_log_key(struct audit_buffer *ab, char *key)
 void audit_log_link_denied(const char *operation, struct path *link)
 {
 	struct audit_buffer *ab;
+	struct audit_names *name;
+
+	name = kzalloc(sizeof(*name), GFP_NOFS);
+	if (!name)
+		return;
 
+	/* Generate AUDIT_ANOM_LINK with subject, operation, outcome. */
 	ab = audit_log_start(current->audit_context, GFP_KERNEL,
 			     AUDIT_ANOM_LINK);
 	if (!ab)
-		return;
-	audit_log_format(ab, "op=%s action=denied", operation);
-	audit_log_format(ab, " pid=%d comm=", current->pid);
-	audit_log_untrustedstring(ab, current->comm);
-	audit_log_d_path(ab, " path=", link);
-	audit_log_format(ab, " dev=");
-	audit_log_untrustedstring(ab, link->dentry->d_inode->i_sb->s_id);
-	audit_log_format(ab, " ino=%lu", link->dentry->d_inode->i_ino);
+		goto out;
+	audit_log_format(ab, "op=%s", operation);
+	audit_log_task_info(ab, current);
+	audit_log_format(ab, " res=0");
 	audit_log_end(ab);
+
+	/* Generate AUDIT_PATH record with object. */
+	name->type = AUDIT_TYPE_NORMAL;
+	audit_copy_inode(name, link->dentry, link->dentry->d_inode);
+	audit_log_name(current->audit_context, name, link, 0, NULL);
+out:
+	kfree(name);
 }
 
 /**
diff --git a/kernel/audit.h b/kernel/audit.h
index 11468d99dad0..1c95131ef760 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -22,6 +22,7 @@
 #include <linux/fs.h>
 #include <linux/audit.h>
 #include <linux/skbuff.h>
+#include <uapi/linux/mqueue.h>
 
 /* 0 = no checking
    1 = put_count checking
@@ -29,6 +30,11 @@
 */
 #define AUDIT_DEBUG 0
 
+/* AUDIT_NAMES is the number of slots we reserve in the audit_context
+ * for saving names from getname().  If we get more names we will allocate
+ * a name dynamically and also add those to the list anchored by names_list. */
+#define AUDIT_NAMES	5
+
 /* At task start time, the audit_state is set in the audit_context using
    a per-task filter.  At syscall entry, the audit_state is augmented by
    the syscall filter. */
@@ -59,8 +65,158 @@ struct audit_entry {
 	struct audit_krule	rule;
 };
 
+struct audit_cap_data {
+	kernel_cap_t		permitted;
+	kernel_cap_t		inheritable;
+	union {
+		unsigned int	fE;		/* effective bit of file cap */
+		kernel_cap_t	effective;	/* effective set of process */
+	};
+};
+
+/* When fs/namei.c:getname() is called, we store the pointer in name and
+ * we don't let putname() free it (instead we free all of the saved
+ * pointers at syscall exit time).
+ *
+ * Further, in fs/namei.c:path_lookup() we store the inode and device.
+ */
+struct audit_names {
+	struct list_head	list;		/* audit_context->names_list */
+
+	struct filename		*name;
+	int			name_len;	/* number of chars to log */
+	bool			name_put;	/* call __putname()? */
+
+	unsigned long		ino;
+	dev_t			dev;
+	umode_t			mode;
+	kuid_t			uid;
+	kgid_t			gid;
+	dev_t			rdev;
+	u32			osid;
+	struct audit_cap_data	fcap;
+	unsigned int		fcap_ver;
+	unsigned char		type;		/* record type */
+	/*
+	 * This was an allocated audit_names and not from the array of
+	 * names allocated in the task audit context.  Thus this name
+	 * should be freed on syscall exit.
+	 */
+	bool			should_free;
+};
+
+/* The per-task audit context. */
+struct audit_context {
+	int		    dummy;	/* must be the first element */
+	int		    in_syscall;	/* 1 if task is in a syscall */
+	enum audit_state    state, current_state;
+	unsigned int	    serial;     /* serial number for record */
+	int		    major;      /* syscall number */
+	struct timespec	    ctime;      /* time of syscall entry */
+	unsigned long	    argv[4];    /* syscall arguments */
+	long		    return_code;/* syscall return code */
+	u64		    prio;
+	int		    return_valid; /* return code is valid */
+	/*
+	 * The names_list is the list of all audit_names collected during this
+	 * syscall.  The first AUDIT_NAMES entries in the names_list will
+	 * actually be from the preallocated_names array for performance
+	 * reasons.  Except during allocation they should never be referenced
+	 * through the preallocated_names array and should only be found/used
+	 * by running the names_list.
+	 */
+	struct audit_names  preallocated_names[AUDIT_NAMES];
+	int		    name_count; /* total records in names_list */
+	struct list_head    names_list;	/* struct audit_names->list anchor */
+	char		    *filterkey;	/* key for rule that triggered record */
+	struct path	    pwd;
+	struct audit_aux_data *aux;
+	struct audit_aux_data *aux_pids;
+	struct sockaddr_storage *sockaddr;
+	size_t sockaddr_len;
+				/* Save things to print about task_struct */
+	pid_t		    pid, ppid;
+	kuid_t		    uid, euid, suid, fsuid;
+	kgid_t		    gid, egid, sgid, fsgid;
+	unsigned long	    personality;
+	int		    arch;
+
+	pid_t		    target_pid;
+	kuid_t		    target_auid;
+	kuid_t		    target_uid;
+	unsigned int	    target_sessionid;
+	u32		    target_sid;
+	char		    target_comm[TASK_COMM_LEN];
+
+	struct audit_tree_refs *trees, *first_trees;
+	struct list_head killed_trees;
+	int tree_count;
+
+	int type;
+	union {
+		struct {
+			int nargs;
+			long args[6];
+		} socketcall;
+		struct {
+			kuid_t			uid;
+			kgid_t			gid;
+			umode_t			mode;
+			u32			osid;
+			int			has_perm;
+			uid_t			perm_uid;
+			gid_t			perm_gid;
+			umode_t			perm_mode;
+			unsigned long		qbytes;
+		} ipc;
+		struct {
+			mqd_t			mqdes;
+			struct mq_attr		mqstat;
+		} mq_getsetattr;
+		struct {
+			mqd_t			mqdes;
+			int			sigev_signo;
+		} mq_notify;
+		struct {
+			mqd_t			mqdes;
+			size_t			msg_len;
+			unsigned int		msg_prio;
+			struct timespec		abs_timeout;
+		} mq_sendrecv;
+		struct {
+			int			oflag;
+			umode_t			mode;
+			struct mq_attr		attr;
+		} mq_open;
+		struct {
+			pid_t			pid;
+			struct audit_cap_data	cap;
+		} capset;
+		struct {
+			int			fd;
+			int			flags;
+		} mmap;
+	};
+	int fds[2];
+
+#if AUDIT_DEBUG
+	int		    put_count;
+	int		    ino_count;
+#endif
+};
+
 extern int audit_ever_enabled;
 
+extern void audit_copy_inode(struct audit_names *name,
+			     const struct dentry *dentry,
+			     const struct inode *inode);
+extern void audit_log_cap(struct audit_buffer *ab, char *prefix,
+			  kernel_cap_t *cap);
+extern void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name);
+extern void audit_log_name(struct audit_context *context,
+			   struct audit_names *n, struct path *path,
+			   int record_num, int *call_panic);
+
 extern int audit_pid;
 
 #define AUDIT_INODE_BUCKETS	32
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 267436826c3b..6bd4a90d1991 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -310,121 +310,83 @@ static u32 audit_to_op(u32 op)
 	return n;
 }
 
-
-/* Translate struct audit_rule to kernel's rule respresentation.
- * Exists for backward compatibility with userspace. */
-static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
+/* check if an audit field is valid */
+static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
 {
-	struct audit_entry *entry;
-	int err = 0;
-	int i;
-
-	entry = audit_to_entry_common(rule);
-	if (IS_ERR(entry))
-		goto exit_nofree;
-
-	for (i = 0; i < rule->field_count; i++) {
-		struct audit_field *f = &entry->rule.fields[i];
-		u32 n;
-
-		n = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS);
-
-		/* Support for legacy operators where
-		 * AUDIT_NEGATE bit signifies != and otherwise assumes == */
-		if (n & AUDIT_NEGATE)
-			f->op = Audit_not_equal;
-		else if (!n)
-			f->op = Audit_equal;
-		else
-			f->op = audit_to_op(n);
-
-		entry->rule.vers_ops = (n & AUDIT_OPERATORS) ? 2 : 1;
-
-		f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS);
-		f->val = rule->values[i];
-		f->uid = INVALID_UID;
-		f->gid = INVALID_GID;
-
-		err = -EINVAL;
-		if (f->op == Audit_bad)
-			goto exit_free;
-
-		switch(f->type) {
-		default:
-			goto exit_free;
-		case AUDIT_UID:
-		case AUDIT_EUID:
-		case AUDIT_SUID:
-		case AUDIT_FSUID:
-		case AUDIT_LOGINUID:
-			/* bit ops not implemented for uid comparisons */
-			if (f->op == Audit_bitmask || f->op == Audit_bittest)
-				goto exit_free;
-
-			f->uid = make_kuid(current_user_ns(), f->val);
-			if (!uid_valid(f->uid))
-				goto exit_free;
-			break;
-		case AUDIT_GID:
-		case AUDIT_EGID:
-		case AUDIT_SGID:
-		case AUDIT_FSGID:
-			/* bit ops not implemented for gid comparisons */
-			if (f->op == Audit_bitmask || f->op == Audit_bittest)
-				goto exit_free;
-
-			f->gid = make_kgid(current_user_ns(), f->val);
-			if (!gid_valid(f->gid))
-				goto exit_free;
-			break;
-		case AUDIT_PID:
-		case AUDIT_PERS:
-		case AUDIT_MSGTYPE:
-		case AUDIT_PPID:
-		case AUDIT_DEVMAJOR:
-		case AUDIT_DEVMINOR:
-		case AUDIT_EXIT:
-		case AUDIT_SUCCESS:
-			/* bit ops are only useful on syscall args */
-			if (f->op == Audit_bitmask || f->op == Audit_bittest)
-				goto exit_free;
-			break;
-		case AUDIT_ARG0:
-		case AUDIT_ARG1:
-		case AUDIT_ARG2:
-		case AUDIT_ARG3:
-			break;
-		/* arch is only allowed to be = or != */
-		case AUDIT_ARCH:
-			if (f->op != Audit_not_equal && f->op != Audit_equal)
-				goto exit_free;
-			entry->rule.arch_f = f;
-			break;
-		case AUDIT_PERM:
-			if (f->val & ~15)
-				goto exit_free;
-			break;
-		case AUDIT_FILETYPE:
-			if (f->val & ~S_IFMT)
-				goto exit_free;
-			break;
-		case AUDIT_INODE:
-			err = audit_to_inode(&entry->rule, f);
-			if (err)
-				goto exit_free;
-			break;
-		}
-	}
-
-	if (entry->rule.inode_f && entry->rule.inode_f->op == Audit_not_equal)
-		entry->rule.inode_f = NULL;
-
-exit_nofree:
-	return entry;
+	switch(f->type) {
+	case AUDIT_MSGTYPE:
+		if (entry->rule.listnr != AUDIT_FILTER_TYPE &&
+		    entry->rule.listnr != AUDIT_FILTER_USER)
+			return -EINVAL;
+		break;
+	};
 
-exit_free:
-	audit_free_rule(entry);
-	return ERR_PTR(err);
+	switch(f->type) {
+	default:
+		return -EINVAL;
+	case AUDIT_UID:
+	case AUDIT_EUID:
+	case AUDIT_SUID:
+	case AUDIT_FSUID:
+	case AUDIT_LOGINUID:
+	case AUDIT_OBJ_UID:
+	case AUDIT_GID:
+	case AUDIT_EGID:
+	case AUDIT_SGID:
+	case AUDIT_FSGID:
+	case AUDIT_OBJ_GID:
+	case AUDIT_PID:
+	case AUDIT_PERS:
+	case AUDIT_MSGTYPE:
+	case AUDIT_PPID:
+	case AUDIT_DEVMAJOR:
+	case AUDIT_DEVMINOR:
+	case AUDIT_EXIT:
+	case AUDIT_SUCCESS:
+		/* bit ops are only useful on syscall args */
+		if (f->op == Audit_bitmask || f->op == Audit_bittest)
+			return -EINVAL;
+		break;
+	case AUDIT_ARG0:
+	case AUDIT_ARG1:
+	case AUDIT_ARG2:
+	case AUDIT_ARG3:
+	case AUDIT_SUBJ_USER:
+	case AUDIT_SUBJ_ROLE:
+	case AUDIT_SUBJ_TYPE:
+	case AUDIT_SUBJ_SEN:
+	case AUDIT_SUBJ_CLR:
+	case AUDIT_OBJ_USER:
+	case AUDIT_OBJ_ROLE:
+	case AUDIT_OBJ_TYPE:
+	case AUDIT_OBJ_LEV_LOW:
+	case AUDIT_OBJ_LEV_HIGH:
+	case AUDIT_WATCH:
+	case AUDIT_DIR:
+	case AUDIT_FILTERKEY:
+		break;
+	case AUDIT_LOGINUID_SET:
+		if ((f->val != 0) && (f->val != 1))
+			return -EINVAL;
+	/* FALL THROUGH */
+	case AUDIT_ARCH:
+		if (f->op != Audit_not_equal && f->op != Audit_equal)
+			return -EINVAL;
+		break;
+	case AUDIT_PERM:
+		if (f->val & ~15)
+			return -EINVAL;
+		break;
+	case AUDIT_FILETYPE:
+		if (f->val & ~S_IFMT)
+			return -EINVAL;
+		break;
+	case AUDIT_FIELD_COMPARE:
+		if (f->val > AUDIT_MAX_FIELD_COMPARE)
+			return -EINVAL;
+		break;
+	};
+	return 0;
 }
 
 /* Translate struct audit_rule_data to kernel's rule respresentation. */
@@ -459,17 +421,25 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		f->gid = INVALID_GID;
 		f->lsm_str = NULL;
 		f->lsm_rule = NULL;
-		switch(f->type) {
+
+		/* Support legacy tests for a valid loginuid */
+		if ((f->type == AUDIT_LOGINUID) && (f->val == 4294967295)) {
+			f->type = AUDIT_LOGINUID_SET;
+			f->val = 0;
+		}
+
+		err = audit_field_valid(entry, f);
+		if (err)
+			goto exit_free;
+
+		err = -EINVAL;
+		switch (f->type) {
+		case AUDIT_LOGINUID:
 		case AUDIT_UID:
 		case AUDIT_EUID:
 		case AUDIT_SUID:
 		case AUDIT_FSUID:
-		case AUDIT_LOGINUID:
 		case AUDIT_OBJ_UID:
-			/* bit ops not implemented for uid comparisons */
-			if (f->op == Audit_bitmask || f->op == Audit_bittest)
-				goto exit_free;
-
 			f->uid = make_kuid(current_user_ns(), f->val);
 			if (!uid_valid(f->uid))
 				goto exit_free;
@@ -479,27 +449,10 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		case AUDIT_SGID:
 		case AUDIT_FSGID:
 		case AUDIT_OBJ_GID:
-			/* bit ops not implemented for gid comparisons */
-			if (f->op == Audit_bitmask || f->op == Audit_bittest)
-				goto exit_free;
-
 			f->gid = make_kgid(current_user_ns(), f->val);
 			if (!gid_valid(f->gid))
 				goto exit_free;
 			break;
-		case AUDIT_PID:
-		case AUDIT_PERS:
-		case AUDIT_MSGTYPE:
-		case AUDIT_PPID:
-		case AUDIT_DEVMAJOR:
-		case AUDIT_DEVMINOR:
-		case AUDIT_EXIT:
-		case AUDIT_SUCCESS:
-		case AUDIT_ARG0:
-		case AUDIT_ARG1:
-		case AUDIT_ARG2:
-		case AUDIT_ARG3:
-			break;
 		case AUDIT_ARCH:
 			entry->rule.arch_f = f;
 			break;
@@ -570,20 +523,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 			entry->rule.buflen += f->val;
 			entry->rule.filterkey = str;
 			break;
-		case AUDIT_PERM:
-			if (f->val & ~15)
-				goto exit_free;
-			break;
-		case AUDIT_FILETYPE:
-			if (f->val & ~S_IFMT)
-				goto exit_free;
-			break;
-		case AUDIT_FIELD_COMPARE:
-			if (f->val > AUDIT_MAX_FIELD_COMPARE)
-				goto exit_free;
-			break;
-		default:
-			goto exit_free;
 		}
 	}
 
@@ -613,36 +552,6 @@ static inline size_t audit_pack_string(void **bufp, const char *str)
 	return len;
 }
 
-/* Translate kernel rule respresentation to struct audit_rule.
- * Exists for backward compatibility with userspace. */
-static struct audit_rule *audit_krule_to_rule(struct audit_krule *krule)
-{
-	struct audit_rule *rule;
-	int i;
-
-	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
-	if (unlikely(!rule))
-		return NULL;
-
-	rule->flags = krule->flags | krule->listnr;
-	rule->action = krule->action;
-	rule->field_count = krule->field_count;
-	for (i = 0; i < rule->field_count; i++) {
-		rule->values[i] = krule->fields[i].val;
-		rule->fields[i] = krule->fields[i].type;
-
-		if (krule->vers_ops == 1) {
-			if (krule->fields[i].op == Audit_not_equal)
-				rule->fields[i] |= AUDIT_NEGATE;
-		} else {
-			rule->fields[i] |= audit_ops[krule->fields[i].op];
-		}
-	}
-	for (i = 0; i < AUDIT_BITMASK_SIZE; i++) rule->mask[i] = krule->mask[i];
-
-	return rule;
-}
-
 /* Translate kernel rule respresentation to struct audit_rule_data. */
 static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
 {
@@ -1055,35 +964,6 @@ out:
 	return ret;
 }
 
-/* List rules using struct audit_rule.  Exists for backward
- * compatibility with userspace. */
-static void audit_list(int pid, int seq, struct sk_buff_head *q)
-{
-	struct sk_buff *skb;
-	struct audit_krule *r;
-	int i;
-
-	/* This is a blocking read, so use audit_filter_mutex instead of rcu
-	 * iterator to sync with list writers. */
-	for (i=0; i<AUDIT_NR_FILTERS; i++) {
-		list_for_each_entry(r, &audit_rules_list[i], list) {
-			struct audit_rule *rule;
-
-			rule = audit_krule_to_rule(r);
-			if (unlikely(!rule))
-				break;
-			skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1,
-					 rule, sizeof(*rule));
-			if (skb)
-				skb_queue_tail(q, skb);
-			kfree(rule);
-		}
-	}
-	skb = audit_make_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0);
-	if (skb)
-		skb_queue_tail(q, skb);
-}
-
 /* List rules using struct audit_rule_data. */
 static void audit_list_rules(int pid, int seq, struct sk_buff_head *q)
 {
@@ -1113,11 +993,11 @@ static void audit_list_rules(int pid, int seq, struct sk_buff_head *q)
 }
 
 /* Log rule additions and removals */
-static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid,
-				  char *action, struct audit_krule *rule,
-				  int res)
+static void audit_log_rule_change(char *action, struct audit_krule *rule, int res)
 {
 	struct audit_buffer *ab;
+	uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(current));
+	u32 sessionid = audit_get_sessionid(current);
 
 	if (!audit_enabled)
 		return;
@@ -1125,18 +1005,8 @@ static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid,
 	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
 	if (!ab)
 		return;
-	audit_log_format(ab, "auid=%u ses=%u",
-			 from_kuid(&init_user_ns, loginuid), sessionid);
-	if (sid) {
-		char *ctx = NULL;
-		u32 len;
-		if (security_secid_to_secctx(sid, &ctx, &len))
-			audit_log_format(ab, " ssid=%u", sid);
-		else {
-			audit_log_format(ab, " subj=%s", ctx);
-			security_release_secctx(ctx, len);
-		}
-	}
+	audit_log_format(ab, "auid=%u ses=%u" ,loginuid, sessionid);
+	audit_log_task_context(ab);
 	audit_log_format(ab, " op=");
 	audit_log_string(ab, action);
 	audit_log_key(ab, rule->filterkey);
@@ -1151,12 +1021,8 @@ static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid,
  * @seq: netlink audit message sequence (serial) number
  * @data: payload data
  * @datasz: size of payload data
- * @loginuid: loginuid of sender
- * @sessionid: sessionid for netlink audit message
- * @sid: SE Linux Security ID of sender
  */
-int audit_receive_filter(int type, int pid, int seq, void *data,
-			 size_t datasz, kuid_t loginuid, u32 sessionid, u32 sid)
+int audit_receive_filter(int type, int pid, int seq, void *data, size_t datasz)
 {
 	struct task_struct *tsk;
 	struct audit_netlink_list *dest;
@@ -1164,7 +1030,6 @@ int audit_receive_filter(int type, int pid, int seq, void *data,
 	struct audit_entry *entry;
 
 	switch (type) {
-	case AUDIT_LIST:
 	case AUDIT_LIST_RULES:
 		/* We can't just spew out the rules here because we might fill
 		 * the available socket buffer space and deadlock waiting for
@@ -1179,10 +1044,7 @@ int audit_receive_filter(int type, int pid, int seq, void *data,
 		skb_queue_head_init(&dest->q);
 
 		mutex_lock(&audit_filter_mutex);
-		if (type == AUDIT_LIST)
-			audit_list(pid, seq, &dest->q);
-		else
-			audit_list_rules(pid, seq, &dest->q);
+		audit_list_rules(pid, seq, &dest->q);
 		mutex_unlock(&audit_filter_mutex);
 
 		tsk = kthread_run(audit_send_list, dest, "audit_send_list");
@@ -1192,35 +1054,23 @@ int audit_receive_filter(int type, int pid, int seq, void *data,
 			err = PTR_ERR(tsk);
 		}
 		break;
-	case AUDIT_ADD:
 	case AUDIT_ADD_RULE:
-		if (type == AUDIT_ADD)
-			entry = audit_rule_to_entry(data);
-		else
-			entry = audit_data_to_entry(data, datasz);
+		entry = audit_data_to_entry(data, datasz);
 		if (IS_ERR(entry))
 			return PTR_ERR(entry);
 
 		err = audit_add_rule(entry);
-		audit_log_rule_change(loginuid, sessionid, sid, "add rule",
-				      &entry->rule, !err);
-
+		audit_log_rule_change("add rule", &entry->rule, !err);
 		if (err)
 			audit_free_rule(entry);
 		break;
-	case AUDIT_DEL:
 	case AUDIT_DEL_RULE:
-		if (type == AUDIT_DEL)
-			entry = audit_rule_to_entry(data);
-		else
-			entry = audit_data_to_entry(data, datasz);
+		entry = audit_data_to_entry(data, datasz);
 		if (IS_ERR(entry))
 			return PTR_ERR(entry);
 
 		err = audit_del_rule(entry);
-		audit_log_rule_change(loginuid, sessionid, sid, "remove rule",
-				      &entry->rule, !err);
-
+		audit_log_rule_change("remove rule", &entry->rule, !err);
 		audit_free_rule(entry);
 		break;
 	default:
@@ -1358,7 +1208,7 @@ int audit_compare_dname_path(const char *dname, const char *path, int parentlen)
 	return strncmp(p, dname, dlen);
 }
 
-static int audit_filter_user_rules(struct audit_krule *rule,
+static int audit_filter_user_rules(struct audit_krule *rule, int type,
 				   enum audit_state *state)
 {
 	int i;
@@ -1382,6 +1232,13 @@ static int audit_filter_user_rules(struct audit_krule *rule,
 			result = audit_uid_comparator(audit_get_loginuid(current),
 						  f->op, f->uid);
 			break;
+		case AUDIT_LOGINUID_SET:
+			result = audit_comparator(audit_loginuid_set(current),
+						  f->op, f->val);
+			break;
+		case AUDIT_MSGTYPE:
+			result = audit_comparator(type, f->op, f->val);
+			break;
 		case AUDIT_SUBJ_USER:
 		case AUDIT_SUBJ_ROLE:
 		case AUDIT_SUBJ_TYPE:
@@ -1408,7 +1265,7 @@ static int audit_filter_user_rules(struct audit_krule *rule,
 	return 1;
 }
 
-int audit_filter_user(void)
+int audit_filter_user(int type)
 {
 	enum audit_state state = AUDIT_DISABLED;
 	struct audit_entry *e;
@@ -1416,7 +1273,7 @@ int audit_filter_user(void)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_USER], list) {
-		if (audit_filter_user_rules(&e->rule, &state)) {
+		if (audit_filter_user_rules(&e->rule, type, &state)) {
 			if (state == AUDIT_DISABLED)
 				ret = 0;
 			break;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c68229411a7c..3c8a601324a2 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -76,11 +76,6 @@
 #define AUDITSC_SUCCESS 1
 #define AUDITSC_FAILURE 2
 
-/* AUDIT_NAMES is the number of slots we reserve in the audit_context
- * for saving names from getname().  If we get more names we will allocate
- * a name dynamically and also add those to the list anchored by names_list. */
-#define AUDIT_NAMES	5
-
 /* no execve audit message should be longer than this (userspace limits) */
 #define MAX_EXECVE_AUDIT_LEN 7500
 
@@ -90,44 +85,6 @@ int audit_n_rules;
 /* determines whether we collect data for signals sent */
 int audit_signals;
 
-struct audit_cap_data {
-	kernel_cap_t		permitted;
-	kernel_cap_t		inheritable;
-	union {
-		unsigned int	fE;		/* effective bit of a file capability */
-		kernel_cap_t	effective;	/* effective set of a process */
-	};
-};
-
-/* When fs/namei.c:getname() is called, we store the pointer in name and
- * we don't let putname() free it (instead we free all of the saved
- * pointers at syscall exit time).
- *
- * Further, in fs/namei.c:path_lookup() we store the inode and device.
- */
-struct audit_names {
-	struct list_head	list;		/* audit_context->names_list */
-	struct filename	*name;
-	unsigned long		ino;
-	dev_t			dev;
-	umode_t			mode;
-	kuid_t			uid;
-	kgid_t			gid;
-	dev_t			rdev;
-	u32			osid;
-	struct audit_cap_data	 fcap;
-	unsigned int		fcap_ver;
-	int			name_len;	/* number of name's characters to log */
-	unsigned char		type;		/* record type */
-	bool			name_put;	/* call __putname() for this name */
-	/*
-	 * This was an allocated audit_names and not from the array of
-	 * names allocated in the task audit context.  Thus this name
-	 * should be freed on syscall exit
-	 */
-	bool			should_free;
-};
-
 struct audit_aux_data {
 	struct audit_aux_data	*next;
 	int			type;
@@ -175,106 +132,6 @@ struct audit_tree_refs {
 	struct audit_chunk *c[31];
 };
 
-/* The per-task audit context. */
-struct audit_context {
-	int		    dummy;	/* must be the first element */
-	int		    in_syscall;	/* 1 if task is in a syscall */
-	enum audit_state    state, current_state;
-	unsigned int	    serial;     /* serial number for record */
-	int		    major;      /* syscall number */
-	struct timespec	    ctime;      /* time of syscall entry */
-	unsigned long	    argv[4];    /* syscall arguments */
-	long		    return_code;/* syscall return code */
-	u64		    prio;
-	int		    return_valid; /* return code is valid */
-	/*
-	 * The names_list is the list of all audit_names collected during this
-	 * syscall.  The first AUDIT_NAMES entries in the names_list will
-	 * actually be from the preallocated_names array for performance
-	 * reasons.  Except during allocation they should never be referenced
-	 * through the preallocated_names array and should only be found/used
-	 * by running the names_list.
-	 */
-	struct audit_names  preallocated_names[AUDIT_NAMES];
-	int		    name_count; /* total records in names_list */
-	struct list_head    names_list;	/* anchor for struct audit_names->list */
-	char *		    filterkey;	/* key for rule that triggered record */
-	struct path	    pwd;
-	struct audit_aux_data *aux;
-	struct audit_aux_data *aux_pids;
-	struct sockaddr_storage *sockaddr;
-	size_t sockaddr_len;
-				/* Save things to print about task_struct */
-	pid_t		    pid, ppid;
-	kuid_t		    uid, euid, suid, fsuid;
-	kgid_t		    gid, egid, sgid, fsgid;
-	unsigned long	    personality;
-	int		    arch;
-
-	pid_t		    target_pid;
-	kuid_t		    target_auid;
-	kuid_t		    target_uid;
-	unsigned int	    target_sessionid;
-	u32		    target_sid;
-	char		    target_comm[TASK_COMM_LEN];
-
-	struct audit_tree_refs *trees, *first_trees;
-	struct list_head killed_trees;
-	int tree_count;
-
-	int type;
-	union {
-		struct {
-			int nargs;
-			long args[6];
-		} socketcall;
-		struct {
-			kuid_t			uid;
-			kgid_t			gid;
-			umode_t			mode;
-			u32			osid;
-			int			has_perm;
-			uid_t			perm_uid;
-			gid_t			perm_gid;
-			umode_t			perm_mode;
-			unsigned long		qbytes;
-		} ipc;
-		struct {
-			mqd_t			mqdes;
-			struct mq_attr 		mqstat;
-		} mq_getsetattr;
-		struct {
-			mqd_t			mqdes;
-			int			sigev_signo;
-		} mq_notify;
-		struct {
-			mqd_t			mqdes;
-			size_t			msg_len;
-			unsigned int		msg_prio;
-			struct timespec		abs_timeout;
-		} mq_sendrecv;
-		struct {
-			int			oflag;
-			umode_t			mode;
-			struct mq_attr		attr;
-		} mq_open;
-		struct {
-			pid_t			pid;
-			struct audit_cap_data	cap;
-		} capset;
-		struct {
-			int			fd;
-			int			flags;
-		} mmap;
-	};
-	int fds[2];
-
-#if AUDIT_DEBUG
-	int		    put_count;
-	int		    ino_count;
-#endif
-};
-
 static inline int open_arg(int flags, int mask)
 {
 	int n = ACC_MODE(flags);
@@ -633,9 +490,23 @@ static int audit_filter_rules(struct task_struct *tsk,
 			break;
 		case AUDIT_GID:
 			result = audit_gid_comparator(cred->gid, f->op, f->gid);
+			if (f->op == Audit_equal) {
+				if (!result)
+					result = in_group_p(f->gid);
+			} else if (f->op == Audit_not_equal) {
+				if (result)
+					result = !in_group_p(f->gid);
+			}
 			break;
 		case AUDIT_EGID:
 			result = audit_gid_comparator(cred->egid, f->op, f->gid);
+			if (f->op == Audit_equal) {
+				if (!result)
+					result = in_egroup_p(f->gid);
+			} else if (f->op == Audit_not_equal) {
+				if (result)
+					result = !in_egroup_p(f->gid);
+			}
 			break;
 		case AUDIT_SGID:
 			result = audit_gid_comparator(cred->sgid, f->op, f->gid);
@@ -742,6 +613,9 @@ static int audit_filter_rules(struct task_struct *tsk,
 			if (ctx)
 				result = audit_uid_comparator(tsk->loginuid, f->op, f->uid);
 			break;
+		case AUDIT_LOGINUID_SET:
+			result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val);
+			break;
 		case AUDIT_SUBJ_USER:
 		case AUDIT_SUBJ_ROLE:
 		case AUDIT_SUBJ_TYPE:
@@ -987,6 +861,8 @@ static inline void audit_free_names(struct audit_context *context)
 
 #if AUDIT_DEBUG == 2
 	if (context->put_count + context->ino_count != context->name_count) {
+		int i = 0;
+
 		printk(KERN_ERR "%s:%d(:%d): major=%d in_syscall=%d"
 		       " name_count=%d put_count=%d"
 		       " ino_count=%d [NOT freeing]\n",
@@ -995,7 +871,7 @@ static inline void audit_free_names(struct audit_context *context)
 		       context->name_count, context->put_count,
 		       context->ino_count);
 		list_for_each_entry(n, &context->names_list, list) {
-			printk(KERN_ERR "names[%d] = %p = %s\n", i,
+			printk(KERN_ERR "names[%d] = %p = %s\n", i++,
 			       n->name, n->name->name ?: "(null)");
 		}
 		dump_stack();
@@ -1010,7 +886,7 @@ static inline void audit_free_names(struct audit_context *context)
 	list_for_each_entry_safe(n, next, &context->names_list, list) {
 		list_del(&n->list);
 		if (n->name && n->name_put)
-			__putname(n->name);
+			final_putname(n->name);
 		if (n->should_free)
 			kfree(n);
 	}
@@ -1093,88 +969,6 @@ static inline void audit_free_context(struct audit_context *context)
 	kfree(context);
 }
 
-void audit_log_task_context(struct audit_buffer *ab)
-{
-	char *ctx = NULL;
-	unsigned len;
-	int error;
-	u32 sid;
-
-	security_task_getsecid(current, &sid);
-	if (!sid)
-		return;
-
-	error = security_secid_to_secctx(sid, &ctx, &len);
-	if (error) {
-		if (error != -EINVAL)
-			goto error_path;
-		return;
-	}
-
-	audit_log_format(ab, " subj=%s", ctx);
-	security_release_secctx(ctx, len);
-	return;
-
-error_path:
-	audit_panic("error in audit_log_task_context");
-	return;
-}
-
-EXPORT_SYMBOL(audit_log_task_context);
-
-void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
-{
-	const struct cred *cred;
-	char name[sizeof(tsk->comm)];
-	struct mm_struct *mm = tsk->mm;
-	char *tty;
-
-	if (!ab)
-		return;
-
-	/* tsk == current */
-	cred = current_cred();
-
-	spin_lock_irq(&tsk->sighand->siglock);
-	if (tsk->signal && tsk->signal->tty)
-		tty = tsk->signal->tty->name;
-	else
-		tty = "(none)";
-	spin_unlock_irq(&tsk->sighand->siglock);
-
-
-	audit_log_format(ab,
-			 " ppid=%ld pid=%d auid=%u uid=%u gid=%u"
-			 " euid=%u suid=%u fsuid=%u"
-			 " egid=%u sgid=%u fsgid=%u ses=%u tty=%s",
-			 sys_getppid(),
-			 tsk->pid,
-			 from_kuid(&init_user_ns, tsk->loginuid),
-			 from_kuid(&init_user_ns, cred->uid),
-			 from_kgid(&init_user_ns, cred->gid),
-			 from_kuid(&init_user_ns, cred->euid),
-			 from_kuid(&init_user_ns, cred->suid),
-			 from_kuid(&init_user_ns, cred->fsuid),
-			 from_kgid(&init_user_ns, cred->egid),
-			 from_kgid(&init_user_ns, cred->sgid),
-			 from_kgid(&init_user_ns, cred->fsgid),
-			 tsk->sessionid, tty);
-
-	get_task_comm(name, tsk);
-	audit_log_format(ab, " comm=");
-	audit_log_untrustedstring(ab, name);
-
-	if (mm) {
-		down_read(&mm->mmap_sem);
-		if (mm->exe_file)
-			audit_log_d_path(ab, " exe=", &mm->exe_file->f_path);
-		up_read(&mm->mmap_sem);
-	}
-	audit_log_task_context(ab);
-}
-
-EXPORT_SYMBOL(audit_log_task_info);
-
 static int audit_log_pid_context(struct audit_context *context, pid_t pid,
 				 kuid_t auid, kuid_t uid, unsigned int sessionid,
 				 u32 sid, char *comm)
@@ -1191,12 +985,14 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid,
 	audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid,
 			 from_kuid(&init_user_ns, auid),
 			 from_kuid(&init_user_ns, uid), sessionid);
-	if (security_secid_to_secctx(sid, &ctx, &len)) {
-		audit_log_format(ab, " obj=(none)");
-		rc = 1;
-	} else {
-		audit_log_format(ab, " obj=%s", ctx);
-		security_release_secctx(ctx, len);
+	if (sid) {
+		if (security_secid_to_secctx(sid, &ctx, &len)) {
+			audit_log_format(ab, " obj=(none)");
+			rc = 1;
+		} else {
+			audit_log_format(ab, " obj=%s", ctx);
+			security_release_secctx(ctx, len);
+		}
 	}
 	audit_log_format(ab, " ocomm=");
 	audit_log_untrustedstring(ab, comm);
@@ -1390,35 +1186,6 @@ static void audit_log_execve_info(struct audit_context *context,
 	kfree(buf);
 }
 
-static void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap)
-{
-	int i;
-
-	audit_log_format(ab, " %s=", prefix);
-	CAP_FOR_EACH_U32(i) {
-		audit_log_format(ab, "%08x", cap->cap[(_KERNEL_CAPABILITY_U32S-1) - i]);
-	}
-}
-
-static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
-{
-	kernel_cap_t *perm = &name->fcap.permitted;
-	kernel_cap_t *inh = &name->fcap.inheritable;
-	int log = 0;
-
-	if (!cap_isclear(*perm)) {
-		audit_log_cap(ab, "cap_fp", perm);
-		log = 1;
-	}
-	if (!cap_isclear(*inh)) {
-		audit_log_cap(ab, "cap_fi", inh);
-		log = 1;
-	}
-
-	if (log)
-		audit_log_format(ab, " cap_fe=%d cap_fver=%x", name->fcap.fE, name->fcap_ver);
-}
-
 static void show_special(struct audit_context *context, int *call_panic)
 {
 	struct audit_buffer *ab;
@@ -1516,68 +1283,6 @@ static void show_special(struct audit_context *context, int *call_panic)
 	audit_log_end(ab);
 }
 
-static void audit_log_name(struct audit_context *context, struct audit_names *n,
-			   int record_num, int *call_panic)
-{
-	struct audit_buffer *ab;
-	ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
-	if (!ab)
-		return; /* audit_panic has been called */
-
-	audit_log_format(ab, "item=%d", record_num);
-
-	if (n->name) {
-		switch (n->name_len) {
-		case AUDIT_NAME_FULL:
-			/* log the full path */
-			audit_log_format(ab, " name=");
-			audit_log_untrustedstring(ab, n->name->name);
-			break;
-		case 0:
-			/* name was specified as a relative path and the
-			 * directory component is the cwd */
-			audit_log_d_path(ab, " name=", &context->pwd);
-			break;
-		default:
-			/* log the name's directory component */
-			audit_log_format(ab, " name=");
-			audit_log_n_untrustedstring(ab, n->name->name,
-						    n->name_len);
-		}
-	} else
-		audit_log_format(ab, " name=(null)");
-
-	if (n->ino != (unsigned long)-1) {
-		audit_log_format(ab, " inode=%lu"
-				 " dev=%02x:%02x mode=%#ho"
-				 " ouid=%u ogid=%u rdev=%02x:%02x",
-				 n->ino,
-				 MAJOR(n->dev),
-				 MINOR(n->dev),
-				 n->mode,
-				 from_kuid(&init_user_ns, n->uid),
-				 from_kgid(&init_user_ns, n->gid),
-				 MAJOR(n->rdev),
-				 MINOR(n->rdev));
-	}
-	if (n->osid != 0) {
-		char *ctx = NULL;
-		u32 len;
-		if (security_secid_to_secctx(
-			n->osid, &ctx, &len)) {
-			audit_log_format(ab, " osid=%u", n->osid);
-			*call_panic = 2;
-		} else {
-			audit_log_format(ab, " obj=%s", ctx);
-			security_release_secctx(ctx, len);
-		}
-	}
-
-	audit_log_fcaps(ab, n);
-
-	audit_log_end(ab);
-}
-
 static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
 {
 	int i, call_panic = 0;
@@ -1695,7 +1400,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 
 	i = 0;
 	list_for_each_entry(n, &context->names_list, list)
-		audit_log_name(context, n, i++, &call_panic);
+		audit_log_name(context, n, NULL, i++, &call_panic);
 
 	/* Send end of event record to help user space know we are finished */
 	ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
@@ -2030,18 +1735,18 @@ void audit_putname(struct filename *name)
 	BUG_ON(!context);
 	if (!context->in_syscall) {
 #if AUDIT_DEBUG == 2
-		printk(KERN_ERR "%s:%d(:%d): __putname(%p)\n",
+		printk(KERN_ERR "%s:%d(:%d): final_putname(%p)\n",
 		       __FILE__, __LINE__, context->serial, name);
 		if (context->name_count) {
 			struct audit_names *n;
-			int i;
+			int i = 0;
 
 			list_for_each_entry(n, &context->names_list, list)
-				printk(KERN_ERR "name[%d] = %p = %s\n", i,
+				printk(KERN_ERR "name[%d] = %p = %s\n", i++,
 				       n->name, n->name->name ?: "(null)");
 			}
 #endif
-		__putname(name);
+		final_putname(name);
 	}
 #if AUDIT_DEBUG
 	else {
@@ -2060,41 +1765,6 @@ void audit_putname(struct filename *name)
 #endif
 }
 
-static inline int audit_copy_fcaps(struct audit_names *name, const struct dentry *dentry)
-{
-	struct cpu_vfs_cap_data caps;
-	int rc;
-
-	if (!dentry)
-		return 0;
-
-	rc = get_vfs_caps_from_disk(dentry, &caps);
-	if (rc)
-		return rc;
-
-	name->fcap.permitted = caps.permitted;
-	name->fcap.inheritable = caps.inheritable;
-	name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
-	name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT;
-
-	return 0;
-}
-
-
-/* Copy inode data into an audit_names. */
-static void audit_copy_inode(struct audit_names *name, const struct dentry *dentry,
-			     const struct inode *inode)
-{
-	name->ino   = inode->i_ino;
-	name->dev   = inode->i_sb->s_dev;
-	name->mode  = inode->i_mode;
-	name->uid   = inode->i_uid;
-	name->gid   = inode->i_gid;
-	name->rdev  = inode->i_rdev;
-	security_inode_getsecid(inode, &name->osid);
-	audit_copy_fcaps(name, dentry);
-}
-
 /**
  * __audit_inode - store the inode and device from a lookup
  * @name: name being audited
@@ -2303,7 +1973,7 @@ int audit_set_loginuid(kuid_t loginuid)
 	unsigned int sessionid;
 
 #ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE
-	if (uid_valid(task->loginuid))
+	if (audit_loginuid_set(task))
 		return -EPERM;
 #else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
 	if (!capable(CAP_AUDIT_CONTROL))
@@ -2471,17 +2141,20 @@ int __audit_bprm(struct linux_binprm *bprm)
 
 /**
  * audit_socketcall - record audit data for sys_socketcall
- * @nargs: number of args
+ * @nargs: number of args, which should not be more than AUDITSC_ARGS.
  * @args: args array
  *
  */
-void __audit_socketcall(int nargs, unsigned long *args)
+int __audit_socketcall(int nargs, unsigned long *args)
 {
 	struct audit_context *context = current->audit_context;
 
+	if (nargs <= 0 || nargs > AUDITSC_ARGS || !args)
+		return -EINVAL;
 	context->type = AUDIT_SOCKETCALL;
 	context->socketcall.nargs = nargs;
 	memcpy(context->socketcall.args, args, nargs * sizeof(unsigned long));
+	return 0;
 }
 
 /**
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 65349f07b878..66677003e223 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -71,6 +71,46 @@ void user_enter(void)
 	local_irq_restore(flags);
 }
 
+#ifdef CONFIG_PREEMPT
+/**
+ * preempt_schedule_context - preempt_schedule called by tracing
+ *
+ * The tracing infrastructure uses preempt_enable_notrace to prevent
+ * recursion and tracing preempt enabling caused by the tracing
+ * infrastructure itself. But as tracing can happen in areas coming
+ * from userspace or just about to enter userspace, a preempt enable
+ * can occur before user_exit() is called. This will cause the scheduler
+ * to be called when the system is still in usermode.
+ *
+ * To prevent this, the preempt_enable_notrace will use this function
+ * instead of preempt_schedule() to exit user context if needed before
+ * calling the scheduler.
+ */
+void __sched notrace preempt_schedule_context(void)
+{
+	struct thread_info *ti = current_thread_info();
+	enum ctx_state prev_ctx;
+
+	if (likely(ti->preempt_count || irqs_disabled()))
+		return;
+
+	/*
+	 * Need to disable preemption in case user_exit() is traced
+	 * and the tracer calls preempt_enable_notrace() causing
+	 * an infinite recursion.
+	 */
+	preempt_disable_notrace();
+	prev_ctx = exception_enter();
+	preempt_enable_no_resched_notrace();
+
+	preempt_schedule();
+
+	preempt_disable_notrace();
+	exception_exit(prev_ctx);
+	preempt_enable_notrace();
+}
+EXPORT_SYMBOL_GPL(preempt_schedule_context);
+#endif /* CONFIG_PREEMPT */
 
 /**
  * user_exit - Inform the context tracking that the CPU is
diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c
index 8b86c0c68edf..d5585f5e038e 100644
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -40,11 +40,13 @@ __setup("hlt", cpu_idle_nopoll_setup);
 
 static inline int cpu_idle_poll(void)
 {
+	rcu_idle_enter();
 	trace_cpu_idle_rcuidle(0, smp_processor_id());
 	local_irq_enable();
 	while (!need_resched())
 		cpu_relax();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
+	rcu_idle_exit();
 	return 1;
 }
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6b41c1899a8b..9dc297faf7c0 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4394,6 +4394,64 @@ perf_event_read_event(struct perf_event *event,
 	perf_output_end(&handle);
 }
 
+typedef int  (perf_event_aux_match_cb)(struct perf_event *event, void *data);
+typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data);
+
+static void
+perf_event_aux_ctx(struct perf_event_context *ctx,
+		   perf_event_aux_match_cb match,
+		   perf_event_aux_output_cb output,
+		   void *data)
+{
+	struct perf_event *event;
+
+	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+		if (event->state < PERF_EVENT_STATE_INACTIVE)
+			continue;
+		if (!event_filter_match(event))
+			continue;
+		if (match(event, data))
+			output(event, data);
+	}
+}
+
+static void
+perf_event_aux(perf_event_aux_match_cb match,
+	       perf_event_aux_output_cb output,
+	       void *data,
+	       struct perf_event_context *task_ctx)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx;
+	struct pmu *pmu;
+	int ctxn;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(pmu, &pmus, entry) {
+		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+		if (cpuctx->unique_pmu != pmu)
+			goto next;
+		perf_event_aux_ctx(&cpuctx->ctx, match, output, data);
+		if (task_ctx)
+			goto next;
+		ctxn = pmu->task_ctx_nr;
+		if (ctxn < 0)
+			goto next;
+		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
+		if (ctx)
+			perf_event_aux_ctx(ctx, match, output, data);
+next:
+		put_cpu_ptr(pmu->pmu_cpu_context);
+	}
+
+	if (task_ctx) {
+		preempt_disable();
+		perf_event_aux_ctx(task_ctx, match, output, data);
+		preempt_enable();
+	}
+	rcu_read_unlock();
+}
+
 /*
  * task tracking -- fork/exit
  *
@@ -4416,8 +4474,9 @@ struct perf_task_event {
 };
 
 static void perf_event_task_output(struct perf_event *event,
-				     struct perf_task_event *task_event)
+				   void *data)
 {
+	struct perf_task_event *task_event = data;
 	struct perf_output_handle handle;
 	struct perf_sample_data	sample;
 	struct task_struct *task = task_event->task;
@@ -4445,62 +4504,11 @@ out:
 	task_event->event_id.header.size = size;
 }
 
-static int perf_event_task_match(struct perf_event *event)
-{
-	if (event->state < PERF_EVENT_STATE_INACTIVE)
-		return 0;
-
-	if (!event_filter_match(event))
-		return 0;
-
-	if (event->attr.comm || event->attr.mmap ||
-	    event->attr.mmap_data || event->attr.task)
-		return 1;
-
-	return 0;
-}
-
-static void perf_event_task_ctx(struct perf_event_context *ctx,
-				  struct perf_task_event *task_event)
+static int perf_event_task_match(struct perf_event *event,
+				 void *data __maybe_unused)
 {
-	struct perf_event *event;
-
-	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
-		if (perf_event_task_match(event))
-			perf_event_task_output(event, task_event);
-	}
-}
-
-static void perf_event_task_event(struct perf_task_event *task_event)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_event_context *ctx;
-	struct pmu *pmu;
-	int ctxn;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(pmu, &pmus, entry) {
-		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
-		if (cpuctx->unique_pmu != pmu)
-			goto next;
-		perf_event_task_ctx(&cpuctx->ctx, task_event);
-
-		ctx = task_event->task_ctx;
-		if (!ctx) {
-			ctxn = pmu->task_ctx_nr;
-			if (ctxn < 0)
-				goto next;
-			ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
-			if (ctx)
-				perf_event_task_ctx(ctx, task_event);
-		}
-next:
-		put_cpu_ptr(pmu->pmu_cpu_context);
-	}
-	if (task_event->task_ctx)
-		perf_event_task_ctx(task_event->task_ctx, task_event);
-
-	rcu_read_unlock();
+	return event->attr.comm || event->attr.mmap ||
+	       event->attr.mmap_data || event->attr.task;
 }
 
 static void perf_event_task(struct task_struct *task,
@@ -4531,7 +4539,10 @@ static void perf_event_task(struct task_struct *task,
 		},
 	};
 
-	perf_event_task_event(&task_event);
+	perf_event_aux(perf_event_task_match,
+		       perf_event_task_output,
+		       &task_event,
+		       task_ctx);
 }
 
 void perf_event_fork(struct task_struct *task)
@@ -4557,8 +4568,9 @@ struct perf_comm_event {
 };
 
 static void perf_event_comm_output(struct perf_event *event,
-				     struct perf_comm_event *comm_event)
+				   void *data)
 {
+	struct perf_comm_event *comm_event = data;
 	struct perf_output_handle handle;
 	struct perf_sample_data sample;
 	int size = comm_event->event_id.header.size;
@@ -4585,39 +4597,16 @@ out:
 	comm_event->event_id.header.size = size;
 }
 
-static int perf_event_comm_match(struct perf_event *event)
-{
-	if (event->state < PERF_EVENT_STATE_INACTIVE)
-		return 0;
-
-	if (!event_filter_match(event))
-		return 0;
-
-	if (event->attr.comm)
-		return 1;
-
-	return 0;
-}
-
-static void perf_event_comm_ctx(struct perf_event_context *ctx,
-				  struct perf_comm_event *comm_event)
+static int perf_event_comm_match(struct perf_event *event,
+				 void *data __maybe_unused)
 {
-	struct perf_event *event;
-
-	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
-		if (perf_event_comm_match(event))
-			perf_event_comm_output(event, comm_event);
-	}
+	return event->attr.comm;
 }
 
 static void perf_event_comm_event(struct perf_comm_event *comm_event)
 {
-	struct perf_cpu_context *cpuctx;
-	struct perf_event_context *ctx;
 	char comm[TASK_COMM_LEN];
 	unsigned int size;
-	struct pmu *pmu;
-	int ctxn;
 
 	memset(comm, 0, sizeof(comm));
 	strlcpy(comm, comm_event->task->comm, sizeof(comm));
@@ -4627,24 +4616,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 	comm_event->comm_size = size;
 
 	comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
-	rcu_read_lock();
-	list_for_each_entry_rcu(pmu, &pmus, entry) {
-		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
-		if (cpuctx->unique_pmu != pmu)
-			goto next;
-		perf_event_comm_ctx(&cpuctx->ctx, comm_event);
 
-		ctxn = pmu->task_ctx_nr;
-		if (ctxn < 0)
-			goto next;
-
-		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
-		if (ctx)
-			perf_event_comm_ctx(ctx, comm_event);
-next:
-		put_cpu_ptr(pmu->pmu_cpu_context);
-	}
-	rcu_read_unlock();
+	perf_event_aux(perf_event_comm_match,
+		       perf_event_comm_output,
+		       comm_event,
+		       NULL);
 }
 
 void perf_event_comm(struct task_struct *task)
@@ -4706,8 +4682,9 @@ struct perf_mmap_event {
 };
 
 static void perf_event_mmap_output(struct perf_event *event,
-				     struct perf_mmap_event *mmap_event)
+				   void *data)
 {
+	struct perf_mmap_event *mmap_event = data;
 	struct perf_output_handle handle;
 	struct perf_sample_data sample;
 	int size = mmap_event->event_id.header.size;
@@ -4734,46 +4711,24 @@ out:
 }
 
 static int perf_event_mmap_match(struct perf_event *event,
-				   struct perf_mmap_event *mmap_event,
-				   int executable)
-{
-	if (event->state < PERF_EVENT_STATE_INACTIVE)
-		return 0;
-
-	if (!event_filter_match(event))
-		return 0;
-
-	if ((!executable && event->attr.mmap_data) ||
-	    (executable && event->attr.mmap))
-		return 1;
-
-	return 0;
-}
-
-static void perf_event_mmap_ctx(struct perf_event_context *ctx,
-				  struct perf_mmap_event *mmap_event,
-				  int executable)
+				 void *data)
 {
-	struct perf_event *event;
+	struct perf_mmap_event *mmap_event = data;
+	struct vm_area_struct *vma = mmap_event->vma;
+	int executable = vma->vm_flags & VM_EXEC;
 
-	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
-		if (perf_event_mmap_match(event, mmap_event, executable))
-			perf_event_mmap_output(event, mmap_event);
-	}
+	return (!executable && event->attr.mmap_data) ||
+	       (executable && event->attr.mmap);
 }
 
 static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 {
-	struct perf_cpu_context *cpuctx;
-	struct perf_event_context *ctx;
 	struct vm_area_struct *vma = mmap_event->vma;
 	struct file *file = vma->vm_file;
 	unsigned int size;
 	char tmp[16];
 	char *buf = NULL;
 	const char *name;
-	struct pmu *pmu;
-	int ctxn;
 
 	memset(tmp, 0, sizeof(tmp));
 
@@ -4829,27 +4784,10 @@ got_name:
 
 	mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(pmu, &pmus, entry) {
-		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
-		if (cpuctx->unique_pmu != pmu)
-			goto next;
-		perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
-					vma->vm_flags & VM_EXEC);
-
-		ctxn = pmu->task_ctx_nr;
-		if (ctxn < 0)
-			goto next;
-
-		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
-		if (ctx) {
-			perf_event_mmap_ctx(ctx, mmap_event,
-					vma->vm_flags & VM_EXEC);
-		}
-next:
-		put_cpu_ptr(pmu->pmu_cpu_context);
-	}
-	rcu_read_unlock();
+	perf_event_aux(perf_event_mmap_match,
+		       perf_event_mmap_output,
+		       mmap_event,
+		       NULL);
 
 	kfree(buf);
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index 7d40687b1434..987b28a1f01b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -70,6 +70,7 @@
 #include <linux/khugepaged.h>
 #include <linux/signalfd.h>
 #include <linux/uprobes.h>
+#include <linux/aio.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1303,6 +1304,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->memcg_batch.do_batch = 0;
 	p->memcg_batch.memcg = NULL;
 #endif
+#ifdef CONFIG_BCACHE
+	p->sequential_io	= 0;
+	p->sequential_io_avg	= 0;
+#endif
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 96f3a1d9c379..5a83dde8ca0c 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -462,9 +462,23 @@ int irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
 		if (domain->ops->map) {
 			ret = domain->ops->map(domain, virq, hwirq);
 			if (ret != 0) {
-				pr_err("irq-%i==>hwirq-0x%lx mapping failed: %d\n",
-				       virq, hwirq, ret);
-				WARN_ON(1);
+				/*
+				 * If map() returns -EPERM, this interrupt is protected
+				 * by the firmware or some other service and shall not
+				 * be mapped.
+				 *
+				 * Since on some platforms we blindly try to map everything
+				 * we end up with a log full of backtraces.
+				 *
+				 * So instead, we silently fail on -EPERM, it is the
+				 * responsibility of the PIC driver to display a relevant
+				 * message if needed.
+				 */
+				if (ret != -EPERM) {
+					pr_err("irq-%i==>hwirq-0x%lx mapping failed: %d\n",
+					       virq, hwirq, ret);
+					WARN_ON(1);
+				}
 				irq_data->domain = NULL;
 				irq_data->hwirq = 0;
 				goto err_unmap;
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 1296e72e4161..8241906c4b61 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -569,6 +569,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
 	int retval = 0;
 
 	helper_lock();
+	if (!sub_info->path) {
+		retval = -EINVAL;
+		goto out;
+	}
+
 	if (sub_info->path[0] == '\0')
 		goto out;
 
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 6a3bccba7e7d..1f3186b37fd5 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2998,6 +2998,7 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
 EXPORT_SYMBOL_GPL(lockdep_init_map);
 
 struct lock_class_key __lockdep_no_validate__;
+EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
 
 static int
 print_lock_nested_lock_not_held(struct task_struct *curr,
diff --git a/kernel/module.c b/kernel/module.c
index b049939177f6..cab4bce49c23 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2431,10 +2431,10 @@ static void kmemleak_load_module(const struct module *mod,
 	kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
 
 	for (i = 1; i < info->hdr->e_shnum; i++) {
-		const char *name = info->secstrings + info->sechdrs[i].sh_name;
-		if (!(info->sechdrs[i].sh_flags & SHF_ALLOC))
-			continue;
-		if (!strstarts(name, ".data") && !strstarts(name, ".bss"))
+		/* Scan all writable sections that's not executable */
+		if (!(info->sechdrs[i].sh_flags & SHF_ALLOC) ||
+		    !(info->sechdrs[i].sh_flags & SHF_WRITE) ||
+		    (info->sechdrs[i].sh_flags & SHF_EXECINSTR))
 			continue;
 
 		kmemleak_scan_area((void *)info->sechdrs[i].sh_addr,
@@ -2769,24 +2769,11 @@ static void find_module_sections(struct module *mod, struct load_info *info)
 	mod->trace_events = section_objs(info, "_ftrace_events",
 					 sizeof(*mod->trace_events),
 					 &mod->num_trace_events);
-	/*
-	 * This section contains pointers to allocated objects in the trace
-	 * code and not scanning it leads to false positives.
-	 */
-	kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
-			   mod->num_trace_events, GFP_KERNEL);
 #endif
 #ifdef CONFIG_TRACING
 	mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
 					 sizeof(*mod->trace_bprintk_fmt_start),
 					 &mod->num_trace_bprintk_fmt);
-	/*
-	 * This section contains pointers to allocated objects in the trace
-	 * code and not scanning it leads to false positives.
-	 */
-	kmemleak_scan_area(mod->trace_bprintk_fmt_start,
-			   sizeof(*mod->trace_bprintk_fmt_start) *
-			   mod->num_trace_bprintk_fmt, GFP_KERNEL);
 #endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 	/* sechdrs[0].sh_size is always zero */
diff --git a/kernel/params.c b/kernel/params.c
index ed35345be536..53b958fcd639 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -613,10 +613,13 @@ static __modinit int add_sysfs_param(struct module_kobject *mk,
 		       sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1),
 		       GFP_KERNEL);
 	if (!new) {
-		kfree(mk->mp);
+		kfree(attrs);
 		err = -ENOMEM;
 		goto fail;
 	}
+	/* Despite looking like the typical realloc() bug, this is safe.
+	 * We *want* the old 'attrs' to be freed either way, and we'll store
+	 * the new one in the success case. */
 	attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL);
 	if (!attrs) {
 		err = -ENOMEM;
diff --git a/kernel/printk.c b/kernel/printk.c
index 96dcfcd9a2d4..fa36e1494420 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -32,6 +32,7 @@
 #include <linux/security.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
+#include <linux/aio.h>
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
 #include <linux/kdb.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 17ae54da0ec2..aed981a3f69c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -17,6 +17,7 @@
 #include <linux/ptrace.h>
 #include <linux/security.h>
 #include <linux/signal.h>
+#include <linux/uio.h>
 #include <linux/audit.h>
 #include <linux/pid_namespace.h>
 #include <linux/syscalls.h>
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 170814dc418f..3db5a375d8dd 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -88,7 +88,7 @@ static void __init rcu_bootup_announce_oddness(void)
 #ifdef CONFIG_RCU_NOCB_CPU
 #ifndef CONFIG_RCU_NOCB_CPU_NONE
 	if (!have_rcu_nocb_mask) {
-		alloc_bootmem_cpumask_var(&rcu_nocb_mask);
+		zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
 		have_rcu_nocb_mask = true;
 	}
 #ifdef CONFIG_RCU_NOCB_CPU_ZERO
@@ -1667,7 +1667,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj)
 	rdtp->last_accelerate = jiffies;
 
 	/* Request timer delay depending on laziness, and round. */
-	if (rdtp->all_lazy) {
+	if (!rdtp->all_lazy) {
 		*dj = round_up(rcu_idle_gp_delay + jiffies,
 			       rcu_idle_gp_delay) - jiffies;
 	} else {
diff --git a/kernel/relay.c b/kernel/relay.c
index eef0d113b79e..b91488ba2e5a 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -234,7 +234,6 @@ static void relay_destroy_buf(struct rchan_buf *buf)
 static void relay_remove_buf(struct kref *kref)
 {
 	struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
-	buf->chan->cb->remove_buf_file(buf->dentry);
 	relay_destroy_buf(buf);
 }
 
@@ -484,6 +483,7 @@ static void relay_close_buf(struct rchan_buf *buf)
 {
 	buf->finalized = 1;
 	del_timer_sync(&buf->timer);
+	buf->chan->cb->remove_buf_file(buf->dentry);
 	kref_put(&buf->kref, relay_remove_buf);
 }
 
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index b3c6c3fcd847..cfff1435bdfb 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -126,6 +126,15 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
 
 EXPORT_SYMBOL(_down_write_nest_lock);
 
+void down_read_non_owner(struct rw_semaphore *sem)
+{
+	might_sleep();
+
+	__down_read(sem);
+}
+
+EXPORT_SYMBOL(down_read_non_owner);
+
 void down_write_nested(struct rw_semaphore *sem, int subclass)
 {
 	might_sleep();
@@ -136,6 +145,13 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
 
 EXPORT_SYMBOL(down_write_nested);
 
+void up_read_non_owner(struct rw_semaphore *sem)
+{
+	__up_read(sem);
+}
+
+EXPORT_SYMBOL(up_read_non_owner);
+
 #endif
 
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 36f85be2932b..d8f071cc9f51 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -633,7 +633,19 @@ void wake_up_nohz_cpu(int cpu)
 static inline bool got_nohz_idle_kick(void)
 {
 	int cpu = smp_processor_id();
-	return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
+
+	if (!test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)))
+		return false;
+
+	if (idle_cpu(cpu) && !need_resched())
+		return true;
+
+	/*
+	 * We can't run Idle Load Balance on this CPU for this time so we
+	 * cancel it and clear NOHZ_BALANCE_KICK
+	 */
+	clear_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
+	return false;
 }
 
 #else /* CONFIG_NO_HZ_COMMON */
@@ -1395,8 +1407,9 @@ static void sched_ttwu_pending(void)
 
 void scheduler_ipi(void)
 {
-	if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()
-	    && !tick_nohz_full_cpu(smp_processor_id()))
+	if (llist_empty(&this_rq()->wake_list)
+			&& !tick_nohz_full_cpu(smp_processor_id())
+			&& !got_nohz_idle_kick())
 		return;
 
 	/*
@@ -1419,7 +1432,7 @@ void scheduler_ipi(void)
 	/*
 	 * Check if someone kicked us for doing the nohz idle load balance.
 	 */
-	if (unlikely(got_nohz_idle_kick() && !need_resched())) {
+	if (unlikely(got_nohz_idle_kick())) {
 		this_rq()->idle_balance = 1;
 		raise_softirq_irqoff(SCHED_SOFTIRQ);
 	}
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index bfd6787b355a..7078052284fd 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -200,6 +200,7 @@ cond_syscall(sys_perf_event_open);
 /* fanotify! */
 cond_syscall(sys_fanotify_init);
 cond_syscall(sys_fanotify_mark);
+cond_syscall(compat_sys_fanotify_mark);
 
 /* open by handle */
 cond_syscall(sys_name_to_handle_at);
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index ebf72358e86a..aea4a9ea6fc8 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -15,6 +15,7 @@
 #include <linux/netdevice.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/compat.h>
 
 #ifdef CONFIG_SYSCTL_SYSCALL
 
@@ -1447,7 +1448,6 @@ SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
 
 
 #ifdef CONFIG_COMPAT
-#include <asm/compat.h>
 
 struct compat_sysctl_args {
 	compat_uptr_t	name;
@@ -1459,7 +1459,7 @@ struct compat_sysctl_args {
 	compat_ulong_t	__unused[4];
 };
 
-asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args)
+COMPAT_SYSCALL_DEFINE1(sysctl, struct compat_sysctl_args __user *, args)
 {
 	struct compat_sysctl_args tmp;
 	compat_size_t __user *compat_oldlenp;
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index e4c07b0692bb..70f27e89012b 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -12,11 +12,6 @@ config CLOCKSOURCE_WATCHDOG
 config ARCH_CLOCKSOURCE_DATA
 	bool
 
-# Platforms has a persistent clock
-config ALWAYS_USE_PERSISTENT_CLOCK
-	bool
-	default n
-
 # Timekeeping vsyscall support
 config GENERIC_TIME_VSYSCALL
 	bool
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 206bbfb34e09..24938d577669 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -786,11 +786,11 @@ bool tick_broadcast_oneshot_available(void)
 
 void __init tick_broadcast_init(void)
 {
-	alloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
-	alloc_cpumask_var(&tmpmask, GFP_NOWAIT);
+	zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
+	zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
 #ifdef CONFIG_TICK_ONESHOT
-	alloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
-	alloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
-	alloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
+	zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
+	zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
+	zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
 #endif
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index bc67d4245e1d..f4208138fbf4 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -717,6 +717,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 	if (unlikely(!cpu_online(cpu))) {
 		if (cpu == tick_do_timer_cpu)
 			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+		return false;
 	}
 
 	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
@@ -1168,7 +1169,7 @@ void tick_cancel_sched_timer(int cpu)
 		hrtimer_cancel(&ts->sched_timer);
 # endif
 
-	ts->nohz_mode = NOHZ_MODE_INACTIVE;
+	memset(ts, 0, sizeof(*ts));
 }
 #endif
 
diff --git a/kernel/timer.c b/kernel/timer.c
index a860bba34412..15ffdb3f1948 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1539,12 +1539,12 @@ static int __cpuinit init_timers_cpu(int cpu)
 			boot_done = 1;
 			base = &boot_tvec_bases;
 		}
+		spin_lock_init(&base->lock);
 		tvec_base_done[cpu] = 1;
 	} else {
 		base = per_cpu(tvec_bases, cpu);
 	}
 
-	spin_lock_init(&base->lock);
 
 	for (j = 0; j < TVN_SIZE; j++) {
 		INIT_LIST_HEAD(base->tv5.vec + j);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5e9efd4b83a4..015f85aaca08 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -71,6 +71,7 @@ config TRACE_CLOCK
 config RING_BUFFER
 	bool
 	select TRACE_CLOCK
+	select IRQ_WORK
 
 config FTRACE_NMI_ENTER
        bool
@@ -107,7 +108,6 @@ config TRACING
 	select BINARY_PRINTF
 	select EVENT_TRACING
 	select TRACE_CLOCK
-	select IRQ_WORK
 
 config GENERIC_TRACER
 	bool
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index ed58a3216a6d..b8b8560bfb95 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1808,6 +1808,7 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
 
 	rwbs[i] = '\0';
 }
+EXPORT_SYMBOL_GPL(blk_fill_rwbs);
 
 #endif /* CONFIG_EVENT_TRACING */
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8a5c017bb50c..b549b0f5b977 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -64,6 +64,13 @@
 
 #define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+#define INIT_REGEX_LOCK(opsname)	\
+	.regex_lock	= __MUTEX_INITIALIZER(opsname.regex_lock),
+#else
+#define INIT_REGEX_LOCK(opsname)
+#endif
+
 static struct ftrace_ops ftrace_list_end __read_mostly = {
 	.func		= ftrace_stub,
 	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
@@ -131,6 +138,16 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
 	while (likely(op = rcu_dereference_raw((op)->next)) &&	\
 	       unlikely((op) != &ftrace_list_end))
 
+static inline void ftrace_ops_init(struct ftrace_ops *ops)
+{
+#ifdef CONFIG_DYNAMIC_FTRACE
+	if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) {
+		mutex_init(&ops->regex_lock);
+		ops->flags |= FTRACE_OPS_FL_INITIALIZED;
+	}
+#endif
+}
+
 /**
  * ftrace_nr_registered_ops - return number of ops registered
  *
@@ -907,7 +924,8 @@ static void unregister_ftrace_profiler(void)
 #else
 static struct ftrace_ops ftrace_profile_ops __read_mostly = {
 	.func		= function_profile_call,
-	.flags		= FTRACE_OPS_FL_RECURSION_SAFE,
+	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
+	INIT_REGEX_LOCK(ftrace_profile_ops)
 };
 
 static int register_ftrace_profiler(void)
@@ -1103,11 +1121,10 @@ static struct ftrace_ops global_ops = {
 	.func			= ftrace_stub,
 	.notrace_hash		= EMPTY_HASH,
 	.filter_hash		= EMPTY_HASH,
-	.flags			= FTRACE_OPS_FL_RECURSION_SAFE,
+	.flags			= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
+	INIT_REGEX_LOCK(global_ops)
 };
 
-static DEFINE_MUTEX(ftrace_regex_lock);
-
 struct ftrace_page {
 	struct ftrace_page	*next;
 	struct dyn_ftrace	*records;
@@ -1247,6 +1264,7 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
 
 void ftrace_free_filter(struct ftrace_ops *ops)
 {
+	ftrace_ops_init(ops);
 	free_ftrace_hash(ops->filter_hash);
 	free_ftrace_hash(ops->notrace_hash);
 }
@@ -2441,7 +2459,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		     !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) ||
 
 		    ((iter->flags & FTRACE_ITER_ENABLED) &&
-		     !(rec->flags & ~FTRACE_FL_MASK))) {
+		     !(rec->flags & FTRACE_FL_ENABLED))) {
 
 			rec = NULL;
 			goto retry;
@@ -2624,6 +2642,8 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
 	struct ftrace_hash *hash;
 	int ret = 0;
 
+	ftrace_ops_init(ops);
+
 	if (unlikely(ftrace_disabled))
 		return -ENODEV;
 
@@ -2636,28 +2656,26 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
 		return -ENOMEM;
 	}
 
+	iter->ops = ops;
+	iter->flags = flag;
+
+	mutex_lock(&ops->regex_lock);
+
 	if (flag & FTRACE_ITER_NOTRACE)
 		hash = ops->notrace_hash;
 	else
 		hash = ops->filter_hash;
 
-	iter->ops = ops;
-	iter->flags = flag;
-
 	if (file->f_mode & FMODE_WRITE) {
-		mutex_lock(&ftrace_lock);
 		iter->hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, hash);
-		mutex_unlock(&ftrace_lock);
-
 		if (!iter->hash) {
 			trace_parser_put(&iter->parser);
 			kfree(iter);
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto out_unlock;
 		}
 	}
 
-	mutex_lock(&ftrace_regex_lock);
-
 	if ((file->f_mode & FMODE_WRITE) &&
 	    (file->f_flags & O_TRUNC))
 		ftrace_filter_reset(iter->hash);
@@ -2677,7 +2695,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
 		}
 	} else
 		file->private_data = iter;
-	mutex_unlock(&ftrace_regex_lock);
+
+ out_unlock:
+	mutex_unlock(&ops->regex_lock);
 
 	return ret;
 }
@@ -2910,6 +2930,8 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
 static struct ftrace_ops trace_probe_ops __read_mostly =
 {
 	.func		= function_trace_probe_call,
+	.flags		= FTRACE_OPS_FL_INITIALIZED,
+	INIT_REGEX_LOCK(trace_probe_ops)
 };
 
 static int ftrace_probe_registered;
@@ -2919,8 +2941,12 @@ static void __enable_ftrace_function_probe(void)
 	int ret;
 	int i;
 
-	if (ftrace_probe_registered)
+	if (ftrace_probe_registered) {
+		/* still need to update the function call sites */
+		if (ftrace_enabled)
+			ftrace_run_update_code(FTRACE_UPDATE_CALLS);
 		return;
+	}
 
 	for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
 		struct hlist_head *hhd = &ftrace_func_hash[i];
@@ -2990,19 +3016,21 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 	if (WARN_ON(not))
 		return -EINVAL;
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock(&trace_probe_ops.regex_lock);
 
 	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
 	if (!hash) {
 		count = -ENOMEM;
-		goto out_unlock;
+		goto out;
 	}
 
 	if (unlikely(ftrace_disabled)) {
 		count = -ENODEV;
-		goto out_unlock;
+		goto out;
 	}
 
+	mutex_lock(&ftrace_lock);
+
 	do_for_each_ftrace_rec(pg, rec) {
 
 		if (!ftrace_match_record(rec, NULL, search, len, type))
@@ -3056,6 +3084,8 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 
  out_unlock:
 	mutex_unlock(&ftrace_lock);
+ out:
+	mutex_unlock(&trace_probe_ops.regex_lock);
 	free_ftrace_hash(hash);
 
 	return count;
@@ -3095,7 +3125,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 			return;
 	}
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock(&trace_probe_ops.regex_lock);
 
 	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
 	if (!hash)
@@ -3133,6 +3163,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 			list_add(&entry->free_list, &free_list);
 		}
 	}
+	mutex_lock(&ftrace_lock);
 	__disable_ftrace_function_probe();
 	/*
 	 * Remove after the disable is called. Otherwise, if the last
@@ -3144,9 +3175,10 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 		list_del(&entry->free_list);
 		ftrace_free_entry(entry);
 	}
+	mutex_unlock(&ftrace_lock);
 		
  out_unlock:
-	mutex_unlock(&ftrace_lock);
+	mutex_unlock(&trace_probe_ops.regex_lock);
 	free_ftrace_hash(hash);
 }
 
@@ -3256,18 +3288,17 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
 	if (!cnt)
 		return 0;
 
-	mutex_lock(&ftrace_regex_lock);
-
-	ret = -ENODEV;
-	if (unlikely(ftrace_disabled))
-		goto out_unlock;
-
 	if (file->f_mode & FMODE_READ) {
 		struct seq_file *m = file->private_data;
 		iter = m->private;
 	} else
 		iter = file->private_data;
 
+	if (unlikely(ftrace_disabled))
+		return -ENODEV;
+
+	/* iter->hash is a local copy, so we don't need regex_lock */
+
 	parser = &iter->parser;
 	read = trace_get_user(parser, ubuf, cnt, ppos);
 
@@ -3276,14 +3307,12 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
 		ret = ftrace_process_regex(iter->hash, parser->buffer,
 					   parser->idx, enable);
 		trace_parser_clear(parser);
-		if (ret)
-			goto out_unlock;
+		if (ret < 0)
+			goto out;
 	}
 
 	ret = read;
-out_unlock:
-	mutex_unlock(&ftrace_regex_lock);
-
+ out:
 	return ret;
 }
 
@@ -3335,16 +3364,19 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
 	if (unlikely(ftrace_disabled))
 		return -ENODEV;
 
+	mutex_lock(&ops->regex_lock);
+
 	if (enable)
 		orig_hash = &ops->filter_hash;
 	else
 		orig_hash = &ops->notrace_hash;
 
 	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
-	if (!hash)
-		return -ENOMEM;
+	if (!hash) {
+		ret = -ENOMEM;
+		goto out_regex_unlock;
+	}
 
-	mutex_lock(&ftrace_regex_lock);
 	if (reset)
 		ftrace_filter_reset(hash);
 	if (buf && !ftrace_match_records(hash, buf, len)) {
@@ -3366,7 +3398,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
 	mutex_unlock(&ftrace_lock);
 
  out_regex_unlock:
-	mutex_unlock(&ftrace_regex_lock);
+	mutex_unlock(&ops->regex_lock);
 
 	free_ftrace_hash(hash);
 	return ret;
@@ -3392,6 +3424,7 @@ ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove,
 int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
 			 int remove, int reset)
 {
+	ftrace_ops_init(ops);
 	return ftrace_set_addr(ops, ip, remove, reset, 1);
 }
 EXPORT_SYMBOL_GPL(ftrace_set_filter_ip);
@@ -3416,6 +3449,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
 int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
 		       int len, int reset)
 {
+	ftrace_ops_init(ops);
 	return ftrace_set_regex(ops, buf, len, reset, 1);
 }
 EXPORT_SYMBOL_GPL(ftrace_set_filter);
@@ -3434,6 +3468,7 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter);
 int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
 			int len, int reset)
 {
+	ftrace_ops_init(ops);
 	return ftrace_set_regex(ops, buf, len, reset, 0);
 }
 EXPORT_SYMBOL_GPL(ftrace_set_notrace);
@@ -3524,6 +3559,8 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)
 {
 	char *func;
 
+	ftrace_ops_init(ops);
+
 	while (buf) {
 		func = strsep(&buf, ",");
 		ftrace_set_regex(ops, func, strlen(func), 0, enable);
@@ -3551,10 +3588,8 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
 	int filter_hash;
 	int ret;
 
-	mutex_lock(&ftrace_regex_lock);
 	if (file->f_mode & FMODE_READ) {
 		iter = m->private;
-
 		seq_release(inode, file);
 	} else
 		iter = file->private_data;
@@ -3567,6 +3602,8 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
 
 	trace_parser_put(parser);
 
+	mutex_lock(&iter->ops->regex_lock);
+
 	if (file->f_mode & FMODE_WRITE) {
 		filter_hash = !!(iter->flags & FTRACE_ITER_FILTER);
 
@@ -3584,10 +3621,11 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
 
 		mutex_unlock(&ftrace_lock);
 	}
+
+	mutex_unlock(&iter->ops->regex_lock);
 	free_ftrace_hash(iter->hash);
 	kfree(iter);
 
-	mutex_unlock(&ftrace_regex_lock);
 	return 0;
 }
 
@@ -4126,7 +4164,8 @@ void __init ftrace_init(void)
 
 static struct ftrace_ops global_ops = {
 	.func			= ftrace_stub,
-	.flags			= FTRACE_OPS_FL_RECURSION_SAFE,
+	.flags			= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
+	INIT_REGEX_LOCK(global_ops)
 };
 
 static int __init ftrace_nodyn_init(void)
@@ -4180,8 +4219,9 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
 }
 
 static struct ftrace_ops control_ops = {
-	.func = ftrace_ops_control_func,
-	.flags = FTRACE_OPS_FL_RECURSION_SAFE,
+	.func	= ftrace_ops_control_func,
+	.flags	= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
+	INIT_REGEX_LOCK(control_ops)
 };
 
 static inline void
@@ -4539,6 +4579,8 @@ int register_ftrace_function(struct ftrace_ops *ops)
 {
 	int ret = -1;
 
+	ftrace_ops_init(ops);
+
 	mutex_lock(&ftrace_lock);
 
 	ret = __register_ftrace_function(ops);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b59aea2c48c2..e444ff88f0a4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -620,6 +620,9 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
 	if (cpu == RING_BUFFER_ALL_CPUS)
 		work = &buffer->irq_work;
 	else {
+		if (!cpumask_test_cpu(cpu, buffer->cpumask))
+			return -EINVAL;
+
 		cpu_buffer = buffer->buffers[cpu];
 		work = &cpu_buffer->irq_work;
 	}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ae6fa2d1cdf7..4d79485b3237 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6216,10 +6216,15 @@ __init static int tracer_alloc_buffers(void)
 
 	trace_init_cmdlines();
 
-	register_tracer(&nop_trace);
-
+	/*
+	 * register_tracer() might reference current_trace, so it
+	 * needs to be set before we register anything. This is
+	 * just a bootstrap of current_trace anyway.
+	 */
 	global_trace.current_trace = &nop_trace;
 
+	register_tracer(&nop_trace);
+
 	/* All seems OK, enable tracing */
 	tracing_disabled = 0;
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 53582e982e51..27963e2bf4bf 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -251,7 +251,8 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
 	switch (enable) {
 	case 0:
 		/*
-		 * When soft_disable is set and enable is cleared, we want
+		 * When soft_disable is set and enable is cleared, the sm_ref
+		 * reference counter is decremented. If it reaches 0, we want
 		 * to clear the SOFT_DISABLED flag but leave the event in the
 		 * state that it was. That is, if the event was enabled and
 		 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
@@ -263,6 +264,8 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
 		 * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
 		 */
 		if (soft_disable) {
+			if (atomic_dec_return(&file->sm_ref) > 0)
+				break;
 			disable = file->flags & FTRACE_EVENT_FL_SOFT_DISABLED;
 			clear_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags);
 		} else
@@ -291,8 +294,11 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
 		 */
 		if (!soft_disable)
 			clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
-		else
+		else {
+			if (atomic_inc_return(&file->sm_ref) > 1)
+				break;
 			set_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags);
+		}
 
 		if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) {
 
@@ -623,6 +629,8 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 	if (file->flags & FTRACE_EVENT_FL_ENABLED) {
 		if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)
 			buf = "0*\n";
+		else if (file->flags & FTRACE_EVENT_FL_SOFT_MODE)
+			buf = "1*\n";
 		else
 			buf = "1\n";
 	} else
@@ -1521,6 +1529,24 @@ __register_event(struct ftrace_event_call *call, struct module *mod)
 	return 0;
 }
 
+static struct ftrace_event_file *
+trace_create_new_event(struct ftrace_event_call *call,
+		       struct trace_array *tr)
+{
+	struct ftrace_event_file *file;
+
+	file = kmem_cache_alloc(file_cachep, GFP_TRACE);
+	if (!file)
+		return NULL;
+
+	file->event_call = call;
+	file->tr = tr;
+	atomic_set(&file->sm_ref, 0);
+	list_add(&file->list, &tr->events);
+
+	return file;
+}
+
 /* Add an event to a trace directory */
 static int
 __trace_add_new_event(struct ftrace_event_call *call,
@@ -1532,14 +1558,10 @@ __trace_add_new_event(struct ftrace_event_call *call,
 {
 	struct ftrace_event_file *file;
 
-	file = kmem_cache_alloc(file_cachep, GFP_TRACE);
+	file = trace_create_new_event(call, tr);
 	if (!file)
 		return -ENOMEM;
 
-	file->event_call = call;
-	file->tr = tr;
-	list_add(&file->list, &tr->events);
-
 	return event_create_dir(tr->event_dir, file, id, enable, filter, format);
 }
 
@@ -1554,14 +1576,10 @@ __trace_early_add_new_event(struct ftrace_event_call *call,
 {
 	struct ftrace_event_file *file;
 
-	file = kmem_cache_alloc(file_cachep, GFP_TRACE);
+	file = trace_create_new_event(call, tr);
 	if (!file)
 		return -ENOMEM;
 
-	file->event_call = call;
-	file->tr = tr;
-	list_add(&file->list, &tr->events);
-
 	return 0;
 }
 
@@ -2054,15 +2072,27 @@ event_enable_func(struct ftrace_hash *hash,
  out_reg:
 	/* Don't let event modules unload while probe registered */
 	ret = try_module_get(file->event_call->mod);
-	if (!ret)
+	if (!ret) {
+		ret = -EBUSY;
 		goto out_free;
+	}
 
 	ret = __ftrace_event_enable_disable(file, 1, 1);
 	if (ret < 0)
 		goto out_put;
 	ret = register_ftrace_function_probe(glob, ops, data);
-	if (!ret)
+	/*
+	 * The above returns on success the # of functions enabled,
+	 * but if it didn't find any functions it returns zero.
+	 * Consider no functions a failure too.
+	 */
+	if (!ret) {
+		ret = -ENOENT;
+		goto out_disable;
+	} else if (ret < 0)
 		goto out_disable;
+	/* Just return zero, not the number of enabled functions */
+	ret = 0;
  out:
 	mutex_unlock(&event_mutex);
 	return ret;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index a6361178de5a..e1b653f7e1ca 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -750,7 +750,11 @@ static int filter_set_pred(struct event_filter *filter,
 
 static void __free_preds(struct event_filter *filter)
 {
+	int i;
+
 	if (filter->preds) {
+		for (i = 0; i < filter->n_preds; i++)
+			kfree(filter->preds[i].ops);
 		kfree(filter->preds);
 		filter->preds = NULL;
 	}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 1865d5f76538..9f46e98ba8f2 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -27,7 +27,6 @@
 /**
  * Kprobe event core functions
  */
-
 struct trace_probe {
 	struct list_head	list;
 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
@@ -36,6 +35,7 @@ struct trace_probe {
 	const char		*symbol;	/* symbol name */
 	struct ftrace_event_class	class;
 	struct ftrace_event_call	call;
+	struct ftrace_event_file * __rcu *files;
 	ssize_t			size;		/* trace entry size */
 	unsigned int		nr_args;
 	struct probe_arg	args[];
@@ -46,7 +46,7 @@ struct trace_probe {
 	(sizeof(struct probe_arg) * (n)))
 
 
-static __kprobes int trace_probe_is_return(struct trace_probe *tp)
+static __kprobes bool trace_probe_is_return(struct trace_probe *tp)
 {
 	return tp->rp.handler != NULL;
 }
@@ -183,12 +183,63 @@ static struct trace_probe *find_trace_probe(const char *event,
 	return NULL;
 }
 
-/* Enable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
-static int enable_trace_probe(struct trace_probe *tp, int flag)
+static int trace_probe_nr_files(struct trace_probe *tp)
+{
+	struct ftrace_event_file **file;
+	int ret = 0;
+
+	/*
+	 * Since all tp->files updater is protected by probe_enable_lock,
+	 * we don't need to lock an rcu_read_lock.
+	 */
+	file = rcu_dereference_raw(tp->files);
+	if (file)
+		while (*(file++))
+			ret++;
+
+	return ret;
+}
+
+static DEFINE_MUTEX(probe_enable_lock);
+
+/*
+ * Enable trace_probe
+ * if the file is NULL, enable "perf" handler, or enable "trace" handler.
+ */
+static int
+enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
 {
 	int ret = 0;
 
-	tp->flags |= flag;
+	mutex_lock(&probe_enable_lock);
+
+	if (file) {
+		struct ftrace_event_file **new, **old;
+		int n = trace_probe_nr_files(tp);
+
+		old = rcu_dereference_raw(tp->files);
+		/* 1 is for new one and 1 is for stopper */
+		new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *),
+			      GFP_KERNEL);
+		if (!new) {
+			ret = -ENOMEM;
+			goto out_unlock;
+		}
+		memcpy(new, old, n * sizeof(struct ftrace_event_file *));
+		new[n] = file;
+		/* The last one keeps a NULL */
+
+		rcu_assign_pointer(tp->files, new);
+		tp->flags |= TP_FLAG_TRACE;
+
+		if (old) {
+			/* Make sure the probe is done with old files */
+			synchronize_sched();
+			kfree(old);
+		}
+	} else
+		tp->flags |= TP_FLAG_PROFILE;
+
 	if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) &&
 	    !trace_probe_has_gone(tp)) {
 		if (trace_probe_is_return(tp))
@@ -197,19 +248,90 @@ static int enable_trace_probe(struct trace_probe *tp, int flag)
 			ret = enable_kprobe(&tp->rp.kp);
 	}
 
+ out_unlock:
+	mutex_unlock(&probe_enable_lock);
+
 	return ret;
 }
 
-/* Disable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
-static void disable_trace_probe(struct trace_probe *tp, int flag)
+static int
+trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file)
+{
+	struct ftrace_event_file **files;
+	int i;
+
+	/*
+	 * Since all tp->files updater is protected by probe_enable_lock,
+	 * we don't need to lock an rcu_read_lock.
+	 */
+	files = rcu_dereference_raw(tp->files);
+	if (files) {
+		for (i = 0; files[i]; i++)
+			if (files[i] == file)
+				return i;
+	}
+
+	return -1;
+}
+
+/*
+ * Disable trace_probe
+ * if the file is NULL, disable "perf" handler, or disable "trace" handler.
+ */
+static int
+disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
 {
-	tp->flags &= ~flag;
+	int ret = 0;
+
+	mutex_lock(&probe_enable_lock);
+
+	if (file) {
+		struct ftrace_event_file **new, **old;
+		int n = trace_probe_nr_files(tp);
+		int i, j;
+
+		old = rcu_dereference_raw(tp->files);
+		if (n == 0 || trace_probe_file_index(tp, file) < 0) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		if (n == 1) {	/* Remove the last file */
+			tp->flags &= ~TP_FLAG_TRACE;
+			new = NULL;
+		} else {
+			new = kzalloc(n * sizeof(struct ftrace_event_file *),
+				      GFP_KERNEL);
+			if (!new) {
+				ret = -ENOMEM;
+				goto out_unlock;
+			}
+
+			/* This copy & check loop copies the NULL stopper too */
+			for (i = 0, j = 0; j < n && i < n + 1; i++)
+				if (old[i] != file)
+					new[j++] = old[i];
+		}
+
+		rcu_assign_pointer(tp->files, new);
+
+		/* Make sure the probe is done with old files */
+		synchronize_sched();
+		kfree(old);
+	} else
+		tp->flags &= ~TP_FLAG_PROFILE;
+
 	if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) {
 		if (trace_probe_is_return(tp))
 			disable_kretprobe(&tp->rp);
 		else
 			disable_kprobe(&tp->rp.kp);
 	}
+
+ out_unlock:
+	mutex_unlock(&probe_enable_lock);
+
+	return ret;
 }
 
 /* Internal register function - just handle k*probes and flags */
@@ -723,9 +845,10 @@ static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
 }
 
 /* Kprobe handler */
-static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
+static __kprobes void
+__kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs,
+		    struct ftrace_event_file *ftrace_file)
 {
-	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
 	struct kprobe_trace_entry_head *entry;
 	struct ring_buffer_event *event;
 	struct ring_buffer *buffer;
@@ -733,7 +856,10 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
 	unsigned long irq_flags;
 	struct ftrace_event_call *call = &tp->call;
 
-	tp->nhit++;
+	WARN_ON(call != ftrace_file->event_call);
+
+	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
+		return;
 
 	local_save_flags(irq_flags);
 	pc = preempt_count();
@@ -741,13 +867,14 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
 	dsize = __get_data_size(tp, regs);
 	size = sizeof(*entry) + tp->size + dsize;
 
-	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
-						  size, irq_flags, pc);
+	event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
+						call->event.type,
+						size, irq_flags, pc);
 	if (!event)
 		return;
 
 	entry = ring_buffer_event_data(event);
-	entry->ip = (unsigned long)kp->addr;
+	entry->ip = (unsigned long)tp->rp.kp.addr;
 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 
 	if (!filter_current_check_discard(buffer, call, entry, event))
@@ -755,11 +882,31 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
 						irq_flags, pc, regs);
 }
 
+static __kprobes void
+kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs)
+{
+	/*
+	 * Note: preempt is already disabled around the kprobe handler.
+	 * However, we still need an smp_read_barrier_depends() corresponding
+	 * to smp_wmb() in rcu_assign_pointer() to access the pointer.
+	 */
+	struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
+
+	if (unlikely(!file))
+		return;
+
+	while (*file) {
+		__kprobe_trace_func(tp, regs, *file);
+		file++;
+	}
+}
+
 /* Kretprobe handler */
-static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
-					  struct pt_regs *regs)
+static __kprobes void
+__kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
+		       struct pt_regs *regs,
+		       struct ftrace_event_file *ftrace_file)
 {
-	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
 	struct kretprobe_trace_entry_head *entry;
 	struct ring_buffer_event *event;
 	struct ring_buffer *buffer;
@@ -767,14 +914,20 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
 	unsigned long irq_flags;
 	struct ftrace_event_call *call = &tp->call;
 
+	WARN_ON(call != ftrace_file->event_call);
+
+	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
+		return;
+
 	local_save_flags(irq_flags);
 	pc = preempt_count();
 
 	dsize = __get_data_size(tp, regs);
 	size = sizeof(*entry) + tp->size + dsize;
 
-	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
-						  size, irq_flags, pc);
+	event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
+						call->event.type,
+						size, irq_flags, pc);
 	if (!event)
 		return;
 
@@ -788,8 +941,28 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
 						irq_flags, pc, regs);
 }
 
+static __kprobes void
+kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
+		     struct pt_regs *regs)
+{
+	/*
+	 * Note: preempt is already disabled around the kprobe handler.
+	 * However, we still need an smp_read_barrier_depends() corresponding
+	 * to smp_wmb() in rcu_assign_pointer() to access the pointer.
+	 */
+	struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
+
+	if (unlikely(!file))
+		return;
+
+	while (*file) {
+		__kretprobe_trace_func(tp, ri, regs, *file);
+		file++;
+	}
+}
+
 /* Event entry printers */
-enum print_line_t
+static enum print_line_t
 print_kprobe_event(struct trace_iterator *iter, int flags,
 		   struct trace_event *event)
 {
@@ -825,7 +998,7 @@ partial:
 	return TRACE_TYPE_PARTIAL_LINE;
 }
 
-enum print_line_t
+static enum print_line_t
 print_kretprobe_event(struct trace_iterator *iter, int flags,
 		      struct trace_event *event)
 {
@@ -975,10 +1148,9 @@ static int set_print_fmt(struct trace_probe *tp)
 #ifdef CONFIG_PERF_EVENTS
 
 /* Kprobe profile handler */
-static __kprobes void kprobe_perf_func(struct kprobe *kp,
-					 struct pt_regs *regs)
+static __kprobes void
+kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
 {
-	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
 	struct ftrace_event_call *call = &tp->call;
 	struct kprobe_trace_entry_head *entry;
 	struct hlist_head *head;
@@ -997,7 +1169,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
 	if (!entry)
 		return;
 
-	entry->ip = (unsigned long)kp->addr;
+	entry->ip = (unsigned long)tp->rp.kp.addr;
 	memset(&entry[1], 0, dsize);
 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 
@@ -1007,10 +1179,10 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
 }
 
 /* Kretprobe profile handler */
-static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
-					    struct pt_regs *regs)
+static __kprobes void
+kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
+		    struct pt_regs *regs)
 {
-	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
 	struct ftrace_event_call *call = &tp->call;
 	struct kretprobe_trace_entry_head *entry;
 	struct hlist_head *head;
@@ -1044,20 +1216,19 @@ int kprobe_register(struct ftrace_event_call *event,
 		    enum trace_reg type, void *data)
 {
 	struct trace_probe *tp = (struct trace_probe *)event->data;
+	struct ftrace_event_file *file = data;
 
 	switch (type) {
 	case TRACE_REG_REGISTER:
-		return enable_trace_probe(tp, TP_FLAG_TRACE);
+		return enable_trace_probe(tp, file);
 	case TRACE_REG_UNREGISTER:
-		disable_trace_probe(tp, TP_FLAG_TRACE);
-		return 0;
+		return disable_trace_probe(tp, file);
 
 #ifdef CONFIG_PERF_EVENTS
 	case TRACE_REG_PERF_REGISTER:
-		return enable_trace_probe(tp, TP_FLAG_PROFILE);
+		return enable_trace_probe(tp, NULL);
 	case TRACE_REG_PERF_UNREGISTER:
-		disable_trace_probe(tp, TP_FLAG_PROFILE);
-		return 0;
+		return disable_trace_probe(tp, NULL);
 	case TRACE_REG_PERF_OPEN:
 	case TRACE_REG_PERF_CLOSE:
 	case TRACE_REG_PERF_ADD:
@@ -1073,11 +1244,13 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 {
 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
 
+	tp->nhit++;
+
 	if (tp->flags & TP_FLAG_TRACE)
-		kprobe_trace_func(kp, regs);
+		kprobe_trace_func(tp, regs);
 #ifdef CONFIG_PERF_EVENTS
 	if (tp->flags & TP_FLAG_PROFILE)
-		kprobe_perf_func(kp, regs);
+		kprobe_perf_func(tp, regs);
 #endif
 	return 0;	/* We don't tweek kernel, so just return 0 */
 }
@@ -1087,11 +1260,13 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
 
+	tp->nhit++;
+
 	if (tp->flags & TP_FLAG_TRACE)
-		kretprobe_trace_func(ri, regs);
+		kretprobe_trace_func(tp, ri, regs);
 #ifdef CONFIG_PERF_EVENTS
 	if (tp->flags & TP_FLAG_PROFILE)
-		kretprobe_perf_func(ri, regs);
+		kretprobe_perf_func(tp, ri, regs);
 #endif
 	return 0;	/* We don't tweek kernel, so just return 0 */
 }
@@ -1189,11 +1364,24 @@ static __used int kprobe_trace_selftest_target(int a1, int a2, int a3,
 	return a1 + a2 + a3 + a4 + a5 + a6;
 }
 
+static struct ftrace_event_file *
+find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr)
+{
+	struct ftrace_event_file *file;
+
+	list_for_each_entry(file, &tr->events, list)
+		if (file->event_call == &tp->call)
+			return file;
+
+	return NULL;
+}
+
 static __init int kprobe_trace_self_tests_init(void)
 {
 	int ret, warn = 0;
 	int (*target)(int, int, int, int, int, int);
 	struct trace_probe *tp;
+	struct ftrace_event_file *file;
 
 	target = kprobe_trace_selftest_target;
 
@@ -1203,31 +1391,43 @@ static __init int kprobe_trace_self_tests_init(void)
 				  "$stack $stack0 +0($stack)",
 				  create_trace_probe);
 	if (WARN_ON_ONCE(ret)) {
-		pr_warning("error on probing function entry.\n");
+		pr_warn("error on probing function entry.\n");
 		warn++;
 	} else {
 		/* Enable trace point */
 		tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
 		if (WARN_ON_ONCE(tp == NULL)) {
-			pr_warning("error on getting new probe.\n");
+			pr_warn("error on getting new probe.\n");
 			warn++;
-		} else
-			enable_trace_probe(tp, TP_FLAG_TRACE);
+		} else {
+			file = find_trace_probe_file(tp, top_trace_array());
+			if (WARN_ON_ONCE(file == NULL)) {
+				pr_warn("error on getting probe file.\n");
+				warn++;
+			} else
+				enable_trace_probe(tp, file);
+		}
 	}
 
 	ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target "
 				  "$retval", create_trace_probe);
 	if (WARN_ON_ONCE(ret)) {
-		pr_warning("error on probing function return.\n");
+		pr_warn("error on probing function return.\n");
 		warn++;
 	} else {
 		/* Enable trace point */
 		tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
 		if (WARN_ON_ONCE(tp == NULL)) {
-			pr_warning("error on getting new probe.\n");
+			pr_warn("error on getting 2nd new probe.\n");
 			warn++;
-		} else
-			enable_trace_probe(tp, TP_FLAG_TRACE);
+		} else {
+			file = find_trace_probe_file(tp, top_trace_array());
+			if (WARN_ON_ONCE(file == NULL)) {
+				pr_warn("error on getting probe file.\n");
+				warn++;
+			} else
+				enable_trace_probe(tp, file);
+		}
 	}
 
 	if (warn)
@@ -1238,27 +1438,39 @@ static __init int kprobe_trace_self_tests_init(void)
 	/* Disable trace points before removing it */
 	tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
 	if (WARN_ON_ONCE(tp == NULL)) {
-		pr_warning("error on getting test probe.\n");
+		pr_warn("error on getting test probe.\n");
 		warn++;
-	} else
-		disable_trace_probe(tp, TP_FLAG_TRACE);
+	} else {
+		file = find_trace_probe_file(tp, top_trace_array());
+		if (WARN_ON_ONCE(file == NULL)) {
+			pr_warn("error on getting probe file.\n");
+			warn++;
+		} else
+			disable_trace_probe(tp, file);
+	}
 
 	tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
 	if (WARN_ON_ONCE(tp == NULL)) {
-		pr_warning("error on getting 2nd test probe.\n");
+		pr_warn("error on getting 2nd test probe.\n");
 		warn++;
-	} else
-		disable_trace_probe(tp, TP_FLAG_TRACE);
+	} else {
+		file = find_trace_probe_file(tp, top_trace_array());
+		if (WARN_ON_ONCE(file == NULL)) {
+			pr_warn("error on getting probe file.\n");
+			warn++;
+		} else
+			disable_trace_probe(tp, file);
+	}
 
 	ret = traceprobe_command("-:testprobe", create_trace_probe);
 	if (WARN_ON_ONCE(ret)) {
-		pr_warning("error on deleting a probe.\n");
+		pr_warn("error on deleting a probe.\n");
 		warn++;
 	}
 
 	ret = traceprobe_command("-:testprobe2", create_trace_probe);
 	if (WARN_ON_ONCE(ret)) {
-		pr_warning("error on deleting a probe.\n");
+		pr_warn("error on deleting a probe.\n");
 		warn++;
 	}
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4aa9f5bc6b2d..ee8e29a2320c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -296,7 +296,7 @@ static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
 static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
 
 struct workqueue_struct *system_wq __read_mostly;
-EXPORT_SYMBOL_GPL(system_wq);
+EXPORT_SYMBOL(system_wq);
 struct workqueue_struct *system_highpri_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_highpri_wq);
 struct workqueue_struct *system_long_wq __read_mostly;
@@ -1411,7 +1411,7 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
 	local_irq_restore(flags);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(queue_work_on);
+EXPORT_SYMBOL(queue_work_on);
 
 void delayed_work_timer_fn(unsigned long __data)
 {
@@ -1485,7 +1485,7 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	local_irq_restore(flags);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(queue_delayed_work_on);
+EXPORT_SYMBOL(queue_delayed_work_on);
 
 /**
  * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
@@ -2059,6 +2059,7 @@ static bool manage_workers(struct worker *worker)
 	if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
 		spin_unlock_irq(&pool->lock);
 		mutex_lock(&pool->manager_mutex);
+		spin_lock_irq(&pool->lock);
 		ret = true;
 	}
 
@@ -4311,6 +4312,12 @@ bool current_is_workqueue_rescuer(void)
  * no synchronization around this function and the test result is
  * unreliable and only useful as advisory hints or for debugging.
  *
+ * If @cpu is WORK_CPU_UNBOUND, the test is performed on the local CPU.
+ * Note that both per-cpu and unbound workqueues may be associated with
+ * multiple pool_workqueues which have separate congested states.  A
+ * workqueue being congested on one CPU doesn't mean the workqueue is also
+ * contested on other CPUs / NUMA nodes.
+ *
  * RETURNS:
  * %true if congested, %false otherwise.
  */
@@ -4321,6 +4328,9 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
 
 	rcu_read_lock_sched();
 
+	if (cpu == WORK_CPU_UNBOUND)
+		cpu = smp_processor_id();
+
 	if (!(wq->flags & WQ_UNBOUND))
 		pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
 	else
@@ -4895,7 +4905,8 @@ static void __init wq_numa_init(void)
 	BUG_ON(!tbl);
 
 	for_each_node(node)
-		BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL, node));
+		BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
+				node_online(node) ? node : NUMA_NO_NODE));
 
 	for_each_possible_cpu(cpu) {
 		node = cpu_to_node(cpu);