From 537f7ccb396804c6d0057b93ba8eb104ba44f851 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 8 Aug 2016 14:37:37 -0500
Subject: mntns: Add a limit on the number of mount namespaces.

v2: Fixed the very obvious lack of setting ucounts
    on struct mnt_ns reported by Andrei Vagin, and the kbuild
    test report.

Reported-by: Andrei Vagin <avagin@openvz.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'fs/namespace.c')

diff --git a/fs/namespace.c b/fs/namespace.c
index 7bb2cda3bfef..491b8f3e4c9a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2719,9 +2719,20 @@ dput_out:
 	return retval;
 }
 
+static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
+}
+
+static void dec_mnt_namespaces(struct ucounts *ucounts)
+{
+	dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
+}
+
 static void free_mnt_ns(struct mnt_namespace *ns)
 {
 	ns_free_inum(&ns->ns);
+	dec_mnt_namespaces(ns->ucounts);
 	put_user_ns(ns->user_ns);
 	kfree(ns);
 }
@@ -2738,14 +2749,22 @@ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
 static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 {
 	struct mnt_namespace *new_ns;
+	struct ucounts *ucounts;
 	int ret;
 
+	ucounts = inc_mnt_namespaces(user_ns);
+	if (!ucounts)
+		return ERR_PTR(-ENFILE);
+
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
-	if (!new_ns)
+	if (!new_ns) {
+		dec_mnt_namespaces(ucounts);
 		return ERR_PTR(-ENOMEM);
+	}
 	ret = ns_alloc_inum(&new_ns->ns);
 	if (ret) {
 		kfree(new_ns);
+		dec_mnt_namespaces(ucounts);
 		return ERR_PTR(ret);
 	}
 	new_ns->ns.ops = &mntns_operations;
@@ -2756,6 +2775,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 	init_waitqueue_head(&new_ns->poll);
 	new_ns->event = 0;
 	new_ns->user_ns = get_user_ns(user_ns);
+	new_ns->ucounts = ucounts;
 	return new_ns;
 }
 
-- 
cgit v1.2.3


From c568d68341be7030f5647def68851e469b21ca11 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 16 Sep 2016 12:44:20 +0200
Subject: locks: fix file locking on overlayfs

This patch allows flock, posix locks, ofd locks and leases to work
correctly on overlayfs.

Instead of using the underlying inode for storing lock context use the
overlay inode.  This allows locks to be persistent across copy-up.

This is done by introducing locks_inode() helper and using it instead of
file_inode() to get the inode in locking code.  For non-overlayfs the two
are equivalent, except for an extra pointer dereference in locks_inode().

Since lock operations are in "struct file_operations" we must also make
sure not to call underlying filesystem's lock operations.  Introcude a
super block flag MS_NOREMOTELOCK to this effect.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Acked-by: Jeff Layton <jlayton@poochiereds.net>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
---
 fs/locks.c              | 50 +++++++++++++++++++++++++++----------------------
 fs/namespace.c          |  2 +-
 fs/open.c               |  2 +-
 fs/overlayfs/super.c    |  2 +-
 include/linux/fs.h      | 16 ++++++++++++++--
 include/uapi/linux/fs.h |  1 +
 6 files changed, 46 insertions(+), 27 deletions(-)

(limited to 'fs/namespace.c')

diff --git a/fs/locks.c b/fs/locks.c
index ee1b15f6fc13..c1656cff53ee 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,6 +139,11 @@
 #define IS_LEASE(fl)	(fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
 #define IS_OFDLCK(fl)	(fl->fl_flags & FL_OFDLCK)
 
+static inline bool is_remote_lock(struct file *filp)
+{
+	return likely(!(filp->f_path.dentry->d_sb->s_flags & MS_NOREMOTELOCK));
+}
+
 static bool lease_breaking(struct file_lock *fl)
 {
 	return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
@@ -791,7 +796,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 {
 	struct file_lock *cfl;
 	struct file_lock_context *ctx;
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = locks_inode(filp);
 
 	ctx = smp_load_acquire(&inode->i_flctx);
 	if (!ctx || list_empty_careful(&ctx->flc_posix)) {
@@ -1192,7 +1197,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
 int posix_lock_file(struct file *filp, struct file_lock *fl,
 			struct file_lock *conflock)
 {
-	return posix_lock_inode(file_inode(filp), fl, conflock);
+	return posix_lock_inode(locks_inode(filp), fl, conflock);
 }
 EXPORT_SYMBOL(posix_lock_file);
 
@@ -1232,7 +1237,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
 int locks_mandatory_locked(struct file *file)
 {
 	int ret;
-	struct inode *inode = file_inode(file);
+	struct inode *inode = locks_inode(file);
 	struct file_lock_context *ctx;
 	struct file_lock *fl;
 
@@ -1572,7 +1577,7 @@ EXPORT_SYMBOL(lease_get_mtime);
 int fcntl_getlease(struct file *filp)
 {
 	struct file_lock *fl;
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = locks_inode(filp);
 	struct file_lock_context *ctx;
 	int type = F_UNLCK;
 	LIST_HEAD(dispose);
@@ -1580,7 +1585,7 @@ int fcntl_getlease(struct file *filp)
 	ctx = smp_load_acquire(&inode->i_flctx);
 	if (ctx && !list_empty_careful(&ctx->flc_lease)) {
 		spin_lock(&ctx->flc_lock);
-		time_out_leases(file_inode(filp), &dispose);
+		time_out_leases(inode, &dispose);
 		list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
 			if (fl->fl_file != filp)
 				continue;
@@ -1628,7 +1633,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 {
 	struct file_lock *fl, *my_fl = NULL, *lease;
 	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = dentry->d_inode;
 	struct file_lock_context *ctx;
 	bool is_deleg = (*flp)->fl_flags & FL_DELEG;
 	int error;
@@ -1742,7 +1747,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
 {
 	int error = -EAGAIN;
 	struct file_lock *fl, *victim = NULL;
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = locks_inode(filp);
 	struct file_lock_context *ctx;
 	LIST_HEAD(dispose);
 
@@ -1782,7 +1787,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
 int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
 			void **priv)
 {
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = locks_inode(filp);
 	int error;
 
 	if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
@@ -1830,7 +1835,7 @@ EXPORT_SYMBOL(generic_setlease);
 int
 vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 {
-	if (filp->f_op->setlease)
+	if (filp->f_op->setlease && is_remote_lock(filp))
 		return filp->f_op->setlease(filp, arg, lease, priv);
 	else
 		return generic_setlease(filp, arg, lease, priv);
@@ -1979,7 +1984,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
 	if (error)
 		goto out_free;
 
-	if (f.file->f_op->flock)
+	if (f.file->f_op->flock && is_remote_lock(f.file))
 		error = f.file->f_op->flock(f.file,
 					  (can_sleep) ? F_SETLKW : F_SETLK,
 					  lock);
@@ -2005,7 +2010,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
  */
 int vfs_test_lock(struct file *filp, struct file_lock *fl)
 {
-	if (filp->f_op->lock)
+	if (filp->f_op->lock && is_remote_lock(filp))
 		return filp->f_op->lock(filp, F_GETLK, fl);
 	posix_test_lock(filp, fl);
 	return 0;
@@ -2129,7 +2134,7 @@ out:
  */
 int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
 {
-	if (filp->f_op->lock)
+	if (filp->f_op->lock && is_remote_lock(filp))
 		return filp->f_op->lock(filp, cmd, fl);
 	else
 		return posix_lock_file(filp, fl, conf);
@@ -2191,7 +2196,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	if (file_lock == NULL)
 		return -ENOLCK;
 
-	inode = file_inode(filp);
+	inode = locks_inode(filp);
 
 	/*
 	 * This might block, so we do it before checking the inode.
@@ -2343,7 +2348,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 	if (copy_from_user(&flock, l, sizeof(flock)))
 		goto out;
 
-	inode = file_inode(filp);
+	inode = locks_inode(filp);
 
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
@@ -2426,6 +2431,7 @@ out:
 void locks_remove_posix(struct file *filp, fl_owner_t owner)
 {
 	int error;
+	struct inode *inode = locks_inode(filp);
 	struct file_lock lock;
 	struct file_lock_context *ctx;
 
@@ -2434,7 +2440,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
 	 * posix_lock_file().  Another process could be setting a lock on this
 	 * file at the same time, but we wouldn't remove that lock anyway.
 	 */
-	ctx =  smp_load_acquire(&file_inode(filp)->i_flctx);
+	ctx =  smp_load_acquire(&inode->i_flctx);
 	if (!ctx || list_empty(&ctx->flc_posix))
 		return;
 
@@ -2452,7 +2458,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
 
 	if (lock.fl_ops && lock.fl_ops->fl_release_private)
 		lock.fl_ops->fl_release_private(&lock);
-	trace_locks_remove_posix(file_inode(filp), &lock, error);
+	trace_locks_remove_posix(inode, &lock, error);
 }
 
 EXPORT_SYMBOL(locks_remove_posix);
@@ -2469,12 +2475,12 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
 		.fl_type = F_UNLCK,
 		.fl_end = OFFSET_MAX,
 	};
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = locks_inode(filp);
 
 	if (list_empty(&flctx->flc_flock))
 		return;
 
-	if (filp->f_op->flock)
+	if (filp->f_op->flock && is_remote_lock(filp))
 		filp->f_op->flock(filp, F_SETLKW, &fl);
 	else
 		flock_lock_inode(inode, &fl);
@@ -2508,7 +2514,7 @@ void locks_remove_file(struct file *filp)
 {
 	struct file_lock_context *ctx;
 
-	ctx = smp_load_acquire(&file_inode(filp)->i_flctx);
+	ctx = smp_load_acquire(&locks_inode(filp)->i_flctx);
 	if (!ctx)
 		return;
 
@@ -2552,7 +2558,7 @@ EXPORT_SYMBOL(posix_unblock_lock);
  */
 int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
 {
-	if (filp->f_op->lock)
+	if (filp->f_op->lock && is_remote_lock(filp))
 		return filp->f_op->lock(filp, F_CANCELLK, fl);
 	return 0;
 }
@@ -2580,7 +2586,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
 		fl_pid = fl->fl_pid;
 
 	if (fl->fl_file != NULL)
-		inode = file_inode(fl->fl_file);
+		inode = locks_inode(fl->fl_file);
 
 	seq_printf(f, "%lld:%s ", id, pfx);
 	if (IS_POSIX(fl)) {
@@ -2682,7 +2688,7 @@ static void __show_fd_locks(struct seq_file *f,
 void show_fd_locks(struct seq_file *f,
 		  struct file *filp, struct files_struct *files)
 {
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = locks_inode(filp);
 	struct file_lock_context *ctx;
 	int id = 0;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 7bb2cda3bfef..dcd9afe21e62 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2700,7 +2700,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
 
 	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
 		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
-		   MS_STRICTATIME);
+		   MS_STRICTATIME | MS_NOREMOTELOCK);
 
 	if (flags & MS_REMOUNT)
 		retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
diff --git a/fs/open.c b/fs/open.c
index 4fd6e256f4f4..648fb9d3e97a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -726,7 +726,7 @@ static int do_dentry_open(struct file *f,
 	if (error)
 		goto cleanup_all;
 
-	error = break_lease(inode, f->f_flags);
+	error = break_lease(locks_inode(f), f->f_flags);
 	if (error)
 		goto cleanup_all;
 
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index e2a94a26767b..3d0b9dee2b76 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1320,7 +1320,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_xattr = ovl_xattr_handlers;
 	sb->s_root = root_dentry;
 	sb->s_fs_info = ufs;
-	sb->s_flags |= MS_POSIXACL;
+	sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK;
 
 	return 0;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7db097d673a8..8ee0f011547f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1065,6 +1065,18 @@ struct file_lock_context {
 
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
+/*
+ * Return the inode to use for locking
+ *
+ * For overlayfs this should be the overlay inode, not the real inode returned
+ * by file_inode().  For any other fs file_inode(filp) and locks_inode(filp) are
+ * equal.
+ */
+static inline struct inode *locks_inode(const struct file *f)
+{
+	return f->f_path.dentry->d_inode;
+}
+
 #ifdef CONFIG_FILE_LOCKING
 extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *);
 extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
@@ -1252,7 +1264,7 @@ static inline struct dentry *file_dentry(const struct file *file)
 
 static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
 {
-	return locks_lock_inode_wait(file_inode(filp), fl);
+	return locks_lock_inode_wait(locks_inode(filp), fl);
 }
 
 struct fasync_struct {
@@ -2155,7 +2167,7 @@ static inline int mandatory_lock(struct inode *ino)
 
 static inline int locks_verify_locked(struct file *file)
 {
-	if (mandatory_lock(file_inode(file)))
+	if (mandatory_lock(locks_inode(file)))
 		return locks_mandatory_locked(file);
 	return 0;
 }
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 3b00f7c8943f..2473272169f2 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -132,6 +132,7 @@ struct inodes_stat_t {
 #define MS_LAZYTIME	(1<<25) /* Update the on-disk [acm]times lazily */
 
 /* These sb flags are internal to the kernel */
+#define MS_NOREMOTELOCK	(1<<27)
 #define MS_NOSEC	(1<<28)
 #define MS_BORN		(1<<29)
 #define MS_ACTIVE	(1<<30)
-- 
cgit v1.2.3


From df75e7748bae1c7098bfa358485389b897f71305 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 22 Sep 2016 13:08:36 -0500
Subject: userns: When the per user per user namespace limit is reached return
 ENOSPC

The current error codes returned when a the per user per user
namespace limit are hit (EINVAL, EUSERS, and ENFILE) are wrong.  I
asked for advice on linux-api and it we made clear that those were
the wrong error code, but a correct effor code was not suggested.

The best general error code I have found for hitting a resource limit
is ENOSPC.  It is not perfect but as it is unambiguous it will serve
until someone comes up with a better error code.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c           | 2 +-
 ipc/namespace.c          | 2 +-
 kernel/cgroup.c          | 2 +-
 kernel/pid_namespace.c   | 2 +-
 kernel/user_namespace.c  | 2 +-
 kernel/utsname.c         | 2 +-
 net/core/net_namespace.c | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs/namespace.c')

diff --git a/fs/namespace.c b/fs/namespace.c
index 491b8f3e4c9a..cf2cc234c8b4 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2754,7 +2754,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 
 	ucounts = inc_mnt_namespaces(user_ns);
 	if (!ucounts)
-		return ERR_PTR(-ENFILE);
+		return ERR_PTR(-ENOSPC);
 
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 	if (!new_ns) {
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 730914214135..fab727d9fe09 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -33,7 +33,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	struct ucounts *ucounts;
 	int err;
 
-	err = -ENFILE;
+	err = -ENOSPC;
 	ucounts = inc_ipc_namespaces(user_ns);
 	if (!ucounts)
 		goto fail;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e9e4427fec46..f1dd4b076210 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -6354,7 +6354,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
 
 	ucounts = inc_cgroup_namespaces(user_ns);
 	if (!ucounts)
-		return ERR_PTR(-ENFILE);
+		return ERR_PTR(-ENOSPC);
 
 	/* It is not safe to take cgroup_mutex here */
 	spin_lock_irq(&css_set_lock);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 30a7f3351932..7542b28cc929 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -98,7 +98,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	int i;
 	int err;
 
-	err = -EINVAL;
+	err = -ENOSPC;
 	if (level > MAX_PID_NS_LEVEL)
 		goto out;
 	ucounts = inc_pid_namespaces(user_ns);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 0edafe305861..f2c5ba5505f1 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -76,7 +76,7 @@ int create_user_ns(struct cred *new)
 	struct ucounts *ucounts;
 	int ret, i;
 
-	ret = -EUSERS;
+	ret = -ENOSPC;
 	if (parent_ns->level > 32)
 		goto fail;
 
diff --git a/kernel/utsname.c b/kernel/utsname.c
index f3b0bb4ac3ba..35587b76faa3 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -49,7 +49,7 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 	struct ucounts *ucounts;
 	int err;
 
-	err = -ENFILE;
+	err = -ENOSPC;
 	ucounts = inc_uts_namespaces(user_ns);
 	if (!ucounts)
 		goto fail;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3e2812aeceb7..06af5d6a883c 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -370,7 +370,7 @@ struct net *copy_net_ns(unsigned long flags,
 
 	ucounts = inc_net_namespaces(user_ns);
 	if (!ucounts)
-		return ERR_PTR(-ENFILE);
+		return ERR_PTR(-ENOSPC);
 
 	net = net_alloc();
 	if (!net) {
-- 
cgit v1.2.3


From bcac25a58bfc6bd79191ac5d7afb49bea96da8c9 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Tue, 6 Sep 2016 00:47:13 -0700
Subject: kernel: add a helper to get an owning user namespace for a namespace

Return -EPERM if an owning user namespace is outside of a process
current user namespace.

v2: In a first version ns_get_owner returned ENOENT for init_user_ns.
    This special cases was removed from this version. There is nothing
    outside of init_user_ns, so we can return EPERM.
v3: rename ns->get_owner() to ns->owner(). get_* usually means that it
grabs a reference.

Acked-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/namespace.c                 |  6 ++++++
 include/linux/proc_ns.h        |  1 +
 include/linux/user_namespace.h |  7 +++++++
 ipc/namespace.c                |  6 ++++++
 kernel/cgroup.c                |  6 ++++++
 kernel/pid_namespace.c         |  6 ++++++
 kernel/user_namespace.c        | 24 ++++++++++++++++++++++++
 kernel/utsname.c               |  6 ++++++
 net/core/net_namespace.c       |  6 ++++++
 9 files changed, 68 insertions(+)

(limited to 'fs/namespace.c')

diff --git a/fs/namespace.c b/fs/namespace.c
index 7bb2cda3bfef..fea56f310547 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3348,10 +3348,16 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 	return 0;
 }
 
+static struct user_namespace *mntns_owner(struct ns_common *ns)
+{
+	return to_mnt_ns(ns)->user_ns;
+}
+
 const struct proc_ns_operations mntns_operations = {
 	.name		= "mnt",
 	.type		= CLONE_NEWNS,
 	.get		= mntns_get,
 	.put		= mntns_put,
 	.install	= mntns_install,
+	.owner		= mntns_owner,
 };
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index de0e7719d4c5..ca85a4348ffc 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -18,6 +18,7 @@ struct proc_ns_operations {
 	struct ns_common *(*get)(struct task_struct *task);
 	void (*put)(struct ns_common *ns);
 	int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
+	struct user_namespace *(*owner)(struct ns_common *ns);
 };
 
 extern const struct proc_ns_operations netns_operations;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 9217169c64cb..190cf0760815 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -73,6 +73,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t,
 extern int proc_setgroups_show(struct seq_file *m, void *v);
 extern bool userns_may_setgroups(const struct user_namespace *ns);
 extern bool current_in_userns(const struct user_namespace *target_ns);
+
+struct ns_common *ns_get_owner(struct ns_common *ns);
 #else
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -106,6 +108,11 @@ static inline bool current_in_userns(const struct user_namespace *target_ns)
 {
 	return true;
 }
+
+static inline struct ns_common *ns_get_owner(struct ns_common *ns)
+{
+	return ERR_PTR(-EPERM);
+}
 #endif
 
 #endif /* _LINUX_USER_H */
diff --git a/ipc/namespace.c b/ipc/namespace.c
index d87e6baa1323..578d93be619d 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -165,10 +165,16 @@ static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
 	return 0;
 }
 
+static struct user_namespace *ipcns_owner(struct ns_common *ns)
+{
+	return to_ipc_ns(ns)->user_ns;
+}
+
 const struct proc_ns_operations ipcns_operations = {
 	.name		= "ipc",
 	.type		= CLONE_NEWIPC,
 	.get		= ipcns_get,
 	.put		= ipcns_put,
 	.install	= ipcns_install,
+	.owner		= ipcns_owner,
 };
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d1c51b7f5221..86b0e8b16426 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -6403,12 +6403,18 @@ static void cgroupns_put(struct ns_common *ns)
 	put_cgroup_ns(to_cg_ns(ns));
 }
 
+static struct user_namespace *cgroupns_owner(struct ns_common *ns)
+{
+	return to_cg_ns(ns)->user_ns;
+}
+
 const struct proc_ns_operations cgroupns_operations = {
 	.name		= "cgroup",
 	.type		= CLONE_NEWCGROUP,
 	.get		= cgroupns_get,
 	.put		= cgroupns_put,
 	.install	= cgroupns_install,
+	.owner		= cgroupns_owner,
 };
 
 static __init int cgroup_namespaces_init(void)
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a65ba137fd15..c02d744225e1 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -388,12 +388,18 @@ static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 	return 0;
 }
 
+static struct user_namespace *pidns_owner(struct ns_common *ns)
+{
+	return to_pid_ns(ns)->user_ns;
+}
+
 const struct proc_ns_operations pidns_operations = {
 	.name		= "pid",
 	.type		= CLONE_NEWPID,
 	.get		= pidns_get,
 	.put		= pidns_put,
 	.install	= pidns_install,
+	.owner		= pidns_owner,
 };
 
 static __init int pid_namespaces_init(void)
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 68f594212759..0ef683a03c20 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -1004,12 +1004,36 @@ static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 	return commit_creds(cred);
 }
 
+struct ns_common *ns_get_owner(struct ns_common *ns)
+{
+	struct user_namespace *my_user_ns = current_user_ns();
+	struct user_namespace *owner, *p;
+
+	/* See if the owner is in the current user namespace */
+	owner = p = ns->ops->owner(ns);
+	for (;;) {
+		if (!p)
+			return ERR_PTR(-EPERM);
+		if (p == my_user_ns)
+			break;
+		p = p->parent;
+	}
+
+	return &get_user_ns(owner)->ns;
+}
+
+static struct user_namespace *userns_owner(struct ns_common *ns)
+{
+	return to_user_ns(ns)->parent;
+}
+
 const struct proc_ns_operations userns_operations = {
 	.name		= "user",
 	.type		= CLONE_NEWUSER,
 	.get		= userns_get,
 	.put		= userns_put,
 	.install	= userns_install,
+	.owner		= userns_owner,
 };
 
 static __init int user_namespaces_init(void)
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 831ea7108232..e1211a8a5c18 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -130,10 +130,16 @@ static int utsns_install(struct nsproxy *nsproxy, struct ns_common *new)
 	return 0;
 }
 
+static struct user_namespace *utsns_owner(struct ns_common *ns)
+{
+	return to_uts_ns(ns)->user_ns;
+}
+
 const struct proc_ns_operations utsns_operations = {
 	.name		= "uts",
 	.type		= CLONE_NEWUTS,
 	.get		= utsns_get,
 	.put		= utsns_put,
 	.install	= utsns_install,
+	.owner		= utsns_owner,
 };
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2c2eb1b629b1..861efa34f08c 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -996,11 +996,17 @@ static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 	return 0;
 }
 
+static struct user_namespace *netns_owner(struct ns_common *ns)
+{
+	return to_net_ns(ns)->user_ns;
+}
+
 const struct proc_ns_operations netns_operations = {
 	.name		= "net",
 	.type		= CLONE_NEWNET,
 	.get		= netns_get,
 	.put		= netns_put,
 	.install	= netns_install,
+	.owner		= netns_owner,
 };
 #endif
-- 
cgit v1.2.3


From d29216842a85c7970c536108e093963f02714498 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 28 Sep 2016 00:27:17 -0500
Subject: mnt: Add a per mount namespace limit on the number of mounts

CAI Qian <caiqian@redhat.com> pointed out that the semantics
of shared subtrees make it possible to create an exponentially
increasing number of mounts in a mount namespace.

    mkdir /tmp/1 /tmp/2
    mount --make-rshared /
    for i in $(seq 1 20) ; do mount --bind /tmp/1 /tmp/2 ; done

Will create create 2^20 or 1048576 mounts, which is a practical problem
as some people have managed to hit this by accident.

As such CVE-2016-6213 was assigned.

Ian Kent <raven@themaw.net> described the situation for autofs users
as follows:

> The number of mounts for direct mount maps is usually not very large because of
> the way they are implemented, large direct mount maps can have performance
> problems. There can be anywhere from a few (likely case a few hundred) to less
> than 10000, plus mounts that have been triggered and not yet expired.
>
> Indirect mounts have one autofs mount at the root plus the number of mounts that
> have been triggered and not yet expired.
>
> The number of autofs indirect map entries can range from a few to the common
> case of several thousand and in rare cases up to between 30000 and 50000. I've
> not heard of people with maps larger than 50000 entries.
>
> The larger the number of map entries the greater the possibility for a large
> number of active mounts so it's not hard to expect cases of a 1000 or somewhat
> more active mounts.

So I am setting the default number of mounts allowed per mount
namespace at 100,000.  This is more than enough for any use case I
know of, but small enough to quickly stop an exponential increase
in mounts.  Which should be perfect to catch misconfigurations and
malfunctioning programs.

For anyone who needs a higher limit this can be changed by writing
to the new /proc/sys/fs/mount-max sysctl.

Tested-by: CAI Qian <caiqian@redhat.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 Documentation/sysctl/fs.txt |  7 +++++++
 fs/mount.h                  |  2 ++
 fs/namespace.c              | 49 ++++++++++++++++++++++++++++++++++++++++++++-
 fs/pnode.c                  |  2 +-
 fs/pnode.h                  |  1 +
 include/linux/mount.h       |  2 ++
 kernel/sysctl.c             |  9 +++++++++
 7 files changed, 70 insertions(+), 2 deletions(-)

(limited to 'fs/namespace.c')

diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 302b5ed616a6..35e17f748ca7 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -265,6 +265,13 @@ aio-nr can grow to.
 
 ==============================================================
 
+mount-max:
+
+This denotes the maximum number of mounts that may exist
+in a mount namespace.
+
+==============================================================
+
 
 2. /proc/sys/fs/binfmt_misc
 ----------------------------------------------------------
diff --git a/fs/mount.h b/fs/mount.h
index e037981d8351..d2e25d7b64b3 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -14,6 +14,8 @@ struct mnt_namespace {
 	u64			seq;	/* Sequence number to prevent loops */
 	wait_queue_head_t poll;
 	u64 event;
+	unsigned int		mounts; /* # of mounts in the namespace */
+	unsigned int		pending_mounts;
 };
 
 struct mnt_pcp {
diff --git a/fs/namespace.c b/fs/namespace.c
index 8a0e90eb81d3..db1b5a38864e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -27,6 +27,9 @@
 #include "pnode.h"
 #include "internal.h"
 
+/* Maximum number of mounts in a mount namespace */
+unsigned int sysctl_mount_max __read_mostly = 100000;
+
 static unsigned int m_hash_mask __read_mostly;
 static unsigned int m_hash_shift __read_mostly;
 static unsigned int mp_hash_mask __read_mostly;
@@ -899,6 +902,9 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
 
 	list_splice(&head, n->list.prev);
 
+	n->mounts += n->pending_mounts;
+	n->pending_mounts = 0;
+
 	attach_shadowed(mnt, parent, shadows);
 	touch_mnt_namespace(n);
 }
@@ -1419,11 +1425,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
 		propagate_umount(&tmp_list);
 
 	while (!list_empty(&tmp_list)) {
+		struct mnt_namespace *ns;
 		bool disconnect;
 		p = list_first_entry(&tmp_list, struct mount, mnt_list);
 		list_del_init(&p->mnt_expire);
 		list_del_init(&p->mnt_list);
-		__touch_mnt_namespace(p->mnt_ns);
+		ns = p->mnt_ns;
+		if (ns) {
+			ns->mounts--;
+			__touch_mnt_namespace(ns);
+		}
 		p->mnt_ns = NULL;
 		if (how & UMOUNT_SYNC)
 			p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
@@ -1840,6 +1851,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
 	return 0;
 }
 
+int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
+{
+	unsigned int max = READ_ONCE(sysctl_mount_max);
+	unsigned int mounts = 0, old, pending, sum;
+	struct mount *p;
+
+	for (p = mnt; p; p = next_mnt(p, mnt))
+		mounts++;
+
+	old = ns->mounts;
+	pending = ns->pending_mounts;
+	sum = old + pending;
+	if ((old > sum) ||
+	    (pending > sum) ||
+	    (max < sum) ||
+	    (mounts > (max - sum)))
+		return -ENOSPC;
+
+	ns->pending_mounts = pending + mounts;
+	return 0;
+}
+
 /*
  *  @source_mnt : mount tree to be attached
  *  @nd         : place the mount tree @source_mnt is attached
@@ -1909,10 +1942,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 			struct path *parent_path)
 {
 	HLIST_HEAD(tree_list);
+	struct mnt_namespace *ns = dest_mnt->mnt_ns;
 	struct mount *child, *p;
 	struct hlist_node *n;
 	int err;
 
+	/* Is there space to add these mounts to the mount namespace? */
+	if (!parent_path) {
+		err = count_mounts(ns, source_mnt);
+		if (err)
+			goto out;
+	}
+
 	if (IS_MNT_SHARED(dest_mnt)) {
 		err = invent_group_ids(source_mnt, true);
 		if (err)
@@ -1949,11 +1990,13 @@ static int attach_recursive_mnt(struct mount *source_mnt,
  out_cleanup_ids:
 	while (!hlist_empty(&tree_list)) {
 		child = hlist_entry(tree_list.first, struct mount, mnt_hash);
+		child->mnt_parent->mnt_ns->pending_mounts = 0;
 		umount_tree(child, UMOUNT_SYNC);
 	}
 	unlock_mount_hash();
 	cleanup_group_ids(source_mnt, NULL);
  out:
+	ns->pending_mounts = 0;
 	return err;
 }
 
@@ -2776,6 +2819,8 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 	new_ns->event = 0;
 	new_ns->user_ns = get_user_ns(user_ns);
 	new_ns->ucounts = ucounts;
+	new_ns->mounts = 0;
+	new_ns->pending_mounts = 0;
 	return new_ns;
 }
 
@@ -2825,6 +2870,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 	q = new;
 	while (p) {
 		q->mnt_ns = new_ns;
+		new_ns->mounts++;
 		if (new_fs) {
 			if (&p->mnt == new_fs->root.mnt) {
 				new_fs->root.mnt = mntget(&q->mnt);
@@ -2863,6 +2909,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
 		struct mount *mnt = real_mount(m);
 		mnt->mnt_ns = new_ns;
 		new_ns->root = mnt;
+		new_ns->mounts++;
 		list_add(&mnt->mnt_list, &new_ns->list);
 	} else {
 		mntput(m);
diff --git a/fs/pnode.c b/fs/pnode.c
index 99899705b105..234a9ac49958 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -259,7 +259,7 @@ static int propagate_one(struct mount *m)
 		read_sequnlock_excl(&mount_lock);
 	}
 	hlist_add_head(&child->mnt_hash, list);
-	return 0;
+	return count_mounts(m->mnt_ns, child);
 }
 
 /*
diff --git a/fs/pnode.h b/fs/pnode.h
index 0fcdbe7ca648..550f5a8b4fcf 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -52,4 +52,5 @@ void mnt_set_mountpoint(struct mount *, struct mountpoint *,
 struct mount *copy_tree(struct mount *, struct dentry *, int);
 bool is_path_reachable(struct mount *, struct dentry *,
 			 const struct path *root);
+int count_mounts(struct mnt_namespace *ns, struct mount *mnt);
 #endif /* _LINUX_PNODE_H */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 54a594d49733..1172cce949a4 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -96,4 +96,6 @@ extern void mark_mounts_for_expiry(struct list_head *mounts);
 
 extern dev_t name_to_dev_t(const char *name);
 
+extern unsigned int sysctl_mount_max;
+
 #endif /* _LINUX_MOUNT_H */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b43d0b27c1fe..03f18cc15697 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -65,6 +65,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/kexec.h>
 #include <linux/bpf.h>
+#include <linux/mount.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -1838,6 +1839,14 @@ static struct ctl_table fs_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
 	},
+	{
+		.procname	= "mount-max",
+		.data		= &sysctl_mount_max,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+	},
 	{ }
 };
 
-- 
cgit v1.2.3


From 0766f788eb727e2e330d55d30545db65bcf2623f Mon Sep 17 00:00:00 2001
From: Emese Revfy <re.emese@gmail.com>
Date: Mon, 20 Jun 2016 20:42:34 +0200
Subject: latent_entropy: Mark functions with __latent_entropy

The __latent_entropy gcc attribute can be used only on functions and
variables.  If it is on a function then the plugin will instrument it for
gathering control-flow entropy. If the attribute is on a variable then
the plugin will initialize it with random contents.  The variable must
be an integer, an integer array type or a structure with integer fields.

These specific functions have been selected because they are init
functions (to help gather boot-time entropy), are called at unpredictable
times, or they have variable loops, each of which provide some level of
latent entropy.

Signed-off-by: Emese Revfy <re.emese@gmail.com>
[kees: expanded commit message]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 block/blk-softirq.c          | 2 +-
 drivers/char/random.c        | 4 ++--
 fs/namespace.c               | 1 +
 include/linux/compiler-gcc.h | 7 +++++++
 include/linux/compiler.h     | 4 ++++
 include/linux/fdtable.h      | 2 +-
 include/linux/genhd.h        | 2 +-
 include/linux/init.h         | 5 +++--
 include/linux/random.h       | 4 ++--
 kernel/fork.c                | 6 ++++--
 kernel/rcu/tiny.c            | 2 +-
 kernel/rcu/tree.c            | 2 +-
 kernel/sched/fair.c          | 2 +-
 kernel/softirq.c             | 4 ++--
 kernel/time/timer.c          | 2 +-
 lib/irq_poll.c               | 2 +-
 lib/random32.c               | 2 +-
 mm/page_alloc.c              | 2 +-
 net/core/dev.c               | 4 ++--
 19 files changed, 37 insertions(+), 22 deletions(-)

(limited to 'fs/namespace.c')

diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 53b1737e978d..489eab825a8a 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -18,7 +18,7 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
  * Softirq action handler - move entries to local list and loop over them
  * while passing them to the queue registered handler.
  */
-static void blk_done_softirq(struct softirq_action *h)
+static __latent_entropy void blk_done_softirq(struct softirq_action *h)
 {
 	struct list_head *cpu_list, local_list;
 
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 3efb3bf0ab83..7274ae89ddb3 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -479,8 +479,8 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf,
 
 static void crng_reseed(struct crng_state *crng, struct entropy_store *r);
 static void push_to_pool(struct work_struct *work);
-static __u32 input_pool_data[INPUT_POOL_WORDS];
-static __u32 blocking_pool_data[OUTPUT_POOL_WORDS];
+static __u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy;
+static __u32 blocking_pool_data[OUTPUT_POOL_WORDS] __latent_entropy;
 
 static struct entropy_store input_pool = {
 	.poolinfo = &poolinfo_table[0],
diff --git a/fs/namespace.c b/fs/namespace.c
index 7bb2cda3bfef..4a9568b81138 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2759,6 +2759,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 	return new_ns;
 }
 
+__latent_entropy
 struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 		struct user_namespace *user_ns, struct fs_struct *new_fs)
 {
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 573c5a18908f..432f5c97e18f 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -188,6 +188,13 @@
 #endif /* GCC_VERSION >= 40300 */
 
 #if GCC_VERSION >= 40500
+
+#ifndef __CHECKER__
+#ifdef LATENT_ENTROPY_PLUGIN
+#define __latent_entropy __attribute__((latent_entropy))
+#endif
+#endif
+
 /*
  * Mark a position in code as unreachable.  This can be used to
  * suppress control flow warnings after asm blocks that transfer
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 668569844d37..ceaddaf76ff1 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -406,6 +406,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 # define __attribute_const__	/* unimplemented */
 #endif
 
+#ifndef __latent_entropy
+# define __latent_entropy
+#endif
+
 /*
  * Tell gcc if a function is cold. The compiler will assume any path
  * directly leading to the call is unlikely.
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 5295535b60c6..9852c7e33466 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -105,7 +105,7 @@ struct files_struct *get_files_struct(struct task_struct *);
 void put_files_struct(struct files_struct *fs);
 void reset_files_struct(struct files_struct *);
 int unshare_files(struct files_struct **);
-struct files_struct *dup_fd(struct files_struct *, int *);
+struct files_struct *dup_fd(struct files_struct *, int *) __latent_entropy;
 void do_close_on_exec(struct files_struct *);
 int iterate_fd(struct files_struct *, unsigned,
 		int (*)(const void *, struct file *, unsigned),
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 1dbf52f9c24b..e0341af6950e 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -437,7 +437,7 @@ extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
 extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
 
 /* drivers/char/random.c */
-extern void add_disk_randomness(struct gendisk *disk);
+extern void add_disk_randomness(struct gendisk *disk) __latent_entropy;
 extern void rand_initialize_disk(struct gendisk *disk);
 
 static inline sector_t get_start_sect(struct block_device *bdev)
diff --git a/include/linux/init.h b/include/linux/init.h
index 6935d02474aa..1e5c131d5c9a 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -39,7 +39,7 @@
 
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
-#define __init		__section(.init.text) __cold notrace
+#define __init		__section(.init.text) __cold notrace __latent_entropy
 #define __initdata	__section(.init.data)
 #define __initconst	__constsection(.init.rodata)
 #define __exitdata	__section(.exit.data)
@@ -86,7 +86,8 @@
 #define __exit          __section(.exit.text) __exitused __cold notrace
 
 /* Used for MEMORY_HOTPLUG */
-#define __meminit        __section(.meminit.text) __cold notrace
+#define __meminit        __section(.meminit.text) __cold notrace \
+						  __latent_entropy
 #define __meminitdata    __section(.meminit.data)
 #define __meminitconst   __constsection(.meminit.rodata)
 #define __memexit        __section(.memexit.text) __exitused __cold notrace
diff --git a/include/linux/random.h b/include/linux/random.h
index a59c74cdb1eb..d80a4388a4fd 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -30,8 +30,8 @@ static inline void add_latent_entropy(void) {}
 #endif
 
 extern void add_input_randomness(unsigned int type, unsigned int code,
-				 unsigned int value);
-extern void add_interrupt_randomness(int irq, int irq_flags);
+				 unsigned int value) __latent_entropy;
+extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy;
 
 extern void get_random_bytes(void *buf, int nbytes);
 extern int add_random_ready_callback(struct random_ready_callback *rdy);
diff --git a/kernel/fork.c b/kernel/fork.c
index 001b18473a07..05393881ef39 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -404,7 +404,8 @@ free_tsk:
 }
 
 #ifdef CONFIG_MMU
-static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+static __latent_entropy int dup_mmap(struct mm_struct *mm,
+					struct mm_struct *oldmm)
 {
 	struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
 	struct rb_node **rb_link, *rb_parent;
@@ -1296,7 +1297,8 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
  * parts of the process environment (as per the clone
  * flags). The actual kick-off is left to the caller.
  */
-static struct task_struct *copy_process(unsigned long clone_flags,
+static __latent_entropy struct task_struct *copy_process(
+					unsigned long clone_flags,
 					unsigned long stack_start,
 					unsigned long stack_size,
 					int __user *child_tidptr,
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 944b1b491ed8..1898559e6b60 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -170,7 +170,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 				      false));
 }
 
-static void rcu_process_callbacks(struct softirq_action *unused)
+static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
 {
 	__rcu_process_callbacks(&rcu_sched_ctrlblk);
 	__rcu_process_callbacks(&rcu_bh_ctrlblk);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5d80925e7fc8..e5164deb51e1 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3013,7 +3013,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
 /*
  * Do RCU core processing for the current CPU.
  */
-static void rcu_process_callbacks(struct softirq_action *unused)
+static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
 {
 	struct rcu_state *rsp;
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 039de34f1521..004996df2f10 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8283,7 +8283,7 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }
  * run_rebalance_domains is triggered when needed from the scheduler tick.
  * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
  */
-static void run_rebalance_domains(struct softirq_action *h)
+static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
 {
 	struct rq *this_rq = this_rq();
 	enum cpu_idle_type idle = this_rq->idle_balance ?
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 17caf4b63342..34033fd09c8c 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -482,7 +482,7 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
 }
 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
 
-static void tasklet_action(struct softirq_action *a)
+static __latent_entropy void tasklet_action(struct softirq_action *a)
 {
 	struct tasklet_struct *list;
 
@@ -518,7 +518,7 @@ static void tasklet_action(struct softirq_action *a)
 	}
 }
 
-static void tasklet_hi_action(struct softirq_action *a)
+static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
 {
 	struct tasklet_struct *list;
 
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 32bf6f75a8fe..2d47980a1bc4 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1633,7 +1633,7 @@ static inline void __run_timers(struct timer_base *base)
 /*
  * This function runs timers and the timer-tq in bottom half context.
  */
-static void run_timer_softirq(struct softirq_action *h)
+static __latent_entropy void run_timer_softirq(struct softirq_action *h)
 {
 	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
 
diff --git a/lib/irq_poll.c b/lib/irq_poll.c
index 836f7db4e548..63be7495dbb2 100644
--- a/lib/irq_poll.c
+++ b/lib/irq_poll.c
@@ -74,7 +74,7 @@ void irq_poll_complete(struct irq_poll *iop)
 }
 EXPORT_SYMBOL(irq_poll_complete);
 
-static void irq_poll_softirq(struct softirq_action *h)
+static void __latent_entropy irq_poll_softirq(struct softirq_action *h)
 {
 	struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
 	int rearm = 0, budget = irq_poll_budget;
diff --git a/lib/random32.c b/lib/random32.c
index 69ed593aab07..a30923573676 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -47,7 +47,7 @@ static inline void prandom_state_selftest(void)
 }
 #endif
 
-static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
+static DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy;
 
 /**
  *	prandom_u32_state - seeded pseudo-random number generator.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 248851d1fc86..901121af4e54 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -92,7 +92,7 @@ int _node_numa_mem_[MAX_NUMNODES];
 #endif
 
 #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
-volatile u64 latent_entropy;
+volatile u64 latent_entropy __latent_entropy;
 EXPORT_SYMBOL(latent_entropy);
 #endif
 
diff --git a/net/core/dev.c b/net/core/dev.c
index ea6312057a71..ee076c2791f9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3855,7 +3855,7 @@ int netif_rx_ni(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_rx_ni);
 
-static void net_tx_action(struct softirq_action *h)
+static __latent_entropy void net_tx_action(struct softirq_action *h)
 {
 	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
 
@@ -5187,7 +5187,7 @@ out_unlock:
 	return work;
 }
 
-static void net_rx_action(struct softirq_action *h)
+static __latent_entropy void net_rx_action(struct softirq_action *h)
 {
 	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
 	unsigned long time_limit = jiffies + 2;
-- 
cgit v1.2.3