70 files changed, 884 insertions, 814 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 9774588da60e..bc821a86d965 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -58,6 +58,13 @@ config FS_DAX_PMD
 	depends on ZONE_DEVICE
 	depends on TRANSPARENT_HUGEPAGE
 
+# Selected by DAX drivers that do not expect filesystem DAX to support
+# get_user_pages() of DAX mappings. I.e. "limited" indicates no support
+# for fork() of processes with MAP_SHARED mappings or support for
+# direct-I/O to a DAX mapping.
+config FS_DAX_LIMITED
+	bool
+
 endif # BLOCK
 
 # Posix ACL utility routines
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index d180b46453cf..b2bf7016e1b3 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -81,7 +81,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
 	 * we can jump directly to where we left off.
 	 */
 	ino = (u32)(long)file->private_data;
-	if (ino && inode_cmp_iversion(inode, file->f_version) == 0) {
+	if (ino && inode_eq_iversion(inode, file->f_version)) {
 		pr_debug("readdir() left off=%d\n", ino);
 		goto inside;
 	}
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index a537368ba0db..fd9f28b8a933 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -332,11 +332,18 @@ bool afs_iterate_addresses(struct afs_addr_cursor *ac)
  */
 int afs_end_cursor(struct afs_addr_cursor *ac)
 {
-	if (ac->responded && ac->index != ac->start)
-		WRITE_ONCE(ac->alist->index, ac->index);
+	struct afs_addr_list *alist;
+
+	alist = ac->alist;
+	if (alist) {
+		if (ac->responded && ac->index != ac->start)
+			WRITE_ONCE(alist->index, ac->index);
+		afs_put_addrlist(alist);
+	}
 
-	afs_put_addrlist(ac->alist);
+	ac->addr = NULL;
 	ac->alist = NULL;
+	ac->begun = false;
 	return ac->error;
 }
 
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 23c7f395d718..ba2b458b36d1 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -17,10 +17,13 @@
 #include <linux/pagemap.h>
 #include <linux/ctype.h>
 #include <linux/sched.h>
+#include <linux/dns_resolver.h>
 #include "internal.h"
 
 static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 				 unsigned int flags);
+static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry,
+					 unsigned int flags);
 static int afs_dir_open(struct inode *inode, struct file *file);
 static int afs_readdir(struct file *file, struct dir_context *ctx);
 static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
@@ -64,6 +67,17 @@ const struct inode_operations afs_dir_inode_operations = {
 	.listxattr	= afs_listxattr,
 };
 
+const struct file_operations afs_dynroot_file_operations = {
+	.open		= dcache_dir_open,
+	.release	= dcache_dir_close,
+	.iterate_shared	= dcache_readdir,
+	.llseek		= dcache_dir_lseek,
+};
+
+const struct inode_operations afs_dynroot_inode_operations = {
+	.lookup		= afs_dynroot_lookup,
+};
+
 const struct dentry_operations afs_fs_dentry_operations = {
 	.d_revalidate	= afs_d_revalidate,
 	.d_delete	= afs_d_delete,
@@ -468,25 +482,58 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
 }
 
 /*
+ * Probe to see if a cell may exist.  This prevents positive dentries from
+ * being created unnecessarily.
+ */
+static int afs_probe_cell_name(struct dentry *dentry)
+{
+	struct afs_cell *cell;
+	const char *name = dentry->d_name.name;
+	size_t len = dentry->d_name.len;
+	int ret;
+
+	/* Names prefixed with a dot are R/W mounts. */
+	if (name[0] == '.') {
+		if (len == 1)
+			return -EINVAL;
+		name++;
+		len--;
+	}
+
+	cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len);
+	if (!IS_ERR(cell)) {
+		afs_put_cell(afs_d2net(dentry), cell);
+		return 0;
+	}
+
+	ret = dns_query("afsdb", name, len, "ipv4", NULL, NULL);
+	if (ret == -ENODATA)
+		ret = -EDESTADDRREQ;
+	return ret;
+}
+
+/*
  * Try to auto mount the mountpoint with pseudo directory, if the autocell
  * operation is setted.
  */
-static struct inode *afs_try_auto_mntpt(
-	int ret, struct dentry *dentry, struct inode *dir, struct key *key,
-	struct afs_fid *fid)
+static struct inode *afs_try_auto_mntpt(struct dentry *dentry,
+					struct inode *dir, struct afs_fid *fid)
 {
-	const char *devname = dentry->d_name.name;
 	struct afs_vnode *vnode = AFS_FS_I(dir);
 	struct inode *inode;
+	int ret = -ENOENT;
 
-	_enter("%d, %p{%pd}, {%x:%u}, %p",
-	       ret, dentry, dentry, vnode->fid.vid, vnode->fid.vnode, key);
+	_enter("%p{%pd}, {%x:%u}",
+	       dentry, dentry, vnode->fid.vid, vnode->fid.vnode);
+
+	if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
+		goto out;
 
-	if (ret != -ENOENT ||
-	    !test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
+	ret = afs_probe_cell_name(dentry);
+	if (ret < 0)
 		goto out;
 
-	inode = afs_iget_autocell(dir, devname, strlen(devname), key);
+	inode = afs_iget_pseudo_dir(dir->i_sb, false);
 	if (IS_ERR(inode)) {
 		ret = PTR_ERR(inode);
 		goto out;
@@ -545,13 +592,16 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 
 	ret = afs_do_lookup(dir, dentry, &fid, key);
 	if (ret < 0) {
-		inode = afs_try_auto_mntpt(ret, dentry, dir, key, &fid);
-		if (!IS_ERR(inode)) {
-			key_put(key);
-			goto success;
+		if (ret == -ENOENT) {
+			inode = afs_try_auto_mntpt(dentry, dir, &fid);
+			if (!IS_ERR(inode)) {
+				key_put(key);
+				goto success;
+			}
+
+			ret = PTR_ERR(inode);
 		}
 
-		ret = PTR_ERR(inode);
 		key_put(key);
 		if (ret == -ENOENT) {
 			d_add(dentry, NULL);
@@ -583,12 +633,53 @@ success:
 }
 
 /*
+ * Look up an entry in a dynroot directory.
+ */
+static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry,
+					 unsigned int flags)
+{
+	struct afs_vnode *vnode;
+	struct afs_fid fid;
+	struct inode *inode;
+	int ret;
+
+	vnode = AFS_FS_I(dir);
+
+	_enter("%pd", dentry);
+
+	ASSERTCMP(d_inode(dentry), ==, NULL);
+
+	if (dentry->d_name.len >= AFSNAMEMAX) {
+		_leave(" = -ENAMETOOLONG");
+		return ERR_PTR(-ENAMETOOLONG);
+	}
+
+	inode = afs_try_auto_mntpt(dentry, dir, &fid);
+	if (IS_ERR(inode)) {
+		ret = PTR_ERR(inode);
+		if (ret == -ENOENT) {
+			d_add(dentry, NULL);
+			_leave(" = NULL [negative]");
+			return NULL;
+		}
+		_leave(" = %d [do]", ret);
+		return ERR_PTR(ret);
+	}
+
+	d_add(dentry, inode);
+	_leave(" = 0 { ino=%lu v=%u }",
+	       d_inode(dentry)->i_ino, d_inode(dentry)->i_generation);
+	return NULL;
+}
+
+/*
  * check that a dentry lookup hit has found a valid entry
  * - NOTE! the hit can be a negative hit too, so we can't assume we have an
  *   inode
  */
 static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
+	struct afs_super_info *as = dentry->d_sb->s_fs_info;
 	struct afs_vnode *vnode, *dir;
 	struct afs_fid uninitialized_var(fid);
 	struct dentry *parent;
@@ -600,6 +691,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
+	if (as->dyn_root)
+		return 1;
+
 	if (d_really_is_positive(dentry)) {
 		vnode = AFS_FS_I(d_inode(dentry));
 		_enter("{v={%x:%u} n=%pd fl=%lx},",
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index c7f17c44c7ce..6b39d0255b72 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -147,7 +147,7 @@ int afs_iget5_test(struct inode *inode, void *opaque)
  *
  * These pseudo inodes don't match anything.
  */
-static int afs_iget5_autocell_test(struct inode *inode, void *opaque)
+static int afs_iget5_pseudo_dir_test(struct inode *inode, void *opaque)
 {
 	return 0;
 }
@@ -169,31 +169,34 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
 }
 
 /*
- * inode retrieval for autocell
+ * Create an inode for a dynamic root directory or an autocell dynamic
+ * automount dir.
  */
-struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
-				int namesz, struct key *key)
+struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
 {
 	struct afs_iget_data data;
 	struct afs_super_info *as;
 	struct afs_vnode *vnode;
-	struct super_block *sb;
 	struct inode *inode;
 	static atomic_t afs_autocell_ino;
 
-	_enter("{%x:%u},%*.*s,",
-	       AFS_FS_I(dir)->fid.vid, AFS_FS_I(dir)->fid.vnode,
-	       namesz, namesz, dev_name ?: "");
+	_enter("");
 
-	sb = dir->i_sb;
 	as = sb->s_fs_info;
-	data.volume = as->volume;
-	data.fid.vid = as->volume->vid;
-	data.fid.unique = 0;
-	data.fid.vnode = 0;
+	if (as->volume) {
+		data.volume = as->volume;
+		data.fid.vid = as->volume->vid;
+	}
+	if (root) {
+		data.fid.vnode = 1;
+		data.fid.unique = 1;
+	} else {
+		data.fid.vnode = atomic_inc_return(&afs_autocell_ino);
+		data.fid.unique = 0;
+	}
 
-	inode = iget5_locked(sb, atomic_inc_return(&afs_autocell_ino),
-			     afs_iget5_autocell_test, afs_iget5_set,
+	inode = iget5_locked(sb, data.fid.vnode,
+			     afs_iget5_pseudo_dir_test, afs_iget5_set,
 			     &data);
 	if (!inode) {
 		_leave(" = -ENOMEM");
@@ -211,7 +214,12 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
 
 	inode->i_size		= 0;
 	inode->i_mode		= S_IFDIR | S_IRUGO | S_IXUGO;
-	inode->i_op		= &afs_autocell_inode_operations;
+	if (root) {
+		inode->i_op	= &afs_dynroot_inode_operations;
+		inode->i_fop	= &afs_dynroot_file_operations;
+	} else {
+		inode->i_op	= &afs_autocell_inode_operations;
+	}
 	set_nlink(inode, 2);
 	inode->i_uid		= GLOBAL_ROOT_UID;
 	inode->i_gid		= GLOBAL_ROOT_GID;
@@ -223,8 +231,12 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
 	inode->i_generation	= 0;
 
 	set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
-	set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
-	inode->i_flags |= S_AUTOMOUNT | S_NOATIME;
+	if (!root) {
+		set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+		inode->i_flags |= S_AUTOMOUNT;
+	}
+
+	inode->i_flags |= S_NOATIME;
 	unlock_new_inode(inode);
 	_leave(" = %p", inode);
 	return inode;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 804d1f905622..f38d6a561a84 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -36,6 +36,7 @@ struct afs_mount_params {
 	bool			rwpath;		/* T if the parent should be considered R/W */
 	bool			force;		/* T to force cell type */
 	bool			autocell;	/* T if set auto mount operation */
+	bool			dyn_root;	/* T if dynamic root */
 	afs_voltype_t		type;		/* type of volume requested */
 	int			volnamesz;	/* size of volume name */
 	const char		*volname;	/* name of volume to mount */
@@ -186,6 +187,7 @@ struct afs_super_info {
 	struct afs_net		*net;		/* Network namespace */
 	struct afs_cell		*cell;		/* The cell in which the volume resides */
 	struct afs_volume	*volume;	/* volume record */
+	bool			dyn_root;	/* True if dynamic root */
 };
 
 static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
@@ -634,10 +636,13 @@ extern bool afs_cm_incoming_call(struct afs_call *);
 /*
  * dir.c
  */
-extern bool afs_dir_check_page(struct inode *, struct page *);
+extern const struct file_operations afs_dir_file_operations;
 extern const struct inode_operations afs_dir_inode_operations;
+extern const struct file_operations afs_dynroot_file_operations;
+extern const struct inode_operations afs_dynroot_inode_operations;
 extern const struct dentry_operations afs_fs_dentry_operations;
-extern const struct file_operations afs_dir_file_operations;
+
+extern bool afs_dir_check_page(struct inode *, struct page *);
 
 /*
  * file.c
@@ -695,8 +700,7 @@ extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
  */
 extern int afs_fetch_status(struct afs_vnode *, struct key *);
 extern int afs_iget5_test(struct inode *, void *);
-extern struct inode *afs_iget_autocell(struct inode *, const char *, int,
-				       struct key *);
+extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool);
 extern struct inode *afs_iget(struct super_block *, struct key *,
 			      struct afs_fid *, struct afs_file_status *,
 			      struct afs_callback *,
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 690fea9d84c3..99fd13500a97 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -72,7 +72,7 @@ static int afs_mntpt_open(struct inode *inode, struct file *file)
  */
 static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 {
-	struct afs_super_info *super;
+	struct afs_super_info *as;
 	struct vfsmount *mnt;
 	struct afs_vnode *vnode;
 	struct page *page;
@@ -104,13 +104,13 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 			goto error_no_page;
 
 		if (mntpt->d_name.name[0] == '.') {
-			devname[0] = '#';
-			memcpy(devname + 1, mntpt->d_name.name, size - 1);
+			devname[0] = '%';
+			memcpy(devname + 1, mntpt->d_name.name + 1, size - 1);
 			memcpy(devname + size, afs_root_cell,
 			       sizeof(afs_root_cell));
 			rwpath = true;
 		} else {
-			devname[0] = '%';
+			devname[0] = '#';
 			memcpy(devname + 1, mntpt->d_name.name, size);
 			memcpy(devname + size + 1, afs_root_cell,
 			       sizeof(afs_root_cell));
@@ -142,11 +142,13 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 	}
 
 	/* work out what options we want */
-	super = AFS_FS_S(mntpt->d_sb);
-	memcpy(options, "cell=", 5);
-	strcpy(options + 5, super->volume->cell->name);
-	if (super->volume->type == AFSVL_RWVOL || rwpath)
-		strcat(options, ",rwpath");
+	as = AFS_FS_S(mntpt->d_sb);
+	if (as->cell) {
+		memcpy(options, "cell=", 5);
+		strcpy(options + 5, as->cell->name);
+		if ((as->volume && as->volume->type == AFSVL_RWVOL) || rwpath)
+			strcat(options, ",rwpath");
+	}
 
 	/* try and do the mount */
 	_debug("--- attempting mount %s -o %s ---", devname, options);
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index d04511fb3879..ad1328d85526 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -330,26 +330,6 @@ start:
 
 	if (!afs_start_fs_iteration(fc, vnode))
 		goto failed;
-	goto use_server;
-
-next_server:
-	_debug("next");
-	afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
-	fc->cbi = NULL;
-	fc->index++;
-	if (fc->index >= fc->server_list->nr_servers)
-		fc->index = 0;
-	if (fc->index != fc->start)
-		goto use_server;
-
-	/* That's all the servers poked to no good effect.  Try again if some
-	 * of them were busy.
-	 */
-	if (fc->flags & AFS_FS_CURSOR_VBUSY)
-		goto restart_from_beginning;
-
-	fc->ac.error = -EDESTADDRREQ;
-	goto failed;
 
 use_server:
 	_debug("use");
@@ -383,6 +363,7 @@ use_server:
 	afs_get_addrlist(alist);
 	read_unlock(&server->fs_lock);
 
+	memset(&fc->ac, 0, sizeof(fc->ac));
 
 	/* Probe the current fileserver if we haven't done so yet. */
 	if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
@@ -397,12 +378,8 @@ use_server:
 	else
 		afs_put_addrlist(alist);
 
-	fc->ac.addr  = NULL;
 	fc->ac.start = READ_ONCE(alist->index);
 	fc->ac.index = fc->ac.start;
-	fc->ac.error = 0;
-	fc->ac.begun = false;
-	goto iterate_address;
 
 iterate_address:
 	ASSERT(fc->ac.alist);
@@ -410,16 +387,35 @@ iterate_address:
 	/* Iterate over the current server's address list to try and find an
 	 * address on which it will respond to us.
 	 */
-	if (afs_iterate_addresses(&fc->ac)) {
-		_leave(" = t");
-		return true;
-	}
+	if (!afs_iterate_addresses(&fc->ac))
+		goto next_server;
+
+	_leave(" = t");
+	return true;
 
+next_server:
+	_debug("next");
 	afs_end_cursor(&fc->ac);
-	goto next_server;
+	afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
+	fc->cbi = NULL;
+	fc->index++;
+	if (fc->index >= fc->server_list->nr_servers)
+		fc->index = 0;
+	if (fc->index != fc->start)
+		goto use_server;
+
+	/* That's all the servers poked to no good effect.  Try again if some
+	 * of them were busy.
+	 */
+	if (fc->flags & AFS_FS_CURSOR_VBUSY)
+		goto restart_from_beginning;
+
+	fc->ac.error = -EDESTADDRREQ;
+	goto failed;
 
 failed:
 	fc->flags |= AFS_FS_CURSOR_STOP;
+	afs_end_cursor(&fc->ac);
 	_leave(" = f [failed %d]", fc->ac.error);
 	return false;
 }
@@ -458,12 +454,10 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
 			return false;
 		}
 
+		memset(&fc->ac, 0, sizeof(fc->ac));
 		fc->ac.alist = alist;
-		fc->ac.addr  = NULL;
 		fc->ac.start = READ_ONCE(alist->index);
 		fc->ac.index = fc->ac.start;
-		fc->ac.error = 0;
-		fc->ac.begun = false;
 		goto iterate_address;
 
 	case 0:
@@ -520,238 +514,3 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc)
 
 	return fc->ac.error;
 }
-
-#if 0
-/*
- * Set a filesystem server cursor for using a specific FS server.
- */
-int afs_set_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
-{
-	afs_init_fs_cursor(fc, vnode);
-
-	read_seqlock_excl(&vnode->cb_lock);
-	if (vnode->cb_interest) {
-		if (vnode->cb_interest->server->fs_state == 0)
-			fc->server = afs_get_server(vnode->cb_interest->server);
-		else
-			fc->ac.error = vnode->cb_interest->server->fs_state;
-	} else {
-		fc->ac.error = -ESTALE;
-	}
-	read_sequnlock_excl(&vnode->cb_lock);
-
-	return fc->ac.error;
-}
-
-/*
- * pick a server to use to try accessing this volume
- * - returns with an elevated usage count on the server chosen
- */
-bool afs_volume_pick_fileserver(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
-{
-	struct afs_volume *volume = vnode->volume;
-	struct afs_server *server;
-	int ret, state, loop;
-
-	_enter("%s", volume->vlocation->vldb.name);
-
-	/* stick with the server we're already using if we can */
-	if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) {
-		fc->server = afs_get_server(vnode->cb_interest->server);
-		goto set_server;
-	}
-
-	down_read(&volume->server_sem);
-
-	/* handle the no-server case */
-	if (volume->nservers == 0) {
-		fc->ac.error = volume->rjservers ? -ENOMEDIUM : -ESTALE;
-		up_read(&volume->server_sem);
-		_leave(" = f [no servers %d]", fc->ac.error);
-		return false;
-	}
-
-	/* basically, just search the list for the first live server and use
-	 * that */
-	ret = 0;
-	for (loop = 0; loop < volume->nservers; loop++) {
-		server = volume->servers[loop];
-		state = server->fs_state;
-
-		_debug("consider %d [%d]", loop, state);
-
-		switch (state) {
-		case 0:
-			goto picked_server;
-
-		case -ENETUNREACH:
-			if (ret == 0)
-				ret = state;
-			break;
-
-		case -EHOSTUNREACH:
-			if (ret == 0 ||
-			    ret == -ENETUNREACH)
-				ret = state;
-			break;
-
-		case -ECONNREFUSED:
-			if (ret == 0 ||
-			    ret == -ENETUNREACH ||
-			    ret == -EHOSTUNREACH)
-				ret = state;
-			break;
-
-		default:
-		case -EREMOTEIO:
-			if (ret == 0 ||
-			    ret == -ENETUNREACH ||
-			    ret == -EHOSTUNREACH ||
-			    ret == -ECONNREFUSED)
-				ret = state;
-			break;
-		}
-	}
-
-error:
-	fc->ac.error = ret;
-
-	/* no available servers
-	 * - TODO: handle the no active servers case better
-	 */
-	up_read(&volume->server_sem);
-	_leave(" = f [%d]", fc->ac.error);
-	return false;
-
-picked_server:
-	/* Found an apparently healthy server.  We need to register an interest
-	 * in receiving callbacks before we talk to it.
-	 */
-	ret = afs_register_server_cb_interest(vnode,
-					      &volume->cb_interests[loop], server);
-	if (ret < 0)
-		goto error;
-
-	fc->server = afs_get_server(server);
-	up_read(&volume->server_sem);
-set_server:
-	fc->ac.alist = afs_get_addrlist(fc->server->addrs);
-	fc->ac.addr = &fc->ac.alist->addrs[0];
-	_debug("USING SERVER: %pIS\n", &fc->ac.addr->transport);
-	_leave(" = t (picked %pIS)", &fc->ac.addr->transport);
-	return true;
-}
-
-/*
- * release a server after use
- * - releases the ref on the server struct that was acquired by picking
- * - records result of using a particular server to access a volume
- * - return true to try again, false if okay or to issue error
- * - the caller must release the server struct if result was false
- */
-bool afs_iterate_fs_cursor(struct afs_fs_cursor *fc,
-			   struct afs_vnode *vnode)
-{
-	struct afs_volume *volume = vnode->volume;
-	struct afs_server *server = fc->server;
-	unsigned loop;
-
-	_enter("%s,%pIS,%d",
-	       volume->vlocation->vldb.name, &fc->ac.addr->transport,
-	       fc->ac.error);
-
-	switch (fc->ac.error) {
-		/* success */
-	case 0:
-		server->fs_state = 0;
-		_leave(" = f");
-		return false;
-
-		/* the fileserver denied all knowledge of the volume */
-	case -ENOMEDIUM:
-		down_write(&volume->server_sem);
-
-		/* firstly, find where the server is in the active list (if it
-		 * is) */
-		for (loop = 0; loop < volume->nservers; loop++)
-			if (volume->servers[loop] == server)
-				goto present;
-
-		/* no longer there - may have been discarded by another op */
-		goto try_next_server_upw;
-
-	present:
-		volume->nservers--;
-		memmove(&volume->servers[loop],
-			&volume->servers[loop + 1],
-			sizeof(volume->servers[loop]) *
-			(volume->nservers - loop));
-		volume->servers[volume->nservers] = NULL;
-		afs_put_server(afs_v2net(vnode), server);
-		volume->rjservers++;
-
-		if (volume->nservers > 0)
-			/* another server might acknowledge its existence */
-			goto try_next_server_upw;
-
-		/* handle the case where all the fileservers have rejected the
-		 * volume
-		 * - TODO: try asking the fileservers for volume information
-		 * - TODO: contact the VL server again to see if the volume is
-		 *         no longer registered
-		 */
-		up_write(&volume->server_sem);
-		afs_put_server(afs_v2net(vnode), server);
-		fc->server = NULL;
-		_leave(" = f [completely rejected]");
-		return false;
-
-		/* problem reaching the server */
-	case -ENETUNREACH:
-	case -EHOSTUNREACH:
-	case -ECONNREFUSED:
-	case -ETIME:
-	case -ETIMEDOUT:
-	case -EREMOTEIO:
-		/* mark the server as dead
-		 * TODO: vary dead timeout depending on error
-		 */
-		spin_lock(&server->fs_lock);
-		if (!server->fs_state) {
-			server->fs_state = fc->ac.error;
-			printk("kAFS: SERVER DEAD state=%d\n", fc->ac.error);
-		}
-		spin_unlock(&server->fs_lock);
-		goto try_next_server;
-
-		/* miscellaneous error */
-	default:
-	case -ENOMEM:
-	case -ENONET:
-		/* tell the caller to accept the result */
-		afs_put_server(afs_v2net(vnode), server);
-		fc->server = NULL;
-		_leave(" = f [local failure]");
-		return false;
-	}
-
-	/* tell the caller to loop around and try the next server */
-try_next_server_upw:
-	up_write(&volume->server_sem);
-try_next_server:
-	afs_put_server(afs_v2net(vnode), server);
-	_leave(" = t [try next server]");
-	return true;
-}
-
-/*
- * Clean up a fileserver cursor.
- */
-int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net)
-{
-	afs_end_cursor(&fc->ac);
-	afs_put_server(net, fc->server);
-	return fc->ac.error;
-}
-
-#endif
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 0ab3f8457839..0f8dc4c8f07c 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -58,7 +58,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
 		server = afs_lookup_server(cell, key, &vldb->fs_server[i]);
 		if (IS_ERR(server)) {
 			ret = PTR_ERR(server);
-			if (ret == -ENOENT)
+			if (ret == -ENOENT ||
+			    ret == -ENOMEDIUM)
 				continue;
 			goto error_2;
 		}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 1037dd41a622..3623c952b6ff 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -64,6 +64,7 @@ static atomic_t afs_count_active_inodes;
 enum {
 	afs_no_opt,
 	afs_opt_cell,
+	afs_opt_dyn,
 	afs_opt_rwpath,
 	afs_opt_vol,
 	afs_opt_autocell,
@@ -71,6 +72,7 @@ enum {
 
 static const match_table_t afs_options_list = {
 	{ afs_opt_cell,		"cell=%s"	},
+	{ afs_opt_dyn,		"dyn"		},
 	{ afs_opt_rwpath,	"rwpath"	},
 	{ afs_opt_vol,		"vol=%s"	},
 	{ afs_opt_autocell,	"autocell"	},
@@ -148,6 +150,11 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root)
 	const char *suf = "";
 	char pref = '%';
 
+	if (as->dyn_root) {
+		seq_puts(m, "none");
+		return 0;
+	}
+	
 	switch (volume->type) {
 	case AFSVL_RWVOL:
 		break;
@@ -171,8 +178,12 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root)
  */
 static int afs_show_options(struct seq_file *m, struct dentry *root)
 {
+	struct afs_super_info *as = AFS_FS_S(root->d_sb);
+
+	if (as->dyn_root)
+		seq_puts(m, ",dyn");
 	if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags))
-		seq_puts(m, "autocell");
+		seq_puts(m, ",autocell");
 	return 0;
 }
 
@@ -212,7 +223,7 @@ static int afs_parse_options(struct afs_mount_params *params,
 			break;
 
 		case afs_opt_rwpath:
-			params->rwpath = 1;
+			params->rwpath = true;
 			break;
 
 		case afs_opt_vol:
@@ -220,7 +231,11 @@ static int afs_parse_options(struct afs_mount_params *params,
 			break;
 
 		case afs_opt_autocell:
-			params->autocell = 1;
+			params->autocell = true;
+			break;
+
+		case afs_opt_dyn:
+			params->dyn_root = true;
 			break;
 
 		default:
@@ -254,7 +269,7 @@ static int afs_parse_device_name(struct afs_mount_params *params,
 	int cellnamesz;
 
 	_enter(",%s", name);
-
+	
 	if (!name) {
 		printk(KERN_ERR "kAFS: no volume name specified\n");
 		return -EINVAL;
@@ -336,7 +351,14 @@ static int afs_test_super(struct super_block *sb, void *data)
 	struct afs_super_info *as1 = data;
 	struct afs_super_info *as = AFS_FS_S(sb);
 
-	return as->net == as1->net && as->volume->vid == as1->volume->vid;
+	return (as->net == as1->net &&
+		as->volume &&
+		as->volume->vid == as1->volume->vid);
+}
+
+static int afs_dynroot_test_super(struct super_block *sb, void *data)
+{
+	return false;
 }
 
 static int afs_set_super(struct super_block *sb, void *data)
@@ -365,24 +387,30 @@ static int afs_fill_super(struct super_block *sb,
 	sb->s_blocksize_bits	= PAGE_SHIFT;
 	sb->s_magic		= AFS_FS_MAGIC;
 	sb->s_op		= &afs_super_ops;
-	sb->s_xattr		= afs_xattr_handlers;
+	if (!as->dyn_root)
+		sb->s_xattr	= afs_xattr_handlers;
 	ret = super_setup_bdi(sb);
 	if (ret)
 		return ret;
 	sb->s_bdi->ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
-	sprintf(sb->s_id, "%u", as->volume->vid);
-
-	afs_activate_volume(as->volume);
 
 	/* allocate the root inode and dentry */
-	fid.vid		= as->volume->vid;
-	fid.vnode	= 1;
-	fid.unique	= 1;
-	inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL);
+	if (as->dyn_root) {
+		inode = afs_iget_pseudo_dir(sb, true);
+		sb->s_flags	|= SB_RDONLY;
+	} else {
+		sprintf(sb->s_id, "%u", as->volume->vid);
+		afs_activate_volume(as->volume);
+		fid.vid		= as->volume->vid;
+		fid.vnode	= 1;
+		fid.unique	= 1;
+		inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL);
+	}
+
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
-	if (params->autocell)
+	if (params->autocell || params->dyn_root)
 		set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
 
 	ret = -ENOMEM;
@@ -407,7 +435,10 @@ static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params)
 	as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
 	if (as) {
 		as->net = afs_get_net(params->net);
-		as->cell = afs_get_cell(params->cell);
+		if (params->dyn_root)
+			as->dyn_root = true;
+		else
+			as->cell = afs_get_cell(params->cell);
 	}
 	return as;
 }
@@ -451,18 +482,20 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 			goto error;
 	}
 
-	ret = afs_parse_device_name(&params, dev_name);
-	if (ret < 0)
-		goto error;
+	if (!params.dyn_root) {
+		ret = afs_parse_device_name(&params, dev_name);
+		if (ret < 0)
+			goto error;
 
-	/* try and do the mount securely */
-	key = afs_request_key(params.cell);
-	if (IS_ERR(key)) {
-		_leave(" = %ld [key]", PTR_ERR(key));
-		ret = PTR_ERR(key);
-		goto error;
+		/* try and do the mount securely */
+		key = afs_request_key(params.cell);
+		if (IS_ERR(key)) {
+			_leave(" = %ld [key]", PTR_ERR(key));
+			ret = PTR_ERR(key);
+			goto error;
+		}
+		params.key = key;
 	}
-	params.key = key;
 
 	/* allocate a superblock info record */
 	ret = -ENOMEM;
@@ -470,20 +503,25 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 	if (!as)
 		goto error_key;
 
-	/* Assume we're going to need a volume record; at the very least we can
-	 * use it to update the volume record if we have one already.  This
-	 * checks that the volume exists within the cell.
-	 */
-	candidate = afs_create_volume(&params);
-	if (IS_ERR(candidate)) {
-		ret = PTR_ERR(candidate);
-		goto error_as;
-	}
+	if (!params.dyn_root) {
+		/* Assume we're going to need a volume record; at the very
+		 * least we can use it to update the volume record if we have
+		 * one already.  This checks that the volume exists within the
+		 * cell.
+		 */
+		candidate = afs_create_volume(&params);
+		if (IS_ERR(candidate)) {
+			ret = PTR_ERR(candidate);
+			goto error_as;
+		}
 
-	as->volume = candidate;
+		as->volume = candidate;
+	}
 
 	/* allocate a deviceless superblock */
-	sb = sget(fs_type, afs_test_super, afs_set_super, flags, as);
+	sb = sget(fs_type,
+		  as->dyn_root ? afs_dynroot_test_super : afs_test_super,
+		  afs_set_super, flags, as);
 	if (IS_ERR(sb)) {
 		ret = PTR_ERR(sb);
 		goto error_as;
@@ -529,9 +567,11 @@ static void afs_kill_super(struct super_block *sb)
 	/* Clear the callback interests (which will do ilookup5) before
 	 * deactivating the superblock.
 	 */
-	afs_clear_callback_interests(as->net, as->volume->servers);
+	if (as->volume)
+		afs_clear_callback_interests(as->net, as->volume->servers);
 	kill_anon_super(sb);
-	afs_deactivate_volume(as->volume);
+	if (as->volume)
+		afs_deactivate_volume(as->volume);
 	afs_destroy_sbi(as);
 }
 
@@ -619,12 +659,24 @@ static void afs_destroy_inode(struct inode *inode)
  */
 static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct afs_super_info *as = AFS_FS_S(dentry->d_sb);
 	struct afs_fs_cursor fc;
 	struct afs_volume_status vs;
 	struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
 	struct key *key;
 	int ret;
 
+	buf->f_type	= dentry->d_sb->s_magic;
+	buf->f_bsize	= AFS_BLOCK_SIZE;
+	buf->f_namelen	= AFSNAMEMAX - 1;
+
+	if (as->dyn_root) {
+		buf->f_blocks	= 1;
+		buf->f_bavail	= 0;
+		buf->f_bfree	= 0;
+		return 0;
+	}
+	
 	key = afs_request_key(vnode->volume->cell);
 	if (IS_ERR(key))
 		return PTR_ERR(key);
@@ -645,10 +697,6 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	key_put(key);
 
 	if (ret == 0) {
-		buf->f_type	= dentry->d_sb->s_magic;
-		buf->f_bsize	= AFS_BLOCK_SIZE;
-		buf->f_namelen	= AFSNAMEMAX - 1;
-
 		if (vs.max_quota == 0)
 			buf->f_blocks = vs.part_max_blocks;
 		else
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index e372f89fd36a..5d8562f1ad4a 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -23,7 +23,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
 	struct afs_uvldbentry__xdr *uvldb;
 	struct afs_vldb_entry *entry;
 	bool new_only = false;
-	u32 tmp;
+	u32 tmp, nr_servers;
 	int i, ret;
 
 	_enter("");
@@ -36,6 +36,10 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
 	uvldb = call->buffer;
 	entry = call->reply[0];
 
+	nr_servers = ntohl(uvldb->nServers);
+	if (nr_servers > AFS_NMAXNSERVERS)
+		nr_servers = AFS_NMAXNSERVERS;
+
 	for (i = 0; i < ARRAY_SIZE(uvldb->name) - 1; i++)
 		entry->name[i] = (u8)ntohl(uvldb->name[i]);
 	entry->name[i] = 0;
@@ -44,14 +48,14 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
 	/* If there is a new replication site that we can use, ignore all the
 	 * sites that aren't marked as new.
 	 */
-	for (i = 0; i < AFS_NMAXNSERVERS; i++) {
+	for (i = 0; i < nr_servers; i++) {
 		tmp = ntohl(uvldb->serverFlags[i]);
 		if (!(tmp & AFS_VLSF_DONTUSE) &&
 		    (tmp & AFS_VLSF_NEWREPSITE))
 			new_only = true;
 	}
 
-	for (i = 0; i < AFS_NMAXNSERVERS; i++) {
+	for (i = 0; i < nr_servers; i++) {
 		struct afs_uuid__xdr *xdr;
 		struct afs_uuid *uuid;
 		int j;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 684c48293353..b517a588781f 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -26,9 +26,8 @@ static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params,
 					   unsigned long type_mask)
 {
 	struct afs_server_list *slist;
-	struct afs_server *server;
 	struct afs_volume *volume;
-	int ret = -ENOMEM, nr_servers = 0, i, j;
+	int ret = -ENOMEM, nr_servers = 0, i;
 
 	for (i = 0; i < vldb->nr_servers; i++)
 		if (vldb->fs_mask[i] & type_mask)
@@ -58,50 +57,10 @@ static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params,
 
 	refcount_set(&slist->usage, 1);
 	volume->servers = slist;
-
-	/* Make sure a records exists for each server this volume occupies. */
-	for (i = 0; i < nr_servers; i++) {
-		if (!(vldb->fs_mask[i] & type_mask))
-			continue;
-
-		server = afs_lookup_server(params->cell, params->key,
-					   &vldb->fs_server[i]);
-		if (IS_ERR(server)) {
-			ret = PTR_ERR(server);
-			if (ret == -ENOENT)
-				continue;
-			goto error_2;
-		}
-
-		/* Insertion-sort by server pointer */
-		for (j = 0; j < slist->nr_servers; j++)
-			if (slist->servers[j].server >= server)
-				break;
-		if (j < slist->nr_servers) {
-			if (slist->servers[j].server == server) {
-				afs_put_server(params->net, server);
-				continue;
-			}
-
-			memmove(slist->servers + j + 1,
-				slist->servers + j,
-				(slist->nr_servers - j) * sizeof(struct afs_server_entry));
-		}
-
-		slist->servers[j].server = server;
-		slist->nr_servers++;
-	}
-
-	if (slist->nr_servers == 0) {
-		ret = -EDESTADDRREQ;
-		goto error_2;
-	}
-
 	return volume;
 
-error_2:
-	afs_put_serverlist(params->net, slist);
 error_1:
+	afs_put_cell(params->net, volume->cell);
 	kfree(volume);
 error_0:
 	return ERR_PTR(ret);
@@ -327,7 +286,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
 
 	/* See if the volume's server list got updated. */
 	new = afs_alloc_server_list(volume->cell, key,
-				      vldb, (1 << volume->type));
+				    vldb, (1 << volume->type));
 	if (IS_ERR(new)) {
 		ret = PTR_ERR(new);
 		goto error_vldb;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 83732fef510d..bdb201230bae 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1599,6 +1599,8 @@ static int fill_files_note(struct memelfnote *note)
 
 	/* *Estimated* file count and total data size needed */
 	count = current->mm->map_count;
+	if (count > UINT_MAX / 64)
+		return -EINVAL;
 	size = count * 64;
 
 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index dbf07051aacd..b4336b42ce3b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -299,7 +299,8 @@ unlock:
  * start an async read(ahead) operation.  return nr_pages we submitted
  * a read for on success, or negative error code.
  */
-static int start_read(struct inode *inode, struct list_head *page_list, int max)
+static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
+		      struct list_head *page_list, int max)
 {
 	struct ceph_osd_client *osdc =
 		&ceph_inode_to_client(inode)->client->osdc;
@@ -316,7 +317,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
 	int got = 0;
 	int ret = 0;
 
-	if (!current->journal_info) {
+	if (!rw_ctx) {
 		/* caller of readpages does not hold buffer and read caps
 		 * (fadvise, madvise and readahead cases) */
 		int want = CEPH_CAP_FILE_CACHE;
@@ -437,6 +438,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 {
 	struct inode *inode = file_inode(file);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_file_info *ci = file->private_data;
+	struct ceph_rw_context *rw_ctx;
 	int rc = 0;
 	int max = 0;
 
@@ -449,11 +452,12 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 	if (rc == 0)
 		goto out;
 
+	rw_ctx = ceph_find_rw_context(ci);
 	max = fsc->mount_options->rsize >> PAGE_SHIFT;
-	dout("readpages %p file %p nr_pages %d max %d\n",
-	     inode, file, nr_pages, max);
+	dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
+	     inode, file, rw_ctx, nr_pages, max);
 	while (!list_empty(page_list)) {
-		rc = start_read(inode, page_list, max);
+		rc = start_read(inode, rw_ctx, page_list, max);
 		if (rc < 0)
 			goto out;
 	}
@@ -574,7 +578,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	struct ceph_fs_client *fsc;
 	struct ceph_snap_context *snapc, *oldest;
 	loff_t page_off = page_offset(page);
-	long writeback_stat;
 	int err, len = PAGE_SIZE;
 	struct ceph_writeback_ctl ceph_wbc;
 
@@ -615,8 +618,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	dout("writepage %p page %p index %lu on %llu~%u snapc %p seq %lld\n",
 	     inode, page, page->index, page_off, len, snapc, snapc->seq);
 
-	writeback_stat = atomic_long_inc_return(&fsc->writeback_count);
-	if (writeback_stat >
+	if (atomic_long_inc_return(&fsc->writeback_count) >
 	    CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
 		set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
 
@@ -651,6 +653,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	end_page_writeback(page);
 	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
 	ceph_put_snap_context(snapc);  /* page's reference */
+
+	if (atomic_long_dec_return(&fsc->writeback_count) <
+	    CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
+		clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
+
 	return err;
 }
 
@@ -1450,9 +1457,10 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
 
 	if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
 	    ci->i_inline_version == CEPH_INLINE_NONE) {
-		current->journal_info = vma->vm_file;
+		CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
+		ceph_add_rw_context(fi, &rw_ctx);
 		ret = filemap_fault(vmf);
-		current->journal_info = NULL;
+		ceph_del_rw_context(fi, &rw_ctx);
 	} else
 		ret = -EAGAIN;
 
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a14b2c974c9e..6582c4507e6c 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -154,13 +154,19 @@ void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta)
 	spin_unlock(&mdsc->caps_list_lock);
 }
 
-void ceph_reserve_caps(struct ceph_mds_client *mdsc,
+/*
+ * Called under mdsc->mutex.
+ */
+int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 		      struct ceph_cap_reservation *ctx, int need)
 {
-	int i;
+	int i, j;
 	struct ceph_cap *cap;
 	int have;
 	int alloc = 0;
+	int max_caps;
+	bool trimmed = false;
+	struct ceph_mds_session *s;
 	LIST_HEAD(newcaps);
 
 	dout("reserve caps ctx=%p need=%d\n", ctx, need);
@@ -179,16 +185,37 @@ void ceph_reserve_caps(struct ceph_mds_client *mdsc,
 	spin_unlock(&mdsc->caps_list_lock);
 
 	for (i = have; i < need; i++) {
+retry:
 		cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
-		if (!cap)
-			break;
+		if (!cap) {
+			if (!trimmed) {
+				for (j = 0; j < mdsc->max_sessions; j++) {
+					s = __ceph_lookup_mds_session(mdsc, j);
+					if (!s)
+						continue;
+					mutex_unlock(&mdsc->mutex);
+
+					mutex_lock(&s->s_mutex);
+					max_caps = s->s_nr_caps - (need - i);
+					ceph_trim_caps(mdsc, s, max_caps);
+					mutex_unlock(&s->s_mutex);
+
+					ceph_put_mds_session(s);
+					mutex_lock(&mdsc->mutex);
+				}
+				trimmed = true;
+				goto retry;
+			} else {
+				pr_warn("reserve caps ctx=%p ENOMEM "
+					"need=%d got=%d\n",
+					ctx, need, have + alloc);
+				goto out_nomem;
+			}
+		}
 		list_add(&cap->caps_item, &newcaps);
 		alloc++;
 	}
-	/* we didn't manage to reserve as much as we needed */
-	if (have + alloc != need)
-		pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
-			ctx, need, have + alloc);
+	BUG_ON(have + alloc != need);
 
 	spin_lock(&mdsc->caps_list_lock);
 	mdsc->caps_total_count += alloc;
@@ -204,6 +231,24 @@ void ceph_reserve_caps(struct ceph_mds_client *mdsc,
 	dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n",
 	     ctx, mdsc->caps_total_count, mdsc->caps_use_count,
 	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
+	return 0;
+
+out_nomem:
+	while (!list_empty(&newcaps)) {
+		cap = list_first_entry(&newcaps,
+				struct ceph_cap, caps_item);
+		list_del(&cap->caps_item);
+		kmem_cache_free(ceph_cap_cachep, cap);
+	}
+
+	spin_lock(&mdsc->caps_list_lock);
+	mdsc->caps_avail_count += have;
+	mdsc->caps_reserve_count -= have;
+	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+					 mdsc->caps_reserve_count +
+					 mdsc->caps_avail_count);
+	spin_unlock(&mdsc->caps_list_lock);
+	return -ENOMEM;
 }
 
 int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
@@ -498,7 +543,7 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
 	 */
 	if ((issued & CEPH_CAP_FILE_SHARED) != (had & CEPH_CAP_FILE_SHARED)) {
 		if (issued & CEPH_CAP_FILE_SHARED)
-			ci->i_shared_gen++;
+			atomic_inc(&ci->i_shared_gen);
 		if (S_ISDIR(ci->vfs_inode.i_mode)) {
 			dout(" marking %p NOT complete\n", &ci->vfs_inode);
 			__ceph_dir_clear_complete(ci);
@@ -577,18 +622,30 @@ void ceph_add_cap(struct inode *inode,
 		}
 	}
 
-	if (!ci->i_snap_realm) {
+	if (!ci->i_snap_realm ||
+	    ((flags & CEPH_CAP_FLAG_AUTH) &&
+	     realmino != (u64)-1 && ci->i_snap_realm->ino != realmino)) {
 		/*
 		 * add this inode to the appropriate snap realm
 		 */
 		struct ceph_snap_realm *realm = ceph_lookup_snap_realm(mdsc,
 							       realmino);
 		if (realm) {
+			struct ceph_snap_realm *oldrealm = ci->i_snap_realm;
+			if (oldrealm) {
+				spin_lock(&oldrealm->inodes_with_caps_lock);
+				list_del_init(&ci->i_snap_realm_item);
+				spin_unlock(&oldrealm->inodes_with_caps_lock);
+			}
+
 			spin_lock(&realm->inodes_with_caps_lock);
 			ci->i_snap_realm = realm;
 			list_add(&ci->i_snap_realm_item,
 				 &realm->inodes_with_caps);
 			spin_unlock(&realm->inodes_with_caps_lock);
+
+			if (oldrealm)
+				ceph_put_snap_realm(mdsc, oldrealm);
 		} else {
 			pr_err("ceph_add_cap: couldn't find snap realm %llx\n",
 			       realmino);
@@ -890,6 +947,11 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
 /*
  * called under i_ceph_lock
  */
+static int __ceph_is_single_caps(struct ceph_inode_info *ci)
+{
+	return rb_first(&ci->i_caps) == rb_last(&ci->i_caps);
+}
+
 static int __ceph_is_any_caps(struct ceph_inode_info *ci)
 {
 	return !RB_EMPTY_ROOT(&ci->i_caps);
@@ -1703,21 +1765,24 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	int mds = -1;   /* keep track of how far we've gone through i_caps list
 			   to avoid an infinite loop on retry */
 	struct rb_node *p;
-	int delayed = 0, sent = 0, num;
-	bool is_delayed = flags & CHECK_CAPS_NODELAY;
+	int delayed = 0, sent = 0;
+	bool no_delay = flags & CHECK_CAPS_NODELAY;
 	bool queue_invalidate = false;
-	bool force_requeue = false;
 	bool tried_invalidate = false;
 
 	/* if we are unmounting, flush any unused caps immediately. */
 	if (mdsc->stopping)
-		is_delayed = true;
+		no_delay = true;
 
 	spin_lock(&ci->i_ceph_lock);
 
 	if (ci->i_ceph_flags & CEPH_I_FLUSH)
 		flags |= CHECK_CAPS_FLUSH;
 
+	if (!(flags & CHECK_CAPS_AUTHONLY) ||
+	    (ci->i_auth_cap && __ceph_is_single_caps(ci)))
+		__cap_delay_cancel(mdsc, ci);
+
 	goto retry_locked;
 retry:
 	spin_lock(&ci->i_ceph_lock);
@@ -1772,7 +1837,7 @@ retry_locked:
 	 * have cached pages, but don't want them, then try to invalidate.
 	 * If we fail, it's because pages are locked.... try again later.
 	 */
-	if ((!is_delayed || mdsc->stopping) &&
+	if ((!no_delay || mdsc->stopping) &&
 	    !S_ISDIR(inode->i_mode) &&		/* ignore readdir cache */
 	    !(ci->i_wb_ref || ci->i_wrbuffer_ref) &&   /* no dirty pages... */
 	    inode->i_data.nrpages &&		/* have cached pages */
@@ -1781,27 +1846,16 @@ retry_locked:
 	    !tried_invalidate) {
 		dout("check_caps trying to invalidate on %p\n", inode);
 		if (try_nonblocking_invalidate(inode) < 0) {
-			if (revoking & (CEPH_CAP_FILE_CACHE|
-					CEPH_CAP_FILE_LAZYIO)) {
-				dout("check_caps queuing invalidate\n");
-				queue_invalidate = true;
-				ci->i_rdcache_revoking = ci->i_rdcache_gen;
-			} else {
-				dout("check_caps failed to invalidate pages\n");
-				/* we failed to invalidate pages.  check these
-				   caps again later. */
-				force_requeue = true;
-				__cap_set_timeouts(mdsc, ci);
-			}
+			dout("check_caps queuing invalidate\n");
+			queue_invalidate = true;
+			ci->i_rdcache_revoking = ci->i_rdcache_gen;
 		}
 		tried_invalidate = true;
 		goto retry_locked;
 	}
 
-	num = 0;
 	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
 		cap = rb_entry(p, struct ceph_cap, ci_node);
-		num++;
 
 		/* avoid looping forever */
 		if (mds >= cap->mds ||
@@ -1864,7 +1918,7 @@ retry_locked:
 		    cap->mds_wanted == want)
 			continue;     /* nope, all good */
 
-		if (is_delayed)
+		if (no_delay)
 			goto ack;
 
 		/* delay? */
@@ -1955,15 +2009,8 @@ ack:
 		goto retry; /* retake i_ceph_lock and restart our cap scan. */
 	}
 
-	/*
-	 * Reschedule delayed caps release if we delayed anything,
-	 * otherwise cancel.
-	 */
-	if (delayed && is_delayed)
-		force_requeue = true;   /* __send_cap delayed release; requeue */
-	if (!delayed && !is_delayed)
-		__cap_delay_cancel(mdsc, ci);
-	else if (!is_delayed || force_requeue)
+	/* Reschedule delayed caps release if we delayed anything */
+	if (delayed)
 		__cap_delay_requeue(mdsc, ci);
 
 	spin_unlock(&ci->i_ceph_lock);
@@ -2160,7 +2207,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
 	u64 flush_tid;
 	int err = 0;
 	int dirty;
-	int wait = wbc->sync_mode == WB_SYNC_ALL;
+	int wait = (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync);
 
 	dout("write_inode %p wait=%d\n", inode, wait);
 	if (wait) {
@@ -3426,7 +3473,14 @@ retry:
 	 */
 
 	issued = cap->issued;
-	WARN_ON(issued != cap->implemented);
+	if (issued != cap->implemented)
+		pr_err_ratelimited("handle_cap_export: issued != implemented: "
+				"ino (%llx.%llx) mds%d seq %d mseq %d "
+				"issued %s implemented %s\n",
+				ceph_vinop(inode), mds, cap->seq, cap->mseq,
+				ceph_cap_string(issued),
+				ceph_cap_string(cap->implemented));
+
 
 	tcap = __get_cap_for_mds(ci, target);
 	if (tcap) {
@@ -3572,12 +3626,13 @@ retry:
 		if ((ph->flags & CEPH_CAP_FLAG_AUTH) &&
 		    (ocap->seq != le32_to_cpu(ph->seq) ||
 		     ocap->mseq != le32_to_cpu(ph->mseq))) {
-			pr_err("handle_cap_import: mismatched seq/mseq: "
-			       "ino (%llx.%llx) mds%d seq %d mseq %d "
-			       "importer mds%d has peer seq %d mseq %d\n",
-			       ceph_vinop(inode), peer, ocap->seq,
-			       ocap->mseq, mds, le32_to_cpu(ph->seq),
-			       le32_to_cpu(ph->mseq));
+			pr_err_ratelimited("handle_cap_import: "
+					"mismatched seq/mseq: ino (%llx.%llx) "
+					"mds%d seq %d mseq %d importer mds%d "
+					"has peer seq %d mseq %d\n",
+					ceph_vinop(inode), peer, ocap->seq,
+					ocap->mseq, mds, le32_to_cpu(ph->seq),
+					le32_to_cpu(ph->mseq));
 		}
 		__ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
 	}
@@ -3939,11 +3994,20 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 
 	cap = __get_cap_for_mds(ci, mds);
 	if (cap && __cap_is_valid(cap)) {
-		if (force ||
-		    ((cap->issued & drop) &&
-		     (cap->issued & unless) == 0)) {
-			if ((cap->issued & drop) &&
-			    (cap->issued & unless) == 0) {
+		unless &= cap->issued;
+		if (unless) {
+			if (unless & CEPH_CAP_AUTH_EXCL)
+				drop &= ~CEPH_CAP_AUTH_SHARED;
+			if (unless & CEPH_CAP_LINK_EXCL)
+				drop &= ~CEPH_CAP_LINK_SHARED;
+			if (unless & CEPH_CAP_XATTR_EXCL)
+				drop &= ~CEPH_CAP_XATTR_SHARED;
+			if (unless & CEPH_CAP_FILE_EXCL)
+				drop &= ~CEPH_CAP_FILE_SHARED;
+		}
+
+		if (force || (cap->issued & drop)) {
+			if (cap->issued & drop) {
 				int wanted = __ceph_caps_wanted(ci);
 				if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0)
 					wanted |= cap->mds_wanted;
@@ -3975,7 +4039,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 			*p += sizeof(*rel);
 			ret = 1;
 		} else {
-			dout("encode_inode_release %p cap %p %s\n",
+			dout("encode_inode_release %p cap %p %s (noop)\n",
 			     inode, cap, ceph_cap_string(cap->issued));
 		}
 	}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8a5266699b67..0c4346806e17 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -173,7 +173,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx,
  * the MDS if/when the directory is modified).
  */
 static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
-			    u32 shared_gen)
+			    int shared_gen)
 {
 	struct ceph_file_info *fi = file->private_data;
 	struct dentry *parent = file->f_path.dentry;
@@ -184,7 +184,7 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 	u64 idx = 0;
 	int err = 0;
 
-	dout("__dcache_readdir %p v%u at %llx\n", dir, shared_gen, ctx->pos);
+	dout("__dcache_readdir %p v%u at %llx\n", dir, (unsigned)shared_gen, ctx->pos);
 
 	/* search start position */
 	if (ctx->pos > 2) {
@@ -231,11 +231,17 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 			goto out;
 		}
 
-		di = ceph_dentry(dentry);
 		spin_lock(&dentry->d_lock);
-		if (di->lease_shared_gen == shared_gen &&
-		    d_really_is_positive(dentry) &&
-		    fpos_cmp(ctx->pos, di->offset) <= 0) {
+		di = ceph_dentry(dentry);
+		if (d_unhashed(dentry) ||
+		    d_really_is_negative(dentry) ||
+		    di->lease_shared_gen != shared_gen) {
+			spin_unlock(&dentry->d_lock);
+			dput(dentry);
+			err = -EAGAIN;
+			goto out;
+		}
+		if (fpos_cmp(ctx->pos, di->offset) <= 0) {
 			emit_dentry = true;
 		}
 		spin_unlock(&dentry->d_lock);
@@ -333,7 +339,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 	    ceph_snap(inode) != CEPH_SNAPDIR &&
 	    __ceph_dir_is_complete_ordered(ci) &&
 	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
-		u32 shared_gen = ci->i_shared_gen;
+		int shared_gen = atomic_read(&ci->i_shared_gen);
 		spin_unlock(&ci->i_ceph_lock);
 		err = __dcache_readdir(file, ctx, shared_gen);
 		if (err != -EAGAIN)
@@ -381,6 +387,7 @@ more:
 		if (op == CEPH_MDS_OP_READDIR) {
 			req->r_direct_hash = ceph_frag_value(frag);
 			__set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
+			req->r_inode_drop = CEPH_CAP_FILE_EXCL;
 		}
 		if (fi->last_name) {
 			req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
@@ -750,7 +757,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 			spin_unlock(&ci->i_ceph_lock);
 			dout(" dir %p complete, -ENOENT\n", dir);
 			d_add(dentry, NULL);
-			di->lease_shared_gen = ci->i_shared_gen;
+			di->lease_shared_gen = atomic_read(&ci->i_shared_gen);
 			return NULL;
 		}
 		spin_unlock(&ci->i_ceph_lock);
@@ -835,7 +842,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	req->r_args.mknod.mode = cpu_to_le32(mode);
 	req->r_args.mknod.rdev = cpu_to_le32(rdev);
-	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	if (acls.pagelist) {
 		req->r_pagelist = acls.pagelist;
@@ -887,7 +894,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
-	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	err = ceph_mdsc_do_request(mdsc, dir, req);
 	if (!err && !req->r_reply_info.head->is_dentry)
@@ -936,7 +943,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	req->r_parent = dir;
 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	req->r_args.mkdir.mode = cpu_to_le32(mode);
-	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	if (acls.pagelist) {
 		req->r_pagelist = acls.pagelist;
@@ -983,7 +990,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	/* release LINK_SHARED on source inode (mds will lock it) */
-	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
+	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
 	err = ceph_mdsc_do_request(mdsc, dir, req);
 	if (err) {
 		d_drop(dentry);
@@ -1096,7 +1103,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	/* release LINK_RDCACHE on source inode (mds will lock it) */
-	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
+	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
 	if (d_really_is_positive(new_dentry))
 		req->r_inode_drop = drop_caps_for_unlink(d_inode(new_dentry));
 	err = ceph_mdsc_do_request(mdsc, old_dir, req);
@@ -1106,16 +1113,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 		 * do_request, above).  If there is no trace, we need
 		 * to do it here.
 		 */
-
-		/* d_move screws up sibling dentries' offsets */
-		ceph_dir_clear_complete(old_dir);
-		ceph_dir_clear_complete(new_dir);
-
 		d_move(old_dentry, new_dentry);
-
-		/* ensure target dentry is invalidated, despite
-		   rehashing bug in vfs_rename_dir */
-		ceph_invalidate_dentry_lease(new_dentry);
 	}
 	ceph_mdsc_put_request(req);
 	return err;
@@ -1199,12 +1197,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
 	int valid = 0;
 
 	spin_lock(&ci->i_ceph_lock);
-	if (ci->i_shared_gen == di->lease_shared_gen)
+	if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen)
 		valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
 	spin_unlock(&ci->i_ceph_lock);
 	dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
-	     dir, (unsigned)ci->i_shared_gen, dentry,
-	     (unsigned)di->lease_shared_gen, valid);
+	     dir, (unsigned)atomic_read(&ci->i_shared_gen),
+	     dentry, (unsigned)di->lease_shared_gen, valid);
 	return valid;
 }
 
@@ -1332,24 +1330,37 @@ static void ceph_d_release(struct dentry *dentry)
  */
 static void ceph_d_prune(struct dentry *dentry)
 {
-	dout("ceph_d_prune %p\n", dentry);
+	struct ceph_inode_info *dir_ci;
+	struct ceph_dentry_info *di;
+
+	dout("ceph_d_prune %pd %p\n", dentry, dentry);
 
 	/* do we have a valid parent? */
 	if (IS_ROOT(dentry))
 		return;
 
-	/* if we are not hashed, we don't affect dir's completeness */
-	if (d_unhashed(dentry))
+	/* we hold d_lock, so d_parent is stable */
+	dir_ci = ceph_inode(d_inode(dentry->d_parent));
+	if (dir_ci->i_vino.snap == CEPH_SNAPDIR)
 		return;
 
-	if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_SNAPDIR)
+	/* who calls d_delete() should also disable dcache readdir */
+	if (d_really_is_negative(dentry))
 		return;
 
-	/*
-	 * we hold d_lock, so d_parent is stable, and d_fsdata is never
-	 * cleared until d_release
-	 */
-	ceph_dir_clear_complete(d_inode(dentry->d_parent));
+	/* d_fsdata does not get cleared until d_release */
+	if (!d_unhashed(dentry)) {
+		__ceph_dir_clear_complete(dir_ci);
+		return;
+	}
+
+	/* Disable dcache readdir just in case that someone called d_drop()
+	 * or d_invalidate(), but MDS didn't revoke CEPH_CAP_FILE_SHARED
+	 * properly (dcache readdir is still enabled) */
+	di = ceph_dentry(dentry);
+	if (di->offset > 0 &&
+	    di->lease_shared_gen == atomic_read(&dir_ci->i_shared_gen))
+		__ceph_dir_clear_ordered(dir_ci);
 }
 
 /*
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 5c17125f45c7..6639926eed4e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -181,6 +181,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 			return -ENOMEM;
 		}
 		cf->fmode = fmode;
+
+		spin_lock_init(&cf->rw_contexts_lock);
+		INIT_LIST_HEAD(&cf->rw_contexts);
+
 		cf->next_offset = 2;
 		cf->readdir_cache_idx = -1;
 		file->private_data = cf;
@@ -396,7 +400,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
 	if (flags & O_CREAT) {
-		req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+		req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
 		req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 		if (acls.pagelist) {
 			req->r_pagelist = acls.pagelist;
@@ -464,6 +468,7 @@ int ceph_release(struct inode *inode, struct file *file)
 		ceph_mdsc_put_request(cf->last_readdir);
 	kfree(cf->last_name);
 	kfree(cf->dir_info);
+	WARN_ON(!list_empty(&cf->rw_contexts));
 	kmem_cache_free(ceph_file_cachep, cf);
 
 	/* wake up anyone waiting for caps on this inode */
@@ -1199,12 +1204,13 @@ again:
 			retry_op = READ_INLINE;
 		}
 	} else {
+		CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
 		dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
 		     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
 		     ceph_cap_string(got));
-		current->journal_info = filp;
+		ceph_add_rw_context(fi, &rw_ctx);
 		ret = generic_file_read_iter(iocb, to);
-		current->journal_info = NULL;
+		ceph_del_rw_context(fi, &rw_ctx);
 	}
 	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
 	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index ab81652198c4..c6ec5aa46100 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -494,7 +494,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	ci->i_wrbuffer_ref = 0;
 	ci->i_wrbuffer_ref_head = 0;
 	atomic_set(&ci->i_filelock_ref, 0);
-	ci->i_shared_gen = 0;
+	atomic_set(&ci->i_shared_gen, 0);
 	ci->i_rdcache_gen = 0;
 	ci->i_rdcache_revoking = 0;
 
@@ -1041,7 +1041,7 @@ static void update_dentry_lease(struct dentry *dentry,
 	if (ceph_snap(dir) != CEPH_NOSNAP)
 		goto out_unlock;
 
-	di->lease_shared_gen = ceph_inode(dir)->i_shared_gen;
+	di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen);
 
 	if (duration == 0)
 		goto out_unlock;
@@ -1080,6 +1080,27 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
 
 	BUG_ON(d_inode(dn));
 
+	if (S_ISDIR(in->i_mode)) {
+		/* If inode is directory, d_splice_alias() below will remove
+		 * 'realdn' from its origin parent. We need to ensure that
+		 * origin parent's readdir cache will not reference 'realdn'
+		 */
+		realdn = d_find_any_alias(in);
+		if (realdn) {
+			struct ceph_dentry_info *di = ceph_dentry(realdn);
+			spin_lock(&realdn->d_lock);
+
+			realdn->d_op->d_prune(realdn);
+
+			di->time = jiffies;
+			di->lease_shared_gen = 0;
+			di->offset = 0;
+
+			spin_unlock(&realdn->d_lock);
+			dput(realdn);
+		}
+	}
+
 	/* dn must be unhashed */
 	if (!d_unhashed(dn))
 		d_drop(dn);
@@ -1295,8 +1316,8 @@ retry_lookup:
 		if (!rinfo->head->is_target) {
 			dout("fill_trace null dentry\n");
 			if (d_really_is_positive(dn)) {
-				ceph_dir_clear_ordered(dir);
 				dout("d_delete %p\n", dn);
+				ceph_dir_clear_ordered(dir);
 				d_delete(dn);
 			} else if (have_lease) {
 				if (d_unhashed(dn))
@@ -1323,7 +1344,6 @@ retry_lookup:
 			dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
 			     dn, d_inode(dn), ceph_vinop(d_inode(dn)),
 			     ceph_vinop(in));
-			ceph_dir_clear_ordered(dir);
 			d_invalidate(dn);
 			have_lease = false;
 		}
@@ -1573,9 +1593,19 @@ retry_lookup:
 		} else if (d_really_is_positive(dn) &&
 			   (ceph_ino(d_inode(dn)) != tvino.ino ||
 			    ceph_snap(d_inode(dn)) != tvino.snap)) {
+			struct ceph_dentry_info *di = ceph_dentry(dn);
 			dout(" dn %p points to wrong inode %p\n",
 			     dn, d_inode(dn));
-			__ceph_dir_clear_ordered(ci);
+
+			spin_lock(&dn->d_lock);
+			if (di->offset > 0 &&
+			    di->lease_shared_gen ==
+			    atomic_read(&ci->i_shared_gen)) {
+				__ceph_dir_clear_ordered(ci);
+				di->offset = 0;
+			}
+			spin_unlock(&dn->d_lock);
+
 			d_delete(dn);
 			dput(dn);
 			goto retry_lookup;
@@ -1600,9 +1630,7 @@ retry_lookup:
 				 &req->r_caps_reservation);
 		if (ret < 0) {
 			pr_err("fill_inode badness on %p\n", in);
-			if (d_really_is_positive(dn))
-				__ceph_dir_clear_ordered(ci);
-			else
+			if (d_really_is_negative(dn))
 				iput(in);
 			d_drop(dn);
 			err = ret;
@@ -2000,8 +2028,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 			ceph_encode_timespec(&req->r_args.setattr.atime,
 					     &attr->ia_atime);
 			mask |= CEPH_SETATTR_ATIME;
-			release |= CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD |
-				CEPH_CAP_FILE_WR;
+			release |= CEPH_CAP_FILE_SHARED |
+				   CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
 		}
 	}
 	if (ia_valid & ATTR_MTIME) {
@@ -2022,8 +2050,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 			ceph_encode_timespec(&req->r_args.setattr.mtime,
 					     &attr->ia_mtime);
 			mask |= CEPH_SETATTR_MTIME;
-			release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD |
-				CEPH_CAP_FILE_WR;
+			release |= CEPH_CAP_FILE_SHARED |
+				   CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
 		}
 	}
 	if (ia_valid & ATTR_SIZE) {
@@ -2041,8 +2069,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 			req->r_args.setattr.old_size =
 				cpu_to_le64(inode->i_size);
 			mask |= CEPH_SETATTR_SIZE;
-			release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD |
-				CEPH_CAP_FILE_WR;
+			release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
+				   CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
 		}
 	}
 
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1b468250e947..2e8f90f96540 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -604,10 +604,20 @@ static void __register_request(struct ceph_mds_client *mdsc,
 			       struct ceph_mds_request *req,
 			       struct inode *dir)
 {
+	int ret = 0;
+
 	req->r_tid = ++mdsc->last_tid;
-	if (req->r_num_caps)
-		ceph_reserve_caps(mdsc, &req->r_caps_reservation,
-				  req->r_num_caps);
+	if (req->r_num_caps) {
+		ret = ceph_reserve_caps(mdsc, &req->r_caps_reservation,
+					req->r_num_caps);
+		if (ret < 0) {
+			pr_err("__register_request %p "
+			       "failed to reserve caps: %d\n", req, ret);
+			/* set req->r_err to fail early from __do_request */
+			req->r_err = ret;
+			return;
+		}
+	}
 	dout("__register_request %p tid %lld\n", req, req->r_tid);
 	ceph_mdsc_get_request(req);
 	insert_request(&mdsc->request_tree, req);
@@ -1545,9 +1555,9 @@ out:
 /*
  * Trim session cap count down to some max number.
  */
-static int trim_caps(struct ceph_mds_client *mdsc,
-		     struct ceph_mds_session *session,
-		     int max_caps)
+int ceph_trim_caps(struct ceph_mds_client *mdsc,
+		   struct ceph_mds_session *session,
+		   int max_caps)
 {
 	int trim_caps = session->s_nr_caps - max_caps;
 
@@ -2438,11 +2448,14 @@ out:
  */
 void ceph_invalidate_dir_request(struct ceph_mds_request *req)
 {
-	struct inode *inode = req->r_parent;
+	struct inode *dir = req->r_parent;
+	struct inode *old_dir = req->r_old_dentry_dir;
 
-	dout("invalidate_dir_request %p (complete, lease(s))\n", inode);
+	dout("invalidate_dir_request %p %p (complete, lease(s))\n", dir, old_dir);
 
-	ceph_dir_clear_complete(inode);
+	ceph_dir_clear_complete(dir);
+	if (old_dir)
+		ceph_dir_clear_complete(old_dir);
 	if (req->r_dentry)
 		ceph_invalidate_dentry_lease(req->r_dentry);
 	if (req->r_old_dentry)
@@ -2773,7 +2786,7 @@ static void handle_session(struct ceph_mds_session *session,
 		break;
 
 	case CEPH_SESSION_RECALL_STATE:
-		trim_caps(mdsc, session, le32_to_cpu(h->max_caps));
+		ceph_trim_caps(mdsc, session, le32_to_cpu(h->max_caps));
 		break;
 
 	case CEPH_SESSION_FLUSHMSG:
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 837ac4b087a0..71e3b783ee6f 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -444,4 +444,7 @@ ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target);
 extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
 					  struct ceph_mds_session *session);
 
+extern int ceph_trim_caps(struct ceph_mds_client *mdsc,
+			  struct ceph_mds_session *session,
+			  int max_caps);
 #endif
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 8a2ca41e4b97..07cf95e6413d 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -922,13 +922,17 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			/*
 			 * Move the inode to the new realm
 			 */
-			spin_lock(&realm->inodes_with_caps_lock);
+			oldrealm = ci->i_snap_realm;
+			spin_lock(&oldrealm->inodes_with_caps_lock);
 			list_del_init(&ci->i_snap_realm_item);
+			spin_unlock(&oldrealm->inodes_with_caps_lock);
+
+			spin_lock(&realm->inodes_with_caps_lock);
 			list_add(&ci->i_snap_realm_item,
 				 &realm->inodes_with_caps);
-			oldrealm = ci->i_snap_realm;
 			ci->i_snap_realm = realm;
 			spin_unlock(&realm->inodes_with_caps_lock);
+
 			spin_unlock(&ci->i_ceph_lock);
 
 			ceph_get_snap_realm(mdsc, realm);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 2beeec07fa76..21b2e5b004eb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -256,7 +256,8 @@ struct ceph_inode_xattr {
  */
 struct ceph_dentry_info {
 	struct ceph_mds_session *lease_session;
-	u32 lease_gen, lease_shared_gen;
+	int lease_shared_gen;
+	u32 lease_gen;
 	u32 lease_seq;
 	unsigned long lease_renew_after, lease_renew_from;
 	struct list_head lru;
@@ -353,7 +354,7 @@ struct ceph_inode_info {
 	int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
 	int i_wrbuffer_ref, i_wrbuffer_ref_head;
 	atomic_t i_filelock_ref;
-	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
+	atomic_t i_shared_gen;       /* increment each time we get FILE_SHARED */
 	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
 	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
 
@@ -648,7 +649,7 @@ extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
 extern void ceph_caps_init(struct ceph_mds_client *mdsc);
 extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
 extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta);
-extern void ceph_reserve_caps(struct ceph_mds_client *mdsc,
+extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 			     struct ceph_cap_reservation *ctx, int need);
 extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
 			       struct ceph_cap_reservation *ctx);
@@ -668,6 +669,9 @@ struct ceph_file_info {
 	short fmode;     /* initialized on open */
 	short flags;     /* CEPH_F_* */
 
+	spinlock_t rw_contexts_lock;
+	struct list_head rw_contexts;
+
 	/* readdir: position within the dir */
 	u32 frag;
 	struct ceph_mds_request *last_readdir;
@@ -684,6 +688,49 @@ struct ceph_file_info {
 	int dir_info_len;
 };
 
+struct ceph_rw_context {
+	struct list_head list;
+	struct task_struct *thread;
+	int caps;
+};
+
+#define CEPH_DEFINE_RW_CONTEXT(_name, _caps)	\
+	struct ceph_rw_context _name = {	\
+		.thread = current,		\
+		.caps = _caps,			\
+	}
+
+static inline void ceph_add_rw_context(struct ceph_file_info *cf,
+				       struct ceph_rw_context *ctx)
+{
+	spin_lock(&cf->rw_contexts_lock);
+	list_add(&ctx->list, &cf->rw_contexts);
+	spin_unlock(&cf->rw_contexts_lock);
+}
+
+static inline void ceph_del_rw_context(struct ceph_file_info *cf,
+				       struct ceph_rw_context *ctx)
+{
+	spin_lock(&cf->rw_contexts_lock);
+	list_del(&ctx->list);
+	spin_unlock(&cf->rw_contexts_lock);
+}
+
+static inline struct ceph_rw_context*
+ceph_find_rw_context(struct ceph_file_info *cf)
+{
+	struct ceph_rw_context *ctx, *found = NULL;
+	spin_lock(&cf->rw_contexts_lock);
+	list_for_each_entry(ctx, &cf->rw_contexts, list) {
+		if (ctx->thread == current) {
+			found = ctx;
+			break;
+		}
+	}
+	spin_unlock(&cf->rw_contexts_lock);
+	return found;
+}
+
 struct ceph_readdir_cache_control {
 	struct page  *page;
 	struct dentry **dentries;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 5fc5dc660600..ef80085ed564 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1218,23 +1218,11 @@ COMPATIBLE_IOCTL(DMX_SET_PES_FILTER)
 COMPATIBLE_IOCTL(DMX_SET_BUFFER_SIZE)
 COMPATIBLE_IOCTL(DMX_GET_PES_PIDS)
 COMPATIBLE_IOCTL(DMX_GET_STC)
-COMPATIBLE_IOCTL(FE_GET_INFO)
-COMPATIBLE_IOCTL(FE_DISEQC_RESET_OVERLOAD)
-COMPATIBLE_IOCTL(FE_DISEQC_SEND_MASTER_CMD)
-COMPATIBLE_IOCTL(FE_DISEQC_RECV_SLAVE_REPLY)
-COMPATIBLE_IOCTL(FE_DISEQC_SEND_BURST)
-COMPATIBLE_IOCTL(FE_SET_TONE)
-COMPATIBLE_IOCTL(FE_SET_VOLTAGE)
-COMPATIBLE_IOCTL(FE_ENABLE_HIGH_LNB_VOLTAGE)
-COMPATIBLE_IOCTL(FE_READ_STATUS)
-COMPATIBLE_IOCTL(FE_READ_BER)
-COMPATIBLE_IOCTL(FE_READ_SIGNAL_STRENGTH)
-COMPATIBLE_IOCTL(FE_READ_SNR)
-COMPATIBLE_IOCTL(FE_READ_UNCORRECTED_BLOCKS)
-COMPATIBLE_IOCTL(FE_SET_FRONTEND)
-COMPATIBLE_IOCTL(FE_GET_FRONTEND)
-COMPATIBLE_IOCTL(FE_GET_EVENT)
-COMPATIBLE_IOCTL(FE_DISHNETWORK_SEND_LEGACY_CMD)
+COMPATIBLE_IOCTL(DMX_REQBUFS)
+COMPATIBLE_IOCTL(DMX_QUERYBUF)
+COMPATIBLE_IOCTL(DMX_EXPBUF)
+COMPATIBLE_IOCTL(DMX_QBUF)
+COMPATIBLE_IOCTL(DMX_DQBUF)
 COMPATIBLE_IOCTL(VIDEO_STOP)
 COMPATIBLE_IOCTL(VIDEO_PLAY)
 COMPATIBLE_IOCTL(VIDEO_FREEZE)
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig
index 58e2fe40b2a0..5933f995309a 100644
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -33,8 +33,7 @@ config CRAMFS_BLOCKDEV
 
 config CRAMFS_MTD
 	bool "Support CramFs image directly mapped in physical memory"
-	depends on CRAMFS && MTD
-	depends on CRAMFS=m || MTD=y
+	depends on CRAMFS && CRAMFS <= MTD
 	default y if !CRAMFS_BLOCKDEV
 	help
 	  This option allows the CramFs driver to load data directly from
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index c5a53fcc43ea..f0138674c1ed 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -242,7 +242,7 @@ exofs_readdir(struct file *file, struct dir_context *ctx)
 	unsigned long n = pos >> PAGE_SHIFT;
 	unsigned long npages = dir_pages(inode);
 	unsigned chunk_mask = ~(exofs_chunk_size(inode)-1);
-	bool need_revalidate = inode_cmp_iversion(inode, file->f_version);
+	bool need_revalidate = !inode_eq_iversion(inode, file->f_version);
 
 	if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1))
 		return 0;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 4111085a129f..3b8114def693 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -294,7 +294,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
 	unsigned long npages = dir_pages(inode);
 	unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
 	unsigned char *types = NULL;
-	bool need_revalidate = inode_cmp_iversion(inode, file->f_version);
+	bool need_revalidate = !inode_eq_iversion(inode, file->f_version);
 
 	if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
 		return 0;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index db5f9daa7780..7666c065b96f 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -962,8 +962,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 
 	if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
 		err = bdev_dax_supported(sb, blocksize);
-		if (err)
-			goto failed_mount;
+		if (err) {
+			ext2_msg(sb, KERN_ERR,
+				"DAX unsupported by block device. Turning off DAX.");
+			sbi->s_mount_opt &= ~EXT2_MOUNT_DAX;
+		}
 	}
 
 	/* If the blocksize doesn't match, re-read the thing.. */
@@ -1228,7 +1231,7 @@ static void ext2_clear_super_error(struct super_block *sb)
 		 * write and hope for the best.
 		 */
 		ext2_msg(sb, KERN_ERR,
-		       "previous I/O error to superblock detected\n");
+		       "previous I/O error to superblock detected");
 		clear_buffer_write_io_error(sbh);
 		set_buffer_uptodate(sbh);
 	}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index afda0a0499ce..da87cf757f7d 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -209,7 +209,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 		 * readdir(2), then we might be pointing to an invalid
 		 * dirent right now.  Scan from the start of the block
 		 * to make sure. */
-		if (inode_cmp_iversion(inode, file->f_version)) {
+		if (!inode_eq_iversion(inode, file->f_version)) {
 			for (i = 0; i < sb->s_blocksize && i < offset; ) {
 				de = (struct ext4_dir_entry_2 *)
 					(bh->b_data + i);
@@ -569,7 +569,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
 		 * cached entries.
 		 */
 		if ((!info->curr_node) ||
-		    inode_cmp_iversion(inode, file->f_version)) {
+		    !inode_eq_iversion(inode, file->f_version)) {
 			info->curr_node = NULL;
 			free_rb_tree_fname(&info->root);
 			file->f_version = inode_query_iversion(inode);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 7c4165b88505..70cf4c7b268a 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1487,7 +1487,7 @@ int ext4_read_inline_dir(struct file *file,
 	 * dirent right now.  Scan from the start of the inline
 	 * dir to make sure.
 	 */
-	if (inode_cmp_iversion(inode, file->f_version)) {
+	if (!inode_eq_iversion(inode, file->f_version)) {
 		for (i = 0; i < extra_size && i < offset;) {
 			/*
 			 * "." is with offset 0 and
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 421222ec3509..39bf464c35f1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3712,11 +3712,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		if (ext4_has_feature_inline_data(sb)) {
 			ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
 					" that may contain inline data");
-			goto failed_mount;
+			sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
 		}
 		err = bdev_dax_supported(sb, blocksize);
-		if (err)
-			goto failed_mount;
+		if (err) {
+			ext4_msg(sb, KERN_ERR,
+				"DAX unsupported by block device. Turning off DAX.");
+			sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
+		}
 	}
 
 	if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index cefea792cde8..2649759c478a 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -46,7 +46,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
 {
 	int ret = 1;
 	spin_lock(&dentry->d_lock);
-	if (inode_cmp_iversion(d_inode(dentry->d_parent), vfat_d_version(dentry)))
+	if (!inode_eq_iversion(d_inode(dentry->d_parent), vfat_d_version(dentry)))
 		ret = 0;
 	spin_unlock(&dentry->d_lock);
 	return ret;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index e8120a282435..15e06fb552da 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -444,7 +444,7 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
 	int res = -ENOMEM;
 
 	mutex_lock(&sbi->vh_mutex);
-	inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO);
+	inode = hfsplus_new_inode(dir->i_sb, dir, S_IFLNK | S_IRWXUGO);
 	if (!inode)
 		goto out;
 
@@ -486,7 +486,7 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
 	int res = -ENOMEM;
 
 	mutex_lock(&sbi->vh_mutex);
-	inode = hfsplus_new_inode(dir->i_sb, mode);
+	inode = hfsplus_new_inode(dir->i_sb, dir, mode);
 	if (!inode)
 		goto out;
 
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index a015044daa05..d9255abafb81 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -478,7 +478,8 @@ extern const struct address_space_operations hfsplus_aops;
 extern const struct address_space_operations hfsplus_btree_aops;
 extern const struct dentry_operations hfsplus_dentry_operations;
 
-struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode);
+struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir,
+				umode_t mode);
 void hfsplus_delete_inode(struct inode *inode);
 void hfsplus_inode_read_fork(struct inode *inode,
 			     struct hfsplus_fork_raw *fork);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 190c60efbc99..c0c8d433864f 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -354,7 +354,8 @@ static const struct file_operations hfsplus_file_operations = {
 	.unlocked_ioctl = hfsplus_ioctl,
 };
 
-struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
+struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir,
+				umode_t mode)
 {
 	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
 	struct inode *inode = new_inode(sb);
@@ -364,9 +365,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
 		return NULL;
 
 	inode->i_ino = sbi->next_cnid++;
-	inode->i_mode = mode;
-	inode->i_uid = current_fsuid();
-	inode->i_gid = current_fsgid();
+	inode_init_owner(inode, dir, mode);
 	set_nlink(inode, 1);
 	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 1d458b716957..513c357c734b 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -549,7 +549,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 
 		if (!sbi->hidden_dir) {
 			mutex_lock(&sbi->vh_mutex);
-			sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
+			sbi->hidden_dir = hfsplus_new_inode(sb, root, S_IFDIR);
 			if (!sbi->hidden_dir) {
 				mutex_unlock(&sbi->vh_mutex);
 				err = -ENOMEM;
diff --git a/fs/inode.c b/fs/inode.c
index e2ca0f4b5151..ef362364d396 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -498,7 +498,6 @@ EXPORT_SYMBOL(__remove_inode_hash);
 
 void clear_inode(struct inode *inode)
 {
-	might_sleep();
 	/*
 	 * We have to cycle tree_lock here because reclaim can be still in the
 	 * process of removing the last page (in __delete_from_page_cache())
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ceeaf0fb6657..7d893543cf3b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1314,7 +1314,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
 
 	if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
 			&& (fattr->valid & NFS_ATTR_FATTR_CHANGE)
-			&& !inode_cmp_iversion_raw(inode, fattr->pre_change_attr)) {
+			&& inode_eq_iversion_raw(inode, fattr->pre_change_attr)) {
 		inode_set_iversion_raw(inode, fattr->change_attr);
 		if (S_ISDIR(inode->i_mode))
 			nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
@@ -1373,7 +1373,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 
 	if (!nfs_file_has_buffered_writers(nfsi)) {
 		/* Verify a few of the more important attributes */
-		if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && inode_cmp_iversion_raw(inode, fattr->change_attr))
+		if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr))
 			invalid |= NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE;
 
 		if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
@@ -1803,7 +1803,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 
 	/* More cache consistency checks */
 	if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
-		if (inode_cmp_iversion_raw(inode, fattr->change_attr)) {
+		if (!inode_eq_iversion_raw(inode, fattr->change_attr)) {
 			dprintk("NFS: change_attr change on server for file %s/%ld\n",
 					inode->i_sb->s_id, inode->i_ino);
 			/* Could it be a race with writeback? */
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 6c5009cc4e6f..68cb9e4740b4 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -130,7 +130,7 @@ int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf,
 }
 
 int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned int flags,
-		       time_t ctime, __u64 cno)
+		       time64_t ctime, __u64 cno)
 {
 	int err;
 
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index 7bbccc099709..10e16935fff6 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -46,7 +46,7 @@ struct nilfs_segsum_info {
 	unsigned long		nfileblk;
 	u64			seg_seq;
 	__u64			cno;
-	time_t			ctime;
+	time64_t		ctime;
 	sector_t		next;
 };
 
@@ -120,7 +120,7 @@ void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf,
 			   struct nilfs_segment_buffer *prev);
 void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64,
 				  struct the_nilfs *);
-int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned int, time_t,
+int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned int, time64_t,
 		       __u64);
 int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *);
 int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *,
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 9f3ffba41533..0953635e7d48 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2040,7 +2040,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
 			goto out;
 
 		/* Update time stamp */
-		sci->sc_seg_ctime = get_seconds();
+		sci->sc_seg_ctime = ktime_get_real_seconds();
 
 		err = nilfs_segctor_collect(sci, nilfs, mode);
 		if (unlikely(err))
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 84084a4d9b3e..04634e3e3d58 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -157,7 +157,7 @@ struct nilfs_sc_info {
 	unsigned long		sc_blk_cnt;
 	unsigned long		sc_datablk_cnt;
 	unsigned long		sc_nblk_this_inc;
-	time_t			sc_seg_ctime;
+	time64_t		sc_seg_ctime;
 	__u64			sc_cno;
 	unsigned long		sc_flags;
 
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 1341a41e7b43..c7fa139d50e8 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -526,7 +526,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
  * @modtime: modification time (option)
  */
 int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
-				   unsigned long nblocks, time_t modtime)
+				   unsigned long nblocks, time64_t modtime)
 {
 	struct buffer_head *bh;
 	struct nilfs_segment_usage *su;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index 158a9190c8ec..673a891350f4 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -35,7 +35,7 @@ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end);
 int nilfs_sufile_alloc(struct inode *, __u64 *);
 int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
 int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
-				   unsigned long nblocks, time_t modtime);
+				   unsigned long nblocks, time64_t modtime);
 int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *);
 ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned int,
 				size_t);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 3073b646e1ba..6ffeca84d7c3 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -283,10 +283,10 @@ int nilfs_commit_super(struct super_block *sb, int flag)
 {
 	struct the_nilfs *nilfs = sb->s_fs_info;
 	struct nilfs_super_block **sbp = nilfs->ns_sbp;
-	time_t t;
+	time64_t t;
 
 	/* nilfs->ns_sem must be locked by the caller. */
-	t = get_seconds();
+	t = ktime_get_real_seconds();
 	nilfs->ns_sbwtime = t;
 	sbp[0]->s_wtime = cpu_to_le64(t);
 	sbp[0]->s_sum = 0;
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index 490303e3d517..4b25837e7724 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -31,7 +31,7 @@ static struct kset *nilfs_kset;
 #define NILFS_SHOW_TIME(time_t_val, buf) ({ \
 		struct tm res; \
 		int count = 0; \
-		time_to_tm(time_t_val, 0, &res); \
+		time64_to_tm(time_t_val, 0, &res); \
 		res.tm_year += 1900; \
 		res.tm_mon += 1; \
 		count = scnprintf(buf, PAGE_SIZE, \
@@ -579,7 +579,7 @@ nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr,
 					struct the_nilfs *nilfs,
 					char *buf)
 {
-	time_t ctime;
+	time64_t ctime;
 
 	down_read(&nilfs->ns_segctor_sem);
 	ctime = nilfs->ns_ctime;
@@ -593,13 +593,13 @@ nilfs_segctor_last_seg_write_time_secs_show(struct nilfs_segctor_attr *attr,
 					    struct the_nilfs *nilfs,
 					    char *buf)
 {
-	time_t ctime;
+	time64_t ctime;
 
 	down_read(&nilfs->ns_segctor_sem);
 	ctime = nilfs->ns_ctime;
 	up_read(&nilfs->ns_segctor_sem);
 
-	return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)ctime);
+	return snprintf(buf, PAGE_SIZE, "%llu\n", ctime);
 }
 
 static ssize_t
@@ -607,7 +607,7 @@ nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr,
 					 struct the_nilfs *nilfs,
 					 char *buf)
 {
-	time_t nongc_ctime;
+	time64_t nongc_ctime;
 
 	down_read(&nilfs->ns_segctor_sem);
 	nongc_ctime = nilfs->ns_nongc_ctime;
@@ -621,14 +621,13 @@ nilfs_segctor_last_nongc_write_time_secs_show(struct nilfs_segctor_attr *attr,
 						struct the_nilfs *nilfs,
 						char *buf)
 {
-	time_t nongc_ctime;
+	time64_t nongc_ctime;
 
 	down_read(&nilfs->ns_segctor_sem);
 	nongc_ctime = nilfs->ns_nongc_ctime;
 	up_read(&nilfs->ns_segctor_sem);
 
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
-			(unsigned long long)nongc_ctime);
+	return snprintf(buf, PAGE_SIZE, "%llu\n", nongc_ctime);
 }
 
 static ssize_t
@@ -728,7 +727,7 @@ nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr,
 				     struct the_nilfs *nilfs,
 				     char *buf)
 {
-	time_t sbwtime;
+	time64_t sbwtime;
 
 	down_read(&nilfs->ns_sem);
 	sbwtime = nilfs->ns_sbwtime;
@@ -742,13 +741,13 @@ nilfs_superblock_sb_write_time_secs_show(struct nilfs_superblock_attr *attr,
 					 struct the_nilfs *nilfs,
 					 char *buf)
 {
-	time_t sbwtime;
+	time64_t sbwtime;
 
 	down_read(&nilfs->ns_sem);
 	sbwtime = nilfs->ns_sbwtime;
 	up_read(&nilfs->ns_sem);
 
-	return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)sbwtime);
+	return snprintf(buf, PAGE_SIZE, "%llu\n", sbwtime);
 }
 
 static ssize_t
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 883d732b0259..36da1779f976 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -116,7 +116,7 @@ struct the_nilfs {
 	 */
 	struct buffer_head     *ns_sbh[2];
 	struct nilfs_super_block *ns_sbp[2];
-	time_t			ns_sbwtime;
+	time64_t		ns_sbwtime;
 	unsigned int		ns_sbwcount;
 	unsigned int		ns_sbsize;
 	unsigned int		ns_mount_state;
@@ -131,8 +131,8 @@ struct the_nilfs {
 	__u64			ns_nextnum;
 	unsigned long		ns_pseg_offset;
 	__u64			ns_cno;
-	time_t			ns_ctime;
-	time_t			ns_nongc_ctime;
+	time64_t		ns_ctime;
+	time64_t		ns_nongc_ctime;
 	atomic_t		ns_ndirtyblks;
 
 	/*
@@ -267,7 +267,7 @@ struct nilfs_root {
 
 static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
 {
-	u64 t = get_seconds();
+	u64 t = ktime_get_real_seconds();
 
 	return t < nilfs->ns_sbwtime ||
 		t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b7520e20a770..977763d4c27d 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1776,7 +1776,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
 		 * readdir(2), then we might be pointing to an invalid
 		 * dirent right now.  Scan from the start of the block
 		 * to make sure. */
-		if (inode_cmp_iversion(inode, *f_version)) {
+		if (!inode_eq_iversion(inode, *f_version)) {
 			for (i = 0; i < i_size_read(inode) && i < offset; ) {
 				de = (struct ocfs2_dir_entry *)
 					(data->id_data + i);
@@ -1870,7 +1870,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
 		 * readdir(2), then we might be pointing to an invalid
 		 * dirent right now.  Scan from the start of the block
 		 * to make sure. */
-		if (inode_cmp_iversion(inode, *f_version)) {
+		if (!inode_eq_iversion(inode, *f_version)) {
 			for (i = 0; i < sb->s_blocksize && i < offset; ) {
 				de = (struct ocfs2_dir_entry *) (bh->b_data + i);
 				/* It's too expensive to do a full
diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c
index ae782df5c063..fe484cf93e5c 100644
--- a/fs/orangefs/dcache.c
+++ b/fs/orangefs/dcache.c
@@ -33,7 +33,7 @@ static int orangefs_revalidate_lookup(struct dentry *dentry)
 	new_op->upcall.req.lookup.parent_refn = parent->refn;
 	strncpy(new_op->upcall.req.lookup.d_name,
 		dentry->d_name.name,
-		ORANGEFS_NAME_MAX);
+		ORANGEFS_NAME_MAX - 1);
 
 	gossip_debug(GOSSIP_DCACHE_DEBUG,
 		     "%s:%s:%d interrupt flag [%d]\n",
@@ -118,8 +118,12 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags)
 		return 0;
 
 	/* We do not need to continue with negative dentries. */
-	if (!dentry->d_inode)
-		goto out;
+	if (!dentry->d_inode) {
+		gossip_debug(GOSSIP_DCACHE_DEBUG,
+		    "%s: negative dentry or positive dentry and inode valid.\n",
+		    __func__);
+		return 1;
+	}
 
 	/* Now we must perform a getattr to validate the inode contents. */
 
@@ -129,14 +133,7 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags)
 		    __FILE__, __func__, __LINE__);
 		return 0;
 	}
-	if (ret == 0)
-		return 0;
-
-out:
-	gossip_debug(GOSSIP_DCACHE_DEBUG,
-	    "%s: negative dentry or positive dentry and inode valid.\n",
-	    __func__);
-	return 1;
+	return !ret;
 }
 
 const struct dentry_operations orangefs_dentry_operations = {
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index c98bba2dbc94..6e3134e6d98a 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -41,7 +41,7 @@ static int orangefs_create(struct inode *dir,
 			       ORANGEFS_TYPE_METAFILE, mode);
 
 	strncpy(new_op->upcall.req.create.d_name,
-		dentry->d_name.name, ORANGEFS_NAME_MAX);
+		dentry->d_name.name, ORANGEFS_NAME_MAX - 1);
 
 	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
 
@@ -142,7 +142,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
 	new_op->upcall.req.lookup.parent_refn = parent->refn;
 
 	strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name,
-		ORANGEFS_NAME_MAX);
+		ORANGEFS_NAME_MAX - 1);
 
 	gossip_debug(GOSSIP_NAME_DEBUG,
 		     "%s: doing lookup on %s under %pU,%d\n",
@@ -244,7 +244,7 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry)
 
 	new_op->upcall.req.remove.parent_refn = parent->refn;
 	strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name,
-		ORANGEFS_NAME_MAX);
+		ORANGEFS_NAME_MAX - 1);
 
 	ret = service_operation(new_op, "orangefs_unlink",
 				get_interruptible_flag(inode));
@@ -300,8 +300,8 @@ static int orangefs_symlink(struct inode *dir,
 
 	strncpy(new_op->upcall.req.sym.entry_name,
 		dentry->d_name.name,
-		ORANGEFS_NAME_MAX);
-	strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX);
+		ORANGEFS_NAME_MAX - 1);
+	strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX - 1);
 
 	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
 
@@ -372,7 +372,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 			      ORANGEFS_TYPE_DIRECTORY, mode);
 
 	strncpy(new_op->upcall.req.mkdir.d_name,
-		dentry->d_name.name, ORANGEFS_NAME_MAX);
+		dentry->d_name.name, ORANGEFS_NAME_MAX - 1);
 
 	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
 
@@ -453,10 +453,10 @@ static int orangefs_rename(struct inode *old_dir,
 
 	strncpy(new_op->upcall.req.rename.d_old_name,
 		old_dentry->d_name.name,
-		ORANGEFS_NAME_MAX);
+		ORANGEFS_NAME_MAX - 1);
 	strncpy(new_op->upcall.req.rename.d_new_name,
 		new_dentry->d_name.name,
-		ORANGEFS_NAME_MAX);
+		ORANGEFS_NAME_MAX - 1);
 
 	ret = service_operation(new_op,
 				"orangefs_rename",
diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
index 1c59dff530de..6e35f2f3c897 100644
--- a/fs/orangefs/orangefs-debugfs.c
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -328,7 +328,7 @@ static int help_show(struct seq_file *m, void *v)
 /*
  * initialize the client-debug file.
  */
-int orangefs_client_debug_init(void)
+static int orangefs_client_debug_init(void)
 {
 
 	int rc = -ENOMEM;
@@ -1056,7 +1056,7 @@ int orangefs_debugfs_new_debug(void __user *arg)
 			client_debug_string,
 			llu(mask_info.mask_value));
 	} else {
-		gossip_lerr("Invalid mask type....\n");
+		gossip_err("Invalid mask type....\n");
 		return -EINVAL;
 	}
 
diff --git a/fs/orangefs/orangefs-debugfs.h b/fs/orangefs/orangefs-debugfs.h
index b5fd9cd4960f..51147f9ce3d6 100644
--- a/fs/orangefs/orangefs-debugfs.h
+++ b/fs/orangefs/orangefs-debugfs.h
@@ -1,7 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 int orangefs_debugfs_init(int);
 void orangefs_debugfs_cleanup(void);
-int orangefs_client_debug_init(void);
 int orangefs_prepare_debugfs_help_string(int);
 int orangefs_debugfs_new_client_mask(void __user *);
 int orangefs_debugfs_new_client_string(void __user *);
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 2595453fe737..eebbaece85ef 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -56,11 +56,7 @@
 
 #include "orangefs-dev-proto.h"
 
-#ifdef ORANGEFS_KERNEL_DEBUG
-#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS       10
-#else
 #define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS       20
-#endif
 
 #define ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS   30
 
@@ -104,11 +100,11 @@ enum orangefs_vfs_op_states {
  * orangefs kernel memory related flags
  */
 
-#if ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB))
+#if (defined CONFIG_DEBUG_SLAB)
 #define ORANGEFS_CACHE_CREATE_FLAGS SLAB_RED_ZONE
 #else
 #define ORANGEFS_CACHE_CREATE_FLAGS 0
-#endif /* ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */
+#endif
 
 extern int orangefs_init_acl(struct inode *inode, struct inode *dir);
 extern const struct xattr_handler *orangefs_xattr_handlers[];
@@ -471,8 +467,6 @@ int orangefs_inode_check_changed(struct inode *inode);
 
 int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr);
 
-void orangefs_make_bad_inode(struct inode *inode);
-
 int orangefs_unmount_sb(struct super_block *sb);
 
 bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op);
diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c
index 97fe93129f38..ea6256d136d1 100644
--- a/fs/orangefs/orangefs-utils.c
+++ b/fs/orangefs/orangefs-utils.c
@@ -230,25 +230,42 @@ static int orangefs_inode_type(enum orangefs_ds_type objtype)
 		return -1;
 }
 
-static int orangefs_inode_is_stale(struct inode *inode, int new,
+static void orangefs_make_bad_inode(struct inode *inode)
+{
+	if (is_root_handle(inode)) {
+		/*
+		 * if this occurs, the pvfs2-client-core was killed but we
+		 * can't afford to lose the inode operations and such
+		 * associated with the root handle in any case.
+		 */
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "*** NOT making bad root inode %pU\n",
+			     get_khandle_from_ino(inode));
+	} else {
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "*** making bad inode %pU\n",
+			     get_khandle_from_ino(inode));
+		make_bad_inode(inode);
+	}
+}
+
+static int orangefs_inode_is_stale(struct inode *inode,
     struct ORANGEFS_sys_attr_s *attrs, char *link_target)
 {
 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
 	int type = orangefs_inode_type(attrs->objtype);
-	if (!new) {
-		/*
-		 * If the inode type or symlink target have changed then this
-		 * inode is stale.
-		 */
-		if (type == -1 || !(inode->i_mode & type)) {
-			orangefs_make_bad_inode(inode);
-			return 1;
-		}
-		if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
-		    link_target, ORANGEFS_NAME_MAX)) {
-			orangefs_make_bad_inode(inode);
-			return 1;
-		}
+	/*
+	 * If the inode type or symlink target have changed then this
+	 * inode is stale.
+	 */
+	if (type == -1 || !(inode->i_mode & type)) {
+		orangefs_make_bad_inode(inode);
+		return 1;
+	}
+	if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
+	    link_target, ORANGEFS_NAME_MAX)) {
+		orangefs_make_bad_inode(inode);
+		return 1;
 	}
 	return 0;
 }
@@ -294,16 +311,18 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
 	if (ret != 0)
 		goto out;
 
-	type = orangefs_inode_type(new_op->
-	    downcall.resp.getattr.attributes.objtype);
-	ret = orangefs_inode_is_stale(inode, new,
-	    &new_op->downcall.resp.getattr.attributes,
-	    new_op->downcall.resp.getattr.link_target);
-	if (ret) {
-		ret = -ESTALE;
-		goto out;
+	if (!new) {
+		ret = orangefs_inode_is_stale(inode,
+		    &new_op->downcall.resp.getattr.attributes,
+		    new_op->downcall.resp.getattr.link_target);
+		if (ret) {
+			ret = -ESTALE;
+			goto out;
+		}
 	}
 
+	type = orangefs_inode_type(new_op->
+	    downcall.resp.getattr.attributes.objtype);
 	switch (type) {
 	case S_IFREG:
 		inode->i_flags = orangefs_inode_flags(&new_op->
@@ -348,6 +367,12 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
 			inode->i_link = orangefs_inode->link_target;
 		}
 		break;
+	/* i.e. -1 */
+	default:
+		/* XXX: ESTALE?  This is what is done if it is not new. */
+		orangefs_make_bad_inode(inode);
+		ret = -ESTALE;
+		goto out;
 	}
 
 	inode->i_uid = make_kuid(&init_user_ns, new_op->
@@ -401,7 +426,7 @@ int orangefs_inode_check_changed(struct inode *inode)
 	if (ret != 0)
 		goto out;
 
-	ret = orangefs_inode_is_stale(inode, 0,
+	ret = orangefs_inode_is_stale(inode,
 	    &new_op->downcall.resp.getattr.attributes,
 	    new_op->downcall.resp.getattr.link_target);
 out:
@@ -444,25 +469,6 @@ int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
 	return ret;
 }
 
-void orangefs_make_bad_inode(struct inode *inode)
-{
-	if (is_root_handle(inode)) {
-		/*
-		 * if this occurs, the pvfs2-client-core was killed but we
-		 * can't afford to lose the inode operations and such
-		 * associated with the root handle in any case.
-		 */
-		gossip_debug(GOSSIP_UTILS_DEBUG,
-			     "*** NOT making bad root inode %pU\n",
-			     get_khandle_from_ino(inode));
-	} else {
-		gossip_debug(GOSSIP_UTILS_DEBUG,
-			     "*** making bad inode %pU\n",
-			     get_khandle_from_ino(inode));
-		make_bad_inode(inode);
-	}
-}
-
 /*
  * The following is a very dirty hack that is now a permanent part of the
  * ORANGEFS protocol. See protocol.h for more error definitions.
@@ -537,6 +543,7 @@ int orangefs_normalize_to_errno(__s32 error_code)
 	 */
 	} else {
 		gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n");
+		error_code = -EINVAL;
 	}
 	return error_code;
 }
diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h
index e0bf5e4dce0d..dc6e3e6269c3 100644
--- a/fs/orangefs/protocol.h
+++ b/fs/orangefs/protocol.h
@@ -395,13 +395,6 @@ struct ORANGEFS_dev_map_desc {
 
 /* gossip.h *****************************************************************/
 
-#ifdef GOSSIP_DISABLE_DEBUG
-#define gossip_debug(mask, fmt, ...)					\
-do {									\
-	if (0)								\
-		printk(KERN_DEBUG fmt, ##__VA_ARGS__);			\
-} while (0)
-#else
 extern __u64 orangefs_gossip_debug_mask;
 
 /* try to avoid function call overhead by checking masks in macro */
@@ -410,13 +403,5 @@ do {									\
 	if (orangefs_gossip_debug_mask & (mask))			\
 		printk(KERN_DEBUG fmt, ##__VA_ARGS__);			\
 } while (0)
-#endif /* GOSSIP_DISABLE_DEBUG */
-
-/* do file and line number printouts w/ the GNU preprocessor */
-#define gossip_ldebug(mask, fmt, ...)					\
-	gossip_debug(mask, "%s: " fmt, __func__, ##__VA_ARGS__)
 
 #define gossip_err pr_err
-#define gossip_lerr(fmt, ...)						\
-	gossip_err("%s line %d: " fmt,					\
-		   __FILE__, __LINE__, ##__VA_ARGS__)
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index 62d49e53061c..3ae5fdba0225 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -335,7 +335,7 @@ static int orangefs_encode_fh(struct inode *inode,
 	struct orangefs_object_kref refn;
 
 	if (*max_len < len) {
-		gossip_lerr("fh buffer is too small for encoding\n");
+		gossip_err("fh buffer is too small for encoding\n");
 		*max_len = len;
 		type = 255;
 		goto out;
@@ -383,7 +383,7 @@ static int orangefs_unmount(int id, __s32 fs_id, const char *devname)
 	op->upcall.req.fs_umount.id = id;
 	op->upcall.req.fs_umount.fs_id = fs_id;
 	strncpy(op->upcall.req.fs_umount.orangefs_config_server,
-	    devname, ORANGEFS_MAX_SERVER_ADDR_LEN);
+	    devname, ORANGEFS_MAX_SERVER_ADDR_LEN - 1);
 	r = service_operation(op, "orangefs_fs_umount", 0);
 	/* Not much to do about an error here. */
 	if (r)
@@ -478,7 +478,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
 
 	strncpy(new_op->upcall.req.fs_mount.orangefs_config_server,
 		devname,
-		ORANGEFS_MAX_SERVER_ADDR_LEN);
+		ORANGEFS_MAX_SERVER_ADDR_LEN - 1);
 
 	gossip_debug(GOSSIP_SUPER_DEBUG,
 		     "Attempting ORANGEFS Mount via host %s\n",
@@ -520,7 +520,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
 	 */
 	strncpy(ORANGEFS_SB(sb)->devname,
 		devname,
-		ORANGEFS_MAX_SERVER_ADDR_LEN);
+		ORANGEFS_MAX_SERVER_ADDR_LEN - 1);
 
 	/* mount_pending must be cleared */
 	ORANGEFS_SB(sb)->mount_pending = 0;
diff --git a/fs/pipe.c b/fs/pipe.c
index a449ca0ec0c6..0913aed7fd0d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -35,11 +35,6 @@
  */
 unsigned int pipe_max_size = 1048576;
 
-/*
- * Minimum pipe size, as required by POSIX
- */
-unsigned int pipe_min_size = PAGE_SIZE;
-
 /* Maximum allocatable pages per user. Hard limit is unset by default, soft
  * matches default values.
  */
@@ -610,12 +605,21 @@ static unsigned long account_pipe_buffers(struct user_struct *user,
 
 static bool too_many_pipe_buffers_soft(unsigned long user_bufs)
 {
-	return pipe_user_pages_soft && user_bufs >= pipe_user_pages_soft;
+	unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
+
+	return soft_limit && user_bufs > soft_limit;
 }
 
 static bool too_many_pipe_buffers_hard(unsigned long user_bufs)
 {
-	return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard;
+	unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
+
+	return hard_limit && user_bufs > hard_limit;
+}
+
+static bool is_unprivileged_user(void)
+{
+	return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
 }
 
 struct pipe_inode_info *alloc_pipe_info(void)
@@ -624,22 +628,23 @@ struct pipe_inode_info *alloc_pipe_info(void)
 	unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
 	struct user_struct *user = get_current_user();
 	unsigned long user_bufs;
+	unsigned int max_size = READ_ONCE(pipe_max_size);
 
 	pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
 	if (pipe == NULL)
 		goto out_free_uid;
 
-	if (pipe_bufs * PAGE_SIZE > pipe_max_size && !capable(CAP_SYS_RESOURCE))
-		pipe_bufs = pipe_max_size >> PAGE_SHIFT;
+	if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
+		pipe_bufs = max_size >> PAGE_SHIFT;
 
 	user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
 
-	if (too_many_pipe_buffers_soft(user_bufs)) {
+	if (too_many_pipe_buffers_soft(user_bufs) && is_unprivileged_user()) {
 		user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
 		pipe_bufs = 1;
 	}
 
-	if (too_many_pipe_buffers_hard(user_bufs))
+	if (too_many_pipe_buffers_hard(user_bufs) && is_unprivileged_user())
 		goto out_revert_acct;
 
 	pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
@@ -1020,18 +1025,16 @@ const struct file_operations pipefifo_fops = {
  * Currently we rely on the pipe array holding a power-of-2 number
  * of pages. Returns 0 on error.
  */
-unsigned int round_pipe_size(unsigned int size)
+unsigned int round_pipe_size(unsigned long size)
 {
-	unsigned long nr_pages;
-
-	if (size < pipe_min_size)
-		size = pipe_min_size;
-
-	nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	if (nr_pages == 0)
+	if (size > (1U << 31))
 		return 0;
 
-	return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
+	/* Minimum pipe size, as required by POSIX */
+	if (size < PAGE_SIZE)
+		return PAGE_SIZE;
+
+	return roundup_pow_of_two(size);
 }
 
 /*
@@ -1046,8 +1049,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
 	long ret = 0;
 
 	size = round_pipe_size(arg);
-	if (size == 0)
-		return -EINVAL;
 	nr_pages = size >> PAGE_SHIFT;
 
 	if (!nr_pages)
@@ -1069,7 +1070,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
 	if (nr_pages > pipe->buffers &&
 			(too_many_pipe_buffers_hard(user_bufs) ||
 			 too_many_pipe_buffers_soft(user_bufs)) &&
-			!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
+			is_unprivileged_user()) {
 		ret = -EPERM;
 		goto out_revert_acct;
 	}
@@ -1125,16 +1126,6 @@ out_revert_acct:
 }
 
 /*
- * This should work even if CONFIG_PROC_FS isn't set, as proc_dopipe_max_size
- * will return an error.
- */
-int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
-		 size_t *lenp, loff_t *ppos)
-{
-	return proc_dopipe_max_size(table, write, buf, lenp, ppos);
-}
-
-/*
  * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
  * location, so checking ->i_pipe is not enough to verify that this is a
  * pipe.
diff --git a/fs/proc/array.c b/fs/proc/array.c
index d67a72dcb92c..598803576e4c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -736,16 +736,10 @@ static int children_seq_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
-int children_seq_release(struct inode *inode, struct file *file)
-{
-	seq_release(inode, file);
-	return 0;
-}
-
 const struct file_operations proc_tid_children_operations = {
 	.open    = children_seq_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = children_seq_release,
+	.release = seq_release,
 };
 #endif /* CONFIG_PROC_CHILDREN */
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 60316b52d659..9298324325ed 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -75,6 +75,7 @@
 #include <linux/ptrace.h>
 #include <linux/tracehook.h>
 #include <linux/printk.h>
+#include <linux/cache.h>
 #include <linux/cgroup.h>
 #include <linux/cpuset.h>
 #include <linux/audit.h>
@@ -100,6 +101,8 @@
 #include "internal.h"
 #include "fd.h"
 
+#include "../../lib/kstrtox.h"
+
 /* NOTE:
  *	Implementing inode permission operations in /proc is almost
  *	certainly an error.  Permission checks need to happen during
@@ -110,8 +113,8 @@
  *	in /proc for a task before it execs a suid executable.
  */
 
-static u8 nlink_tid;
-static u8 nlink_tgid;
+static u8 nlink_tid __ro_after_init;
+static u8 nlink_tgid __ro_after_init;
 
 struct pid_entry {
 	const char *name;
@@ -1370,7 +1373,7 @@ static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
 	task = get_proc_task(file_inode(file));
 	if (!task)
 		return -ESRCH;
-	WRITE_ONCE(task->fail_nth, n);
+	task->fail_nth = n;
 	put_task_struct(task);
 
 	return count;
@@ -1386,8 +1389,7 @@ static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
 	task = get_proc_task(file_inode(file));
 	if (!task)
 		return -ESRCH;
-	len = snprintf(numbuf, sizeof(numbuf), "%u\n",
-			READ_ONCE(task->fail_nth));
+	len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth);
 	len = simple_read_from_buffer(buf, count, ppos, numbuf, len);
 	put_task_struct(task);
 
@@ -1907,8 +1909,33 @@ end_instantiate:
 static int dname_to_vma_addr(struct dentry *dentry,
 			     unsigned long *start, unsigned long *end)
 {
-	if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
+	const char *str = dentry->d_name.name;
+	unsigned long long sval, eval;
+	unsigned int len;
+
+	len = _parse_integer(str, 16, &sval);
+	if (len & KSTRTOX_OVERFLOW)
+		return -EINVAL;
+	if (sval != (unsigned long)sval)
 		return -EINVAL;
+	str += len;
+
+	if (*str != '-')
+		return -EINVAL;
+	str++;
+
+	len = _parse_integer(str, 16, &eval);
+	if (len & KSTRTOX_OVERFLOW)
+		return -EINVAL;
+	if (eval != (unsigned long)eval)
+		return -EINVAL;
+	str += len;
+
+	if (*str != '\0')
+		return -EINVAL;
+
+	*start = sval;
+	*end = eval;
 
 	return 0;
 }
@@ -2000,9 +2027,9 @@ out:
 }
 
 struct map_files_info {
+	unsigned long	start;
+	unsigned long	end;
 	fmode_t		mode;
-	unsigned int	len;
-	unsigned char	name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
 };
 
 /*
@@ -2172,10 +2199,9 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
 			if (++pos <= ctx->pos)
 				continue;
 
+			info.start = vma->vm_start;
+			info.end = vma->vm_end;
 			info.mode = vma->vm_file->f_mode;
-			info.len = snprintf(info.name,
-					sizeof(info.name), "%lx-%lx",
-					vma->vm_start, vma->vm_end);
 			if (flex_array_put(fa, i++, &info, GFP_KERNEL))
 				BUG();
 		}
@@ -2183,9 +2209,13 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
 	up_read(&mm->mmap_sem);
 
 	for (i = 0; i < nr_files; i++) {
+		char buf[4 * sizeof(long) + 2];	/* max: %lx-%lx\0 */
+		unsigned int len;
+
 		p = flex_array_get(fa, i);
+		len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end);
 		if (!proc_fill_cache(file, ctx,
-				      p->name, p->len,
+				      buf, len,
 				      proc_map_files_instantiate,
 				      task,
 				      (void *)(unsigned long)p->mode))
@@ -3018,11 +3048,11 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 {
 	struct dentry *dentry, *leader, *dir;
-	char buf[PROC_NUMBUF];
+	char buf[10 + 1];
 	struct qstr name;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", pid);
+	name.len = snprintf(buf, sizeof(buf), "%u", pid);
 	/* no ->d_hash() rejects on procfs */
 	dentry = d_hash_and_lookup(mnt->mnt_root, &name);
 	if (dentry) {
@@ -3034,7 +3064,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 		return;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", tgid);
+	name.len = snprintf(buf, sizeof(buf), "%u", tgid);
 	leader = d_hash_and_lookup(mnt->mnt_root, &name);
 	if (!leader)
 		goto out;
@@ -3046,7 +3076,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 		goto out_put_leader;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", pid);
+	name.len = snprintf(buf, sizeof(buf), "%u", pid);
 	dentry = d_hash_and_lookup(dir, &name);
 	if (dentry) {
 		d_invalidate(dentry);
@@ -3225,14 +3255,14 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
 	for (iter = next_tgid(ns, iter);
 	     iter.task;
 	     iter.tgid += 1, iter = next_tgid(ns, iter)) {
-		char name[PROC_NUMBUF];
+		char name[10 + 1];
 		int len;
 
 		cond_resched();
 		if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
 			continue;
 
-		len = snprintf(name, sizeof(name), "%d", iter.tgid);
+		len = snprintf(name, sizeof(name), "%u", iter.tgid);
 		ctx->pos = iter.tgid + TGID_OFFSET;
 		if (!proc_fill_cache(file, ctx, name, len,
 				     proc_pid_instantiate, iter.task, NULL)) {
@@ -3560,10 +3590,10 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
 	for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
 	     task;
 	     task = next_tid(task), ctx->pos++) {
-		char name[PROC_NUMBUF];
+		char name[10 + 1];
 		int len;
 		tid = task_pid_nr_ns(task, ns);
-		len = snprintf(name, sizeof(name), "%d", tid);
+		len = snprintf(name, sizeof(name), "%u", tid);
 		if (!proc_fill_cache(file, ctx, name, len,
 				proc_task_instantiate, task, NULL)) {
 			/* returning this tgid failed, save it as the first
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index 290ba85cb900..a8ac48aebd59 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -55,8 +55,7 @@ static int show_console_dev(struct seq_file *m, void *v)
 	if (dev)
 		seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev));
 
-	seq_printf(m, "\n");
-
+	seq_putc(m, '\n');
 	return 0;
 }
 
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 96fc70225e54..6b80cd1e419a 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -236,7 +236,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
 	for (fd = ctx->pos - 2;
 	     fd < files_fdtable(files)->max_fds;
 	     fd++, ctx->pos++) {
-		char name[PROC_NUMBUF];
+		char name[10 + 1];
 		int len;
 
 		if (!fcheck_files(files, fd))
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 793a67574668..5d709fa8f3a2 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -28,7 +28,7 @@
 
 static DEFINE_RWLOCK(proc_subdir_lock);
 
-static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
+static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len)
 {
 	if (len < de->namelen)
 		return -1;
@@ -60,7 +60,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
 		struct proc_dir_entry *de = rb_entry(node,
 						     struct proc_dir_entry,
 						     subdir_node);
-		int result = proc_match(len, name, de);
+		int result = proc_match(name, de, len);
 
 		if (result < 0)
 			node = node->rb_left;
@@ -84,7 +84,7 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
 		struct proc_dir_entry *this = rb_entry(*new,
 						       struct proc_dir_entry,
 						       subdir_node);
-		int result = proc_match(de->namelen, de->name, this);
+		int result = proc_match(de->name, this, de->namelen);
 
 		parent = *new;
 		if (result < 0)
@@ -211,8 +211,8 @@ void proc_free_inum(unsigned int inum)
  * Don't create negative dentries here, return -ENOENT by hand
  * instead.
  */
-struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
-		struct dentry *dentry)
+struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
+			      struct proc_dir_entry *de)
 {
 	struct inode *inode;
 
@@ -235,7 +235,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
 struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
 		unsigned int flags)
 {
-	return proc_lookup_de(PDE(dir), dir, dentry);
+	return proc_lookup_de(dir, dentry, PDE(dir));
 }
 
 /*
@@ -247,8 +247,8 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
  * value of the readdir() call, as long as it's non-negative
  * for success..
  */
-int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
-		    struct dir_context *ctx)
+int proc_readdir_de(struct file *file, struct dir_context *ctx,
+		    struct proc_dir_entry *de)
 {
 	int i;
 
@@ -292,7 +292,7 @@ int proc_readdir(struct file *file, struct dir_context *ctx)
 {
 	struct inode *inode = file_inode(file);
 
-	return proc_readdir_de(PDE(inode), file, ctx);
+	return proc_readdir_de(file, ctx, PDE(inode));
 }
 
 /*
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 8dacaabb9f37..6e8724958116 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -5,6 +5,7 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 
+#include <linux/cache.h>
 #include <linux/time.h>
 #include <linux/proc_fs.h>
 #include <linux/kernel.h>
@@ -52,7 +53,7 @@ static void proc_evict_inode(struct inode *inode)
 	}
 }
 
-static struct kmem_cache * proc_inode_cachep;
+static struct kmem_cache *proc_inode_cachep __ro_after_init;
 
 static struct inode *proc_alloc_inode(struct super_block *sb)
 {
@@ -128,12 +129,12 @@ enum {BIAS = -1U<<31};
 
 static inline int use_pde(struct proc_dir_entry *pde)
 {
-	return atomic_inc_unless_negative(&pde->in_use);
+	return likely(atomic_inc_unless_negative(&pde->in_use));
 }
 
 static void unuse_pde(struct proc_dir_entry *pde)
 {
-	if (atomic_dec_return(&pde->in_use) == BIAS)
+	if (unlikely(atomic_dec_return(&pde->in_use) == BIAS))
 		complete(pde->pde_unload_completion);
 }
 
@@ -166,7 +167,7 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
 		spin_lock(&pde->pde_unload_lock);
 		/* After ->release. */
 		list_del(&pdeo->lh);
-		if (pdeo->c)
+		if (unlikely(pdeo->c))
 			complete(pdeo->c);
 		kfree(pdeo);
 	}
@@ -420,7 +421,7 @@ static const char *proc_get_link(struct dentry *dentry,
 				 struct delayed_call *done)
 {
 	struct proc_dir_entry *pde = PDE(inode);
-	if (unlikely(!use_pde(pde)))
+	if (!use_pde(pde))
 		return ERR_PTR(-EINVAL);
 	set_delayed_call(done, proc_put_link, pde);
 	return pde->data;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 4a67188c8d74..d697c8ab0a14 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -31,24 +31,28 @@ struct mempolicy;
  * subdir_node is used to build the rb tree "subdir" of the parent.
  */
 struct proc_dir_entry {
+	/*
+	 * number of callers into module in progress;
+	 * negative -> it's going away RSN
+	 */
+	atomic_t in_use;
+	atomic_t count;		/* use count */
+	struct list_head pde_openers;	/* who did ->open, but not ->release */
+	/* protects ->pde_openers and all struct pde_opener instances */
+	spinlock_t pde_unload_lock;
+	struct completion *pde_unload_completion;
+	const struct inode_operations *proc_iops;
+	const struct file_operations *proc_fops;
+	void *data;
 	unsigned int low_ino;
-	umode_t mode;
 	nlink_t nlink;
 	kuid_t uid;
 	kgid_t gid;
 	loff_t size;
-	const struct inode_operations *proc_iops;
-	const struct file_operations *proc_fops;
 	struct proc_dir_entry *parent;
 	struct rb_root_cached subdir;
 	struct rb_node subdir_node;
-	void *data;
-	atomic_t count;		/* use count */
-	atomic_t in_use;	/* number of callers into module in progress; */
-			/* negative -> it's going away RSN */
-	struct completion *pde_unload_completion;
-	struct list_head pde_openers;	/* who did ->open, but not ->release */
-	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
+	umode_t mode;
 	u8 namelen;
 	char name[];
 } __randomize_layout;
@@ -149,10 +153,9 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i
  * generic.c
  */
 extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
-extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *,
-				     struct dentry *);
+struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *);
 extern int proc_readdir(struct file *, struct dir_context *);
-extern int proc_readdir_de(struct proc_dir_entry *, struct file *, struct dir_context *);
+int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *);
 
 static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
 {
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 4bc85cb8be6a..e8a93bc8285d 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -512,23 +512,15 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 				return -EFAULT;
 		} else {
 			if (kern_addr_valid(start)) {
-				unsigned long n;
-
 				/*
 				 * Using bounce buffer to bypass the
 				 * hardened user copy kernel text checks.
 				 */
-				memcpy(buf, (char *) start, tsz);
-				n = copy_to_user(buffer, buf, tsz);
-				/*
-				 * We cannot distinguish between fault on source
-				 * and fault on destination. When this happens
-				 * we clear too and hope it will trigger the
-				 * EFAULT again.
-				 */
-				if (n) { 
-					if (clear_user(buffer + tsz - n,
-								n))
+				if (probe_kernel_read(buf, (void *) start, tsz)) {
+					if (clear_user(buffer, tsz))
+						return -EFAULT;
+				} else {
+					if (copy_to_user(buffer, buf, tsz))
 						return -EFAULT;
 				}
 			} else {
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index a2bf369c923d..68c06ae7888c 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -135,7 +135,7 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir,
 	de = ERR_PTR(-ENOENT);
 	net = get_proc_task_net(dir);
 	if (net != NULL) {
-		de = proc_lookup_de(net->proc_net, dir, dentry);
+		de = proc_lookup_de(dir, dentry, net->proc_net);
 		put_net(net);
 	}
 	return de;
@@ -172,7 +172,7 @@ static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx)
 	ret = -EINVAL;
 	net = get_proc_task_net(file_inode(file));
 	if (net != NULL) {
-		ret = proc_readdir_de(net->proc_net, file, ctx);
+		ret = proc_readdir_de(file, ctx, net->proc_net);
 		put_net(net);
 	}
 	return ret;
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 31326bb23b8b..4d7d061696b3 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/cache.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/pid_namespace.h>
@@ -17,11 +18,11 @@ static const char *proc_self_get_link(struct dentry *dentry,
 
 	if (!tgid)
 		return ERR_PTR(-ENOENT);
-	/* 11 for max length of signed int in decimal + NULL term */
-	name = kmalloc(12, dentry ? GFP_KERNEL : GFP_ATOMIC);
+	/* max length of unsigned int in decimal + NULL term */
+	name = kmalloc(10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC);
 	if (unlikely(!name))
 		return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD);
-	sprintf(name, "%d", tgid);
+	sprintf(name, "%u", tgid);
 	set_delayed_call(done, kfree_link, name);
 	return name;
 }
@@ -30,7 +31,7 @@ static const struct inode_operations proc_self_inode_operations = {
 	.get_link	= proc_self_get_link,
 };
 
-static unsigned self_inum;
+static unsigned self_inum __ro_after_init;
 
 int proc_setup_self(struct super_block *s)
 {
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index b813e3b529f2..9d2efaca499f 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/cache.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/pid_namespace.h>
@@ -18,11 +19,10 @@ static const char *proc_thread_self_get_link(struct dentry *dentry,
 
 	if (!pid)
 		return ERR_PTR(-ENOENT);
-	name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF,
-				dentry ? GFP_KERNEL : GFP_ATOMIC);
+	name = kmalloc(10 + 6 + 10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC);
 	if (unlikely(!name))
 		return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD);
-	sprintf(name, "%d/task/%d", tgid, pid);
+	sprintf(name, "%u/task/%u", tgid, pid);
 	set_delayed_call(done, kfree_link, name);
 	return name;
 }
@@ -31,7 +31,7 @@ static const struct inode_operations proc_thread_self_inode_operations = {
 	.get_link	= proc_thread_self_get_link,
 };
 
-static unsigned thread_self_inum;
+static unsigned thread_self_inum __ro_after_init;
 
 int proc_setup_thread_self(struct super_block *s)
 {
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 885d445afa0d..a45f0af22a60 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -1178,18 +1178,16 @@ fs_initcall(vmcore_init);
 /* Cleanup function for vmcore module. */
 void vmcore_cleanup(void)
 {
-	struct list_head *pos, *next;
-
 	if (proc_vmcore) {
 		proc_remove(proc_vmcore);
 		proc_vmcore = NULL;
 	}
 
 	/* clear the vmcore list. */
-	list_for_each_safe(pos, next, &vmcore_list) {
+	while (!list_empty(&vmcore_list)) {
 		struct vmcore *m;
 
-		m = list_entry(pos, struct vmcore, list);
+		m = list_first_entry(&vmcore_list, struct vmcore, list);
 		list_del(&m->list);
 		kfree(m);
 	}
diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c
index 14626b34d13e..0927a4b2ecaf 100644
--- a/fs/udf/udftime.c
+++ b/fs/udf/udftime.c
@@ -62,6 +62,11 @@ udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src)
 	dest->tv_sec -= offset * 60;
 	dest->tv_nsec = 1000 * (src.centiseconds * 10000 +
 			src.hundredsOfMicroseconds * 100 + src.microseconds);
+	/*
+	 * Sanitize nanosecond field since reportedly some filesystems are
+	 * recorded with bogus sub-second values.
+	 */
+	dest->tv_nsec %= NSEC_PER_SEC;
 	return dest;
 }
 
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 50dfce000864..b721d0bda5e5 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -429,7 +429,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
 	unsigned long n = pos >> PAGE_SHIFT;
 	unsigned long npages = dir_pages(inode);
 	unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
-	bool need_revalidate = inode_cmp_iversion(inode, file->f_version);
+	bool need_revalidate = !inode_eq_iversion(inode, file->f_version);
 	unsigned flags = UFS_SB(sb)->s_flags;
 
 	UFSD("BEGIN\n");