diff options
Diffstat (limited to 'fs')
204 files changed, 1879 insertions, 1059 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 8b75463cb211..af03c2a901eb 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -94,13 +94,13 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,  	if (v9ses->cache)  		sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE; -	sb->s_flags |= MS_ACTIVE | MS_DIRSYNC | MS_NOATIME; +	sb->s_flags |= SB_ACTIVE | SB_DIRSYNC | SB_NOATIME;  	if (!v9ses->cache) -		sb->s_flags |= MS_SYNCHRONOUS; +		sb->s_flags |= SB_SYNCHRONOUS;  #ifdef CONFIG_9P_FS_POSIX_ACL  	if ((v9ses->flags & V9FS_ACL_MASK) == V9FS_POSIX_ACL) -		sb->s_flags |= MS_POSIXACL; +		sb->s_flags |= SB_POSIXACL;  #endif  	return 0; diff --git a/fs/adfs/super.c b/fs/adfs/super.c index c9fdfb112933..cfda2c7caedc 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -213,7 +213,7 @@ static int parse_options(struct super_block *sb, char *options)  static int adfs_remount(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	*flags |= MS_NODIRATIME; +	*flags |= SB_NODIRATIME;  	return parse_options(sb, data);  } @@ -372,7 +372,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)  	struct inode *root;  	int ret = -EINVAL; -	sb->s_flags |= MS_NODIRATIME; +	sb->s_flags |= SB_NODIRATIME;  	asb = kzalloc(sizeof(*asb), GFP_KERNEL);  	if (!asb) diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 185d5ab7e986..0f0e6925e97d 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -453,7 +453,7 @@ affs_error(struct super_block *sb, const char *function, const char *fmt, ...)  	pr_crit("error (device %s): %s(): %pV\n", sb->s_id, function, &vaf);  	if (!sb_rdonly(sb))  		pr_warn("Remounting filesystem read-only\n"); -	sb->s_flags |= MS_RDONLY; +	sb->s_flags |= SB_RDONLY;  	va_end(args);  } diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c index 2b1399611d9e..5ba9ef2742f6 100644 --- a/fs/affs/bitmap.c +++ b/fs/affs/bitmap.c @@ -250,12 +250,12 @@ int affs_init_bitmap(struct super_block *sb, int *flags)  	int i, res = 0;  	struct affs_sb_info *sbi = AFFS_SB(sb); -	if (*flags & MS_RDONLY) +	if (*flags & SB_RDONLY)  		return 0;  	if (!AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag) {  		pr_notice("Bitmap invalid - mounting %s read only\n", sb->s_id); -		*flags |= MS_RDONLY; +		*flags |= SB_RDONLY;  		return 0;  	} @@ -288,7 +288,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags)  		if (affs_checksum_block(sb, bh)) {  			pr_warn("Bitmap %u invalid - mounting %s read only.\n",  				bm->bm_key, sb->s_id); -			*flags |= MS_RDONLY; +			*flags |= SB_RDONLY;  			goto out;  		}  		pr_debug("read bitmap block %d: %d\n", blk, bm->bm_key); diff --git a/fs/affs/super.c b/fs/affs/super.c index 884bedab7266..1117e36134cc 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -356,7 +356,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)  	sb->s_magic             = AFFS_SUPER_MAGIC;  	sb->s_op                = &affs_sops; -	sb->s_flags |= MS_NODIRATIME; +	sb->s_flags |= SB_NODIRATIME;  	sbi = kzalloc(sizeof(struct affs_sb_info), GFP_KERNEL);  	if (!sbi) @@ -466,7 +466,7 @@ got_root:  	if ((chksum == FS_DCFFS || chksum == MUFS_DCFFS || chksum == FS_DCOFS  	     || chksum == MUFS_DCOFS) && !sb_rdonly(sb)) {  		pr_notice("Dircache FS - mounting %s read only\n", sb->s_id); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	}  	switch (chksum) {  	case MUFS_FS: @@ -488,7 +488,7 @@ got_root:  		/* fall thru */  	case FS_OFS:  		affs_set_opt(sbi->s_flags, SF_OFS); -		sb->s_flags |= MS_NOEXEC; +		sb->s_flags |= SB_NOEXEC;  		break;  	case MUFS_DCOFS:  	case MUFS_INTLOFS: @@ -497,7 +497,7 @@ got_root:  	case FS_INTLOFS:  		affs_set_opt(sbi->s_flags, SF_INTL);  		affs_set_opt(sbi->s_flags, SF_OFS); -		sb->s_flags |= MS_NOEXEC; +		sb->s_flags |= SB_NOEXEC;  		break;  	default:  		pr_err("Unknown filesystem on device %s: %08X\n", @@ -513,7 +513,7 @@ got_root:  			sig, sig[3] + '0', blocksize);  	} -	sb->s_flags |= MS_NODEV | MS_NOSUID; +	sb->s_flags |= SB_NODEV | SB_NOSUID;  	sbi->s_data_blksize = sb->s_blocksize;  	if (affs_test_opt(sbi->s_flags, SF_OFS)) @@ -570,7 +570,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)  	pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data);  	sync_filesystem(sb); -	*flags |= MS_NODIRATIME; +	*flags |= SB_NODIRATIME;  	memcpy(volume, sbi->s_volume, 32);  	if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block, @@ -596,10 +596,10 @@ affs_remount(struct super_block *sb, int *flags, char *data)  	memcpy(sbi->s_volume, volume, 32);  	spin_unlock(&sbi->symlink_lock); -	if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) +	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))  		return 0; -	if (*flags & MS_RDONLY) +	if (*flags & SB_RDONLY)  		affs_free_bitmap(sb);  	else  		res = affs_init_bitmap(sb, flags); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index ff8d5bf4354f..23c7f395d718 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -895,20 +895,38 @@ error:   * However, if we didn't have a callback promise outstanding, or it was   * outstanding on a different server, then it won't break it either...   */ -static int afs_dir_remove_link(struct dentry *dentry, struct key *key) +static int afs_dir_remove_link(struct dentry *dentry, struct key *key, +			       unsigned long d_version_before, +			       unsigned long d_version_after)  { +	bool dir_valid;  	int ret = 0; +	/* There were no intervening changes on the server if the version +	 * number we got back was incremented by exactly 1. +	 */ +	dir_valid = (d_version_after == d_version_before + 1); +  	if (d_really_is_positive(dentry)) {  		struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); -		if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) -			kdebug("AFS_VNODE_DELETED"); -		clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); - -		ret = afs_validate(vnode, key); -		if (ret == -ESTALE) +		if (dir_valid) { +			drop_nlink(&vnode->vfs_inode); +			if (vnode->vfs_inode.i_nlink == 0) { +				set_bit(AFS_VNODE_DELETED, &vnode->flags); +				clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); +			}  			ret = 0; +		} else { +			clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + +			if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) +				kdebug("AFS_VNODE_DELETED"); + +			ret = afs_validate(vnode, key); +			if (ret == -ESTALE) +				ret = 0; +		}  		_debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);  	} @@ -923,6 +941,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)  	struct afs_fs_cursor fc;  	struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;  	struct key *key; +	unsigned long d_version = (unsigned long)dentry->d_fsdata;  	int ret;  	_enter("{%x:%u},{%pd}", @@ -955,7 +974,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)  		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);  		ret = afs_end_vnode_operation(&fc);  		if (ret == 0) -			ret = afs_dir_remove_link(dentry, key); +			ret = afs_dir_remove_link( +				dentry, key, d_version, +				(unsigned long)dvnode->status.data_version);  	}  error_key: diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 3415eb7484f6..1e81864ef0b2 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -377,6 +377,10 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)  	}  	read_sequnlock_excl(&vnode->cb_lock); + +	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) +		clear_nlink(&vnode->vfs_inode); +  	if (valid)  		goto valid; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e03910cebdd4..804d1f905622 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -441,7 +441,10 @@ enum afs_lock_state {  };  /* - * AFS inode private data + * AFS inode private data. + * + * Note that afs_alloc_inode() *must* reset anything that could incorrectly + * leak from one inode to another.   */  struct afs_vnode {  	struct inode		vfs_inode;	/* the VFS's inode record */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ea1460b9b71a..e1126659f043 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -885,7 +885,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,  {  	struct afs_net *net = call->net;  	enum afs_call_state state; -	u32 remote_abort; +	u32 remote_abort = 0;  	int ret;  	_enter("{%s,%zu},,%zu,%d", diff --git a/fs/afs/security.c b/fs/afs/security.c index 2b00097101b3..b88b7d45fdaa 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -120,7 +120,7 @@ static void afs_hash_permits(struct afs_permits *permits)  void afs_cache_permit(struct afs_vnode *vnode, struct key *key,  		      unsigned int cb_break)  { -	struct afs_permits *permits, *xpermits, *replacement, *new = NULL; +	struct afs_permits *permits, *xpermits, *replacement, *zap, *new = NULL;  	afs_access_t caller_access = READ_ONCE(vnode->status.caller_access);  	size_t size = 0;  	bool changed = false; @@ -204,7 +204,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,  	new = kzalloc(sizeof(struct afs_permits) +  		      sizeof(struct afs_permit) * size, GFP_NOFS);  	if (!new) -		return; +		goto out_put;  	refcount_set(&new->usage, 1);  	new->nr_permits = size; @@ -229,8 +229,6 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,  	afs_hash_permits(new); -	afs_put_permits(permits); -  	/* Now see if the permit list we want is actually already available */  	spin_lock(&afs_permits_lock); @@ -262,11 +260,15 @@ found:  	kfree(new);  	spin_lock(&vnode->lock); -	if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break) || -	    permits != rcu_access_pointer(vnode->permit_cache)) -		goto someone_else_changed_it_unlock; -	rcu_assign_pointer(vnode->permit_cache, replacement); +	zap = rcu_access_pointer(vnode->permit_cache); +	if (cb_break == (vnode->cb_break + vnode->cb_interest->server->cb_s_break) && +	    zap == permits) +		rcu_assign_pointer(vnode->permit_cache, replacement); +	else +		zap = replacement;  	spin_unlock(&vnode->lock); +	afs_put_permits(zap); +out_put:  	afs_put_permits(permits);  	return; diff --git a/fs/afs/super.c b/fs/afs/super.c index 875b5eb02242..1037dd41a622 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -496,10 +496,10 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,  		if (ret < 0)  			goto error_sb;  		as = NULL; -		sb->s_flags |= MS_ACTIVE; +		sb->s_flags |= SB_ACTIVE;  	} else {  		_debug("reuse"); -		ASSERTCMP(sb->s_flags, &, MS_ACTIVE); +		ASSERTCMP(sb->s_flags, &, SB_ACTIVE);  		afs_destroy_sbi(as);  		as = NULL;  	} @@ -536,7 +536,9 @@ static void afs_kill_super(struct super_block *sb)  }  /* - * initialise an inode cache slab element prior to any use + * Initialise an inode cache slab element prior to any use.  Note that + * afs_alloc_inode() *must* reset anything that could incorrectly leak from one + * inode to another.   */  static void afs_i_init_once(void *_vnode)  { @@ -568,11 +570,21 @@ static struct inode *afs_alloc_inode(struct super_block *sb)  	atomic_inc(&afs_count_active_inodes); +	/* Reset anything that shouldn't leak from one inode to the next. */  	memset(&vnode->fid, 0, sizeof(vnode->fid));  	memset(&vnode->status, 0, sizeof(vnode->status));  	vnode->volume		= NULL; +	vnode->lock_key		= NULL; +	vnode->permit_cache	= NULL; +	vnode->cb_interest	= NULL; +#ifdef CONFIG_AFS_FSCACHE +	vnode->cache		= NULL; +#endif +  	vnode->flags		= 1 << AFS_VNODE_UNSET; +	vnode->cb_type		= 0; +	vnode->lock_state	= AFS_VNODE_LOCK_NONE;  	_leave(" = %p", &vnode->vfs_inode);  	return &vnode->vfs_inode; diff --git a/fs/afs/write.c b/fs/afs/write.c index cb5f8a3df577..9370e2feb999 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -198,7 +198,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,  			ret = afs_fill_page(vnode, key, pos + copied,  					    len - copied, page);  			if (ret < 0) -				return ret; +				goto out;  		}  		SetPageUptodate(page);  	} @@ -206,10 +206,12 @@ int afs_write_end(struct file *file, struct address_space *mapping,  	set_page_dirty(page);  	if (PageDirty(page))  		_debug("dirtied"); +	ret = copied; + +out:  	unlock_page(page);  	put_page(page); - -	return copied; +	return ret;  }  /* diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index d79ced925861..82e8f6edfb48 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -281,8 +281,8 @@ static int autofs4_mount_wait(const struct path *path, bool rcu_walk)  		pr_debug("waiting for mount name=%pd\n", path->dentry);  		status = autofs4_wait(sbi, path, NFY_MOUNT);  		pr_debug("mount wait done status=%d\n", status); -		ino->last_used = jiffies;  	} +	ino->last_used = jiffies;  	return status;  } @@ -321,21 +321,16 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)  	 */  	if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {  		struct dentry *parent = dentry->d_parent; +		struct autofs_info *ino;  		struct dentry *new;  		new = d_lookup(parent, &dentry->d_name);  		if (!new)  			return NULL; -		if (new == dentry) -			dput(new); -		else { -			struct autofs_info *ino; - -			ino = autofs4_dentry_ino(new); -			ino->last_used = jiffies; -			dput(path->dentry); -			path->dentry = new; -		} +		ino = autofs4_dentry_ino(new); +		ino->last_used = jiffies; +		dput(path->dentry); +		path->dentry = new;  	}  	return path->dentry;  } diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 8fc41705c7cd..961a12dc6dc8 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -170,7 +170,6 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,  	mutex_unlock(&sbi->wq_mutex); -	if (autofs4_write(sbi, pipe, &pkt, pktsz))  	switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) {  	case 0:  		break; diff --git a/fs/befs/ChangeLog b/fs/befs/ChangeLog index 75a461cfaca6..16f2dfe8c2f7 100644 --- a/fs/befs/ChangeLog +++ b/fs/befs/ChangeLog @@ -365,7 +365,7 @@ Version 0.4 (2001-10-28)  	(fs/befs/super.c)  * Tell the kernel to only mount befs read-only.  -	By setting the MS_RDONLY flag in befs_read_super(). +	By setting the SB_RDONLY flag in befs_read_super().  	Not that it was possible to write before. But now the kernel won't even try.  	(fs/befs/super.c) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index a92355cc453b..ee236231cafa 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -841,7 +841,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)  	if (!sb_rdonly(sb)) {  		befs_warning(sb,  			     "No write support. Marking filesystem read-only"); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	}  	/* @@ -948,7 +948,7 @@ static int  befs_remount(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	if (!(*flags & MS_RDONLY)) +	if (!(*flags & SB_RDONLY))  		return -EINVAL;  	return 0;  } diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b35ce16b3df3..5982c8a71f02 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -295,7 +295,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,  				 unsigned long len, u64 disk_start,  				 unsigned long compressed_len,  				 struct page **compressed_pages, -				 unsigned long nr_pages) +				 unsigned long nr_pages, +				 unsigned int write_flags)  {  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);  	struct bio *bio = NULL; @@ -327,7 +328,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,  	bdev = fs_info->fs_devices->latest_bdev;  	bio = btrfs_bio_alloc(bdev, first_byte); -	bio_set_op_attrs(bio, REQ_OP_WRITE, 0); +	bio->bi_opf = REQ_OP_WRITE | write_flags;  	bio->bi_private = cb;  	bio->bi_end_io = end_compressed_bio_write;  	refcount_set(&cb->pending_bios, 1); @@ -374,7 +375,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,  			bio_put(bio);  			bio = btrfs_bio_alloc(bdev, first_byte); -			bio_set_op_attrs(bio, REQ_OP_WRITE, 0); +			bio->bi_opf = REQ_OP_WRITE | write_flags;  			bio->bi_private = cb;  			bio->bi_end_io = end_compressed_bio_write;  			bio_add_page(bio, page, PAGE_SIZE, 0); @@ -1528,5 +1529,5 @@ unsigned int btrfs_compress_str2level(const char *str)  	if (str[4] == ':' && '1' <= str[5] && str[5] <= '9' && str[6] == 0)  		return str[5] - '0'; -	return 0; +	return BTRFS_ZLIB_DEFAULT_LEVEL;  } diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index da20755ebf21..0868cc554f14 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -34,6 +34,8 @@  /* Maximum size of data before compression */  #define BTRFS_MAX_UNCOMPRESSED		(SZ_128K) +#define	BTRFS_ZLIB_DEFAULT_LEVEL		3 +  struct compressed_bio {  	/* number of bios pending for this compressed extent */  	refcount_t pending_bios; @@ -91,7 +93,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,  				  unsigned long len, u64 disk_start,  				  unsigned long compressed_len,  				  struct page **compressed_pages, -				  unsigned long nr_pages); +				  unsigned long nr_pages, +				  unsigned int write_flags);  blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,  				 int mirror_num, unsigned long bio_flags); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 531e0a8645b0..1e74cf826532 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1032,14 +1032,17 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,  		     root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&  		    !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {  			ret = btrfs_inc_ref(trans, root, buf, 1); -			BUG_ON(ret); /* -ENOMEM */ +			if (ret) +				return ret;  			if (root->root_key.objectid ==  			    BTRFS_TREE_RELOC_OBJECTID) {  				ret = btrfs_dec_ref(trans, root, buf, 0); -				BUG_ON(ret); /* -ENOMEM */ +				if (ret) +					return ret;  				ret = btrfs_inc_ref(trans, root, cow, 1); -				BUG_ON(ret); /* -ENOMEM */ +				if (ret) +					return ret;  			}  			new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;  		} else { @@ -1049,7 +1052,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,  				ret = btrfs_inc_ref(trans, root, cow, 1);  			else  				ret = btrfs_inc_ref(trans, root, cow, 0); -			BUG_ON(ret); /* -ENOMEM */ +			if (ret) +				return ret;  		}  		if (new_flags != 0) {  			int level = btrfs_header_level(buf); @@ -1068,9 +1072,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,  				ret = btrfs_inc_ref(trans, root, cow, 1);  			else  				ret = btrfs_inc_ref(trans, root, cow, 0); -			BUG_ON(ret); /* -ENOMEM */ +			if (ret) +				return ret;  			ret = btrfs_dec_ref(trans, root, buf, 1); -			BUG_ON(ret); /* -ENOMEM */ +			if (ret) +				return ret;  		}  		clean_tree_block(fs_info, buf);  		*last_ref = 1; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f7df5536ab61..13c260b525a1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2957,7 +2957,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)   */  static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info)  { -	return fs_info->sb->s_flags & MS_RDONLY || btrfs_fs_closing(fs_info); +	return fs_info->sb->s_flags & SB_RDONLY || btrfs_fs_closing(fs_info);  }  static inline void free_fs_info(struct btrfs_fs_info *fs_info) @@ -3180,6 +3180,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);  int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,  			       int nr);  int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, +			      unsigned int extra_bits,  			      struct extent_state **cached_state, int dedupe);  int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,  			     struct btrfs_root *new_root, diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 5d73f79ded8b..056276101c63 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -87,6 +87,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(  	spin_lock(&root->inode_lock);  	node = radix_tree_lookup(&root->delayed_nodes_tree, ino); +  	if (node) {  		if (btrfs_inode->delayed_node) {  			refcount_inc(&node->refs);	/* can be accessed */ @@ -94,9 +95,30 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(  			spin_unlock(&root->inode_lock);  			return node;  		} -		btrfs_inode->delayed_node = node; -		/* can be accessed and cached in the inode */ -		refcount_add(2, &node->refs); + +		/* +		 * It's possible that we're racing into the middle of removing +		 * this node from the radix tree.  In this case, the refcount +		 * was zero and it should never go back to one.  Just return +		 * NULL like it was never in the radix at all; our release +		 * function is in the process of removing it. +		 * +		 * Some implementations of refcount_inc refuse to bump the +		 * refcount once it has hit zero.  If we don't do this dance +		 * here, refcount_inc() may decide to just WARN_ONCE() instead +		 * of actually bumping the refcount. +		 * +		 * If this node is properly in the radix, we want to bump the +		 * refcount twice, once for the inode and once for this get +		 * operation. +		 */ +		if (refcount_inc_not_zero(&node->refs)) { +			refcount_inc(&node->refs); +			btrfs_inode->delayed_node = node; +		} else { +			node = NULL; +		} +  		spin_unlock(&root->inode_lock);  		return node;  	} @@ -254,17 +276,18 @@ static void __btrfs_release_delayed_node(  	mutex_unlock(&delayed_node->mutex);  	if (refcount_dec_and_test(&delayed_node->refs)) { -		bool free = false;  		struct btrfs_root *root = delayed_node->root; +  		spin_lock(&root->inode_lock); -		if (refcount_read(&delayed_node->refs) == 0) { -			radix_tree_delete(&root->delayed_nodes_tree, -					  delayed_node->inode_id); -			free = true; -		} +		/* +		 * Once our refcount goes to zero, nobody is allowed to bump it +		 * back up.  We can delete it now. +		 */ +		ASSERT(refcount_read(&delayed_node->refs) == 0); +		radix_tree_delete(&root->delayed_nodes_tree, +				  delayed_node->inode_id);  		spin_unlock(&root->inode_lock); -		if (free) -			kmem_cache_free(delayed_node_cache, delayed_node); +		kmem_cache_free(delayed_node_cache, delayed_node);  	}  } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index efce9a2fa9be..a8ecccfc36de 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -610,7 +610,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,  	 * that we don't try and read the other copies of this block, just  	 * return -EIO.  	 */ -	if (found_level == 0 && btrfs_check_leaf(root, eb)) { +	if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {  		set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);  		ret = -EIO;  	} @@ -3231,6 +3231,7 @@ static int write_dev_supers(struct btrfs_device *device,  	int errors = 0;  	u32 crc;  	u64 bytenr; +	int op_flags;  	if (max_mirrors == 0)  		max_mirrors = BTRFS_SUPER_MIRROR_MAX; @@ -3273,13 +3274,10 @@ static int write_dev_supers(struct btrfs_device *device,  		 * we fua the first super.  The others we allow  		 * to go down lazy.  		 */ -		if (i == 0) { -			ret = btrfsic_submit_bh(REQ_OP_WRITE, -				REQ_SYNC | REQ_FUA | REQ_META | REQ_PRIO, bh); -		} else { -			ret = btrfsic_submit_bh(REQ_OP_WRITE, -				REQ_SYNC | REQ_META | REQ_PRIO, bh); -		} +		op_flags = REQ_SYNC | REQ_META | REQ_PRIO; +		if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER)) +			op_flags |= REQ_FUA; +		ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);  		if (ret)  			errors++;  	} @@ -3848,7 +3846,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)  					 buf->len,  					 fs_info->dirty_metadata_batch);  #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY -	if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) { +	/* +	 * Since btrfs_mark_buffer_dirty() can be called with item pointer set +	 * but item data not updated. +	 * So here we should only check item pointers, not item data. +	 */ +	if (btrfs_header_level(buf) == 0 && +	    btrfs_check_leaf_relaxed(root, buf)) {  		btrfs_print_leaf(buf);  		ASSERT(0);  	} diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7208ecef7088..2f4328511ac8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3502,13 +3502,6 @@ again:  		goto again;  	} -	/* We've already setup this transaction, go ahead and exit */ -	if (block_group->cache_generation == trans->transid && -	    i_size_read(inode)) { -		dcs = BTRFS_DC_SETUP; -		goto out_put; -	} -  	/*  	 * We want to set the generation to 0, that way if anything goes wrong  	 * from here on out we know not to trust this cache when we load up next @@ -3532,6 +3525,13 @@ again:  	}  	WARN_ON(ret); +	/* We've already setup this transaction, go ahead and exit */ +	if (block_group->cache_generation == trans->transid && +	    i_size_read(inode)) { +		dcs = BTRFS_DC_SETUP; +		goto out_put; +	} +  	if (i_size_read(inode) > 0) {  		ret = btrfs_check_trunc_cache_free_space(fs_info,  					&fs_info->global_block_rsv); @@ -9206,6 +9206,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,  	ret = btrfs_del_root(trans, fs_info, &root->root_key);  	if (ret) {  		btrfs_abort_transaction(trans, ret); +		err = ret;  		goto out_end_trans;  	} diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 16045ea86fc1..012d63870b99 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1984,7 +1984,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,  	struct btrfs_bio *bbio = NULL;  	int ret; -	ASSERT(!(fs_info->sb->s_flags & MS_RDONLY)); +	ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));  	BUG_ON(!mirror_num);  	bio = btrfs_io_bio_alloc(1); @@ -3253,7 +3253,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,  					       delalloc_start,  					       delalloc_end,  					       &page_started, -					       nr_written); +					       nr_written, wbc);  		/* File system has been set read-only */  		if (ret) {  			SetPageError(page); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 4a8861379d3e..93dcae0c3183 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -116,7 +116,8 @@ struct extent_io_ops {  	 */  	int (*fill_delalloc)(void *private_data, struct page *locked_page,  			     u64 start, u64 end, int *page_started, -			     unsigned long *nr_written); +			     unsigned long *nr_written, +			     struct writeback_control *wbc);  	int (*writepage_start_hook)(struct page *page, u64 start, u64 end);  	void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, @@ -365,10 +366,11 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,  		       struct extent_state **cached_state);  static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start, -		u64 end, struct extent_state **cached_state) +				      u64 end, unsigned int extra_bits, +				      struct extent_state **cached_state)  {  	return set_extent_bit(tree, start, end, -			      EXTENT_DELALLOC | EXTENT_UPTODATE, +			      EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits,  			      NULL, cached_state, GFP_NOFS);  } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f80254d82f40..eb1bac7c8553 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -477,6 +477,47 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)  	}  } +static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, +					 const u64 start, +					 const u64 len, +					 struct extent_state **cached_state) +{ +	u64 search_start = start; +	const u64 end = start + len - 1; + +	while (search_start < end) { +		const u64 search_len = end - search_start + 1; +		struct extent_map *em; +		u64 em_len; +		int ret = 0; + +		em = btrfs_get_extent(inode, NULL, 0, search_start, +				      search_len, 0); +		if (IS_ERR(em)) +			return PTR_ERR(em); + +		if (em->block_start != EXTENT_MAP_HOLE) +			goto next; + +		em_len = em->len; +		if (em->start < search_start) +			em_len -= search_start - em->start; +		if (em_len > search_len) +			em_len = search_len; + +		ret = set_extent_bit(&inode->io_tree, search_start, +				     search_start + em_len - 1, +				     EXTENT_DELALLOC_NEW, +				     NULL, cached_state, GFP_NOFS); +next: +		search_start = extent_map_end(em); +		free_extent_map(em); +		if (ret) +			return ret; +	} +	return 0; +} +  /*   * after copy_from_user, pages need to be dirtied and we need to make   * sure holes are created between the current EOF and the start of @@ -497,14 +538,34 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,  	u64 end_of_last_block;  	u64 end_pos = pos + write_bytes;  	loff_t isize = i_size_read(inode); +	unsigned int extra_bits = 0;  	start_pos = pos & ~((u64) fs_info->sectorsize - 1);  	num_bytes = round_up(write_bytes + pos - start_pos,  			     fs_info->sectorsize);  	end_of_last_block = start_pos + num_bytes - 1; + +	if (!btrfs_is_free_space_inode(BTRFS_I(inode))) { +		if (start_pos >= isize && +		    !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) { +			/* +			 * There can't be any extents following eof in this case +			 * so just set the delalloc new bit for the range +			 * directly. +			 */ +			extra_bits |= EXTENT_DELALLOC_NEW; +		} else { +			err = btrfs_find_new_delalloc_bytes(BTRFS_I(inode), +							    start_pos, +							    num_bytes, cached); +			if (err) +				return err; +		} +	} +  	err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, -					cached, 0); +					extra_bits, cached, 0);  	if (err)  		return err; @@ -1404,47 +1465,6 @@ fail:  } -static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, -					 const u64 start, -					 const u64 len, -					 struct extent_state **cached_state) -{ -	u64 search_start = start; -	const u64 end = start + len - 1; - -	while (search_start < end) { -		const u64 search_len = end - search_start + 1; -		struct extent_map *em; -		u64 em_len; -		int ret = 0; - -		em = btrfs_get_extent(inode, NULL, 0, search_start, -				      search_len, 0); -		if (IS_ERR(em)) -			return PTR_ERR(em); - -		if (em->block_start != EXTENT_MAP_HOLE) -			goto next; - -		em_len = em->len; -		if (em->start < search_start) -			em_len -= search_start - em->start; -		if (em_len > search_len) -			em_len = search_len; - -		ret = set_extent_bit(&inode->io_tree, search_start, -				     search_start + em_len - 1, -				     EXTENT_DELALLOC_NEW, -				     NULL, cached_state, GFP_NOFS); -next: -		search_start = extent_map_end(em); -		free_extent_map(em); -		if (ret) -			return ret; -	} -	return 0; -} -  /*   * This function locks the extent and properly waits for data=ordered extents   * to finish before allowing the pages to be modified if need. @@ -1473,10 +1493,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,  		+ round_up(pos + write_bytes - start_pos,  			   fs_info->sectorsize) - 1; -	if (start_pos < inode->vfs_inode.i_size || -	    (inode->flags & BTRFS_INODE_PREALLOC)) { +	if (start_pos < inode->vfs_inode.i_size) {  		struct btrfs_ordered_extent *ordered; -		unsigned int clear_bits;  		lock_extent_bits(&inode->io_tree, start_pos, last_pos,  				cached_state); @@ -1498,19 +1516,10 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,  		}  		if (ordered)  			btrfs_put_ordered_extent(ordered); -		ret = btrfs_find_new_delalloc_bytes(inode, start_pos, -						    last_pos - start_pos + 1, -						    cached_state); -		clear_bits = EXTENT_DIRTY | EXTENT_DELALLOC | -			EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG; -		if (ret) -			clear_bits |= EXTENT_DELALLOC_NEW | EXTENT_LOCKED; -		clear_extent_bit(&inode->io_tree, start_pos, -				 last_pos, clear_bits, -				 (clear_bits & EXTENT_LOCKED) ? 1 : 0, -				 0, cached_state, GFP_NOFS); -		if (ret) -			return ret; +		clear_extent_bit(&inode->io_tree, start_pos, last_pos, +				 EXTENT_DIRTY | EXTENT_DELALLOC | +				 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, +				 0, 0, cached_state, GFP_NOFS);  		*lockstart = start_pos;  		*lockend = last_pos;  		ret = 1; @@ -2048,6 +2057,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  	len = (u64)end - (u64)start + 1;  	trace_btrfs_sync_file(file, datasync); +	btrfs_init_log_ctx(&ctx, inode); +  	/*  	 * We write the dirty pages in the range and wait until they complete  	 * out of the ->i_mutex. If so, we can flush the dirty pages by @@ -2194,8 +2205,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  	}  	trans->sync = true; -	btrfs_init_log_ctx(&ctx, inode); -  	ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);  	if (ret < 0) {  		/* Fallthrough and commit/free transaction. */ @@ -2253,6 +2262,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  		ret = btrfs_end_transaction(trans);  	}  out: +	ASSERT(list_empty(&ctx.list));  	err = file_check_and_advance_wb_err(file);  	if (!ret)  		ret = err; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index cdc9f4015ec3..4426d1c73e50 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1264,7 +1264,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,  	/* Lock all pages first so we can lock the extent safely. */  	ret = io_ctl_prepare_pages(io_ctl, inode, 0);  	if (ret) -		goto out; +		goto out_unlock;  	lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,  			 &cached_state); @@ -1358,6 +1358,7 @@ out_nospc_locked:  out_nospc:  	cleanup_write_cache_enospc(inode, io_ctl, &cached_state); +out_unlock:  	if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))  		up_write(&block_group->data_rwsem); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b93fe05a39c7..e1a7f3cb5be9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -378,6 +378,7 @@ struct async_cow {  	struct page *locked_page;  	u64 start;  	u64 end; +	unsigned int write_flags;  	struct list_head extents;  	struct btrfs_work work;  }; @@ -857,7 +858,8 @@ retry:  				    async_extent->ram_size,  				    ins.objectid,  				    ins.offset, async_extent->pages, -				    async_extent->nr_pages)) { +				    async_extent->nr_pages, +				    async_cow->write_flags)) {  			struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;  			struct page *p = async_extent->pages[0];  			const u64 start = async_extent->start; @@ -1191,7 +1193,8 @@ static noinline void async_cow_free(struct btrfs_work *work)  static int cow_file_range_async(struct inode *inode, struct page *locked_page,  				u64 start, u64 end, int *page_started, -				unsigned long *nr_written) +				unsigned long *nr_written, +				unsigned int write_flags)  {  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);  	struct async_cow *async_cow; @@ -1208,6 +1211,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,  		async_cow->root = root;  		async_cow->locked_page = locked_page;  		async_cow->start = start; +		async_cow->write_flags = write_flags;  		if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&  		    !btrfs_test_opt(fs_info, FORCE_COMPRESS)) @@ -1577,11 +1581,13 @@ static inline int need_force_cow(struct inode *inode, u64 start, u64 end)   */  static int run_delalloc_range(void *private_data, struct page *locked_page,  			      u64 start, u64 end, int *page_started, -			      unsigned long *nr_written) +			      unsigned long *nr_written, +			      struct writeback_control *wbc)  {  	struct inode *inode = private_data;  	int ret;  	int force_cow = need_force_cow(inode, start, end); +	unsigned int write_flags = wbc_to_write_flags(wbc);  	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {  		ret = run_delalloc_nocow(inode, locked_page, start, end, @@ -1596,7 +1602,8 @@ static int run_delalloc_range(void *private_data, struct page *locked_page,  		set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,  			&BTRFS_I(inode)->runtime_flags);  		ret = cow_file_range_async(inode, locked_page, start, end, -					   page_started, nr_written); +					   page_started, nr_written, +					   write_flags);  	}  	if (ret)  		btrfs_cleanup_ordered_extents(inode, start, end - start + 1); @@ -2025,11 +2032,12 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,  }  int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, +			      unsigned int extra_bits,  			      struct extent_state **cached_state, int dedupe)  {  	WARN_ON((end & (PAGE_SIZE - 1)) == 0);  	return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, -				   cached_state); +				   extra_bits, cached_state);  }  /* see btrfs_writepage_start_hook for details on why this is required */ @@ -2090,7 +2098,7 @@ again:  		goto out;  	 } -	btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state, +	btrfs_set_extent_delalloc(inode, page_start, page_end, 0, &cached_state,  				  0);  	ClearPageChecked(page);  	set_page_dirty(page); @@ -2997,6 +3005,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  		compress_type = ordered_extent->compress_type;  	if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {  		BUG_ON(compress_type); +		btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset, +				       ordered_extent->len);  		ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),  						ordered_extent->file_offset,  						ordered_extent->file_offset + @@ -4790,7 +4800,7 @@ again:  			  EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,  			  0, 0, &cached_state, GFP_NOFS); -	ret = btrfs_set_extent_delalloc(inode, block_start, block_end, +	ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,  					&cached_state, 0);  	if (ret) {  		unlock_extent_cached(io_tree, block_start, block_end, @@ -5438,6 +5448,14 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,  		goto out_err;  	btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); +	if (location->type != BTRFS_INODE_ITEM_KEY && +	    location->type != BTRFS_ROOT_ITEM_KEY) { +		btrfs_warn(root->fs_info, +"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))", +			   __func__, name, btrfs_ino(BTRFS_I(dir)), +			   location->objectid, location->type, location->offset); +		goto out_err; +	}  out:  	btrfs_free_path(path);  	return ret; @@ -5754,8 +5772,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)  		return inode;  	} -	BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY); -  	index = srcu_read_lock(&fs_info->subvol_srcu);  	ret = fixup_tree_root_location(fs_info, dir, dentry,  				       &location, &sub_root); @@ -9150,7 +9166,7 @@ again:  			  EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,  			  0, 0, &cached_state, GFP_NOFS); -	ret = btrfs_set_extent_delalloc(inode, page_start, end, +	ret = btrfs_set_extent_delalloc(inode, page_start, end, 0,  					&cached_state, 0);  	if (ret) {  		unlock_extent_cached(io_tree, page_start, page_end, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fd172a93d11a..2ef8acaac688 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1172,7 +1172,7 @@ again:  	if (!i_done || ret)  		goto out; -	if (!(inode->i_sb->s_flags & MS_ACTIVE)) +	if (!(inode->i_sb->s_flags & SB_ACTIVE))  		goto out;  	/* @@ -1333,7 +1333,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,  		 * make sure we stop running if someone unmounts  		 * the FS  		 */ -		if (!(inode->i_sb->s_flags & MS_ACTIVE)) +		if (!(inode->i_sb->s_flags & SB_ACTIVE))  			break;  		if (btrfs_defrag_cancelled(fs_info)) { @@ -2206,7 +2206,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,  	if (!path)  		return -ENOMEM; -	ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX]; +	ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1];  	key.objectid = tree_id;  	key.type = BTRFS_ROOT_ITEM_KEY; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4cf2eb67eba6..f0c3f00e97cb 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3268,7 +3268,8 @@ static int relocate_file_extent_cluster(struct inode *inode,  			nr++;  		} -		btrfs_set_extent_delalloc(inode, page_start, page_end, NULL, 0); +		btrfs_set_extent_delalloc(inode, page_start, page_end, 0, NULL, +					  0);  		set_page_dirty(page);  		unlock_extent(&BTRFS_I(inode)->io_tree, diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index c10e4c70f02d..20d3300bd268 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3521,7 +3521,40 @@ out:  }  /* - * Check if ino ino1 is an ancestor of inode ino2 in the given root. + * Check if inode ino2, or any of its ancestors, is inode ino1. + * Return 1 if true, 0 if false and < 0 on error. + */ +static int check_ino_in_path(struct btrfs_root *root, +			     const u64 ino1, +			     const u64 ino1_gen, +			     const u64 ino2, +			     const u64 ino2_gen, +			     struct fs_path *fs_path) +{ +	u64 ino = ino2; + +	if (ino1 == ino2) +		return ino1_gen == ino2_gen; + +	while (ino > BTRFS_FIRST_FREE_OBJECTID) { +		u64 parent; +		u64 parent_gen; +		int ret; + +		fs_path_reset(fs_path); +		ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path); +		if (ret < 0) +			return ret; +		if (parent == ino1) +			return parent_gen == ino1_gen; +		ino = parent; +	} +	return 0; +} + +/* + * Check if ino ino1 is an ancestor of inode ino2 in the given root for any + * possible path (in case ino2 is not a directory and has multiple hard links).   * Return 1 if true, 0 if false and < 0 on error.   */  static int is_ancestor(struct btrfs_root *root, @@ -3530,36 +3563,91 @@ static int is_ancestor(struct btrfs_root *root,  		       const u64 ino2,  		       struct fs_path *fs_path)  { -	u64 ino = ino2; -	bool free_path = false; +	bool free_fs_path = false;  	int ret = 0; +	struct btrfs_path *path = NULL; +	struct btrfs_key key;  	if (!fs_path) {  		fs_path = fs_path_alloc();  		if (!fs_path)  			return -ENOMEM; -		free_path = true; +		free_fs_path = true;  	} -	while (ino > BTRFS_FIRST_FREE_OBJECTID) { -		u64 parent; -		u64 parent_gen; +	path = alloc_path_for_send(); +	if (!path) { +		ret = -ENOMEM; +		goto out; +	} -		fs_path_reset(fs_path); -		ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path); -		if (ret < 0) { -			if (ret == -ENOENT && ino == ino2) -				ret = 0; -			goto out; +	key.objectid = ino2; +	key.type = BTRFS_INODE_REF_KEY; +	key.offset = 0; + +	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); +	if (ret < 0) +		goto out; + +	while (true) { +		struct extent_buffer *leaf = path->nodes[0]; +		int slot = path->slots[0]; +		u32 cur_offset = 0; +		u32 item_size; + +		if (slot >= btrfs_header_nritems(leaf)) { +			ret = btrfs_next_leaf(root, path); +			if (ret < 0) +				goto out; +			if (ret > 0) +				break; +			continue;  		} -		if (parent == ino1) { -			ret = parent_gen == ino1_gen ? 1 : 0; -			goto out; + +		btrfs_item_key_to_cpu(leaf, &key, slot); +		if (key.objectid != ino2) +			break; +		if (key.type != BTRFS_INODE_REF_KEY && +		    key.type != BTRFS_INODE_EXTREF_KEY) +			break; + +		item_size = btrfs_item_size_nr(leaf, slot); +		while (cur_offset < item_size) { +			u64 parent; +			u64 parent_gen; + +			if (key.type == BTRFS_INODE_EXTREF_KEY) { +				unsigned long ptr; +				struct btrfs_inode_extref *extref; + +				ptr = btrfs_item_ptr_offset(leaf, slot); +				extref = (struct btrfs_inode_extref *) +					(ptr + cur_offset); +				parent = btrfs_inode_extref_parent(leaf, +								   extref); +				cur_offset += sizeof(*extref); +				cur_offset += btrfs_inode_extref_name_len(leaf, +								  extref); +			} else { +				parent = key.offset; +				cur_offset = item_size; +			} + +			ret = get_inode_info(root, parent, NULL, &parent_gen, +					     NULL, NULL, NULL, NULL); +			if (ret < 0) +				goto out; +			ret = check_ino_in_path(root, ino1, ino1_gen, +						parent, parent_gen, fs_path); +			if (ret) +				goto out;  		} -		ino = parent; +		path->slots[0]++;  	} +	ret = 0;   out: -	if (free_path) +	btrfs_free_path(path); +	if (free_fs_path)  		fs_path_free(fs_path);  	return ret;  } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 65af029559b5..3a4dce153645 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -107,7 +107,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)  		return;  	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  		btrfs_info(fs_info, "forced readonly");  		/*  		 * Note that a running device replace operation is not @@ -137,7 +137,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function  	/*  	 * Special case: if the error is EROFS, and we're already -	 * under MS_RDONLY, then it is safe here. +	 * under SB_RDONLY, then it is safe here.  	 */  	if (errno == -EROFS && sb_rdonly(sb))    		return; @@ -168,7 +168,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function  	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);  	/* Don't go through full error handling during mount */ -	if (sb->s_flags & MS_BORN) +	if (sb->s_flags & SB_BORN)  		btrfs_handle_error(fs_info);  } @@ -507,9 +507,18 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,  			    token == Opt_compress_force ||  			    strncmp(args[0].from, "zlib", 4) == 0) {  				compress_type = "zlib"; +  				info->compress_type = BTRFS_COMPRESS_ZLIB; -				info->compress_level = -					btrfs_compress_str2level(args[0].from); +				info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL; +				/* +				 * args[0] contains uninitialized data since +				 * for these tokens we don't expect any +				 * parameter. +				 */ +				if (token != Opt_compress && +				    token != Opt_compress_force) +					info->compress_level = +					  btrfs_compress_str2level(args[0].from);  				btrfs_set_opt(info->mount_opt, COMPRESS);  				btrfs_clear_opt(info->mount_opt, NODATACOW);  				btrfs_clear_opt(info->mount_opt, NODATASUM); @@ -625,7 +634,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,  			break;  		case Opt_acl:  #ifdef CONFIG_BTRFS_FS_POSIX_ACL -			info->sb->s_flags |= MS_POSIXACL; +			info->sb->s_flags |= SB_POSIXACL;  			break;  #else  			btrfs_err(info, "support for ACL not compiled in!"); @@ -633,7 +642,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,  			goto out;  #endif  		case Opt_noacl: -			info->sb->s_flags &= ~MS_POSIXACL; +			info->sb->s_flags &= ~SB_POSIXACL;  			break;  		case Opt_notreelog:  			btrfs_set_and_info(info, NOTREELOG, @@ -851,7 +860,7 @@ check:  	/*  	 * Extra check for current option against current flag  	 */ -	if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) { +	if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & SB_RDONLY)) {  		btrfs_err(info,  			  "nologreplay must be used with ro mount option");  		ret = -EINVAL; @@ -1147,7 +1156,7 @@ static int btrfs_fill_super(struct super_block *sb,  	sb->s_xattr = btrfs_xattr_handlers;  	sb->s_time_gran = 1;  #ifdef CONFIG_BTRFS_FS_POSIX_ACL -	sb->s_flags |= MS_POSIXACL; +	sb->s_flags |= SB_POSIXACL;  #endif  	sb->s_flags |= SB_I_VERSION;  	sb->s_iflags |= SB_I_CGROUPWB; @@ -1180,7 +1189,7 @@ static int btrfs_fill_super(struct super_block *sb,  	}  	cleancache_init_fs(sb); -	sb->s_flags |= MS_ACTIVE; +	sb->s_flags |= SB_ACTIVE;  	return 0;  fail_close: @@ -1277,7 +1286,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)  		seq_puts(seq, ",flushoncommit");  	if (btrfs_test_opt(info, DISCARD))  		seq_puts(seq, ",discard"); -	if (!(info->sb->s_flags & MS_POSIXACL)) +	if (!(info->sb->s_flags & SB_POSIXACL))  		seq_puts(seq, ",noacl");  	if (btrfs_test_opt(info, SPACE_CACHE))  		seq_puts(seq, ",space_cache"); @@ -1409,11 +1418,11 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,  	mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);  	if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) { -		if (flags & MS_RDONLY) { -			mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, +		if (flags & SB_RDONLY) { +			mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~SB_RDONLY,  					     device_name, newargs);  		} else { -			mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, +			mnt = vfs_kern_mount(&btrfs_fs_type, flags | SB_RDONLY,  					     device_name, newargs);  			if (IS_ERR(mnt)) {  				root = ERR_CAST(mnt); @@ -1565,7 +1574,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,  	u64 subvol_objectid = 0;  	int error = 0; -	if (!(flags & MS_RDONLY)) +	if (!(flags & SB_RDONLY))  		mode |= FMODE_WRITE;  	error = btrfs_parse_early_options(data, mode, fs_type, @@ -1619,13 +1628,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,  	if (error)  		goto error_fs_info; -	if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { +	if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) {  		error = -EACCES;  		goto error_close_devices;  	}  	bdev = fs_devices->latest_bdev; -	s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC, +	s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,  		 fs_info);  	if (IS_ERR(s)) {  		error = PTR_ERR(s); @@ -1635,7 +1644,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,  	if (s->s_root) {  		btrfs_close_devices(fs_devices);  		free_fs_info(fs_info); -		if ((flags ^ s->s_flags) & MS_RDONLY) +		if ((flags ^ s->s_flags) & SB_RDONLY)  			error = -EBUSY;  	} else {  		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); @@ -1702,11 +1711,11 @@ static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,  {  	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&  	    (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || -	     (flags & MS_RDONLY))) { +	     (flags & SB_RDONLY))) {  		/* wait for any defraggers to finish */  		wait_event(fs_info->transaction_wait,  			   (atomic_read(&fs_info->defrag_running) == 0)); -		if (flags & MS_RDONLY) +		if (flags & SB_RDONLY)  			sync_filesystem(fs_info->sb);  	}  } @@ -1766,10 +1775,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)  	btrfs_resize_thread_pool(fs_info,  		fs_info->thread_pool_size, old_thread_pool_size); -	if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) +	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))  		goto out; -	if (*flags & MS_RDONLY) { +	if (*flags & SB_RDONLY) {  		/*  		 * this also happens on 'umount -rf' or on shutdown, when  		 * the filesystem is busy. @@ -1781,10 +1790,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)  		/* avoid complains from lockdep et al. */  		up(&fs_info->uuid_tree_rescan_sem); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  		/* -		 * Setting MS_RDONLY will put the cleaner thread to +		 * Setting SB_RDONLY will put the cleaner thread to  		 * sleep at the next loop if it's already active.  		 * If it's already asleep, we'll leave unused block  		 * groups on disk until we're mounted read-write again @@ -1856,7 +1865,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)  				goto restore;  			}  		} -		sb->s_flags &= ~MS_RDONLY; +		sb->s_flags &= ~SB_RDONLY;  		set_bit(BTRFS_FS_OPEN, &fs_info->flags);  	} @@ -1866,9 +1875,9 @@ out:  	return 0;  restore: -	/* We've hit an error - don't reset MS_RDONLY */ +	/* We've hit an error - don't reset SB_RDONLY */  	if (sb_rdonly(sb)) -		old_flags |= MS_RDONLY; +		old_flags |= SB_RDONLY;  	sb->s_flags = old_flags;  	fs_info->mount_opt = old_opts;  	fs_info->compress_type = old_compress_type; diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index d06b1c931d05..2e7f64a3b22b 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -114,7 +114,7 @@ static int test_find_delalloc(u32 sectorsize)  	 * |--- delalloc ---|  	 * |---  search  ---|  	 */ -	set_extent_delalloc(&tmp, 0, sectorsize - 1, NULL); +	set_extent_delalloc(&tmp, 0, sectorsize - 1, 0, NULL);  	start = 0;  	end = 0;  	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -145,7 +145,7 @@ static int test_find_delalloc(u32 sectorsize)  		test_msg("Couldn't find the locked page\n");  		goto out_bits;  	} -	set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, NULL); +	set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, 0, NULL);  	start = test_start;  	end = 0;  	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -200,7 +200,7 @@ static int test_find_delalloc(u32 sectorsize)  	 *  	 * We are re-using our test_start from above since it works out well.  	 */ -	set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL); +	set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, 0, NULL);  	start = test_start;  	end = 0;  	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index f797642c013d..30affb60da51 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -968,7 +968,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)  	btrfs_test_inode_set_ops(inode);  	/* [BTRFS_MAX_EXTENT_SIZE] */ -	ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, +	ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, 0,  					NULL, 0);  	if (ret) {  		test_msg("btrfs_set_extent_delalloc returned %d\n", ret); @@ -984,7 +984,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)  	/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */  	ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,  					BTRFS_MAX_EXTENT_SIZE + sectorsize - 1, -					NULL, 0); +					0, NULL, 0);  	if (ret) {  		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);  		goto out; @@ -1018,7 +1018,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)  	ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,  					(BTRFS_MAX_EXTENT_SIZE >> 1)  					+ sectorsize - 1, -					NULL, 0); +					0, NULL, 0);  	if (ret) {  		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);  		goto out; @@ -1036,7 +1036,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)  	ret = btrfs_set_extent_delalloc(inode,  			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize,  			(BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1, -			NULL, 0); +			0, NULL, 0);  	if (ret) {  		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);  		goto out; @@ -1053,7 +1053,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)  	*/  	ret = btrfs_set_extent_delalloc(inode,  			BTRFS_MAX_EXTENT_SIZE + sectorsize, -			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0); +			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL, 0);  	if (ret) {  		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);  		goto out; @@ -1089,7 +1089,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)  	 */  	ret = btrfs_set_extent_delalloc(inode,  			BTRFS_MAX_EXTENT_SIZE + sectorsize, -			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0); +			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL, 0);  	if (ret) {  		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);  		goto out; diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 114fc5f0ecc5..ce4ed6ec8f39 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -242,7 +242,8 @@ static int check_leaf_item(struct btrfs_root *root,  	return ret;  } -int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) +static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, +		      bool check_item_data)  {  	struct btrfs_fs_info *fs_info = root->fs_info;  	/* No valid key type is 0, so all key should be larger than this key */ @@ -361,10 +362,15 @@ int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf)  			return -EUCLEAN;  		} -		/* Check if the item size and content meet other criteria */ -		ret = check_leaf_item(root, leaf, &key, slot); -		if (ret < 0) -			return ret; +		if (check_item_data) { +			/* +			 * Check if the item size and content meet other +			 * criteria +			 */ +			ret = check_leaf_item(root, leaf, &key, slot); +			if (ret < 0) +				return ret; +		}  		prev_key.objectid = key.objectid;  		prev_key.type = key.type; @@ -374,6 +380,17 @@ int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf)  	return 0;  } +int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf) +{ +	return check_leaf(root, leaf, true); +} + +int btrfs_check_leaf_relaxed(struct btrfs_root *root, +			     struct extent_buffer *leaf) +{ +	return check_leaf(root, leaf, false); +} +  int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)  {  	unsigned long nr = btrfs_header_nritems(node); diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index 96c486e95d70..3d53e8d6fda0 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -20,7 +20,19 @@  #include "ctree.h"  #include "extent_io.h" -int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf); +/* + * Comprehensive leaf checker. + * Will check not only the item pointers, but also every possible member + * in item data. + */ +int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf); + +/* + * Less strict leaf checker. + * Will only check item pointers, not reading item data. + */ +int btrfs_check_leaf_relaxed(struct btrfs_root *root, +			     struct extent_buffer *leaf);  int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);  #endif diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index aa7c71cff575..7bf9b31561db 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4102,7 +4102,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,  	if (ordered_io_err) {  		ctx->io_err = -EIO; -		return 0; +		return ctx->io_err;  	}  	btrfs_init_map_token(&token); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f1ecb938ba4d..a25684287501 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -189,6 +189,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)  				    struct btrfs_device, dev_list);  		list_del(&device->dev_list);  		rcu_string_free(device->name); +		bio_put(device->flush_bio);  		kfree(device);  	}  	kfree(fs_devices); @@ -236,7 +237,6 @@ static struct btrfs_device *__alloc_device(void)  		kfree(dev);  		return ERR_PTR(-ENOMEM);  	} -	bio_get(dev->flush_bio);  	INIT_LIST_HEAD(&dev->dev_list);  	INIT_LIST_HEAD(&dev->dev_alloc_list); @@ -578,6 +578,7 @@ static void btrfs_free_stale_device(struct btrfs_device *cur_dev)  				fs_devs->num_devices--;  				list_del(&dev->dev_list);  				rcu_string_free(dev->name); +				bio_put(dev->flush_bio);  				kfree(dev);  			}  			break; @@ -630,6 +631,7 @@ static noinline int device_list_add(const char *path,  		name = rcu_string_strdup(path, GFP_NOFS);  		if (!name) { +			bio_put(device->flush_bio);  			kfree(device);  			return -ENOMEM;  		} @@ -742,6 +744,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)  			name = rcu_string_strdup(orig_dev->name->str,  					GFP_KERNEL);  			if (!name) { +				bio_put(device->flush_bio);  				kfree(device);  				goto error;  			} @@ -807,6 +810,7 @@ again:  		list_del_init(&device->dev_list);  		fs_devices->num_devices--;  		rcu_string_free(device->name); +		bio_put(device->flush_bio);  		kfree(device);  	} @@ -1750,20 +1754,24 @@ static int btrfs_rm_dev_item(struct btrfs_fs_info *fs_info,  	key.offset = device->devid;  	ret = btrfs_search_slot(trans, root, &key, path, -1, 1); -	if (ret < 0) -		goto out; - -	if (ret > 0) { -		ret = -ENOENT; +	if (ret) { +		if (ret > 0) +			ret = -ENOENT; +		btrfs_abort_transaction(trans, ret); +		btrfs_end_transaction(trans);  		goto out;  	}  	ret = btrfs_del_item(trans, root, path); -	if (ret) -		goto out; +	if (ret) { +		btrfs_abort_transaction(trans, ret); +		btrfs_end_transaction(trans); +	} +  out:  	btrfs_free_path(path); -	btrfs_commit_transaction(trans); +	if (!ret) +		ret = btrfs_commit_transaction(trans);  	return ret;  } @@ -1993,7 +2001,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,  	fs_devices = srcdev->fs_devices;  	list_del_rcu(&srcdev->dev_list); -	list_del_rcu(&srcdev->dev_alloc_list); +	list_del(&srcdev->dev_alloc_list);  	fs_devices->num_devices--;  	if (srcdev->missing)  		fs_devices->missing_devices--; @@ -2349,6 +2357,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path  	name = rcu_string_strdup(device_path, GFP_KERNEL);  	if (!name) { +		bio_put(device->flush_bio);  		kfree(device);  		ret = -ENOMEM;  		goto error; @@ -2358,6 +2367,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path  	trans = btrfs_start_transaction(root, 0);  	if (IS_ERR(trans)) {  		rcu_string_free(device->name); +		bio_put(device->flush_bio);  		kfree(device);  		ret = PTR_ERR(trans);  		goto error; @@ -2384,7 +2394,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path  	set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);  	if (seeding_dev) { -		sb->s_flags &= ~MS_RDONLY; +		sb->s_flags &= ~SB_RDONLY;  		ret = btrfs_prepare_sprout(fs_info);  		if (ret) {  			btrfs_abort_transaction(trans, ret); @@ -2497,10 +2507,11 @@ error_sysfs:  	btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);  error_trans:  	if (seeding_dev) -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	if (trans)  		btrfs_end_transaction(trans);  	rcu_string_free(device->name); +	bio_put(device->flush_bio);  	kfree(device);  error:  	blkdev_put(bdev, FMODE_EXCL); @@ -2567,6 +2578,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,  	name = rcu_string_strdup(device_path, GFP_KERNEL);  	if (!name) { +		bio_put(device->flush_bio);  		kfree(device);  		ret = -ENOMEM;  		goto error; @@ -6284,6 +6296,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,  		ret = find_next_devid(fs_info, &tmp);  		if (ret) { +			bio_put(dev->flush_bio);  			kfree(dev);  			return ERR_PTR(ret);  		} diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index ab69dcb70e8a..1b468250e947 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1440,6 +1440,29 @@ static int __close_session(struct ceph_mds_client *mdsc,  	return request_close_session(mdsc, session);  } +static bool drop_negative_children(struct dentry *dentry) +{ +	struct dentry *child; +	bool all_negative = true; + +	if (!d_is_dir(dentry)) +		goto out; + +	spin_lock(&dentry->d_lock); +	list_for_each_entry(child, &dentry->d_subdirs, d_child) { +		if (d_really_is_positive(child)) { +			all_negative = false; +			break; +		} +	} +	spin_unlock(&dentry->d_lock); + +	if (all_negative) +		shrink_dcache_parent(dentry); +out: +	return all_negative; +} +  /*   * Trim old(er) caps.   * @@ -1490,16 +1513,27 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)  	if ((used | wanted) & ~oissued & mine)  		goto out;   /* we need these caps */ -	session->s_trim_caps--;  	if (oissued) {  		/* we aren't the only cap.. just remove us */  		__ceph_remove_cap(cap, true); +		session->s_trim_caps--;  	} else { +		struct dentry *dentry;  		/* try dropping referring dentries */  		spin_unlock(&ci->i_ceph_lock); -		d_prune_aliases(inode); -		dout("trim_caps_cb %p cap %p  pruned, count now %d\n", -		     inode, cap, atomic_read(&inode->i_count)); +		dentry = d_find_any_alias(inode); +		if (dentry && drop_negative_children(dentry)) { +			int count; +			dput(dentry); +			d_prune_aliases(inode); +			count = atomic_read(&inode->i_count); +			if (count == 1) +				session->s_trim_caps--; +			dout("trim_caps_cb %p cap %p pruned, count now %d\n", +			     inode, cap, count); +		} else { +			dput(dentry); +		}  		return 0;  	} diff --git a/fs/ceph/super.c b/fs/ceph/super.c index fe9fbb3f13f7..a62d2a9841dc 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -331,11 +331,11 @@ static int parse_fsopt_token(char *c, void *private)  		break;  #ifdef CONFIG_CEPH_FS_POSIX_ACL  	case Opt_acl: -		fsopt->sb_flags |= MS_POSIXACL; +		fsopt->sb_flags |= SB_POSIXACL;  		break;  #endif  	case Opt_noacl: -		fsopt->sb_flags &= ~MS_POSIXACL; +		fsopt->sb_flags &= ~SB_POSIXACL;  		break;  	default:  		BUG_ON(token); @@ -520,7 +520,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)  		seq_puts(m, ",nopoolperm");  #ifdef CONFIG_CEPH_FS_POSIX_ACL -	if (fsopt->sb_flags & MS_POSIXACL) +	if (fsopt->sb_flags & SB_POSIXACL)  		seq_puts(m, ",acl");  	else  		seq_puts(m, ",noacl"); @@ -988,7 +988,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,  	dout("ceph_mount\n");  #ifdef CONFIG_CEPH_FS_POSIX_ACL -	flags |= MS_POSIXACL; +	flags |= SB_POSIXACL;  #endif  	err = parse_mount_options(&fsopt, &opt, flags, data, dev_name);  	if (err < 0) { diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index cbd216b57239..350fa55a1bf7 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -42,7 +42,7 @@  #define CIFS_MOUNT_MULTIUSER	0x20000 /* multiuser mount */  #define CIFS_MOUNT_STRICT_IO	0x40000 /* strict cache mode */  #define CIFS_MOUNT_RWPIDFORWARD	0x80000 /* use pid forwarding for rw */ -#define CIFS_MOUNT_POSIXACL	0x100000 /* mirror of MS_POSIXACL in mnt_cifs_flags */ +#define CIFS_MOUNT_POSIXACL	0x100000 /* mirror of SB_POSIXACL in mnt_cifs_flags */  #define CIFS_MOUNT_CIFS_BACKUPUID 0x200000 /* backup intent bit for a user */  #define CIFS_MOUNT_CIFS_BACKUPGID 0x400000 /* backup intent bit for a group */  #define CIFS_MOUNT_MAP_SFM_CHR	0x800000 /* SFM/MAC mapping for illegal chars */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8c8b75d33f31..31b7565b1617 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -125,7 +125,7 @@ cifs_read_super(struct super_block *sb)  	tcon = cifs_sb_master_tcon(cifs_sb);  	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL) -		sb->s_flags |= MS_POSIXACL; +		sb->s_flags |= SB_POSIXACL;  	if (tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files)  		sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -497,7 +497,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root)  		seq_puts(s, ",cifsacl");  	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)  		seq_puts(s, ",dynperm"); -	if (root->d_sb->s_flags & MS_POSIXACL) +	if (root->d_sb->s_flags & SB_POSIXACL)  		seq_puts(s, ",acl");  	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS)  		seq_puts(s, ",mfsymlinks"); @@ -573,7 +573,7 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root)  static int cifs_remount(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	*flags |= MS_NODIRATIME; +	*flags |= SB_NODIRATIME;  	return 0;  } @@ -708,7 +708,7 @@ cifs_do_mount(struct file_system_type *fs_type,  	rc = cifs_mount(cifs_sb, volume_info);  	if (rc) { -		if (!(flags & MS_SILENT)) +		if (!(flags & SB_SILENT))  			cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n",  				 rc);  		root = ERR_PTR(rc); @@ -720,7 +720,7 @@ cifs_do_mount(struct file_system_type *fs_type,  	mnt_data.flags = flags;  	/* BB should we make this contingent on mount parm? */ -	flags |= MS_NODIRATIME | MS_NOATIME; +	flags |= SB_NODIRATIME | SB_NOATIME;  	sb = sget(fs_type, cifs_match_super, cifs_set_super, flags, &mnt_data);  	if (IS_ERR(sb)) { @@ -739,7 +739,7 @@ cifs_do_mount(struct file_system_type *fs_type,  			goto out_super;  		} -		sb->s_flags |= MS_ACTIVE; +		sb->s_flags |= SB_ACTIVE;  	}  	root = cifs_get_root(volume_info, sb); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index e185b2853eab..b16583594d1a 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -559,8 +559,8 @@ struct smb_vol {  			 CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_STRICT_IO | \  			 CIFS_MOUNT_CIFS_BACKUPUID | CIFS_MOUNT_CIFS_BACKUPGID) -#define CIFS_MS_MASK (MS_RDONLY | MS_MANDLOCK | MS_NOEXEC | MS_NOSUID | \ -		      MS_NODEV | MS_SYNCHRONOUS) +#define CIFS_MS_MASK (SB_RDONLY | SB_MANDLOCK | SB_NOEXEC | SB_NOSUID | \ +		      SB_NODEV | SB_SYNCHRONOUS)  struct cifs_mnt_data {  	struct cifs_sb_info *cifs_sb; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 7c732cb44164..ecb99079363a 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -985,7 +985,7 @@ retry_iget5_locked:  		}  		cifs_fattr_to_inode(inode, fattr); -		if (sb->s_flags & MS_NOATIME) +		if (sb->s_flags & SB_NOATIME)  			inode->i_flags |= S_NOATIME | S_NOCMTIME;  		if (inode->i_state & I_NEW) {  			inode->i_ino = hash; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e06740436b92..ed88ab8a4774 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1406,7 +1406,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,  	} while (rc == -EAGAIN);  	if (rc) { -		cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc); +		if (rc != -ENOENT) +			cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc);  		goto out;  	} diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5331631386a2..01346b8b6edb 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2678,27 +2678,27 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,  	cifs_small_buf_release(req);  	rsp = (struct smb2_read_rsp *)rsp_iov.iov_base; -	shdr = get_sync_hdr(rsp); -	if (shdr->Status == STATUS_END_OF_FILE) { +	if (rc) { +		if (rc != -ENODATA) { +			cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); +			cifs_dbg(VFS, "Send error in read = %d\n", rc); +		}  		free_rsp_buf(resp_buftype, rsp_iov.iov_base); -		return 0; +		return rc == -ENODATA ? 0 : rc;  	} -	if (rc) { -		cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); -		cifs_dbg(VFS, "Send error in read = %d\n", rc); -	} else { -		*nbytes = le32_to_cpu(rsp->DataLength); -		if ((*nbytes > CIFS_MAX_MSGSIZE) || -		    (*nbytes > io_parms->length)) { -			cifs_dbg(FYI, "bad length %d for count %d\n", -				 *nbytes, io_parms->length); -			rc = -EIO; -			*nbytes = 0; -		} +	*nbytes = le32_to_cpu(rsp->DataLength); +	if ((*nbytes > CIFS_MAX_MSGSIZE) || +	    (*nbytes > io_parms->length)) { +		cifs_dbg(FYI, "bad length %d for count %d\n", +			 *nbytes, io_parms->length); +		rc = -EIO; +		*nbytes = 0;  	} +	shdr = get_sync_hdr(rsp); +  	if (*buf) {  		memcpy(*buf, (char *)shdr + rsp->DataOffset, *nbytes);  		free_rsp_buf(resp_buftype, rsp_iov.iov_base); diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 52f975d848a0..316af84674f1 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -117,7 +117,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler,  #ifdef CONFIG_CIFS_POSIX  		if (!value)  			goto out; -		if (sb->s_flags & MS_POSIXACL) +		if (sb->s_flags & SB_POSIXACL)  			rc = CIFSSMBSetPosixACL(xid, pTcon, full_path,  				value, (const int)size,  				ACL_TYPE_ACCESS, cifs_sb->local_nls, @@ -129,7 +129,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler,  #ifdef CONFIG_CIFS_POSIX  		if (!value)  			goto out; -		if (sb->s_flags & MS_POSIXACL) +		if (sb->s_flags & SB_POSIXACL)  			rc = CIFSSMBSetPosixACL(xid, pTcon, full_path,  				value, (const int)size,  				ACL_TYPE_DEFAULT, cifs_sb->local_nls, @@ -266,7 +266,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler,  	case XATTR_ACL_ACCESS:  #ifdef CONFIG_CIFS_POSIX -		if (sb->s_flags & MS_POSIXACL) +		if (sb->s_flags & SB_POSIXACL)  			rc = CIFSSMBGetPosixACL(xid, pTcon, full_path,  				value, size, ACL_TYPE_ACCESS,  				cifs_sb->local_nls, @@ -276,7 +276,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler,  	case XATTR_ACL_DEFAULT:  #ifdef CONFIG_CIFS_POSIX -		if (sb->s_flags & MS_POSIXACL) +		if (sb->s_flags & SB_POSIXACL)  			rc = CIFSSMBGetPosixACL(xid, pTcon, full_path,  				value, size, ACL_TYPE_DEFAULT,  				cifs_sb->local_nls, diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 6f0a6a4d5faa..97424cf206c0 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -96,7 +96,7 @@ void coda_destroy_inodecache(void)  static int coda_remount(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	*flags |= MS_NOATIME; +	*flags |= SB_NOATIME;  	return 0;  } @@ -188,7 +188,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)  	mutex_unlock(&vc->vc_mutex);  	sb->s_fs_info = vc; -	sb->s_flags |= MS_NOATIME; +	sb->s_flags |= SB_NOATIME;  	sb->s_blocksize = 4096;	/* XXXXX  what do we put here?? */  	sb->s_blocksize_bits = 12;  	sb->s_magic = CODA_SUPER_MAGIC; diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig index f937082f3244..58e2fe40b2a0 100644 --- a/fs/cramfs/Kconfig +++ b/fs/cramfs/Kconfig @@ -34,6 +34,7 @@ config CRAMFS_BLOCKDEV  config CRAMFS_MTD  	bool "Support CramFs image directly mapped in physical memory"  	depends on CRAMFS && MTD +	depends on CRAMFS=m || MTD=y  	default y if !CRAMFS_BLOCKDEV  	help  	  This option allows the CramFs driver to load data directly from diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 9a2ab419ba62..017b0ab19bc4 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -505,7 +505,7 @@ static void cramfs_kill_sb(struct super_block *sb)  static int cramfs_remount(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	*flags |= MS_RDONLY; +	*flags |= SB_RDONLY;  	return 0;  } @@ -592,7 +592,7 @@ static int cramfs_finalize_super(struct super_block *sb,  	struct inode *root;  	/* Set it all up.. */ -	sb->s_flags |= MS_RDONLY; +	sb->s_flags |= SB_RDONLY;  	sb->s_op = &cramfs_ops;  	root = get_cramfs_inode(sb, cramfs_root, 0);  	if (IS_ERR(root)) diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index f2677c90d96e..025d66a705db 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -560,8 +560,8 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags  	 * Set the POSIX ACL flag based on whether they're enabled in the lower  	 * mount.  	 */ -	s->s_flags = flags & ~MS_POSIXACL; -	s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL; +	s->s_flags = flags & ~SB_POSIXACL; +	s->s_flags |= path.dentry->d_sb->s_flags & SB_POSIXACL;  	/**  	 * Force a read-only eCryptfs mount when: @@ -569,7 +569,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags  	 *   2) The ecryptfs_encrypted_view mount option is specified  	 */  	if (sb_rdonly(path.dentry->d_sb) || mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) -		s->s_flags |= MS_RDONLY; +		s->s_flags |= SB_RDONLY;  	s->s_maxbytes = path.dentry->d_sb->s_maxbytes;  	s->s_blocksize = path.dentry->d_sb->s_blocksize; @@ -602,7 +602,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags  	ecryptfs_set_dentry_private(s->s_root, root_info);  	root_info->lower_path = path; -	s->s_flags |= MS_ACTIVE; +	s->s_flags |= SB_ACTIVE;  	return dget(s->s_root);  out_free: diff --git a/fs/efs/super.c b/fs/efs/super.c index 65b59009555b..6ffb7ba1547a 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -116,7 +116,7 @@ static void destroy_inodecache(void)  static int efs_remount(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	*flags |= MS_RDONLY; +	*flags |= SB_RDONLY;  	return 0;  } @@ -311,7 +311,7 @@ static int efs_fill_super(struct super_block *s, void *d, int silent)  #ifdef DEBUG  		pr_info("forcing read-only mode\n");  #endif -		s->s_flags |= MS_RDONLY; +		s->s_flags |= SB_RDONLY;  	}  	s->s_op   = &efs_superblock_operations;  	s->s_export_op = &efs_export_ops; diff --git a/fs/exec.c b/fs/exec.c index 1d6243d9f2b6..7eb8d21bcab9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1216,15 +1216,14 @@ killed:  	return -EAGAIN;  } -char *get_task_comm(char *buf, struct task_struct *tsk) +char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)  { -	/* buf must be at least sizeof(tsk->comm) in size */  	task_lock(tsk); -	strncpy(buf, tsk->comm, sizeof(tsk->comm)); +	strncpy(buf, tsk->comm, buf_size);  	task_unlock(tsk);  	return buf;  } -EXPORT_SYMBOL_GPL(get_task_comm); +EXPORT_SYMBOL_GPL(__get_task_comm);  /*   * These functions flushes out all traces of the currently running executable @@ -1350,9 +1349,14 @@ void setup_new_exec(struct linux_binprm * bprm)  	current->sas_ss_sp = current->sas_ss_size = 0; -	/* Figure out dumpability. */ +	/* +	 * Figure out dumpability. Note that this checking only of current +	 * is wrong, but userspace depends on it. This should be testing +	 * bprm->secureexec instead. +	 */  	if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP || -	    bprm->secureexec) +	    !(uid_eq(current_euid(), current_uid()) && +	      gid_eq(current_egid(), current_gid())))  		set_dumpable(current->mm, suid_dumpable);  	else  		set_dumpable(current->mm, SUID_DUMP_USER); diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index e1b3724bebf2..33db13365c5e 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -548,7 +548,7 @@ do_more:  	}  	mark_buffer_dirty(bitmap_bh); -	if (sb->s_flags & MS_SYNCHRONOUS) +	if (sb->s_flags & SB_SYNCHRONOUS)  		sync_dirty_buffer(bitmap_bh);  	group_adjust_blocks(sb, block_group, desc, bh2, group_freed); @@ -1424,7 +1424,7 @@ allocated:  	percpu_counter_sub(&sbi->s_freeblocks_counter, num);  	mark_buffer_dirty(bitmap_bh); -	if (sb->s_flags & MS_SYNCHRONOUS) +	if (sb->s_flags & SB_SYNCHRONOUS)  		sync_dirty_buffer(bitmap_bh);  	*errp = 0; diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index a1fc3dabca41..6484199b35d1 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -145,7 +145,7 @@ void ext2_free_inode (struct inode * inode)  	else  		ext2_release_inode(sb, block_group, is_directory);  	mark_buffer_dirty(bitmap_bh); -	if (sb->s_flags & MS_SYNCHRONOUS) +	if (sb->s_flags & SB_SYNCHRONOUS)  		sync_dirty_buffer(bitmap_bh);  	brelse(bitmap_bh); @@ -517,7 +517,7 @@ repeat_in_this_group:  	goto fail;  got:  	mark_buffer_dirty(bitmap_bh); -	if (sb->s_flags & MS_SYNCHRONOUS) +	if (sb->s_flags & SB_SYNCHRONOUS)  		sync_dirty_buffer(bitmap_bh);  	brelse(bitmap_bh); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index e2b6be03e69b..7646818ab266 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -75,7 +75,7 @@ void ext2_error(struct super_block *sb, const char *function,  	if (test_opt(sb, ERRORS_RO)) {  		ext2_msg(sb, KERN_CRIT,  			     "error: remounting filesystem read-only"); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	}  } @@ -656,7 +656,7 @@ static int ext2_setup_super (struct super_block * sb,  		ext2_msg(sb, KERN_ERR,  			"error: revision level too high, "  			"forcing read-only mode"); -		res = MS_RDONLY; +		res = SB_RDONLY;  	}  	if (read_only)  		return res; @@ -924,9 +924,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)  	sbi->s_resuid = opts.s_resuid;  	sbi->s_resgid = opts.s_resgid; -	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | +	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |  		((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? -		 MS_POSIXACL : 0); +		 SB_POSIXACL : 0);  	sb->s_iflags |= SB_I_CGROUPWB;  	if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && @@ -1178,7 +1178,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)  		ext2_msg(sb, KERN_WARNING,  			"warning: mounting ext3 filesystem as ext2");  	if (ext2_setup_super (sb, es, sb_rdonly(sb))) -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	ext2_write_super(sb);  	return 0; @@ -1341,9 +1341,9 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)  			 "dax flag with busy inodes while remounting");  		new_opts.s_mount_opt ^= EXT2_MOUNT_DAX;  	} -	if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) +	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))  		goto out_set; -	if (*flags & MS_RDONLY) { +	if (*flags & SB_RDONLY) {  		if (le16_to_cpu(es->s_state) & EXT2_VALID_FS ||  		    !(sbi->s_mount_state & EXT2_VALID_FS))  			goto out_set; @@ -1379,7 +1379,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)  		 */  		sbi->s_mount_state = le16_to_cpu(es->s_state);  		if (!ext2_setup_super (sb, es, 0)) -			sb->s_flags &= ~MS_RDONLY; +			sb->s_flags &= ~SB_RDONLY;  		spin_unlock(&sbi->s_lock);  		ext2_write_super(sb); @@ -1392,8 +1392,8 @@ out_set:  	sbi->s_mount_opt = new_opts.s_mount_opt;  	sbi->s_resuid = new_opts.s_resuid;  	sbi->s_resgid = new_opts.s_resgid; -	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | -		((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); +	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | +		((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0);  	spin_unlock(&sbi->s_lock);  	return 0; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 07bca11749d4..c941251ac0c0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4722,6 +4722,7 @@ retry:  						    EXT4_INODE_EOFBLOCKS);  		}  		ext4_mark_inode_dirty(handle, inode); +		ext4_update_inode_fsync_trans(handle, inode, 1);  		ret2 = ext4_journal_stop(handle);  		if (ret2)  			break; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b4267d72f249..b32cf263750d 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -816,6 +816,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,  #ifdef CONFIG_EXT4_FS_POSIX_ACL  		struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT); +		if (IS_ERR(p)) +			return ERR_CAST(p);  		if (p) {  			int acl_size = p->a_count * sizeof(ext4_acl_entry); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0992d76f7ab1..534a9130f625 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -149,6 +149,15 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,   */  int ext4_inode_is_fast_symlink(struct inode *inode)  { +	if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { +		int ea_blocks = EXT4_I(inode)->i_file_acl ? +				EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; + +		if (ext4_has_inline_data(inode)) +			return 0; + +		return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); +	}  	return S_ISLNK(inode->i_mode) && inode->i_size &&  	       (inode->i_size < EXT4_N_BLOCKS * 4);  } @@ -2742,7 +2751,7 @@ static int ext4_writepages(struct address_space *mapping,  	 * If the filesystem has aborted, it is read-only, so return  	 * right away instead of dumping stack traces later on that  	 * will obscure the real source of the problem.  We test -	 * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because +	 * EXT4_MF_FS_ABORTED instead of sb->s_flag's SB_RDONLY because  	 * the latter could be true if the filesystem is mounted  	 * read-only, and in that case, ext4_writepages should  	 * *never* be called, so if that ever happens, we would want @@ -5183,7 +5192,7 @@ static int ext4_do_update_inode(handle_t *handle,  	ext4_inode_csum_set(inode, raw_inode, ei);  	spin_unlock(&ei->i_raw_lock); -	if (inode->i_sb->s_flags & MS_LAZYTIME) +	if (inode->i_sb->s_flags & SB_LAZYTIME)  		ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,  					      bh->b_data); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 798b3ac680db..e750d68fbcb5 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1399,6 +1399,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,  			       "falling back\n"));  	}  	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); +	if (!nblocks) { +		ret = NULL; +		goto cleanup_and_exit; +	}  	start = EXT4_I(dir)->i_dir_start_lookup;  	if (start >= nblocks)  		start = 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0556cd036b69..7c46693a14d7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -422,7 +422,7 @@ static void ext4_handle_error(struct super_block *sb)  		 * before ->s_flags update  		 */  		smp_wmb(); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	}  	if (test_opt(sb, ERRORS_PANIC)) {  		if (EXT4_SB(sb)->s_journal && @@ -635,7 +635,7 @@ void __ext4_abort(struct super_block *sb, const char *function,  		 * before ->s_flags update  		 */  		smp_wmb(); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  		if (EXT4_SB(sb)->s_journal)  			jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);  		save_error_info(sb, function, line); @@ -1682,10 +1682,10 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,  		sb->s_flags |= SB_I_VERSION;  		return 1;  	case Opt_lazytime: -		sb->s_flags |= MS_LAZYTIME; +		sb->s_flags |= SB_LAZYTIME;  		return 1;  	case Opt_nolazytime: -		sb->s_flags &= ~MS_LAZYTIME; +		sb->s_flags &= ~SB_LAZYTIME;  		return 1;  	} @@ -2116,7 +2116,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,  	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {  		ext4_msg(sb, KERN_ERR, "revision level too high, "  			 "forcing read-only mode"); -		res = MS_RDONLY; +		res = SB_RDONLY;  	}  	if (read_only)  		goto done; @@ -2429,7 +2429,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,  	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {  		/* don't clear list on RO mount w/ errors */ -		if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { +		if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {  			ext4_msg(sb, KERN_INFO, "Errors on filesystem, "  				  "clearing orphan list.\n");  			es->s_last_orphan = 0; @@ -2438,19 +2438,19 @@ static void ext4_orphan_cleanup(struct super_block *sb,  		return;  	} -	if (s_flags & MS_RDONLY) { +	if (s_flags & SB_RDONLY) {  		ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); -		sb->s_flags &= ~MS_RDONLY; +		sb->s_flags &= ~SB_RDONLY;  	}  #ifdef CONFIG_QUOTA  	/* Needed for iput() to work correctly and not trash data */ -	sb->s_flags |= MS_ACTIVE; +	sb->s_flags |= SB_ACTIVE;  	/*  	 * Turn on quotas which were not enabled for read-only mounts if  	 * filesystem has quota feature, so that they are updated correctly.  	 */ -	if (ext4_has_feature_quota(sb) && (s_flags & MS_RDONLY)) { +	if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) {  		int ret = ext4_enable_quotas(sb);  		if (!ret) @@ -2539,7 +2539,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,  		}  	}  #endif -	sb->s_flags = s_flags; /* Restore MS_RDONLY status */ +	sb->s_flags = s_flags; /* Restore SB_RDONLY status */  }  /* @@ -2741,7 +2741,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)  	if (ext4_has_feature_readonly(sb)) {  		ext4_msg(sb, KERN_INFO, "filesystem is read-only"); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  		return 1;  	} @@ -3623,8 +3623,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  		sb->s_iflags |= SB_I_CGROUPWB;  	} -	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | -		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); +	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | +		(test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);  	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&  	    (ext4_has_compat_features(sb) || @@ -4199,7 +4199,7 @@ no_journal:  	}  	if (ext4_setup_super(sb, es, sb_rdonly(sb))) -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	/* determine the minimum size of new large inodes, if present */  	if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && @@ -4693,7 +4693,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)  	 * the clock is set in the future, and this will cause e2fsck  	 * to complain and force a full file system check.  	 */ -	if (!(sb->s_flags & MS_RDONLY)) +	if (!(sb->s_flags & SB_RDONLY))  		es->s_wtime = cpu_to_le32(get_seconds());  	if (sb->s_bdev->bd_part)  		es->s_kbytes_written = @@ -5047,8 +5047,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)  	if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)  		ext4_abort(sb, "Abort forced by user"); -	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | -		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); +	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | +		(test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);  	es = sbi->s_es; @@ -5057,16 +5057,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)  		set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);  	} -	if (*flags & MS_LAZYTIME) -		sb->s_flags |= MS_LAZYTIME; +	if (*flags & SB_LAZYTIME) +		sb->s_flags |= SB_LAZYTIME; -	if ((bool)(*flags & MS_RDONLY) != sb_rdonly(sb)) { +	if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {  		if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {  			err = -EROFS;  			goto restore_opts;  		} -		if (*flags & MS_RDONLY) { +		if (*flags & SB_RDONLY) {  			err = sync_filesystem(sb);  			if (err < 0)  				goto restore_opts; @@ -5078,7 +5078,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)  			 * First of all, the unconditional stuff we have to do  			 * to disable replay of the journal when we next remount  			 */ -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  			/*  			 * OK, test if we are remounting a valid rw partition @@ -5140,7 +5140,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)  				ext4_clear_journal_err(sb, es);  			sbi->s_mount_state = le16_to_cpu(es->s_state);  			if (!ext4_setup_super(sb, es, 0)) -				sb->s_flags &= ~MS_RDONLY; +				sb->s_flags &= ~SB_RDONLY;  			if (ext4_has_feature_mmp(sb))  				if (ext4_multi_mount_protect(sb,  						le64_to_cpu(es->s_mmp_block))) { @@ -5164,7 +5164,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)  	}  	ext4_setup_system_zone(sb); -	if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY)) +	if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY))  		ext4_commit_super(sb, 1);  #ifdef CONFIG_QUOTA @@ -5182,7 +5182,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)  	}  #endif -	*flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME); +	*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);  	ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);  	kfree(orig_data);  	return 0; diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index dd2e73e10857..4aa69bc1c70a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -617,17 +617,17 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)  	if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))  		return 0; -	if (s_flags & MS_RDONLY) { +	if (s_flags & SB_RDONLY) {  		f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); -		sbi->sb->s_flags &= ~MS_RDONLY; +		sbi->sb->s_flags &= ~SB_RDONLY;  	}  #ifdef CONFIG_QUOTA  	/* Needed for iput() to work correctly and not trash data */ -	sbi->sb->s_flags |= MS_ACTIVE; +	sbi->sb->s_flags |= SB_ACTIVE;  	/* Turn on quotas so that they are updated correctly */ -	quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); +	quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);  #endif  	start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); @@ -658,7 +658,7 @@ out:  	if (quota_enabled)  		f2fs_quota_off_umount(sbi->sb);  #endif -	sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ +	sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */  	return err;  } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f4e094e816c6..6abf26c31d01 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2378,7 +2378,7 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)  static inline int f2fs_readonly(struct super_block *sb)  { -	return sb->s_flags & MS_RDONLY; +	return sb->s_flags & SB_RDONLY;  }  static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5d5bba462f26..d844dcb80570 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1005,7 +1005,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,  	cpc.reason = __get_cp_reason(sbi);  gc_more: -	if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) { +	if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) {  		ret = -EINVAL;  		goto stop;  	} diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 92c57ace1939..b3a14b0429f2 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -598,16 +598,16 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)  	int quota_enabled;  #endif -	if (s_flags & MS_RDONLY) { +	if (s_flags & SB_RDONLY) {  		f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); -		sbi->sb->s_flags &= ~MS_RDONLY; +		sbi->sb->s_flags &= ~SB_RDONLY;  	}  #ifdef CONFIG_QUOTA  	/* Needed for iput() to work correctly and not trash data */ -	sbi->sb->s_flags |= MS_ACTIVE; +	sbi->sb->s_flags |= SB_ACTIVE;  	/* Turn on quotas so that they are updated correctly */ -	quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); +	quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);  #endif  	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", @@ -671,7 +671,7 @@ out:  	if (quota_enabled)  		f2fs_quota_off_umount(sbi->sb);  #endif -	sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ +	sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */  	return ret ? ret: err;  } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a6c5dd450002..708155d9c2e4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -534,10 +534,10 @@ static int parse_options(struct super_block *sb, char *options)  #endif  			break;  		case Opt_lazytime: -			sb->s_flags |= MS_LAZYTIME; +			sb->s_flags |= SB_LAZYTIME;  			break;  		case Opt_nolazytime: -			sb->s_flags &= ~MS_LAZYTIME; +			sb->s_flags &= ~SB_LAZYTIME;  			break;  #ifdef CONFIG_QUOTA  		case Opt_quota: @@ -1168,7 +1168,7 @@ static void default_options(struct f2fs_sb_info *sbi)  	set_opt(sbi, INLINE_DENTRY);  	set_opt(sbi, EXTENT_CACHE);  	set_opt(sbi, NOHEAP); -	sbi->sb->s_flags |= MS_LAZYTIME; +	sbi->sb->s_flags |= SB_LAZYTIME;  	set_opt(sbi, FLUSH_MERGE);  	if (f2fs_sb_mounted_blkzoned(sbi->sb)) {  		set_opt_mode(sbi, F2FS_MOUNT_LFS); @@ -1236,7 +1236,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)  #endif  	/* recover superblocks we couldn't write due to previous RO mount */ -	if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { +	if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {  		err = f2fs_commit_super(sbi, false);  		f2fs_msg(sb, KERN_INFO,  			"Try to recover all the superblocks, ret: %d", err); @@ -1255,17 +1255,17 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)  	 * Previous and new state of filesystem is RO,  	 * so skip checking GC and FLUSH_MERGE conditions.  	 */ -	if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) +	if (f2fs_readonly(sb) && (*flags & SB_RDONLY))  		goto skip;  #ifdef CONFIG_QUOTA -	if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) { +	if (!f2fs_readonly(sb) && (*flags & SB_RDONLY)) {  		err = dquot_suspend(sb, -1);  		if (err < 0)  			goto restore_opts;  	} else {  		/* dquot_resume needs RW */ -		sb->s_flags &= ~MS_RDONLY; +		sb->s_flags &= ~SB_RDONLY;  		if (sb_any_quota_suspended(sb)) {  			dquot_resume(sb, -1);  		} else if (f2fs_sb_has_quota_ino(sb)) { @@ -1288,7 +1288,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)  	 * or if background_gc = off is passed in mount  	 * option. Also sync the filesystem.  	 */ -	if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { +	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {  		if (sbi->gc_thread) {  			stop_gc_thread(sbi);  			need_restart_gc = true; @@ -1300,7 +1300,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)  		need_stop_gc = true;  	} -	if (*flags & MS_RDONLY) { +	if (*flags & SB_RDONLY) {  		writeback_inodes_sb(sb, WB_REASON_SYNC);  		sync_inodes_sb(sb); @@ -1314,7 +1314,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)  	 * We stop issue flush thread if FS is mounted as RO  	 * or if flush_merge is not passed in mount option.  	 */ -	if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { +	if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {  		clear_opt(sbi, FLUSH_MERGE);  		destroy_flush_cmd_control(sbi, false);  	} else { @@ -1329,8 +1329,8 @@ skip:  		kfree(s_qf_names[i]);  #endif  	/* Update the POSIXACL Flag */ -	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | -		(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); +	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | +		(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);  	return 0;  restore_gc: @@ -2472,8 +2472,8 @@ try_onemore:  	sb->s_export_op = &f2fs_export_ops;  	sb->s_magic = F2FS_SUPER_MAGIC;  	sb->s_time_gran = 1; -	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | -		(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); +	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | +		(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);  	memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));  	/* init f2fs-specific super block info */ diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 48b2336692f9..bac10de678cc 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -392,7 +392,7 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs,  			memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize);  			set_buffer_uptodate(c_bh);  			mark_buffer_dirty_inode(c_bh, sbi->fat_inode); -			if (sb->s_flags & MS_SYNCHRONOUS) +			if (sb->s_flags & SB_SYNCHRONOUS)  				err = sync_dirty_buffer(c_bh);  			brelse(c_bh);  			if (err) @@ -597,7 +597,7 @@ int fat_free_clusters(struct inode *inode, int cluster)  		}  		if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) { -			if (sb->s_flags & MS_SYNCHRONOUS) { +			if (sb->s_flags & SB_SYNCHRONOUS) {  				err = fat_sync_bhs(bhs, nr_bhs);  				if (err)  					goto error; @@ -612,7 +612,7 @@ int fat_free_clusters(struct inode *inode, int cluster)  		fat_collect_bhs(bhs, &nr_bhs, &fatent);  	} while (cluster != FAT_ENT_EOF); -	if (sb->s_flags & MS_SYNCHRONOUS) { +	if (sb->s_flags & SB_SYNCHRONOUS) {  		err = fat_sync_bhs(bhs, nr_bhs);  		if (err)  			goto error; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 30c52394a7ad..20a0a89eaca5 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -779,14 +779,14 @@ static void __exit fat_destroy_inodecache(void)  static int fat_remount(struct super_block *sb, int *flags, char *data)  { -	int new_rdonly; +	bool new_rdonly;  	struct msdos_sb_info *sbi = MSDOS_SB(sb); -	*flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); +	*flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME);  	sync_filesystem(sb);  	/* make sure we update state on remount. */ -	new_rdonly = *flags & MS_RDONLY; +	new_rdonly = *flags & SB_RDONLY;  	if (new_rdonly != sb_rdonly(sb)) {  		if (new_rdonly)  			fat_set_state(sb, 0, 0); @@ -1352,7 +1352,7 @@ out:  	if (opts->unicode_xlate)  		opts->utf8 = 0;  	if (opts->nfs == FAT_NFS_NOSTALE_RO) { -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  		sb->s_export_op = &fat_export_ops_nostale;  	} @@ -1608,7 +1608,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,  		return -ENOMEM;  	sb->s_fs_info = sbi; -	sb->s_flags |= MS_NODIRATIME; +	sb->s_flags |= SB_NODIRATIME;  	sb->s_magic = MSDOS_SUPER_MAGIC;  	sb->s_op = &fat_sops;  	sb->s_export_op = &fat_export_ops; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index acc3aa30ee54..f9bdc1e01c98 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -33,7 +33,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)  	if (opts->errors == FAT_ERRORS_PANIC)  		panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id);  	else if (opts->errors == FAT_ERRORS_RO && !sb_rdonly(sb)) { -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  		fat_msg(sb, KERN_ERR, "Filesystem has been set read-only");  	}  } diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 7d6a105d601b..d24d2758a363 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -646,7 +646,7 @@ static void setup(struct super_block *sb)  {  	MSDOS_SB(sb)->dir_ops = &msdos_dir_inode_operations;  	sb->s_d_op = &msdos_dentry_operations; -	sb->s_flags |= MS_NOATIME; +	sb->s_flags |= SB_NOATIME;  }  static int msdos_fill_super(struct super_block *sb, void *data, int silent) diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 455ce5b77e9b..f989efa051a0 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -116,7 +116,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp)  static int vxfs_remount(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	*flags |= MS_RDONLY; +	*flags |= SB_RDONLY;  	return 0;  } @@ -220,7 +220,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)  	int ret = -EINVAL;  	u32 j; -	sbp->s_flags |= MS_RDONLY; +	sbp->s_flags |= SB_RDONLY;  	infp = kzalloc(sizeof(*infp), GFP_KERNEL);  	if (!infp) { diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 08f5debd07d1..cea4836385b7 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -490,7 +490,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)  	/* while holding I_WB_SWITCH, no one else can update the association */  	spin_lock(&inode->i_lock); -	if (!(inode->i_sb->s_flags & MS_ACTIVE) || +	if (!(inode->i_sb->s_flags & SB_ACTIVE) ||  	    inode->i_state & (I_WB_SWITCH | I_FREEING) ||  	    inode_to_wb(inode) == isw->new_wb) {  		spin_unlock(&inode->i_lock); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2f504d615d92..624f18bbfd2b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -130,7 +130,7 @@ static void fuse_evict_inode(struct inode *inode)  {  	truncate_inode_pages_final(&inode->i_data);  	clear_inode(inode); -	if (inode->i_sb->s_flags & MS_ACTIVE) { +	if (inode->i_sb->s_flags & SB_ACTIVE) {  		struct fuse_conn *fc = get_fuse_conn(inode);  		struct fuse_inode *fi = get_fuse_inode(inode);  		fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup); @@ -141,7 +141,7 @@ static void fuse_evict_inode(struct inode *inode)  static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	if (*flags & MS_MANDLOCK) +	if (*flags & SB_MANDLOCK)  		return -EINVAL;  	return 0; @@ -1056,10 +1056,10 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)  	int is_bdev = sb->s_bdev != NULL;  	err = -EINVAL; -	if (sb->s_flags & MS_MANDLOCK) +	if (sb->s_flags & SB_MANDLOCK)  		goto err; -	sb->s_flags &= ~(MS_NOSEC | SB_I_VERSION); +	sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);  	if (!parse_fuse_opt(data, &d, is_bdev))  		goto err; @@ -1109,9 +1109,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)  		goto err_dev_free;  	/* Handle umasking inside the fuse code */ -	if (sb->s_flags & MS_POSIXACL) +	if (sb->s_flags & SB_POSIXACL)  		fc->dont_mask = 1; -	sb->s_flags |= MS_POSIXACL; +	sb->s_flags |= SB_POSIXACL;  	fc->default_permissions = d.default_permissions;  	fc->allow_other = d.allow_other; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index a3711f543405..ad55eb86a250 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1065,15 +1065,15 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent  	sdp->sd_args = *args;  	if (sdp->sd_args.ar_spectator) { -                sb->s_flags |= MS_RDONLY; +                sb->s_flags |= SB_RDONLY;  		set_bit(SDF_RORECOVERY, &sdp->sd_flags);  	}  	if (sdp->sd_args.ar_posix_acl) -		sb->s_flags |= MS_POSIXACL; +		sb->s_flags |= SB_POSIXACL;  	if (sdp->sd_args.ar_nobarrier)  		set_bit(SDF_NOBARRIERS, &sdp->sd_flags); -	sb->s_flags |= MS_NOSEC; +	sb->s_flags |= SB_NOSEC;  	sb->s_magic = GFS2_MAGIC;  	sb->s_op = &gfs2_super_ops;  	sb->s_d_op = &gfs2_dops; @@ -1257,7 +1257,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,  	struct gfs2_args args;  	struct gfs2_sbd *sdp; -	if (!(flags & MS_RDONLY)) +	if (!(flags & SB_RDONLY))  		mode |= FMODE_WRITE;  	bdev = blkdev_get_by_path(dev_name, mode, fs_type); @@ -1313,15 +1313,15 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,  	if (s->s_root) {  		error = -EBUSY; -		if ((flags ^ s->s_flags) & MS_RDONLY) +		if ((flags ^ s->s_flags) & SB_RDONLY)  			goto error_super;  	} else {  		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);  		sb_set_blocksize(s, block_size(bdev)); -		error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0); +		error = fill_super(s, &args, flags & SB_SILENT ? 1 : 0);  		if (error)  			goto error_super; -		s->s_flags |= MS_ACTIVE; +		s->s_flags |= SB_ACTIVE;  		bdev->bd_super = s;  	} @@ -1365,7 +1365,7 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,  		pr_warn("gfs2 mount does not exist\n");  		return ERR_CAST(s);  	} -	if ((flags ^ s->s_flags) & MS_RDONLY) { +	if ((flags ^ s->s_flags) & SB_RDONLY) {  		deactivate_locked_super(s);  		return ERR_PTR(-EBUSY);  	} diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 9cb5c9a97d69..d81d46e19726 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1256,10 +1256,10 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)  		return -EINVAL;  	if (sdp->sd_args.ar_spectator) -		*flags |= MS_RDONLY; +		*flags |= SB_RDONLY; -	if ((sb->s_flags ^ *flags) & MS_RDONLY) { -		if (*flags & MS_RDONLY) +	if ((sb->s_flags ^ *flags) & SB_RDONLY) { +		if (*flags & SB_RDONLY)  			error = gfs2_make_fs_ro(sdp);  		else  			error = gfs2_make_fs_rw(sdp); @@ -1269,9 +1269,9 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)  	sdp->sd_args = args;  	if (sdp->sd_args.ar_posix_acl) -		sb->s_flags |= MS_POSIXACL; +		sb->s_flags |= SB_POSIXACL;  	else -		sb->s_flags &= ~MS_POSIXACL; +		sb->s_flags &= ~SB_POSIXACL;  	if (sdp->sd_args.ar_nobarrier)  		set_bit(SDF_NOBARRIERS, &sdp->sd_flags);  	else diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index a85ca8b2c9ba..ca8b72d0a831 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -117,7 +117,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)  		kfree(tr);  	up_read(&sdp->sd_log_flush_lock); -	if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) +	if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS)  		gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);  	if (alloced)  		sb_end_intwrite(sdp->sd_vfs); diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 894994d2c885..460281b1299e 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -204,11 +204,11 @@ int hfs_mdb_get(struct super_block *sb)  	attrib = mdb->drAtrb;  	if (!(attrib & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) {  		pr_warn("filesystem was not cleanly unmounted, running fsck.hfs is recommended.  mounting read-only.\n"); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	}  	if ((attrib & cpu_to_be16(HFS_SB_ATTRIB_SLOCK))) {  		pr_warn("filesystem is marked locked, mounting read-only.\n"); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	}  	if (!sb_rdonly(sb)) {  		/* Mark the volume uncleanly unmounted in case we crash */ diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 7e0d65e9586c..173876782f73 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -114,18 +114,18 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf)  static int hfs_remount(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	*flags |= MS_NODIRATIME; -	if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) +	*flags |= SB_NODIRATIME; +	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))  		return 0; -	if (!(*flags & MS_RDONLY)) { +	if (!(*flags & SB_RDONLY)) {  		if (!(HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) {  			pr_warn("filesystem was not cleanly unmounted, running fsck.hfs is recommended.  leaving read-only.\n"); -			sb->s_flags |= MS_RDONLY; -			*flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY; +			*flags |= SB_RDONLY;  		} else if (HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_SLOCK)) {  			pr_warn("filesystem is marked locked, leaving read-only.\n"); -			sb->s_flags |= MS_RDONLY; -			*flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY; +			*flags |= SB_RDONLY;  		}  	}  	return 0; @@ -407,7 +407,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)  	sb->s_op = &hfs_super_operations;  	sb->s_xattr = hfs_xattr_handlers; -	sb->s_flags |= MS_NODIRATIME; +	sb->s_flags |= SB_NODIRATIME;  	mutex_init(&sbi->bitmap_lock);  	res = hfs_mdb_get(sb); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index e5bb2de2262a..1d458b716957 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -329,9 +329,9 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)  static int hfsplus_remount(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) +	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))  		return 0; -	if (!(*flags & MS_RDONLY)) { +	if (!(*flags & SB_RDONLY)) {  		struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr;  		int force = 0; @@ -340,20 +340,20 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data)  		if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) {  			pr_warn("filesystem was not cleanly unmounted, running fsck.hfsplus is recommended.  leaving read-only.\n"); -			sb->s_flags |= MS_RDONLY; -			*flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY; +			*flags |= SB_RDONLY;  		} else if (force) {  			/* nothing */  		} else if (vhdr->attributes &  				cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {  			pr_warn("filesystem is marked locked, leaving read-only.\n"); -			sb->s_flags |= MS_RDONLY; -			*flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY; +			*flags |= SB_RDONLY;  		} else if (vhdr->attributes &  				cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {  			pr_warn("filesystem is marked journaled, leaving read-only.\n"); -			sb->s_flags |= MS_RDONLY; -			*flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY; +			*flags |= SB_RDONLY;  		}  	}  	return 0; @@ -455,16 +455,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)  	if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) {  		pr_warn("Filesystem was not cleanly unmounted, running fsck.hfsplus is recommended.  mounting read-only.\n"); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	} else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) {  		/* nothing */  	} else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {  		pr_warn("Filesystem is marked locked, mounting read-only.\n"); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	} else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) &&  			!sb_rdonly(sb)) {  		pr_warn("write access to a journaled filesystem is not supported, use the force option at your own risk, mounting read-only.\n"); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	}  	err = -EINVAL; diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 8d6b7e35faf9..c83ece7facc5 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -150,7 +150,6 @@ static int hpfs_readdir(struct file *file, struct dir_context *ctx)  			if (unlikely(ret < 0))  				goto out;  			ctx->pos = ((loff_t) hpfs_de_as_down_as_possible(inode->i_sb, hpfs_inode->i_dno) << 4) + 1; -			file->f_version = inode->i_version;  		}  		next_pos = ctx->pos;  		if (!(de = map_pos_dirent(inode, &next_pos, &qbh))) { diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c index 3b834563b1f1..a4ad18afbdec 100644 --- a/fs/hpfs/dnode.c +++ b/fs/hpfs/dnode.c @@ -419,7 +419,6 @@ int hpfs_add_dirent(struct inode *i,  		c = 1;  		goto ret;  	}	 -	i->i_version++;  	c = hpfs_add_to_dnode(i, dno, name, namelen, new_de, 0);  	ret:  	return c; @@ -726,7 +725,6 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,  			return 2;  		}  	} -	i->i_version++;  	for_all_poss(i, hpfs_pos_del, (t = get_pos(dnode, de)) + 1, 1);  	hpfs_delete_de(i->i_sb, dnode, de);  	hpfs_mark_4buffers_dirty(qbh); diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index e0e60b148400..7c49f1ef0c85 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -288,7 +288,7 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno,  					goto bail;  				}  				if (((31 + de->namelen + de->down*4 + 3) & ~3) != le16_to_cpu(de->length)) { -					if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & MS_RDONLY) goto ok; +					if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & SB_RDONLY) goto ok;  					hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp);  					goto bail;  				} diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 1516fb4e28f4..f2c3ebcd309c 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -78,7 +78,7 @@ void hpfs_error(struct super_block *s, const char *fmt, ...)  			else {  				pr_cont("; remounting read-only\n");  				mark_dirty(s, 0); -				s->s_flags |= MS_RDONLY; +				s->s_flags |= SB_RDONLY;  			}  		} else if (sb_rdonly(s))  				pr_cont("; going on - but anything won't be destroyed because it's read-only\n"); @@ -235,7 +235,6 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)  	ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);  	if (!ei)  		return NULL; -	ei->vfs_inode.i_version = 1;  	return &ei->vfs_inode;  } @@ -457,7 +456,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)  	sync_filesystem(s); -	*flags |= MS_NOATIME; +	*flags |= SB_NOATIME;  	hpfs_lock(s);  	uid = sbi->sb_uid; gid = sbi->sb_gid; @@ -488,7 +487,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)  	sbi->sb_eas = eas; sbi->sb_chk = chk; sbi->sb_chkdsk = chkdsk;  	sbi->sb_err = errs; sbi->sb_timeshift = timeshift; -	if (!(*flags & MS_RDONLY)) mark_dirty(s, 1); +	if (!(*flags & SB_RDONLY)) mark_dirty(s, 1);  	hpfs_unlock(s);  	return 0; @@ -614,7 +613,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)  		goto bail4;  	} -	s->s_flags |= MS_NOATIME; +	s->s_flags |= SB_NOATIME;  	/* Fill superblock stuff */  	s->s_magic = HPFS_SUPER_MAGIC; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 1e76730aac0d..8a85f3f53446 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -639,11 +639,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,  		mutex_unlock(&hugetlb_fault_mutex_table[hash]);  		/* -		 * page_put due to reference from alloc_huge_page()  		 * unlock_page because locked by add_to_page_cache() +		 * page_put due to reference from alloc_huge_page()  		 */ -		put_page(page);  		unlock_page(page); +		put_page(page);  	}  	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) diff --git a/fs/inode.c b/fs/inode.c index fd401028a309..03102d6ef044 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -416,7 +416,7 @@ void inode_add_lru(struct inode *inode)  {  	if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC |  				I_FREEING | I_WILL_FREE)) && -	    !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE) +	    !atomic_read(&inode->i_count) && inode->i_sb->s_flags & SB_ACTIVE)  		inode_lru_list_add(inode);  } @@ -595,7 +595,7 @@ static void dispose_list(struct list_head *head)   * @sb:		superblock to operate on   *   * Make sure that no inodes with zero refcount are retained.  This is - * called by superblock shutdown after having MS_ACTIVE flag removed, + * called by superblock shutdown after having SB_ACTIVE flag removed,   * so any inode reaching zero refcount during or after that call will   * be immediately evicted.   */ @@ -1492,7 +1492,7 @@ static void iput_final(struct inode *inode)  	else  		drop = generic_drop_inode(inode); -	if (!drop && (sb->s_flags & MS_ACTIVE)) { +	if (!drop && (sb->s_flags & SB_ACTIVE)) {  		inode_add_lru(inode);  		spin_unlock(&inode->i_lock);  		return; @@ -1644,7 +1644,7 @@ int generic_update_time(struct inode *inode, struct timespec *time, int flags)  	if (flags & S_MTIME)  		inode->i_mtime = *time; -	if (!(inode->i_sb->s_flags & MS_LAZYTIME) || (flags & S_VERSION)) +	if (!(inode->i_sb->s_flags & SB_LAZYTIME) || (flags & S_VERSION))  		iflags |= I_DIRTY_SYNC;  	__mark_inode_dirty(inode, iflags);  	return 0; @@ -1691,7 +1691,7 @@ bool __atime_needs_update(const struct path *path, struct inode *inode,  	if (IS_NOATIME(inode))  		return false; -	if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) +	if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))  		return false;  	if (mnt->mnt_flags & MNT_NOATIME) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 447a24d77b89..bc258a4402f6 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -114,7 +114,7 @@ static void destroy_inodecache(void)  static int isofs_remount(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	if (!(*flags & MS_RDONLY)) +	if (!(*flags & SB_RDONLY))  		return -EROFS;  	return 0;  } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index e96c6b05e43e..d8c274d39ddb 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -409,10 +409,10 @@ int jffs2_do_remount_fs(struct super_block *sb, int *flags, char *data)  		mutex_unlock(&c->alloc_sem);  	} -	if (!(*flags & MS_RDONLY)) +	if (!(*flags & SB_RDONLY))  		jffs2_start_garbage_collect_thread(c); -	*flags |= MS_NOATIME; +	*flags |= SB_NOATIME;  	return 0;  } diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 824e61ede465..c2fbec19c616 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -59,7 +59,7 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)  } -#define jffs2_is_readonly(c) (OFNI_BS_2SFFJ(c)->s_flags & MS_RDONLY) +#define jffs2_is_readonly(c) (OFNI_BS_2SFFJ(c)->s_flags & SB_RDONLY)  #define SECTOR_ADDR(x) ( (((unsigned long)(x) / c->sector_size) * c->sector_size) )  #ifndef CONFIG_JFFS2_FS_WRITEBUFFER diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 153f1c6eb169..f60dee7faf03 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -301,10 +301,10 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)  	sb->s_op = &jffs2_super_operations;  	sb->s_export_op = &jffs2_export_ops; -	sb->s_flags = sb->s_flags | MS_NOATIME; +	sb->s_flags = sb->s_flags | SB_NOATIME;  	sb->s_xattr = jffs2_xattr_handlers;  #ifdef CONFIG_JFFS2_FS_POSIX_ACL -	sb->s_flags |= MS_POSIXACL; +	sb->s_flags |= SB_POSIXACL;  #endif  	ret = jffs2_do_fill_super(sb, data, silent);  	return ret; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 2f7b3af5b8b7..90373aebfdca 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -87,7 +87,7 @@ static void jfs_handle_error(struct super_block *sb)  	else if (sbi->flag & JFS_ERR_REMOUNT_RO) {  		jfs_err("ERROR: (device %s): remounting filesystem as read-only",  			sb->s_id); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	}  	/* nothing is done for continue beyond marking the superblock dirty */ @@ -477,7 +477,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)  			return rc;  	} -	if (sb_rdonly(sb) && !(*flags & MS_RDONLY)) { +	if (sb_rdonly(sb) && !(*flags & SB_RDONLY)) {  		/*  		 * Invalidate any previously read metadata.  fsck may have  		 * changed the on-disk data since we mounted r/o @@ -488,12 +488,12 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)  		ret = jfs_mount_rw(sb, 1);  		/* mark the fs r/w for quota activity */ -		sb->s_flags &= ~MS_RDONLY; +		sb->s_flags &= ~SB_RDONLY;  		dquot_resume(sb, -1);  		return ret;  	} -	if (!sb_rdonly(sb) && (*flags & MS_RDONLY)) { +	if (!sb_rdonly(sb) && (*flags & SB_RDONLY)) {  		rc = dquot_suspend(sb, -1);  		if (rc < 0)  			return rc; @@ -545,7 +545,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)  	sbi->flag = flag;  #ifdef CONFIG_JFS_POSIX_ACL -	sb->s_flags |= MS_POSIXACL; +	sb->s_flags |= SB_POSIXACL;  #endif  	if (newLVSize) { diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 95a7c88baed9..26dd9a50f383 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -335,7 +335,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,  			deactivate_locked_super(sb);  			return ERR_PTR(error);  		} -		sb->s_flags |= MS_ACTIVE; +		sb->s_flags |= SB_ACTIVE;  		mutex_lock(&kernfs_mutex);  		list_add(&info->node, &root->supers); diff --git a/fs/libfs.c b/fs/libfs.c index 3aabe553fc45..7ff3cb904acd 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -246,7 +246,7 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name,  	struct inode *root;  	struct qstr d_name = QSTR_INIT(name, strlen(name)); -	s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER, +	s = sget_userns(fs_type, NULL, set_anon_super, SB_KERNMOUNT|SB_NOUSER,  			&init_user_ns, NULL);  	if (IS_ERR(s))  		return ERR_CAST(s); @@ -277,7 +277,7 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name,  	d_instantiate(dentry, root);  	s->s_root = dentry;  	s->s_d_op = dops; -	s->s_flags |= MS_ACTIVE; +	s->s_flags |= SB_ACTIVE;  	return dget(s->s_root);  Enomem: @@ -578,7 +578,7 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c  	spin_lock(&pin_fs_lock);  	if (unlikely(!*mount)) {  		spin_unlock(&pin_fs_lock); -		mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, NULL); +		mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);  		if (IS_ERR(mnt))  			return PTR_ERR(mnt);  		spin_lock(&pin_fs_lock); diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 0d4e590e0549..826a89184f90 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -578,8 +578,10 @@ static void nlm_complain_hosts(struct net *net)  		if (ln->nrhosts == 0)  			return; -		printk(KERN_WARNING "lockd: couldn't shutdown host module for net %p!\n", net); -		dprintk("lockd: %lu hosts left in net %p:\n", ln->nrhosts, net); +		pr_warn("lockd: couldn't shutdown host module for net %x!\n", +			net->ns.inum); +		dprintk("lockd: %lu hosts left in net %x:\n", ln->nrhosts, +			net->ns.inum);  	} else {  		if (nrhosts == 0)  			return; @@ -590,9 +592,9 @@ static void nlm_complain_hosts(struct net *net)  	for_each_host(host, chain, nlm_server_hosts) {  		if (net && host->net != net)  			continue; -		dprintk("       %s (cnt %d use %d exp %ld net %p)\n", +		dprintk("       %s (cnt %d use %d exp %ld net %x)\n",  			host->h_name, atomic_read(&host->h_count), -			host->h_inuse, host->h_expires, host->net); +			host->h_inuse, host->h_expires, host->net->ns.inum);  	}  } @@ -605,7 +607,8 @@ nlm_shutdown_hosts_net(struct net *net)  	mutex_lock(&nlm_host_mutex);  	/* First, make all hosts eligible for gc */ -	dprintk("lockd: nuking all hosts in net %p...\n", net); +	dprintk("lockd: nuking all hosts in net %x...\n", +		net ? net->ns.inum : 0);  	for_each_host(host, chain, nlm_server_hosts) {  		if (net && host->net != net)  			continue; @@ -618,9 +621,8 @@ nlm_shutdown_hosts_net(struct net *net)  	/* Then, perform a garbage collection pass */  	nlm_gc_hosts(net); -	mutex_unlock(&nlm_host_mutex); -  	nlm_complain_hosts(net); +	mutex_unlock(&nlm_host_mutex);  }  /* @@ -646,7 +648,8 @@ nlm_gc_hosts(struct net *net)  	struct hlist_node *next;  	struct nlm_host	*host; -	dprintk("lockd: host garbage collection for net %p\n", net); +	dprintk("lockd: host garbage collection for net %x\n", +		net ? net->ns.inum : 0);  	for_each_host(host, chain, nlm_server_hosts) {  		if (net && host->net != net)  			continue; @@ -662,9 +665,10 @@ nlm_gc_hosts(struct net *net)  		if (atomic_read(&host->h_count) || host->h_inuse  		 || time_before(jiffies, host->h_expires)) {  			dprintk("nlm_gc_hosts skipping %s " -				"(cnt %d use %d exp %ld net %p)\n", +				"(cnt %d use %d exp %ld net %x)\n",  				host->h_name, atomic_read(&host->h_count), -				host->h_inuse, host->h_expires, host->net); +				host->h_inuse, host->h_expires, +				host->net->ns.inum);  			continue;  		}  		nlm_destroy_host_locked(host); diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 9fbbd11f9ecb..96cfb2967ac7 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -110,7 +110,8 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,  	clnt = nsm_create(host->net, host->nodename);  	if (IS_ERR(clnt)) {  		dprintk("lockd: failed to create NSM upcall transport, " -			"status=%ld, net=%p\n", PTR_ERR(clnt), host->net); +			"status=%ld, net=%x\n", PTR_ERR(clnt), +			host->net->ns.inum);  		return PTR_ERR(clnt);  	} diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index a8e3777c94dc..9c36d614bf89 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -57,6 +57,9 @@ static struct task_struct	*nlmsvc_task;  static struct svc_rqst		*nlmsvc_rqst;  unsigned long			nlmsvc_timeout; +atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0); +DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq); +  unsigned int lockd_net_id;  /* @@ -259,7 +262,7 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net)  	if (error < 0)  		goto err_bind;  	set_grace_period(net); -	dprintk("lockd_up_net: per-net data created; net=%p\n", net); +	dprintk("%s: per-net data created; net=%x\n", __func__, net->ns.inum);  	return 0;  err_bind: @@ -274,12 +277,15 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)  	if (ln->nlmsvc_users) {  		if (--ln->nlmsvc_users == 0) {  			nlm_shutdown_hosts_net(net); +			cancel_delayed_work_sync(&ln->grace_period_end); +			locks_end_grace(&ln->lockd_manager);  			svc_shutdown_net(serv, net); -			dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net); +			dprintk("%s: per-net data destroyed; net=%x\n", +				__func__, net->ns.inum);  		}  	} else { -		printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n", -				nlmsvc_task, net); +		pr_err("%s: no users! task=%p, net=%x\n", +			__func__, nlmsvc_task, net->ns.inum);  		BUG();  	}  } @@ -290,7 +296,8 @@ static int lockd_inetaddr_event(struct notifier_block *this,  	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;  	struct sockaddr_in sin; -	if (event != NETDEV_DOWN) +	if ((event != NETDEV_DOWN) || +	    !atomic_inc_not_zero(&nlm_ntf_refcnt))  		goto out;  	if (nlmsvc_rqst) { @@ -301,6 +308,8 @@ static int lockd_inetaddr_event(struct notifier_block *this,  		svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,  			(struct sockaddr *)&sin);  	} +	atomic_dec(&nlm_ntf_refcnt); +	wake_up(&nlm_ntf_wq);  out:  	return NOTIFY_DONE; @@ -317,7 +326,8 @@ static int lockd_inet6addr_event(struct notifier_block *this,  	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;  	struct sockaddr_in6 sin6; -	if (event != NETDEV_DOWN) +	if ((event != NETDEV_DOWN) || +	    !atomic_inc_not_zero(&nlm_ntf_refcnt))  		goto out;  	if (nlmsvc_rqst) { @@ -329,6 +339,8 @@ static int lockd_inet6addr_event(struct notifier_block *this,  		svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,  			(struct sockaddr *)&sin6);  	} +	atomic_dec(&nlm_ntf_refcnt); +	wake_up(&nlm_ntf_wq);  out:  	return NOTIFY_DONE; @@ -345,10 +357,12 @@ static void lockd_unregister_notifiers(void)  #if IS_ENABLED(CONFIG_IPV6)  	unregister_inet6addr_notifier(&lockd_inet6addr_notifier);  #endif +	wait_event(nlm_ntf_wq, atomic_read(&nlm_ntf_refcnt) == 0);  }  static void lockd_svc_exit_thread(void)  { +	atomic_dec(&nlm_ntf_refcnt);  	lockd_unregister_notifiers();  	svc_exit_thread(nlmsvc_rqst);  } @@ -373,6 +387,7 @@ static int lockd_start_svc(struct svc_serv *serv)  		goto out_rqst;  	} +	atomic_inc(&nlm_ntf_refcnt);  	svc_sock_update_bufs(serv);  	serv->sv_maxconn = nlm_max_connections; @@ -676,6 +691,17 @@ static int lockd_init_net(struct net *net)  static void lockd_exit_net(struct net *net)  { +	struct lockd_net *ln = net_generic(net, lockd_net_id); + +	WARN_ONCE(!list_empty(&ln->lockd_manager.list), +		  "net %x %s: lockd_manager.list is not empty\n", +		  net->ns.inum, __func__); +	WARN_ONCE(!list_empty(&ln->nsm_handles), +		  "net %x %s: nsm_handles list is not empty\n", +		  net->ns.inum, __func__); +	WARN_ONCE(delayed_work_pending(&ln->grace_period_end), +		  "net %x %s: grace_period_end was not cancelled\n", +		  net->ns.inum, __func__);  }  static struct pernet_operations lockd_net_ops = { diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index a563ddbc19e6..4ec3d6e03e76 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -370,7 +370,7 @@ nlmsvc_mark_resources(struct net *net)  {  	struct nlm_host hint; -	dprintk("lockd: nlmsvc_mark_resources for net %p\n", net); +	dprintk("lockd: %s for net %x\n", __func__, net ? net->ns.inum : 0);  	hint.net = net;  	nlm_traverse_files(&hint, nlmsvc_mark_host, NULL);  } diff --git a/fs/locks.c b/fs/locks.c index 1bd71c4d663a..21b4dfa289ee 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -141,7 +141,7 @@  static inline bool is_remote_lock(struct file *filp)  { -	return likely(!(filp->f_path.dentry->d_sb->s_flags & MS_NOREMOTELOCK)); +	return likely(!(filp->f_path.dentry->d_sb->s_flags & SB_NOREMOTELOCK));  }  static bool lease_breaking(struct file_lock *fl) diff --git a/fs/mbcache.c b/fs/mbcache.c index d818fd236787..b8b8b9ced9f8 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -269,6 +269,9 @@ static unsigned long mb_cache_count(struct shrinker *shrink,  	struct mb_cache *cache = container_of(shrink, struct mb_cache,  					      c_shrink); +	/* Unlikely, but not impossible */ +	if (unlikely(cache->c_entry_count < 0)) +		return 0;  	return cache->c_entry_count;  } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index b6829d679643..72e308c3e66b 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -125,9 +125,9 @@ static int minix_remount (struct super_block * sb, int * flags, char * data)  	sync_filesystem(sb);  	ms = sbi->s_ms; -	if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) +	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))  		return 0; -	if (*flags & MS_RDONLY) { +	if (*flags & SB_RDONLY) {  		if (ms->s_state & MINIX_VALID_FS ||  		    !(sbi->s_mount_state & MINIX_VALID_FS))  			return 0; diff --git a/fs/namei.c b/fs/namei.c index f0c7a7b9b6ca..9cc91fb7f156 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1129,18 +1129,9 @@ static int follow_automount(struct path *path, struct nameidata *nd,  	 * of the daemon to instantiate them before they can be used.  	 */  	if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | -			   LOOKUP_OPEN | LOOKUP_CREATE | -			   LOOKUP_AUTOMOUNT))) { -		/* Positive dentry that isn't meant to trigger an -		 * automount, EISDIR will allow it to be used, -		 * otherwise there's no mount here "now" so return -		 * ENOENT. -		 */ -		if (path->dentry->d_inode) -			return -EISDIR; -		else -			return -ENOENT; -	} +			   LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && +	    path->dentry->d_inode) +		return -EISDIR;  	if (path->dentry->d_sb->s_user_ns != &init_user_ns)  		return -EACCES; diff --git a/fs/namespace.c b/fs/namespace.c index e158ec6b527b..9d1374ab6e06 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2826,6 +2826,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,  			    SB_DIRSYNC |  			    SB_SILENT |  			    SB_POSIXACL | +			    SB_LAZYTIME |  			    SB_I_VERSION);  	if (flags & MS_REMOUNT) diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 129f1937fa2c..41de88cdc053 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -103,7 +103,7 @@ static void destroy_inodecache(void)  static int ncp_remount(struct super_block *sb, int *flags, char* data)  {  	sync_filesystem(sb); -	*flags |= MS_NODIRATIME; +	*flags |= SB_NODIRATIME;  	return 0;  } @@ -547,7 +547,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)  	else  		default_bufsize = 1024; -	sb->s_flags |= MS_NODIRATIME;	/* probably even noatime */ +	sb->s_flags |= SB_NODIRATIME;	/* probably even noatime */  	sb->s_maxbytes = 0xFFFFFFFFU;  	sb->s_blocksize = 1024;	/* Eh...  Is this correct? */  	sb->s_blocksize_bits = 10; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 0ac2fb1c6b63..b9129e2befea 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -291,12 +291,23 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat  	const struct sockaddr *sap = data->addr;  	struct nfs_net *nn = net_generic(data->net, nfs_net_id); +again:  	list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {  	        const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;  		/* Don't match clients that failed to initialise properly */  		if (clp->cl_cons_state < 0)  			continue; +		/* If a client is still initializing then we need to wait */ +		if (clp->cl_cons_state > NFS_CS_READY) { +			refcount_inc(&clp->cl_count); +			spin_unlock(&nn->nfs_client_lock); +			nfs_wait_client_init_complete(clp); +			nfs_put_client(clp); +			spin_lock(&nn->nfs_client_lock); +			goto again; +		} +  		/* Different NFS versions cannot share the same nfs_client */  		if (clp->rpc_ops != data->nfs_mod->rpc_ops)  			continue; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e51ae52ed14f..2f3f86726f5b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1256,7 +1256,7 @@ static int nfs_dentry_delete(const struct dentry *dentry)  		/* Unhash it, so that ->d_iput() would be called */  		return 1;  	} -	if (!(dentry->d_sb->s_flags & MS_ACTIVE)) { +	if (!(dentry->d_sb->s_flags & SB_ACTIVE)) {  		/* Unhash it, so that ancestors of killed async unlink  		 * files will be cleaned up during umount */  		return 1; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 38b93d54c02e..b992d2382ffa 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -752,7 +752,7 @@ int nfs_getattr(const struct path *path, struct kstat *stat,  	 * Note that we only have to check the vfsmount flags here:  	 *  - NFS always sets S_NOATIME by so checking it would give a  	 *    bogus result -	 *  - NFS never sets MS_NOATIME or MS_NODIRATIME so there is +	 *  - NFS never sets SB_NOATIME or SB_NODIRATIME so there is  	 *    no point in checking those.  	 */  	if ((path->mnt->mnt_flags & MNT_NOATIME) || diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5ab17fd4700a..8357ff69962f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -10,7 +10,7 @@  #include <linux/nfs_page.h>  #include <linux/wait_bit.h> -#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) +#define NFS_MS_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)  extern const struct export_operations nfs_export_ops; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 12bbab0becb4..65a7e5da508c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -404,15 +404,19 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,  	if (error < 0)  		goto error; -	if (!nfs4_has_session(clp)) -		nfs_mark_client_ready(clp, NFS_CS_READY); -  	error = nfs4_discover_server_trunking(clp, &old);  	if (error < 0)  		goto error; -	if (clp != old) +	if (clp != old) {  		clp->cl_preserve_clid = true; +		/* +		 * Mark the client as having failed initialization so other +		 * processes walking the nfs_client_list in nfs_match_client() +		 * won't try to use it. +		 */ +		nfs_mark_client_ready(clp, -EPERM); +	}  	nfs_put_client(clp);  	clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);  	return old; @@ -539,6 +543,9 @@ int nfs40_walk_client_list(struct nfs_client *new,  	spin_lock(&nn->nfs_client_lock);  	list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { +		if (pos == new) +			goto found; +  		status = nfs4_match_client(pos, new, &prev, nn);  		if (status < 0)  			goto out_unlock; @@ -559,6 +566,7 @@ int nfs40_walk_client_list(struct nfs_client *new,  		 * way that a SETCLIENTID_CONFIRM to pos can succeed is  		 * if new and pos point to the same server:  		 */ +found:  		refcount_inc(&pos->cl_count);  		spin_unlock(&nn->nfs_client_lock); @@ -572,6 +580,7 @@ int nfs40_walk_client_list(struct nfs_client *new,  		case 0:  			nfs4_swap_callback_idents(pos, new);  			pos->cl_confirm = new->cl_confirm; +			nfs_mark_client_ready(pos, NFS_CS_READY);  			prev = NULL;  			*result = pos; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 54fd56d715a8..e4f4a09ed9f4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -71,8 +71,8 @@ const nfs4_stateid zero_stateid = {  };  const nfs4_stateid invalid_stateid = {  	{ -		.seqid = cpu_to_be32(0xffffffffU), -		.other = { 0 }, +		/* Funky initialiser keeps older gcc versions happy */ +		.data = { 0xff, 0xff, 0xff, 0xff, 0 },  	},  	.type = NFS4_INVALID_STATEID_TYPE,  }; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 43cadb28db6e..29bacdc56f6a 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -813,9 +813,9 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root)  	 */  	seq_printf(m, "\n\topts:\t");  	seq_puts(m, sb_rdonly(root->d_sb) ? "ro" : "rw"); -	seq_puts(m, root->d_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); -	seq_puts(m, root->d_sb->s_flags & MS_NOATIME ? ",noatime" : ""); -	seq_puts(m, root->d_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); +	seq_puts(m, root->d_sb->s_flags & SB_SYNCHRONOUS ? ",sync" : ""); +	seq_puts(m, root->d_sb->s_flags & SB_NOATIME ? ",noatime" : ""); +	seq_puts(m, root->d_sb->s_flags & SB_NODIRATIME ? ",nodiratime" : "");  	nfs_show_mount_options(m, nfss, 1);  	seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); @@ -2296,11 +2296,11 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)  	/*  	 * noac is a special case. It implies -o sync, but that's not  	 * necessarily reflected in the mtab options. do_remount_sb -	 * will clear MS_SYNCHRONOUS if -o sync wasn't specified in the +	 * will clear SB_SYNCHRONOUS if -o sync wasn't specified in the  	 * remount options, so we have to explicitly reset it.  	 */  	if (data->flags & NFS_MOUNT_NOAC) -		*flags |= MS_SYNCHRONOUS; +		*flags |= SB_SYNCHRONOUS;  	/* compare new mount options with old ones */  	error = nfs_compare_remount_data(nfss, data); @@ -2349,7 +2349,7 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)  		/* The VFS shouldn't apply the umask to mode bits. We will do  		 * so ourselves when necessary.  		 */ -		sb->s_flags |= MS_POSIXACL; +		sb->s_flags |= SB_POSIXACL;  		sb->s_time_gran = 1;  		sb->s_export_op = &nfs_export_ops;  	} @@ -2379,7 +2379,7 @@ static void nfs_clone_super(struct super_block *sb,  		/* The VFS shouldn't apply the umask to mode bits. We will do  		 * so ourselves when necessary.  		 */ -		sb->s_flags |= MS_POSIXACL; +		sb->s_flags |= SB_POSIXACL;  	}   	nfs_initialise_sb(sb); @@ -2600,11 +2600,11 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,  	/* -o noac implies -o sync */  	if (server->flags & NFS_MOUNT_NOAC) -		sb_mntdata.mntflags |= MS_SYNCHRONOUS; +		sb_mntdata.mntflags |= SB_SYNCHRONOUS;  	if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL) -		if (mount_info->cloned->sb->s_flags & MS_SYNCHRONOUS) -			sb_mntdata.mntflags |= MS_SYNCHRONOUS; +		if (mount_info->cloned->sb->s_flags & SB_SYNCHRONOUS) +			sb_mntdata.mntflags |= SB_SYNCHRONOUS;  	/* Get a superblock - note that we may end up sharing one that already exists */  	s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); @@ -2641,7 +2641,7 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,  	if (error)  		goto error_splat_root; -	s->s_flags |= MS_ACTIVE; +	s->s_flags |= SB_ACTIVE;  out:  	return mntroot; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5b5f464f6f2a..4a379d7918f2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1890,6 +1890,8 @@ int nfs_commit_inode(struct inode *inode, int how)  	if (res)  		error = nfs_generic_commit_list(inode, &head, how, &cinfo);  	nfs_commit_end(cinfo.mds); +	if (res == 0) +		return res;  	if (error < 0)  		goto out_error;  	if (!may_wait) diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 897b299db55e..5be08f02a76b 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -30,7 +30,11 @@ locks_start_grace(struct net *net, struct lock_manager *lm)  	struct list_head *grace_list = net_generic(net, grace_net_id);  	spin_lock(&grace_lock); -	list_add(&lm->list, grace_list); +	if (list_empty(&lm->list)) +		list_add(&lm->list, grace_list); +	else +		WARN(1, "double list_add attempt detected in net %x %s\n", +		     net->ns.inum, (net == &init_net) ? "(init_net)" : "");  	spin_unlock(&grace_lock);  }  EXPORT_SYMBOL_GPL(locks_start_grace); @@ -104,7 +108,9 @@ grace_exit_net(struct net *net)  {  	struct list_head *grace_list = net_generic(net, grace_net_id); -	BUG_ON(!list_empty(grace_list)); +	WARN_ONCE(!list_empty(grace_list), +		  "net %x %s: grace_list is not empty\n", +		  net->ns.inum, __func__);  }  static struct pernet_operations grace_net_ops = { diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 697f8ae7792d..f650e475d8f0 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -60,6 +60,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)  				gi->gid[i] = exp->ex_anon_gid;  			else  				gi->gid[i] = rqgi->gid[i]; + +			/* Each thread allocates its own gi, no race */ +			groups_sort(gi);  		}  	} else {  		gi = get_group_info(rqgi); diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 46b48dbbdd32..8ceb25a10ea0 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -232,7 +232,7 @@ static struct cache_head *expkey_alloc(void)  		return NULL;  } -static struct cache_detail svc_expkey_cache_template = { +static const struct cache_detail svc_expkey_cache_template = {  	.owner		= THIS_MODULE,  	.hash_size	= EXPKEY_HASHMAX,  	.name		= "nfsd.fh", @@ -748,7 +748,7 @@ static struct cache_head *svc_export_alloc(void)  		return NULL;  } -static struct cache_detail svc_export_cache_template = { +static const struct cache_detail svc_export_cache_template = {  	.owner		= THIS_MODULE,  	.hash_size	= EXPORT_HASHMAX,  	.name		= "nfsd.export", @@ -1230,7 +1230,7 @@ nfsd_export_init(struct net *net)  	int rv;  	struct nfsd_net *nn = net_generic(net, nfsd_net_id); -	dprintk("nfsd: initializing export module (net: %p).\n", net); +	dprintk("nfsd: initializing export module (net: %x).\n", net->ns.inum);  	nn->svc_export_cache = cache_create_net(&svc_export_cache_template, net);  	if (IS_ERR(nn->svc_export_cache)) @@ -1278,7 +1278,7 @@ nfsd_export_shutdown(struct net *net)  {  	struct nfsd_net *nn = net_generic(net, nfsd_net_id); -	dprintk("nfsd: shutting down export module (net: %p).\n", net); +	dprintk("nfsd: shutting down export module (net: %x).\n", net->ns.inum);  	cache_unregister_net(nn->svc_expkey_cache, net);  	cache_unregister_net(nn->svc_export_cache, net); @@ -1286,5 +1286,5 @@ nfsd_export_shutdown(struct net *net)  	cache_destroy_net(nn->svc_export_cache, net);  	svcauth_unix_purge(net); -	dprintk("nfsd: export shutdown complete (net: %p).\n", net); +	dprintk("nfsd: export shutdown complete (net: %x).\n", net->ns.inum);  } diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 1c91391f4805..36358d435cb0 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -119,6 +119,9 @@ struct nfsd_net {  	u32 clverifier_counter;  	struct svc_serv *nfsd_serv; + +	wait_queue_head_t ntf_wq; +	atomic_t ntf_refcnt;  };  /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 6b9b6cca469f..a5bb76593ce7 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -178,7 +178,7 @@ static struct ent *idtoname_lookup(struct cache_detail *, struct ent *);  static struct ent *idtoname_update(struct cache_detail *, struct ent *,  				   struct ent *); -static struct cache_detail idtoname_cache_template = { +static const struct cache_detail idtoname_cache_template = {  	.owner		= THIS_MODULE,  	.hash_size	= ENT_HASHMAX,  	.name		= "nfs4.idtoname", @@ -341,7 +341,7 @@ static struct ent *nametoid_update(struct cache_detail *, struct ent *,  				   struct ent *);  static int         nametoid_parse(struct cache_detail *, char *, int); -static struct cache_detail nametoid_cache_template = { +static const struct cache_detail nametoid_cache_template = {  	.owner		= THIS_MODULE,  	.hash_size	= ENT_HASHMAX,  	.name		= "nfs4.nametoid", diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b82817767b9d..b29b5a185a2c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -63,12 +63,16 @@ static const stateid_t zero_stateid = {  static const stateid_t currentstateid = {  	.si_generation = 1,  }; +static const stateid_t close_stateid = { +	.si_generation = 0xffffffffU, +};  static u64 current_sessionid = 1;  #define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))  #define ONE_STATEID(stateid)  (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))  #define CURRENT_STATEID(stateid) (!memcmp((stateid), ¤tstateid, sizeof(stateid_t))) +#define CLOSE_STATEID(stateid)  (!memcmp((stateid), &close_stateid, sizeof(stateid_t)))  /* forward declarations */  static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); @@ -83,6 +87,11 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid);   */  static DEFINE_SPINLOCK(state_lock); +enum nfsd4_st_mutex_lock_subclass { +	OPEN_STATEID_MUTEX = 0, +	LOCK_STATEID_MUTEX = 1, +}; +  /*   * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for   * the refcount on the open stateid to drop. @@ -3562,7 +3571,9 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)  		/* ignore lock owners */  		if (local->st_stateowner->so_is_open_owner == 0)  			continue; -		if (local->st_stateowner == &oo->oo_owner) { +		if (local->st_stateowner != &oo->oo_owner) +			continue; +		if (local->st_stid.sc_type == NFS4_OPEN_STID) {  			ret = local;  			refcount_inc(&ret->st_stid.sc_count);  			break; @@ -3571,6 +3582,52 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)  	return ret;  } +static __be32 +nfsd4_verify_open_stid(struct nfs4_stid *s) +{ +	__be32 ret = nfs_ok; + +	switch (s->sc_type) { +	default: +		break; +	case NFS4_CLOSED_STID: +	case NFS4_CLOSED_DELEG_STID: +		ret = nfserr_bad_stateid; +		break; +	case NFS4_REVOKED_DELEG_STID: +		ret = nfserr_deleg_revoked; +	} +	return ret; +} + +/* Lock the stateid st_mutex, and deal with races with CLOSE */ +static __be32 +nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp) +{ +	__be32 ret; + +	mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX); +	ret = nfsd4_verify_open_stid(&stp->st_stid); +	if (ret != nfs_ok) +		mutex_unlock(&stp->st_mutex); +	return ret; +} + +static struct nfs4_ol_stateid * +nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) +{ +	struct nfs4_ol_stateid *stp; +	for (;;) { +		spin_lock(&fp->fi_lock); +		stp = nfsd4_find_existing_open(fp, open); +		spin_unlock(&fp->fi_lock); +		if (!stp || nfsd4_lock_ol_stateid(stp) == nfs_ok) +			break; +		nfs4_put_stid(&stp->st_stid); +	} +	return stp; +} +  static struct nfs4_openowner *  alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,  			   struct nfsd4_compound_state *cstate) @@ -3613,8 +3670,9 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)  	stp = open->op_stp;  	/* We are moving these outside of the spinlocks to avoid the warnings */  	mutex_init(&stp->st_mutex); -	mutex_lock(&stp->st_mutex); +	mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); +retry:  	spin_lock(&oo->oo_owner.so_client->cl_lock);  	spin_lock(&fp->fi_lock); @@ -3639,7 +3697,11 @@ out_unlock:  	spin_unlock(&fp->fi_lock);  	spin_unlock(&oo->oo_owner.so_client->cl_lock);  	if (retstp) { -		mutex_lock(&retstp->st_mutex); +		/* Handle races with CLOSE */ +		if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { +			nfs4_put_stid(&retstp->st_stid); +			goto retry; +		}  		/* To keep mutex tracking happy */  		mutex_unlock(&stp->st_mutex);  		stp = retstp; @@ -4449,6 +4511,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf  	struct nfs4_ol_stateid *stp = NULL;  	struct nfs4_delegation *dp = NULL;  	__be32 status; +	bool new_stp = false;  	/*  	 * Lookup file; if found, lookup stateid and check open request, @@ -4460,9 +4523,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf  		status = nfs4_check_deleg(cl, open, &dp);  		if (status)  			goto out; -		spin_lock(&fp->fi_lock); -		stp = nfsd4_find_existing_open(fp, open); -		spin_unlock(&fp->fi_lock); +		stp = nfsd4_find_and_lock_existing_open(fp, open);  	} else {  		open->op_file = NULL;  		status = nfserr_bad_stateid; @@ -4470,35 +4531,31 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf  			goto out;  	} +	if (!stp) { +		stp = init_open_stateid(fp, open); +		if (!open->op_stp) +			new_stp = true; +	} +  	/*  	 * OPEN the file, or upgrade an existing OPEN.  	 * If truncate fails, the OPEN fails. +	 * +	 * stp is already locked.  	 */ -	if (stp) { +	if (!new_stp) {  		/* Stateid was found, this is an OPEN upgrade */ -		mutex_lock(&stp->st_mutex);  		status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);  		if (status) {  			mutex_unlock(&stp->st_mutex);  			goto out;  		}  	} else { -		/* stp is returned locked. */ -		stp = init_open_stateid(fp, open); -		/* See if we lost the race to some other thread */ -		if (stp->st_access_bmap != 0) { -			status = nfs4_upgrade_open(rqstp, fp, current_fh, -						stp, open); -			if (status) { -				mutex_unlock(&stp->st_mutex); -				goto out; -			} -			goto upgrade_out; -		}  		status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);  		if (status) { -			mutex_unlock(&stp->st_mutex); +			stp->st_stid.sc_type = NFS4_CLOSED_STID;  			release_open_stateid(stp); +			mutex_unlock(&stp->st_mutex);  			goto out;  		} @@ -4507,7 +4564,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf  		if (stp->st_clnt_odstate == open->op_odstate)  			open->op_odstate = NULL;  	} -upgrade_out: +  	nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);  	mutex_unlock(&stp->st_mutex); @@ -4734,7 +4791,7 @@ nfs4_laundromat(struct nfsd_net *nn)  	spin_unlock(&nn->blocked_locks_lock);  	while (!list_empty(&reaplist)) { -		nbl = list_first_entry(&nn->blocked_locks_lru, +		nbl = list_first_entry(&reaplist,  					struct nfsd4_blocked_lock, nbl_lru);  		list_del_init(&nbl->nbl_lru);  		posix_unblock_lock(&nbl->nbl_lock); @@ -4855,6 +4912,18 @@ static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_s  	return nfserr_old_stateid;  } +static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_stid *s, bool has_session) +{ +	__be32 ret; + +	spin_lock(&s->sc_lock); +	ret = nfsd4_verify_open_stid(s); +	if (ret == nfs_ok) +		ret = check_stateid_generation(in, &s->sc_stateid, has_session); +	spin_unlock(&s->sc_lock); +	return ret; +} +  static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols)  {  	if (ols->st_stateowner->so_is_open_owner && @@ -4868,7 +4937,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)  	struct nfs4_stid *s;  	__be32 status = nfserr_bad_stateid; -	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) +	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || +		CLOSE_STATEID(stateid))  		return status;  	/* Client debugging aid. */  	if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { @@ -4883,7 +4953,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)  	s = find_stateid_locked(cl, stateid);  	if (!s)  		goto out_unlock; -	status = check_stateid_generation(stateid, &s->sc_stateid, 1); +	status = nfsd4_stid_check_stateid_generation(stateid, s, 1);  	if (status)  		goto out_unlock;  	switch (s->sc_type) { @@ -4926,7 +4996,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,  	else if (typemask & NFS4_DELEG_STID)  		typemask |= NFS4_REVOKED_DELEG_STID; -	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) +	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || +		CLOSE_STATEID(stateid))  		return nfserr_bad_stateid;  	status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn);  	if (status == nfserr_stale_clientid) { @@ -5044,7 +5115,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,  				&s, nn);  	if (status)  		return status; -	status = check_stateid_generation(stateid, &s->sc_stateid, +	status = nfsd4_stid_check_stateid_generation(stateid, s,  			nfsd4_has_session(cstate));  	if (status)  		goto out; @@ -5098,7 +5169,9 @@ nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)  	struct nfs4_ol_stateid *stp = openlockstateid(s);  	__be32 ret; -	mutex_lock(&stp->st_mutex); +	ret = nfsd4_lock_ol_stateid(stp); +	if (ret) +		goto out_put_stid;  	ret = check_stateid_generation(stateid, &s->sc_stateid, 1);  	if (ret) @@ -5109,11 +5182,13 @@ nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)  			    lockowner(stp->st_stateowner)))  		goto out; +	stp->st_stid.sc_type = NFS4_CLOSED_STID;  	release_lock_stateid(stp);  	ret = nfs_ok;  out:  	mutex_unlock(&stp->st_mutex); +out_put_stid:  	nfs4_put_stid(s);  	return ret;  } @@ -5133,6 +5208,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  	s = find_stateid_locked(cl, stateid);  	if (!s)  		goto out_unlock; +	spin_lock(&s->sc_lock);  	switch (s->sc_type) {  	case NFS4_DELEG_STID:  		ret = nfserr_locks_held; @@ -5144,11 +5220,13 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  		ret = nfserr_locks_held;  		break;  	case NFS4_LOCK_STID: +		spin_unlock(&s->sc_lock);  		refcount_inc(&s->sc_count);  		spin_unlock(&cl->cl_lock);  		ret = nfsd4_free_lock_stateid(stateid, s);  		goto out;  	case NFS4_REVOKED_DELEG_STID: +		spin_unlock(&s->sc_lock);  		dp = delegstateid(s);  		list_del_init(&dp->dl_recall_lru);  		spin_unlock(&cl->cl_lock); @@ -5157,6 +5235,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  		goto out;  	/* Default falls through and returns nfserr_bad_stateid */  	} +	spin_unlock(&s->sc_lock);  out_unlock:  	spin_unlock(&cl->cl_lock);  out: @@ -5179,15 +5258,9 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_  	status = nfsd4_check_seqid(cstate, sop, seqid);  	if (status)  		return status; -	if (stp->st_stid.sc_type == NFS4_CLOSED_STID -		|| stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID) -		/* -		 * "Closed" stateid's exist *only* to return -		 * nfserr_replay_me from the previous step, and -		 * revoked delegations are kept only for free_stateid. -		 */ -		return nfserr_bad_stateid; -	mutex_lock(&stp->st_mutex); +	status = nfsd4_lock_ol_stateid(stp); +	if (status != nfs_ok) +		return status;  	status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));  	if (status == nfs_ok)  		status = nfs4_check_fh(current_fh, &stp->st_stid); @@ -5367,7 +5440,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)  	bool unhashed;  	LIST_HEAD(reaplist); -	s->st_stid.sc_type = NFS4_CLOSED_STID;  	spin_lock(&clp->cl_lock);  	unhashed = unhash_open_stateid(s, &reaplist); @@ -5407,10 +5479,17 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  	nfsd4_bump_seqid(cstate, status);  	if (status)  		goto out;  + +	stp->st_stid.sc_type = NFS4_CLOSED_STID;  	nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); -	mutex_unlock(&stp->st_mutex);  	nfsd4_close_open_stateid(stp); +	mutex_unlock(&stp->st_mutex); + +	/* See RFC5661 sectionm 18.2.4 */ +	if (stp->st_stid.sc_client->cl_minorversion) +		memcpy(&close->cl_stateid, &close_stateid, +				sizeof(close->cl_stateid));  	/* put reference from nfs4_preprocess_seqid_op */  	nfs4_put_stid(&stp->st_stid); @@ -5436,7 +5515,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  	if (status)  		goto out;  	dp = delegstateid(s); -	status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate)); +	status = nfsd4_stid_check_stateid_generation(stateid, &dp->dl_stid, nfsd4_has_session(cstate));  	if (status)  		goto put_stateid; @@ -5642,14 +5721,41 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,  	return ret;  } -static void +static struct nfs4_ol_stateid * +find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) +{ +	struct nfs4_ol_stateid *lst; +	struct nfs4_client *clp = lo->lo_owner.so_client; + +	lockdep_assert_held(&clp->cl_lock); + +	list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { +		if (lst->st_stid.sc_type != NFS4_LOCK_STID) +			continue; +		if (lst->st_stid.sc_file == fp) { +			refcount_inc(&lst->st_stid.sc_count); +			return lst; +		} +	} +	return NULL; +} + +static struct nfs4_ol_stateid *  init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,  		  struct nfs4_file *fp, struct inode *inode,  		  struct nfs4_ol_stateid *open_stp)  {  	struct nfs4_client *clp = lo->lo_owner.so_client; +	struct nfs4_ol_stateid *retstp; -	lockdep_assert_held(&clp->cl_lock); +	mutex_init(&stp->st_mutex); +	mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); +retry: +	spin_lock(&clp->cl_lock); +	spin_lock(&fp->fi_lock); +	retstp = find_lock_stateid(lo, fp); +	if (retstp) +		goto out_unlock;  	refcount_inc(&stp->st_stid.sc_count);  	stp->st_stid.sc_type = NFS4_LOCK_STID; @@ -5659,29 +5765,22 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,  	stp->st_access_bmap = 0;  	stp->st_deny_bmap = open_stp->st_deny_bmap;  	stp->st_openstp = open_stp; -	mutex_init(&stp->st_mutex);  	list_add(&stp->st_locks, &open_stp->st_locks);  	list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); -	spin_lock(&fp->fi_lock);  	list_add(&stp->st_perfile, &fp->fi_stateids); +out_unlock:  	spin_unlock(&fp->fi_lock); -} - -static struct nfs4_ol_stateid * -find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) -{ -	struct nfs4_ol_stateid *lst; -	struct nfs4_client *clp = lo->lo_owner.so_client; - -	lockdep_assert_held(&clp->cl_lock); - -	list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { -		if (lst->st_stid.sc_file == fp) { -			refcount_inc(&lst->st_stid.sc_count); -			return lst; +	spin_unlock(&clp->cl_lock); +	if (retstp) { +		if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { +			nfs4_put_stid(&retstp->st_stid); +			goto retry;  		} +		/* To keep mutex tracking happy */ +		mutex_unlock(&stp->st_mutex); +		stp = retstp;  	} -	return NULL; +	return stp;  }  static struct nfs4_ol_stateid * @@ -5694,26 +5793,25 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,  	struct nfs4_openowner *oo = openowner(ost->st_stateowner);  	struct nfs4_client *clp = oo->oo_owner.so_client; +	*new = false;  	spin_lock(&clp->cl_lock);  	lst = find_lock_stateid(lo, fi); -	if (lst == NULL) { -		spin_unlock(&clp->cl_lock); -		ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid); -		if (ns == NULL) -			return NULL; - -		spin_lock(&clp->cl_lock); -		lst = find_lock_stateid(lo, fi); -		if (likely(!lst)) { -			lst = openlockstateid(ns); -			init_lock_stateid(lst, lo, fi, inode, ost); -			ns = NULL; -			*new = true; -		} -	}  	spin_unlock(&clp->cl_lock); -	if (ns) +	if (lst != NULL) { +		if (nfsd4_lock_ol_stateid(lst) == nfs_ok) +			goto out; +		nfs4_put_stid(&lst->st_stid); +	} +	ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid); +	if (ns == NULL) +		return NULL; + +	lst = init_lock_stateid(openlockstateid(ns), lo, fi, inode, ost); +	if (lst == openlockstateid(ns)) +		*new = true; +	else  		nfs4_put_stid(ns); +out:  	return lst;  } @@ -5750,7 +5848,6 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,  	struct nfs4_lockowner *lo;  	struct nfs4_ol_stateid *lst;  	unsigned int strhashval; -	bool hashed;  	lo = find_lockowner_str(cl, &lock->lk_new_owner);  	if (!lo) { @@ -5766,25 +5863,12 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,  			goto out;  	} -retry:  	lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);  	if (lst == NULL) {  		status = nfserr_jukebox;  		goto out;  	} -	mutex_lock(&lst->st_mutex); - -	/* See if it's still hashed to avoid race with FREE_STATEID */ -	spin_lock(&cl->cl_lock); -	hashed = !list_empty(&lst->st_perfile); -	spin_unlock(&cl->cl_lock); - -	if (!hashed) { -		mutex_unlock(&lst->st_mutex); -		nfs4_put_stid(&lst->st_stid); -		goto retry; -	}  	status = nfs_ok;  	*plst = lst;  out: @@ -5990,14 +6074,16 @@ out:  		    seqid_mutating_err(ntohl(status)))  			lock_sop->lo_owner.so_seqid++; -		mutex_unlock(&lock_stp->st_mutex); -  		/*  		 * If this is a new, never-before-used stateid, and we are  		 * returning an error, then just go ahead and release it.  		 */ -		if (status && new) +		if (status && new) { +			lock_stp->st_stid.sc_type = NFS4_CLOSED_STID;  			release_lock_stateid(lock_stp); +		} + +		mutex_unlock(&lock_stp->st_mutex);  		nfs4_put_stid(&lock_stp->st_stid);  	} @@ -7017,6 +7103,10 @@ static int nfs4_state_create_net(struct net *net)  		INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);  	nn->conf_name_tree = RB_ROOT;  	nn->unconf_name_tree = RB_ROOT; +	nn->boot_time = get_seconds(); +	nn->grace_ended = false; +	nn->nfsd4_manager.block_opens = true; +	INIT_LIST_HEAD(&nn->nfsd4_manager.list);  	INIT_LIST_HEAD(&nn->client_lru);  	INIT_LIST_HEAD(&nn->close_lru);  	INIT_LIST_HEAD(&nn->del_recall_lru); @@ -7074,9 +7164,6 @@ nfs4_state_start_net(struct net *net)  	ret = nfs4_state_create_net(net);  	if (ret)  		return ret; -	nn->boot_time = get_seconds(); -	nn->grace_ended = false; -	nn->nfsd4_manager.block_opens = true;  	locks_start_grace(net, &nn->nfsd4_manager);  	nfsd4_client_tracking_init(net);  	printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n", @@ -7153,7 +7240,7 @@ nfs4_state_shutdown_net(struct net *net)  	spin_unlock(&nn->blocked_locks_lock);  	while (!list_empty(&reaplist)) { -		nbl = list_first_entry(&nn->blocked_locks_lru, +		nbl = list_first_entry(&reaplist,  					struct nfsd4_blocked_lock, nbl_lru);  		list_del_init(&nbl->nbl_lru);  		posix_unblock_lock(&nbl->nbl_lock); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6493df6b1bd5..d107b4426f7e 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1241,6 +1241,9 @@ static __net_init int nfsd_init_net(struct net *net)  	nn->nfsd4_grace = 90;  	nn->clverifier_counter = prandom_u32();  	nn->clientid_counter = prandom_u32(); + +	atomic_set(&nn->ntf_refcnt, 0); +	init_waitqueue_head(&nn->ntf_wq);  	return 0;  out_idmap_error: diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 33117d4ffce0..89cb484f1cfb 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -335,7 +335,8 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);  	struct sockaddr_in sin; -	if (event != NETDEV_DOWN) +	if ((event != NETDEV_DOWN) || +	    !atomic_inc_not_zero(&nn->ntf_refcnt))  		goto out;  	if (nn->nfsd_serv) { @@ -344,6 +345,8 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,  		sin.sin_addr.s_addr = ifa->ifa_local;  		svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin);  	} +	atomic_dec(&nn->ntf_refcnt); +	wake_up(&nn->ntf_wq);  out:  	return NOTIFY_DONE; @@ -363,7 +366,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);  	struct sockaddr_in6 sin6; -	if (event != NETDEV_DOWN) +	if ((event != NETDEV_DOWN) || +	    !atomic_inc_not_zero(&nn->ntf_refcnt))  		goto out;  	if (nn->nfsd_serv) { @@ -374,7 +378,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,  			sin6.sin6_scope_id = ifa->idev->dev->ifindex;  		svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);  	} - +	atomic_dec(&nn->ntf_refcnt); +	wake_up(&nn->ntf_wq);  out:  	return NOTIFY_DONE;  } @@ -391,6 +396,7 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)  {  	struct nfsd_net *nn = net_generic(net, nfsd_net_id); +	atomic_dec(&nn->ntf_refcnt);  	/* check if the notifier still has clients */  	if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {  		unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); @@ -398,6 +404,7 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)  		unregister_inet6addr_notifier(&nfsd_inet6addr_notifier);  #endif  	} +	wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0);  	/*  	 * write_ports can create the server without actually starting @@ -517,6 +524,7 @@ int nfsd_create_serv(struct net *net)  		register_inet6addr_notifier(&nfsd_inet6addr_notifier);  #endif  	} +	atomic_inc(&nn->ntf_refcnt);  	ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */  	return 0;  } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index f572538dcc4f..9f3ffba41533 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1979,7 +1979,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,  					     struct the_nilfs *nilfs)  {  	struct nilfs_inode_info *ii, *n; -	int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE); +	int during_mount = !(sci->sc_super->s_flags & SB_ACTIVE);  	int defer_iput = false;  	spin_lock(&nilfs->ns_inode_lock); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 3ce20cd44a20..3073b646e1ba 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -141,7 +141,7 @@ void __nilfs_error(struct super_block *sb, const char *function,  		if (nilfs_test_opt(nilfs, ERRORS_RO)) {  			printk(KERN_CRIT "Remounting filesystem read-only\n"); -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  		}  	} @@ -869,7 +869,7 @@ int nilfs_store_magic_and_option(struct super_block *sb,  	/* FS independent flags */  #ifdef NILFS_ATIME_DISABLE -	sb->s_flags |= MS_NOATIME; +	sb->s_flags |= SB_NOATIME;  #endif  	nilfs_set_default_options(sb, sbp); @@ -1133,7 +1133,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)  		err = -EINVAL;  		goto restore_opts;  	} -	sb->s_flags = (sb->s_flags & ~MS_POSIXACL); +	sb->s_flags = (sb->s_flags & ~SB_POSIXACL);  	err = -EINVAL; @@ -1143,12 +1143,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)  		goto restore_opts;  	} -	if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) +	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))  		goto out; -	if (*flags & MS_RDONLY) { +	if (*flags & SB_RDONLY) {  		/* Shutting down log writer */  		nilfs_detach_log_writer(sb); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  		/*  		 * Remounting a valid RW partition RDONLY, so set @@ -1178,7 +1178,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)  			goto restore_opts;  		} -		sb->s_flags &= ~MS_RDONLY; +		sb->s_flags &= ~SB_RDONLY;  		root = NILFS_I(d_inode(sb->s_root))->i_root;  		err = nilfs_attach_log_writer(sb, root); @@ -1212,7 +1212,7 @@ static int nilfs_parse_snapshot_option(const char *option,  	const char *msg = NULL;  	int err; -	if (!(sd->flags & MS_RDONLY)) { +	if (!(sd->flags & SB_RDONLY)) {  		msg = "read-only option is not specified";  		goto parse_error;  	} @@ -1286,7 +1286,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags,  	struct dentry *root_dentry;  	int err, s_new = false; -	if (!(flags & MS_RDONLY)) +	if (!(flags & SB_RDONLY))  		mode |= FMODE_WRITE;  	sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type); @@ -1327,14 +1327,14 @@ nilfs_mount(struct file_system_type *fs_type, int flags,  		snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev);  		sb_set_blocksize(s, block_size(sd.bdev)); -		err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); +		err = nilfs_fill_super(s, data, flags & SB_SILENT ? 1 : 0);  		if (err)  			goto failed_super; -		s->s_flags |= MS_ACTIVE; +		s->s_flags |= SB_ACTIVE;  	} else if (!sd.cno) {  		if (nilfs_tree_is_busy(s->s_root)) { -			if ((flags ^ s->s_flags) & MS_RDONLY) { +			if ((flags ^ s->s_flags) & SB_RDONLY) {  				nilfs_msg(s, KERN_ERR,  					  "the device already has a %s mount.",  					  sb_rdonly(s) ? "read-only" : "read/write"); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index afebb5067cec..1a85317e83f0 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -220,7 +220,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)  	if (!valid_fs) {  		nilfs_msg(sb, KERN_WARNING, "mounting unchecked fs"); -		if (s_flags & MS_RDONLY) { +		if (s_flags & SB_RDONLY) {  			nilfs_msg(sb, KERN_INFO,  				  "recovery required for readonly filesystem");  			nilfs_msg(sb, KERN_INFO, @@ -286,7 +286,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)  	if (valid_fs)  		goto skip_recovery; -	if (s_flags & MS_RDONLY) { +	if (s_flags & SB_RDONLY) {  		__u64 features;  		if (nilfs_test_opt(nilfs, NORECOVERY)) { @@ -309,7 +309,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)  			err = -EROFS;  			goto failed_unload;  		} -		sb->s_flags &= ~MS_RDONLY; +		sb->s_flags &= ~SB_RDONLY;  	} else if (nilfs_test_opt(nilfs, NORECOVERY)) {  		nilfs_msg(sb, KERN_ERR,  			  "recovery cancelled because norecovery option was specified for a read/write mount"); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 81d8959b6aef..219b269c737e 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -67,7 +67,7 @@ void fsnotify_unmount_inodes(struct super_block *sb)  		/*  		 * If i_count is zero, the inode cannot have any watches and -		 * doing an __iget/iput with MS_ACTIVE clear would actually +		 * doing an __iget/iput with SB_ACTIVE clear would actually  		 * evict all inodes with zero i_count from icache which is  		 * unnecessarily violent and may in fact be illegal to do.  		 */ diff --git a/fs/nsfs.c b/fs/nsfs.c index ef243e14b6eb..7c6f76d29f56 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -255,5 +255,5 @@ void __init nsfs_init(void)  	nsfs_mnt = kern_mount(&nsfs);  	if (IS_ERR(nsfs_mnt))  		panic("can't set nsfs up\n"); -	nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER; +	nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER;  } diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 3f70f041dbe9..bb7159f697f2 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -473,7 +473,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)  #ifndef NTFS_RW  	/* For read-only compiled driver, enforce read-only flag. */ -	*flags |= MS_RDONLY; +	*flags |= SB_RDONLY;  #else /* NTFS_RW */  	/*  	 * For the read-write compiled driver, if we are remounting read-write, @@ -487,7 +487,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)  	 * When remounting read-only, mark the volume clean if no volume errors  	 * have occurred.  	 */ -	if (sb_rdonly(sb) && !(*flags & MS_RDONLY)) { +	if (sb_rdonly(sb) && !(*flags & SB_RDONLY)) {  		static const char *es = ".  Cannot remount read-write.";  		/* Remounting read-write. */ @@ -548,7 +548,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)  			NVolSetErrors(vol);  			return -EROFS;  		} -	} else if (!sb_rdonly(sb) && (*flags & MS_RDONLY)) { +	} else if (!sb_rdonly(sb) && (*flags & SB_RDONLY)) {  		/* Remounting read-only. */  		if (!NVolErrors(vol)) {  			if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY)) @@ -1799,7 +1799,7 @@ static bool load_system_files(ntfs_volume *vol)  						es3);  				goto iput_mirr_err_out;  			} -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  			ntfs_error(sb, "%s.  Mounting read-only%s",  					!vol->mftmirr_ino ? es1 : es2, es3);  		} else @@ -1937,7 +1937,7 @@ get_ctx_vol_failed:  						es1, es2);  				goto iput_vol_err_out;  			} -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);  		} else  			ntfs_warning(sb, "%s.  Will not be able to remount " @@ -1974,7 +1974,7 @@ get_ctx_vol_failed:  				}  				goto iput_logfile_err_out;  			} -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);  		} else  			ntfs_warning(sb, "%s.  Will not be able to remount " @@ -2019,7 +2019,7 @@ get_ctx_vol_failed:  						es1, es2);  				goto iput_root_err_out;  			} -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);  		} else  			ntfs_warning(sb, "%s.  Will not be able to remount " @@ -2042,7 +2042,7 @@ get_ctx_vol_failed:  			goto iput_root_err_out;  		}  		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  		/*  		 * Do not set NVolErrors() because ntfs_remount() might manage  		 * to set the dirty flag in which case all would be well. @@ -2055,7 +2055,7 @@ get_ctx_vol_failed:  	 * If (still) a read-write mount, set the NT4 compatibility flag on  	 * newer NTFS version volumes.  	 */ -	if (!(sb->s_flags & MS_RDONLY) && (vol->major_ver > 1) && +	if (!(sb->s_flags & SB_RDONLY) && (vol->major_ver > 1) &&  			ntfs_set_volume_flags(vol, VOLUME_MOUNTED_ON_NT4)) {  		static const char *es1 = "Failed to set NT4 compatibility flag";  		static const char *es2 = ".  Run chkdsk."; @@ -2069,7 +2069,7 @@ get_ctx_vol_failed:  			goto iput_root_err_out;  		}  		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  		NVolSetErrors(vol);  	}  #endif @@ -2087,7 +2087,7 @@ get_ctx_vol_failed:  			goto iput_root_err_out;  		}  		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  		NVolSetErrors(vol);  	}  #endif /* NTFS_RW */ @@ -2128,7 +2128,7 @@ get_ctx_vol_failed:  						es1, es2);  				goto iput_quota_err_out;  			} -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);  		} else  			ntfs_warning(sb, "%s.  Will not be able to remount " @@ -2150,7 +2150,7 @@ get_ctx_vol_failed:  			goto iput_quota_err_out;  		}  		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  		NVolSetErrors(vol);  	}  	/* @@ -2171,7 +2171,7 @@ get_ctx_vol_failed:  						es1, es2);  				goto iput_usnjrnl_err_out;  			} -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);  		} else  			ntfs_warning(sb, "%s.  Will not be able to remount " @@ -2194,7 +2194,7 @@ get_ctx_vol_failed:  			goto iput_usnjrnl_err_out;  		}  		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  		NVolSetErrors(vol);  	}  #endif /* NTFS_RW */ @@ -2728,7 +2728,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)  	lockdep_off();  	ntfs_debug("Entering.");  #ifndef NTFS_RW -	sb->s_flags |= MS_RDONLY; +	sb->s_flags |= SB_RDONLY;  #endif /* ! NTFS_RW */  	/* Allocate a new ntfs_volume and place it in sb->s_fs_info. */  	sb->s_fs_info = kmalloc(sizeof(ntfs_volume), GFP_NOFS); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index dc455d45a66a..a1d051055472 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -227,7 +227,7 @@ int ocfs2_should_update_atime(struct inode *inode,  		return 0;  	if ((inode->i_flags & S_NOATIME) || -	    ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) +	    ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode)))  		return 0;  	/* diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 040bbb6a6e4b..80efa5699fb0 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -675,9 +675,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)  	}  	/* We're going to/from readonly mode. */ -	if ((bool)(*flags & MS_RDONLY) != sb_rdonly(sb)) { +	if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {  		/* Disable quota accounting before remounting RO */ -		if (*flags & MS_RDONLY) { +		if (*flags & SB_RDONLY) {  			ret = ocfs2_susp_quotas(osb, 0);  			if (ret < 0)  				goto out; @@ -691,8 +691,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)  			goto unlock_osb;  		} -		if (*flags & MS_RDONLY) { -			sb->s_flags |= MS_RDONLY; +		if (*flags & SB_RDONLY) { +			sb->s_flags |= SB_RDONLY;  			osb->osb_flags |= OCFS2_OSB_SOFT_RO;  		} else {  			if (osb->osb_flags & OCFS2_OSB_ERROR_FS) { @@ -709,14 +709,14 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)  				ret = -EINVAL;  				goto unlock_osb;  			} -			sb->s_flags &= ~MS_RDONLY; +			sb->s_flags &= ~SB_RDONLY;  			osb->osb_flags &= ~OCFS2_OSB_SOFT_RO;  		}  		trace_ocfs2_remount(sb->s_flags, osb->osb_flags, *flags);  unlock_osb:  		spin_unlock(&osb->osb_lock);  		/* Enable quota accounting after remounting RW */ -		if (!ret && !(*flags & MS_RDONLY)) { +		if (!ret && !(*flags & SB_RDONLY)) {  			if (sb_any_quota_suspended(sb))  				ret = ocfs2_susp_quotas(osb, 1);  			else @@ -724,7 +724,7 @@ unlock_osb:  			if (ret < 0) {  				/* Return back changes... */  				spin_lock(&osb->osb_lock); -				sb->s_flags |= MS_RDONLY; +				sb->s_flags |= SB_RDONLY;  				osb->osb_flags |= OCFS2_OSB_SOFT_RO;  				spin_unlock(&osb->osb_lock);  				goto out; @@ -744,9 +744,9 @@ unlock_osb:  		if (!ocfs2_is_hard_readonly(osb))  			ocfs2_set_journal_params(osb); -		sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | +		sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |  			((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? -							MS_POSIXACL : 0); +							SB_POSIXACL : 0);  	}  out:  	return ret; @@ -1057,10 +1057,10 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)  	sb->s_magic = OCFS2_SUPER_MAGIC; -	sb->s_flags = (sb->s_flags & ~(MS_POSIXACL | MS_NOSEC)) | -		((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); +	sb->s_flags = (sb->s_flags & ~(SB_POSIXACL | SB_NOSEC)) | +		((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0); -	/* Hard readonly mode only if: bdev_read_only, MS_RDONLY, +	/* Hard readonly mode only if: bdev_read_only, SB_RDONLY,  	 * heartbeat=none */  	if (bdev_read_only(sb->s_bdev)) {  		if (!sb_rdonly(sb)) { @@ -2057,7 +2057,7 @@ static int ocfs2_initialize_super(struct super_block *sb,  	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;  	sb->s_xattr = ocfs2_xattr_handlers;  	sb->s_time_gran = 1; -	sb->s_flags |= MS_NOATIME; +	sb->s_flags |= SB_NOATIME;  	/* this is needed to support O_LARGEFILE */  	cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);  	bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); @@ -2568,7 +2568,7 @@ static int ocfs2_handle_error(struct super_block *sb)  			return rv;  		pr_crit("OCFS2: File system is now read-only.\n"); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  		ocfs2_set_ro_flag(osb, 0);  	} diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 5fdf269ba82e..c5898c59d411 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -901,7 +901,7 @@ static int ocfs2_xattr_list_entry(struct super_block *sb,  	case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS:  	case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT: -		if (!(sb->s_flags & MS_POSIXACL)) +		if (!(sb->s_flags & SB_POSIXACL))  			return 0;  		break; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 13215f26e321..2200662a9bf1 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -369,7 +369,7 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino)  static int openprom_remount(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	*flags |= MS_NOATIME; +	*flags |= SB_NOATIME;  	return 0;  } @@ -386,7 +386,7 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent)  	struct op_inode_info *oi;  	int ret; -	s->s_flags |= MS_NOATIME; +	s->s_flags |= SB_NOATIME;  	s->s_blocksize = 1024;  	s->s_blocksize_bits = 10;  	s->s_magic = OPENPROM_SUPER_MAGIC; diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 366750eef201..36f1390b5ed7 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -40,7 +40,7 @@ static int orangefs_show_options(struct seq_file *m, struct dentry *root)  {  	struct orangefs_sb_info_s *orangefs_sb = ORANGEFS_SB(root->d_sb); -	if (root->d_sb->s_flags & MS_POSIXACL) +	if (root->d_sb->s_flags & SB_POSIXACL)  		seq_puts(m, ",acl");  	if (orangefs_sb->flags & ORANGEFS_OPT_INTR)  		seq_puts(m, ",intr"); @@ -60,7 +60,7 @@ static int parse_mount_options(struct super_block *sb, char *options,  	 * Force any potential flags that might be set from the mount  	 * to zero, ie, initialize to unset.  	 */ -	sb->s_flags &= ~MS_POSIXACL; +	sb->s_flags &= ~SB_POSIXACL;  	orangefs_sb->flags &= ~ORANGEFS_OPT_INTR;  	orangefs_sb->flags &= ~ORANGEFS_OPT_LOCAL_LOCK; @@ -73,7 +73,7 @@ static int parse_mount_options(struct super_block *sb, char *options,  		token = match_token(p, tokens, args);  		switch (token) {  		case Opt_acl: -			sb->s_flags |= MS_POSIXACL; +			sb->s_flags |= SB_POSIXACL;  			break;  		case Opt_intr:  			orangefs_sb->flags |= ORANGEFS_OPT_INTR; @@ -507,7 +507,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,  	ret = orangefs_fill_sb(sb,  	      &new_op->downcall.resp.fs_mount, data, -	      flags & MS_SILENT ? 1 : 0); +	      flags & SB_SILENT ? 1 : 0);  	if (ret) {  		d = ERR_PTR(ret); diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index cbfc196e5dc5..5ac415466861 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -24,6 +24,16 @@ config OVERLAY_FS_REDIRECT_DIR  	  an overlay which has redirects on a kernel that doesn't support this  	  feature will have unexpected results. +config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW +	bool "Overlayfs: follow redirects even if redirects are turned off" +	default y +	depends on OVERLAY_FS +	help +	  Disable this to get a possibly more secure configuration, but that +	  might not be backward compatible with previous kernels. + +	  For more information, see Documentation/filesystems/overlayfs.txt +  config OVERLAY_FS_INDEX  	bool "Overlayfs: turn on inodes index feature by default"  	depends on OVERLAY_FS diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index e13921824c70..f9788bc116a8 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -887,7 +887,8 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)  		spin_unlock(&dentry->d_lock);  	} else {  		kfree(redirect); -		pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); +		pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n", +				    err);  		/* Fall back to userspace copy-up */  		err = -EXDEV;  	} diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 625ed8066570..beb945e1963c 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -435,7 +435,7 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower,  	/* Check if index is orphan and don't warn before cleaning it */  	if (d_inode(index)->i_nlink == 1 && -	    ovl_get_nlink(index, origin.dentry, 0) == 0) +	    ovl_get_nlink(origin.dentry, index, 0) == 0)  		err = -ENOENT;  	dput(origin.dentry); @@ -681,6 +681,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,  		if (d.stop)  			break; +		/* +		 * Following redirects can have security consequences: it's like +		 * a symlink into the lower layer without the permission checks. +		 * This is only a problem if the upper layer is untrusted (e.g +		 * comes from an USB drive).  This can allow a non-readable file +		 * or directory to become readable. +		 * +		 * Only following redirects when redirects are enabled disables +		 * this attack vector when not necessary. +		 */ +		err = -EPERM; +		if (d.redirect && !ofs->config.redirect_follow) { +			pr_warn_ratelimited("overlay: refusing to follow redirect for (%pd2)\n", dentry); +			goto out_put; +		} +  		if (d.redirect && d.redirect[0] == '/' && poe != roe) {  			poe = roe; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 13eab09a6b6f..b489099ccd49 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -180,7 +180,7 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)  static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)  {  	struct dentry *ret = vfs_tmpfile(dentry, mode, 0); -	int err = IS_ERR(ret) ? PTR_ERR(ret) : 0; +	int err = PTR_ERR_OR_ZERO(ret);  	pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);  	return ret; diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 752bab645879..9d0bc03bf6e4 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -14,6 +14,8 @@ struct ovl_config {  	char *workdir;  	bool default_permissions;  	bool redirect_dir; +	bool redirect_follow; +	const char *redirect_mode;  	bool index;  }; diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 0daa4354fec4..8c98578d27a1 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -499,7 +499,7 @@ out:  	return err;  fail: -	pr_warn_ratelimited("overlay: failed to look up (%s) for ino (%i)\n", +	pr_warn_ratelimited("overlayfs: failed to look up (%s) for ino (%i)\n",  			    p->name, err);  	goto out;  } @@ -663,7 +663,10 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)  			return PTR_ERR(rdt.cache);  	} -	return iterate_dir(od->realfile, &rdt.ctx); +	err = iterate_dir(od->realfile, &rdt.ctx); +	ctx->pos = rdt.ctx.pos; + +	return err;  } diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index be03578181d2..76440feb79f6 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -33,6 +33,13 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);  MODULE_PARM_DESC(ovl_redirect_dir_def,  		 "Default to on or off for the redirect_dir feature"); +static bool ovl_redirect_always_follow = +	IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW); +module_param_named(redirect_always_follow, ovl_redirect_always_follow, +		   bool, 0644); +MODULE_PARM_DESC(ovl_redirect_always_follow, +		 "Follow redirects even if redirect_dir feature is turned off"); +  static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);  module_param_named(index, ovl_index_def, bool, 0644);  MODULE_PARM_DESC(ovl_index_def, @@ -232,6 +239,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)  	kfree(ofs->config.lowerdir);  	kfree(ofs->config.upperdir);  	kfree(ofs->config.workdir); +	kfree(ofs->config.redirect_mode);  	if (ofs->creator_cred)  		put_cred(ofs->creator_cred);  	kfree(ofs); @@ -244,6 +252,7 @@ static void ovl_put_super(struct super_block *sb)  	ovl_free_fs(ofs);  } +/* Sync real dirty inodes in upper filesystem (if it exists) */  static int ovl_sync_fs(struct super_block *sb, int wait)  {  	struct ovl_fs *ofs = sb->s_fs_info; @@ -252,14 +261,24 @@ static int ovl_sync_fs(struct super_block *sb, int wait)  	if (!ofs->upper_mnt)  		return 0; -	upper_sb = ofs->upper_mnt->mnt_sb; -	if (!upper_sb->s_op->sync_fs) + +	/* +	 * If this is a sync(2) call or an emergency sync, all the super blocks +	 * will be iterated, including upper_sb, so no need to do anything. +	 * +	 * If this is a syncfs(2) call, then we do need to call +	 * sync_filesystem() on upper_sb, but enough if we do it when being +	 * called with wait == 1. +	 */ +	if (!wait)  		return 0; -	/* real inodes have already been synced by sync_filesystem(ovl_sb) */ +	upper_sb = ofs->upper_mnt->mnt_sb; +  	down_read(&upper_sb->s_umount); -	ret = upper_sb->s_op->sync_fs(upper_sb, wait); +	ret = sync_filesystem(upper_sb);  	up_read(&upper_sb->s_umount); +  	return ret;  } @@ -295,6 +314,11 @@ static bool ovl_force_readonly(struct ovl_fs *ofs)  	return (!ofs->upper_mnt || !ofs->workdir);  } +static const char *ovl_redirect_mode_def(void) +{ +	return ovl_redirect_dir_def ? "on" : "off"; +} +  /**   * ovl_show_options   * @@ -313,12 +337,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)  	}  	if (ofs->config.default_permissions)  		seq_puts(m, ",default_permissions"); -	if (ofs->config.redirect_dir != ovl_redirect_dir_def) -		seq_printf(m, ",redirect_dir=%s", -			   ofs->config.redirect_dir ? "on" : "off"); +	if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0) +		seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);  	if (ofs->config.index != ovl_index_def) -		seq_printf(m, ",index=%s", -			   ofs->config.index ? "on" : "off"); +		seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");  	return 0;  } @@ -326,7 +348,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)  {  	struct ovl_fs *ofs = sb->s_fs_info; -	if (!(*flags & MS_RDONLY) && ovl_force_readonly(ofs)) +	if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))  		return -EROFS;  	return 0; @@ -348,8 +370,7 @@ enum {  	OPT_UPPERDIR,  	OPT_WORKDIR,  	OPT_DEFAULT_PERMISSIONS, -	OPT_REDIRECT_DIR_ON, -	OPT_REDIRECT_DIR_OFF, +	OPT_REDIRECT_DIR,  	OPT_INDEX_ON,  	OPT_INDEX_OFF,  	OPT_ERR, @@ -360,8 +381,7 @@ static const match_table_t ovl_tokens = {  	{OPT_UPPERDIR,			"upperdir=%s"},  	{OPT_WORKDIR,			"workdir=%s"},  	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"}, -	{OPT_REDIRECT_DIR_ON,		"redirect_dir=on"}, -	{OPT_REDIRECT_DIR_OFF,		"redirect_dir=off"}, +	{OPT_REDIRECT_DIR,		"redirect_dir=%s"},  	{OPT_INDEX_ON,			"index=on"},  	{OPT_INDEX_OFF,			"index=off"},  	{OPT_ERR,			NULL} @@ -390,10 +410,37 @@ static char *ovl_next_opt(char **s)  	return sbegin;  } +static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode) +{ +	if (strcmp(mode, "on") == 0) { +		config->redirect_dir = true; +		/* +		 * Does not make sense to have redirect creation without +		 * redirect following. +		 */ +		config->redirect_follow = true; +	} else if (strcmp(mode, "follow") == 0) { +		config->redirect_follow = true; +	} else if (strcmp(mode, "off") == 0) { +		if (ovl_redirect_always_follow) +			config->redirect_follow = true; +	} else if (strcmp(mode, "nofollow") != 0) { +		pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n", +		       mode); +		return -EINVAL; +	} + +	return 0; +} +  static int ovl_parse_opt(char *opt, struct ovl_config *config)  {  	char *p; +	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL); +	if (!config->redirect_mode) +		return -ENOMEM; +  	while ((p = ovl_next_opt(&opt)) != NULL) {  		int token;  		substring_t args[MAX_OPT_ARGS]; @@ -428,12 +475,11 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)  			config->default_permissions = true;  			break; -		case OPT_REDIRECT_DIR_ON: -			config->redirect_dir = true; -			break; - -		case OPT_REDIRECT_DIR_OFF: -			config->redirect_dir = false; +		case OPT_REDIRECT_DIR: +			kfree(config->redirect_mode); +			config->redirect_mode = match_strdup(&args[0]); +			if (!config->redirect_mode) +				return -ENOMEM;  			break;  		case OPT_INDEX_ON: @@ -458,7 +504,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)  		config->workdir = NULL;  	} -	return 0; +	return ovl_parse_redirect_mode(config, config->redirect_mode);  }  #define OVL_WORKDIR_NAME "work" @@ -1160,7 +1206,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)  	if (!cred)  		goto out_err; -	ofs->config.redirect_dir = ovl_redirect_dir_def;  	ofs->config.index = ovl_index_def;  	err = ovl_parse_opt((char *) data, &ofs->config);  	if (err) @@ -1190,7 +1235,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)  			goto out_err;  		if (!ofs->workdir) -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  		sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;  		sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran; @@ -1203,7 +1248,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)  	/* If the upper fs is nonexistent, we mark overlayfs r/o too */  	if (!ofs->upper_mnt) -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	else if (ofs->upper_mnt->mnt_sb != ofs->same_sb)  		ofs->same_sb = NULL; @@ -1213,7 +1258,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)  			goto out_free_oe;  		if (!ofs->indexdir) -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  	}  	/* Show index=off/on in /proc/mounts for any of the reasons above */ @@ -1227,7 +1272,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)  	sb->s_op = &ovl_super_operations;  	sb->s_xattr = ovl_xattr_handlers;  	sb->s_fs_info = ofs; -	sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; +	sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK;  	err = -ENOMEM;  	root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); diff --git a/fs/proc/base.c b/fs/proc/base.c index 31934cb9dfc8..60316b52d659 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -443,8 +443,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,  		save_stack_trace_tsk(task, &trace);  		for (i = 0; i < trace.nr_entries; i++) { -			seq_printf(m, "[<%pK>] %pB\n", -				   (void *)entries[i], (void *)entries[i]); +			seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);  		}  		unlock_trace(task);  	} @@ -2269,7 +2268,7 @@ static int show_timer(struct seq_file *m, void *v)  	notify = timer->it_sigev_notify;  	seq_printf(m, "ID: %d\n", timer->it_id); -	seq_printf(m, "signal: %d/%p\n", +	seq_printf(m, "signal: %d/%px\n",  		   timer->sigq->info.si_signo,  		   timer->sigq->info.si_value.sival_ptr);  	seq_printf(m, "notify: %s/%s.%d\n", diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 225f541f7078..dd0f82622427 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -483,7 +483,7 @@ int proc_fill_super(struct super_block *s, void *data, int silent)  	/* User space would break if executables or devices appear on proc */  	s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; -	s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; +	s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC;  	s->s_blocksize = 1024;  	s->s_blocksize_bits = 10;  	s->s_magic = PROC_SUPER_MAGIC; diff --git a/fs/proc/root.c b/fs/proc/root.c index 4e42aba97f2e..ede8e64974be 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -91,7 +91,7 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,  {  	struct pid_namespace *ns; -	if (flags & MS_KERNMOUNT) { +	if (flags & SB_KERNMOUNT) {  		ns = data;  		data = NULL;  	} else { diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 7b635d173213..b786840facd9 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -45,10 +45,10 @@ struct proc_fs_info {  static int show_sb_opts(struct seq_file *m, struct super_block *sb)  {  	static const struct proc_fs_info fs_info[] = { -		{ MS_SYNCHRONOUS, ",sync" }, -		{ MS_DIRSYNC, ",dirsync" }, -		{ MS_MANDLOCK, ",mand" }, -		{ MS_LAZYTIME, ",lazytime" }, +		{ SB_SYNCHRONOUS, ",sync" }, +		{ SB_DIRSYNC, ",dirsync" }, +		{ SB_MANDLOCK, ",mand" }, +		{ SB_LAZYTIME, ",lazytime" },  		{ 0, NULL }  	};  	const struct proc_fs_info *fs_infop; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 3a67cfb142d8..3d46fe302fcb 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -47,7 +47,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data)  	sync_filesystem(sb);  	qs = qnx4_sb(sb);  	qs->Version = QNX4_VERSION; -	*flags |= MS_RDONLY; +	*flags |= SB_RDONLY;  	return 0;  } @@ -199,7 +199,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)  	s->s_op = &qnx4_sops;  	s->s_magic = QNX4_SUPER_MAGIC; -	s->s_flags |= MS_RDONLY;	/* Yup, read-only yet */ +	s->s_flags |= SB_RDONLY;	/* Yup, read-only yet */  	/* Check the superblock signature. Since the qnx4 code is  	   dangerous, we should leave as quickly as possible diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 1192422a1c56..4aeb26bcb4d0 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -56,7 +56,7 @@ static int qnx6_show_options(struct seq_file *seq, struct dentry *root)  static int qnx6_remount(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	*flags |= MS_RDONLY; +	*flags |= SB_RDONLY;  	return 0;  } @@ -427,7 +427,7 @@ mmi_success:  	}  	s->s_op = &qnx6_sops;  	s->s_magic = QNX6_SUPER_MAGIC; -	s->s_flags |= MS_RDONLY;        /* Yup, read-only yet */ +	s->s_flags |= SB_RDONLY;        /* Yup, read-only yet */  	/* ease the later tree level calculations */  	sbi = QNX6_SB(s); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 39f1b0b0c76f..020c597ef9b6 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -941,12 +941,13 @@ static int dqinit_needed(struct inode *inode, int type)  }  /* This routine is guarded by s_umount semaphore */ -static void add_dquot_ref(struct super_block *sb, int type) +static int add_dquot_ref(struct super_block *sb, int type)  {  	struct inode *inode, *old_inode = NULL;  #ifdef CONFIG_QUOTA_DEBUG  	int reserved = 0;  #endif +	int err = 0;  	spin_lock(&sb->s_inode_list_lock);  	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { @@ -966,7 +967,11 @@ static void add_dquot_ref(struct super_block *sb, int type)  			reserved = 1;  #endif  		iput(old_inode); -		__dquot_initialize(inode, type); +		err = __dquot_initialize(inode, type); +		if (err) { +			iput(inode); +			goto out; +		}  		/*  		 * We hold a reference to 'inode' so it couldn't have been @@ -981,7 +986,7 @@ static void add_dquot_ref(struct super_block *sb, int type)  	}  	spin_unlock(&sb->s_inode_list_lock);  	iput(old_inode); - +out:  #ifdef CONFIG_QUOTA_DEBUG  	if (reserved) {  		quota_error(sb, "Writes happened before quota was turned on " @@ -989,6 +994,7 @@ static void add_dquot_ref(struct super_block *sb, int type)  			"Please run quotacheck(8)");  	}  #endif +	return err;  }  /* @@ -2379,10 +2385,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,  	dqopt->flags |= dquot_state_flag(flags, type);  	spin_unlock(&dq_state_lock); -	add_dquot_ref(sb, type); - -	return 0; +	error = add_dquot_ref(sb, type); +	if (error) +		dquot_disable(sb, type, flags); +	return error;  out_file_init:  	dqopt->files[type] = NULL;  	iput(inode); @@ -2985,7 +2992,8 @@ static int __init dquot_init(void)  	pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld,"  		" %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); -	register_shrinker(&dqcache_shrinker); +	if (register_shrinker(&dqcache_shrinker)) +		panic("Cannot register dquot shrinker");  	return 0;  } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 11a48affa882..b13fc024d2ee 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2106,7 +2106,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,  			journal_end(th);  			goto out_inserted_sd;  		} -	} else if (inode->i_sb->s_flags & MS_POSIXACL) { +	} else if (inode->i_sb->s_flags & SB_POSIXACL) {  		reiserfs_warning(inode->i_sb, "jdm-13090",  				 "ACLs aren't enabled in the fs, "  				 "but vfs thinks they are!"); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 69ff280bdfe8..70057359fbaf 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1960,7 +1960,7 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,  	/*  	 * Cancel flushing of old commits. Note that neither of these works  	 * will be requeued because superblock is being shutdown and doesn't -	 * have MS_ACTIVE set. +	 * have SB_ACTIVE set.  	 */  	reiserfs_cancel_old_flush(sb);  	/* wait for all commits to finish */ @@ -4302,7 +4302,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, int flags)  		 * Avoid queueing work when sb is being shut down. Transaction  		 * will be flushed on journal shutdown.  		 */ -		if (sb->s_flags & MS_ACTIVE) +		if (sb->s_flags & SB_ACTIVE)  			queue_delayed_work(REISERFS_SB(sb)->commit_wq,  					   &journal->j_work, HZ / 10);  	} @@ -4393,7 +4393,7 @@ void reiserfs_abort_journal(struct super_block *sb, int errno)  	if (!journal->j_errno)  		journal->j_errno = errno; -	sb->s_flags |= MS_RDONLY; +	sb->s_flags |= SB_RDONLY;  	set_bit(J_ABORTED, &journal->j_state);  #ifdef CONFIG_REISERFS_CHECK diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 64f49cafbc5b..7e288d97adcb 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -390,7 +390,7 @@ void __reiserfs_error(struct super_block *sb, const char *id,  		return;  	reiserfs_info(sb, "Remounting filesystem read-only\n"); -	sb->s_flags |= MS_RDONLY; +	sb->s_flags |= SB_RDONLY;  	reiserfs_abort_journal(sb, -EIO);  } @@ -409,7 +409,7 @@ void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)  	printk(KERN_CRIT "REISERFS abort (device %s): %s\n", sb->s_id,  	       error_buf); -	sb->s_flags |= MS_RDONLY; +	sb->s_flags |= SB_RDONLY;  	reiserfs_abort_journal(sb, errno);  } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 5464ec517702..1fc934d24459 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -121,7 +121,7 @@ void reiserfs_schedule_old_flush(struct super_block *s)  	 * Avoid scheduling flush when sb is being shut down. It can race  	 * with journal shutdown and free still queued delayed work.  	 */ -	if (sb_rdonly(s) || !(s->s_flags & MS_ACTIVE)) +	if (sb_rdonly(s) || !(s->s_flags & SB_ACTIVE))  		return;  	spin_lock(&sbi->old_work_lock); @@ -252,11 +252,11 @@ static int finish_unfinished(struct super_block *s)  #ifdef CONFIG_QUOTA  	/* Needed for iput() to work correctly and not trash data */ -	if (s->s_flags & MS_ACTIVE) { +	if (s->s_flags & SB_ACTIVE) {  		ms_active_set = 0;  	} else {  		ms_active_set = 1; -		s->s_flags |= MS_ACTIVE; +		s->s_flags |= SB_ACTIVE;  	}  	/* Turn on quotas so that they are updated correctly */  	for (i = 0; i < REISERFS_MAXQUOTAS; i++) { @@ -411,7 +411,7 @@ static int finish_unfinished(struct super_block *s)  	reiserfs_write_lock(s);  	if (ms_active_set)  		/* Restore the flag back */ -		s->s_flags &= ~MS_ACTIVE; +		s->s_flags &= ~SB_ACTIVE;  #endif  	pathrelse(&path);  	if (done) @@ -1521,7 +1521,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)  			goto out_err_unlock;  	} -	if (*mount_flags & MS_RDONLY) { +	if (*mount_flags & SB_RDONLY) {  		reiserfs_write_unlock(s);  		reiserfs_xattr_init(s, *mount_flags);  		/* remount read-only */ @@ -1567,7 +1567,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)  		REISERFS_SB(s)->s_mount_state = sb_umount_state(rs);  		/* now it is safe to call journal_begin */ -		s->s_flags &= ~MS_RDONLY; +		s->s_flags &= ~SB_RDONLY;  		err = journal_begin(&th, s, 10);  		if (err)  			goto out_err_unlock; @@ -1575,7 +1575,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)  		/* Mount a partition which is read-only, read-write */  		reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);  		REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); -		s->s_flags &= ~MS_RDONLY; +		s->s_flags &= ~SB_RDONLY;  		set_sb_umount_state(rs, REISERFS_ERROR_FS);  		if (!old_format_only(s))  			set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); @@ -1590,7 +1590,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)  		goto out_err_unlock;  	reiserfs_write_unlock(s); -	if (!(*mount_flags & MS_RDONLY)) { +	if (!(*mount_flags & SB_RDONLY)) {  		dquot_resume(s, -1);  		reiserfs_write_lock(s);  		finish_unfinished(s); @@ -2055,7 +2055,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)  	if (bdev_read_only(s->s_bdev) && !sb_rdonly(s)) {  		SWARN(silent, s, "clm-7000",  		      "Detected readonly device, marking FS readonly"); -		s->s_flags |= MS_RDONLY; +		s->s_flags |= SB_RDONLY;  	}  	args.objectid = REISERFS_ROOT_OBJECTID;  	args.dirid = REISERFS_ROOT_PARENT_OBJECTID; @@ -2591,7 +2591,6 @@ out:  		return err;  	if (inode->i_size < off + len - towrite)  		i_size_write(inode, off + len - towrite); -	inode->i_version++;  	inode->i_mtime = inode->i_ctime = current_time(inode);  	mark_inode_dirty(inode);  	return len - towrite; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 46492fb37a4c..5dbf5324bdda 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -959,7 +959,7 @@ int reiserfs_lookup_privroot(struct super_block *s)  /*   * We need to take a copy of the mount flags since things like - * MS_RDONLY don't get set until *after* we're called. + * SB_RDONLY don't get set until *after* we're called.   * mount_flags != mount_options   */  int reiserfs_xattr_init(struct super_block *s, int mount_flags) @@ -971,7 +971,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)  	if (err)  		goto error; -	if (d_really_is_negative(privroot) && !(mount_flags & MS_RDONLY)) { +	if (d_really_is_negative(privroot) && !(mount_flags & SB_RDONLY)) {  		inode_lock(d_inode(s->s_root));  		err = create_privroot(REISERFS_SB(s)->priv_root);  		inode_unlock(d_inode(s->s_root)); @@ -999,11 +999,11 @@ error:  		clear_bit(REISERFS_POSIXACL, &REISERFS_SB(s)->s_mount_opt);  	} -	/* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ +	/* The super_block SB_POSIXACL must mirror the (no)acl mount option. */  	if (reiserfs_posixacl(s)) -		s->s_flags |= MS_POSIXACL; +		s->s_flags |= SB_POSIXACL;  	else -		s->s_flags &= ~MS_POSIXACL; +		s->s_flags &= ~SB_POSIXACL;  	return err;  } diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 0186fe6d39f3..8f06fd1f3d69 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -451,7 +451,7 @@ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf)  static int romfs_remount(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	*flags |= MS_RDONLY; +	*flags |= SB_RDONLY;  	return 0;  } @@ -502,7 +502,7 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)  	sb->s_maxbytes = 0xFFFFFFFF;  	sb->s_magic = ROMFS_MAGIC; -	sb->s_flags |= MS_RDONLY | MS_NOATIME; +	sb->s_flags |= SB_RDONLY | SB_NOATIME;  	sb->s_op = &romfs_super_ops;  #ifdef CONFIG_ROMFS_ON_MTD diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index cf01e15a7b16..8a73b97217c8 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -195,7 +195,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)  		(u64) le64_to_cpu(sblk->id_table_start));  	sb->s_maxbytes = MAX_LFS_FILESIZE; -	sb->s_flags |= MS_RDONLY; +	sb->s_flags |= SB_RDONLY;  	sb->s_op = &squashfs_super_ops;  	err = -ENOMEM; @@ -373,7 +373,7 @@ static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf)  static int squashfs_remount(struct super_block *sb, int *flags, char *data)  {  	sync_filesystem(sb); -	*flags |= MS_RDONLY; +	*flags |= SB_RDONLY;  	return 0;  } diff --git a/fs/statfs.c b/fs/statfs.c index b072a8bab71a..5b2a24f0f263 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -35,11 +35,11 @@ static int flags_by_mnt(int mnt_flags)  static int flags_by_sb(int s_flags)  {  	int flags = 0; -	if (s_flags & MS_SYNCHRONOUS) +	if (s_flags & SB_SYNCHRONOUS)  		flags |= ST_SYNCHRONOUS; -	if (s_flags & MS_MANDLOCK) +	if (s_flags & SB_MANDLOCK)  		flags |= ST_MANDLOCK; -	if (s_flags & MS_RDONLY) +	if (s_flags & SB_RDONLY)  		flags |= ST_RDONLY;  	return flags;  } diff --git a/fs/super.c b/fs/super.c index d4e33e8f1e6f..06bd25d90ba5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -191,6 +191,24 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,  	INIT_LIST_HEAD(&s->s_mounts);  	s->s_user_ns = get_user_ns(user_ns); +	init_rwsem(&s->s_umount); +	lockdep_set_class(&s->s_umount, &type->s_umount_key); +	/* +	 * sget() can have s_umount recursion. +	 * +	 * When it cannot find a suitable sb, it allocates a new +	 * one (this one), and tries again to find a suitable old +	 * one. +	 * +	 * In case that succeeds, it will acquire the s_umount +	 * lock of the old one. Since these are clearly distrinct +	 * locks, and this object isn't exposed yet, there's no +	 * risk of deadlocks. +	 * +	 * Annotate this by putting this lock in a different +	 * subclass. +	 */ +	down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);  	if (security_sb_alloc(s))  		goto fail; @@ -218,25 +236,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,  		goto fail;  	if (list_lru_init_memcg(&s->s_inode_lru))  		goto fail; - -	init_rwsem(&s->s_umount); -	lockdep_set_class(&s->s_umount, &type->s_umount_key); -	/* -	 * sget() can have s_umount recursion. -	 * -	 * When it cannot find a suitable sb, it allocates a new -	 * one (this one), and tries again to find a suitable old -	 * one. -	 * -	 * In case that succeeds, it will acquire the s_umount -	 * lock of the old one. Since these are clearly distrinct -	 * locks, and this object isn't exposed yet, there's no -	 * risk of deadlocks. -	 * -	 * Annotate this by putting this lock in a different -	 * subclass. -	 */ -	down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);  	s->s_count = 1;  	atomic_set(&s->s_active, 1);  	mutex_init(&s->s_vfs_rename_mutex); @@ -518,7 +517,11 @@ retry:  	hlist_add_head(&s->s_instances, &type->fs_supers);  	spin_unlock(&sb_lock);  	get_filesystem(type); -	register_shrinker(&s->s_shrink); +	err = register_shrinker(&s->s_shrink); +	if (err) { +		deactivate_locked_super(s); +		s = ERR_PTR(err); +	}  	return s;  } diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 20b8f82e115b..fb49510c5dcf 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -30,7 +30,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,  	void *ns;  	bool new_sb; -	if (!(flags & MS_KERNMOUNT)) { +	if (!(flags & SB_KERNMOUNT)) {  		if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))  			return ERR_PTR(-EPERM);  	} diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 3c47b7d5d4cf..bec9f79adb25 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -63,7 +63,7 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data)  	sync_filesystem(sb);  	if (sbi->s_forced_ro) -		*flags |= MS_RDONLY; +		*flags |= SB_RDONLY;  	return 0;  } diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 0d56e486b392..89765ddfb738 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -333,7 +333,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size)  	/* set up enough so that it can read an inode */  	sb->s_op = &sysv_sops;  	if (sbi->s_forced_ro) -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	if (sbi->s_truncate)  		sb->s_d_op = &sysv_dentry_operations;  	root_inode = sysv_iget(sb, SYSV_ROOT_INO); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index a02aa59d1e24..dfe85069586e 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1406,7 +1406,7 @@ int ubifs_update_time(struct inode *inode, struct timespec *time,  	if (flags & S_MTIME)  		inode->i_mtime = *time; -	if (!(inode->i_sb->s_flags & MS_LAZYTIME)) +	if (!(inode->i_sb->s_flags & SB_LAZYTIME))  		iflags |= I_DIRTY_SYNC;  	release = ui->dirty; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 3be28900bf37..fe77e9625e84 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -84,7 +84,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)  	if (!c->ro_error) {  		c->ro_error = 1;  		c->no_chk_data_crc = 0; -		c->vfs_sb->s_flags |= MS_RDONLY; +		c->vfs_sb->s_flags |= SB_RDONLY;  		ubifs_warn(c, "switched to read-only mode, error %d", err);  		dump_stack();  	} diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 7503e7cdf870..0beb285b143d 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -968,7 +968,7 @@ static int parse_standard_option(const char *option)  	pr_notice("UBIFS: parse %s\n", option);  	if (!strcmp(option, "sync")) -		return MS_SYNCHRONOUS; +		return SB_SYNCHRONOUS;  	return 0;  } @@ -1160,8 +1160,8 @@ static int mount_ubifs(struct ubifs_info *c)  	size_t sz;  	c->ro_mount = !!sb_rdonly(c->vfs_sb); -	/* Suppress error messages while probing if MS_SILENT is set */ -	c->probing = !!(c->vfs_sb->s_flags & MS_SILENT); +	/* Suppress error messages while probing if SB_SILENT is set */ +	c->probing = !!(c->vfs_sb->s_flags & SB_SILENT);  	err = init_constants_early(c);  	if (err) @@ -1852,7 +1852,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)  		return err;  	} -	if (c->ro_mount && !(*flags & MS_RDONLY)) { +	if (c->ro_mount && !(*flags & SB_RDONLY)) {  		if (c->ro_error) {  			ubifs_msg(c, "cannot re-mount R/W due to prior errors");  			return -EROFS; @@ -1864,7 +1864,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)  		err = ubifs_remount_rw(c);  		if (err)  			return err; -	} else if (!c->ro_mount && (*flags & MS_RDONLY)) { +	} else if (!c->ro_mount && (*flags & SB_RDONLY)) {  		if (c->ro_error) {  			ubifs_msg(c, "cannot re-mount R/O due to prior errors");  			return -EROFS; @@ -2117,7 +2117,7 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,  	 */  	ubi = open_ubi(name, UBI_READONLY);  	if (IS_ERR(ubi)) { -		if (!(flags & MS_SILENT)) +		if (!(flags & SB_SILENT))  			pr_err("UBIFS error (pid: %d): cannot open \"%s\", error %d",  			       current->pid, name, (int)PTR_ERR(ubi));  		return ERR_CAST(ubi); @@ -2143,18 +2143,18 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,  		kfree(c);  		/* A new mount point for already mounted UBIFS */  		dbg_gen("this ubi volume is already mounted"); -		if (!!(flags & MS_RDONLY) != c1->ro_mount) { +		if (!!(flags & SB_RDONLY) != c1->ro_mount) {  			err = -EBUSY;  			goto out_deact;  		}  	} else { -		err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); +		err = ubifs_fill_super(sb, data, flags & SB_SILENT ? 1 : 0);  		if (err)  			goto out_deact;  		/* We do not support atime */ -		sb->s_flags |= MS_ACTIVE; +		sb->s_flags |= SB_ACTIVE;  #ifndef CONFIG_UBIFS_ATIME_SUPPORT -		sb->s_flags |= MS_NOATIME; +		sb->s_flags |= SB_NOATIME;  #else  		ubifs_msg(c, "full atime support is enabled.");  #endif diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 63c7468147eb..5ee7af879cc4 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1201,7 +1201,7 @@ struct ubifs_debug_info;   * @need_recovery: %1 if the file-system needs recovery   * @replaying: %1 during journal replay   * @mounting: %1 while mounting - * @probing: %1 while attempting to mount if MS_SILENT mount flag is set + * @probing: %1 while attempting to mount if SB_SILENT mount flag is set   * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode   * @replay_list: temporary list used during journal replay   * @replay_buds: list of buds to replay @@ -1850,7 +1850,7 @@ __printf(2, 3)  void ubifs_warn(const struct ubifs_info *c, const char *fmt, ...);  /*   * A conditional variant of 'ubifs_err()' which doesn't output anything - * if probing (ie. MS_SILENT set). + * if probing (ie. SB_SILENT set).   */  #define ubifs_errc(c, fmt, ...)						\  do {									\ diff --git a/fs/udf/super.c b/fs/udf/super.c index f80e0a0f24d3..f73239a9a97d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -650,7 +650,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)  	sync_filesystem(sb);  	if (lvidiu) {  		int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev); -		if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY)) +		if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & SB_RDONLY))  			return -EACCES;  	} @@ -673,10 +673,10 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)  	sbi->s_dmode = uopt.dmode;  	write_unlock(&sbi->s_cred_lock); -	if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb)) +	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))  		goto out_unlock; -	if (*flags & MS_RDONLY) +	if (*flags & SB_RDONLY)  		udf_close_lvid(sb);  	else  		udf_open_lvid(sb); diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index b5cd79065ef9..e727ee07dbe4 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -115,7 +115,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)  	ubh_mark_buffer_dirty (USPI_UBH(uspi));  	ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); -	if (sb->s_flags & MS_SYNCHRONOUS) +	if (sb->s_flags & SB_SYNCHRONOUS)  		ubh_sync_block(UCPI_UBH(ucpi));  	ufs_mark_sb_dirty(sb); @@ -205,7 +205,7 @@ do_more:  	ubh_mark_buffer_dirty (USPI_UBH(uspi));  	ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); -	if (sb->s_flags & MS_SYNCHRONOUS) +	if (sb->s_flags & SB_SYNCHRONOUS)  		ubh_sync_block(UCPI_UBH(ucpi));  	if (overflow) { @@ -567,7 +567,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,  	ubh_mark_buffer_dirty (USPI_UBH(uspi));  	ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); -	if (sb->s_flags & MS_SYNCHRONOUS) +	if (sb->s_flags & SB_SYNCHRONOUS)  		ubh_sync_block(UCPI_UBH(ucpi));  	ufs_mark_sb_dirty(sb); @@ -688,7 +688,7 @@ cg_found:  succed:  	ubh_mark_buffer_dirty (USPI_UBH(uspi));  	ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); -	if (sb->s_flags & MS_SYNCHRONOUS) +	if (sb->s_flags & SB_SYNCHRONOUS)  		ubh_sync_block(UCPI_UBH(ucpi));  	ufs_mark_sb_dirty(sb); diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 916b4a428933..e1ef0f0a1353 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -112,7 +112,7 @@ void ufs_free_inode (struct inode * inode)  	ubh_mark_buffer_dirty (USPI_UBH(uspi));  	ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); -	if (sb->s_flags & MS_SYNCHRONOUS) +	if (sb->s_flags & SB_SYNCHRONOUS)  		ubh_sync_block(UCPI_UBH(ucpi));  	ufs_mark_sb_dirty(sb); @@ -146,14 +146,14 @@ static void ufs2_init_inodes_chunk(struct super_block *sb,  		set_buffer_uptodate(bh);  		mark_buffer_dirty(bh);  		unlock_buffer(bh); -		if (sb->s_flags & MS_SYNCHRONOUS) +		if (sb->s_flags & SB_SYNCHRONOUS)  			sync_dirty_buffer(bh);  		brelse(bh);  	}  	fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb);  	ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); -	if (sb->s_flags & MS_SYNCHRONOUS) +	if (sb->s_flags & SB_SYNCHRONOUS)  		ubh_sync_block(UCPI_UBH(ucpi));  	UFSD("EXIT\n"); @@ -284,7 +284,7 @@ cg_found:  	}  	ubh_mark_buffer_dirty (USPI_UBH(uspi));  	ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); -	if (sb->s_flags & MS_SYNCHRONOUS) +	if (sb->s_flags & SB_SYNCHRONOUS)  		ubh_sync_block(UCPI_UBH(ucpi));  	ufs_mark_sb_dirty(sb); @@ -330,7 +330,7 @@ cg_found:  		ufs2_inode->ui_birthnsec = cpu_to_fs32(sb, ts.tv_nsec);  		mark_buffer_dirty(bh);  		unlock_buffer(bh); -		if (sb->s_flags & MS_SYNCHRONOUS) +		if (sb->s_flags & SB_SYNCHRONOUS)  			sync_dirty_buffer(bh);  		brelse(bh);  	} diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 6440003f8ddc..4d497e9c6883 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -282,7 +282,7 @@ void ufs_error (struct super_block * sb, const char * function,  		usb1->fs_clean = UFS_FSBAD;  		ubh_mark_buffer_dirty(USPI_UBH(uspi));  		ufs_mark_sb_dirty(sb); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	}  	va_start(args, fmt);  	vaf.fmt = fmt; @@ -320,7 +320,7 @@ void ufs_panic (struct super_block * sb, const char * function,  	va_start(args, fmt);  	vaf.fmt = fmt;  	vaf.va = &args; -	sb->s_flags |= MS_RDONLY; +	sb->s_flags |= SB_RDONLY;  	pr_crit("panic (device %s): %s: %pV\n",  		sb->s_id, function, &vaf);  	va_end(args); @@ -905,7 +905,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)  		if (!sb_rdonly(sb)) {  			if (!silent)  				pr_info("ufstype=old is supported read-only\n"); -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  		}  		break; @@ -921,7 +921,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)  		if (!sb_rdonly(sb)) {  			if (!silent)  				pr_info("ufstype=nextstep is supported read-only\n"); -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  		}  		break; @@ -937,7 +937,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)  		if (!sb_rdonly(sb)) {  			if (!silent)  				pr_info("ufstype=nextstep-cd is supported read-only\n"); -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  		}  		break; @@ -953,7 +953,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)  		if (!sb_rdonly(sb)) {  			if (!silent)  				pr_info("ufstype=openstep is supported read-only\n"); -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  		}  		break; @@ -968,7 +968,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)  		if (!sb_rdonly(sb)) {  			if (!silent)  				pr_info("ufstype=hp is supported read-only\n"); -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;   		}   		break;  	default: @@ -1125,21 +1125,21 @@ magic_found:  			break;  		case UFS_FSACTIVE:  			pr_err("%s(): fs is active\n", __func__); -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  			break;  		case UFS_FSBAD:  			pr_err("%s(): fs is bad\n", __func__); -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  			break;  		default:  			pr_err("%s(): can't grok fs_clean 0x%x\n",  			       __func__, usb1->fs_clean); -			sb->s_flags |= MS_RDONLY; +			sb->s_flags |= SB_RDONLY;  			break;  		}  	} else {  		pr_err("%s(): fs needs fsck\n", __func__); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	}  	/* @@ -1328,7 +1328,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)  		return -EINVAL;  	} -	if ((bool)(*mount_flags & MS_RDONLY) == sb_rdonly(sb)) { +	if ((bool)(*mount_flags & SB_RDONLY) == sb_rdonly(sb)) {  		UFS_SB(sb)->s_mount_opt = new_mount_opt;  		mutex_unlock(&UFS_SB(sb)->s_lock);  		return 0; @@ -1337,7 +1337,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)  	/*  	 * fs was mouted as rw, remounting ro  	 */ -	if (*mount_flags & MS_RDONLY) { +	if (*mount_flags & SB_RDONLY) {  		ufs_put_super_internal(sb);  		usb1->fs_time = cpu_to_fs32(sb, get_seconds());  		if ((flags & UFS_ST_MASK) == UFS_ST_SUN @@ -1346,7 +1346,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)  			ufs_set_fs_state(sb, usb1, usb3,  				UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));  		ubh_mark_buffer_dirty (USPI_UBH(uspi)); -		sb->s_flags |= MS_RDONLY; +		sb->s_flags |= SB_RDONLY;  	} else {  	/*  	 * fs was mounted as ro, remounting rw @@ -1370,7 +1370,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)  			mutex_unlock(&UFS_SB(sb)->s_lock);  			return -EPERM;  		} -		sb->s_flags &= ~MS_RDONLY; +		sb->s_flags &= ~SB_RDONLY;  #endif  	}  	UFS_SB(sb)->s_mount_opt = new_mount_opt; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index ac9a4e65ca49..41a75f9f23fd 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -570,11 +570,14 @@ out:  static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,  					      struct userfaultfd_wait_queue *ewq)  { +	struct userfaultfd_ctx *release_new_ctx; +  	if (WARN_ON_ONCE(current->flags & PF_EXITING))  		goto out;  	ewq->ctx = ctx;  	init_waitqueue_entry(&ewq->wq, current); +	release_new_ctx = NULL;  	spin_lock(&ctx->event_wqh.lock);  	/* @@ -601,8 +604,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,  				new = (struct userfaultfd_ctx *)  					(unsigned long)  					ewq->msg.arg.reserved.reserved1; - -				userfaultfd_ctx_put(new); +				release_new_ctx = new;  			}  			break;  		} @@ -617,6 +619,20 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,  	__set_current_state(TASK_RUNNING);  	spin_unlock(&ctx->event_wqh.lock); +	if (release_new_ctx) { +		struct vm_area_struct *vma; +		struct mm_struct *mm = release_new_ctx->mm; + +		/* the various vma->vm_userfaultfd_ctx still points to it */ +		down_write(&mm->mmap_sem); +		for (vma = mm->mmap; vma; vma = vma->vm_next) +			if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) +				vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; +		up_write(&mm->mmap_sem); + +		userfaultfd_ctx_put(release_new_ctx); +	} +  	/*  	 * ctx may go away after this if the userfault pseudo fd is  	 * already released. diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 0da80019a917..83ed7715f856 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -702,7 +702,7 @@ xfs_alloc_ag_vextent(  	ASSERT(args->agbno % args->alignment == 0);  	/* if not file data, insert new block into the reverse map btree */ -	if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { +	if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {  		error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,  				       args->agbno, args->len, &args->oinfo);  		if (error) @@ -1682,7 +1682,7 @@ xfs_free_ag_extent(  	bno_cur = cnt_cur = NULL;  	mp = tp->t_mountp; -	if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { +	if (!xfs_rmap_should_skip_owner_update(oinfo)) {  		error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);  		if (error)  			goto error0; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 6249c92671de..a76914db72ef 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -212,6 +212,7 @@ xfs_attr_set(  	int			flags)  {  	struct xfs_mount	*mp = dp->i_mount; +	struct xfs_buf		*leaf_bp = NULL;  	struct xfs_da_args	args;  	struct xfs_defer_ops	dfops;  	struct xfs_trans_res	tres; @@ -327,9 +328,16 @@ xfs_attr_set(  		 * GROT: another possible req'mt for a double-split btree op.  		 */  		xfs_defer_init(args.dfops, args.firstblock); -		error = xfs_attr_shortform_to_leaf(&args); +		error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);  		if (error)  			goto out_defer_cancel; +		/* +		 * Prevent the leaf buffer from being unlocked so that a +		 * concurrent AIL push cannot grab the half-baked leaf +		 * buffer and run into problems with the write verifier. +		 */ +		xfs_trans_bhold(args.trans, leaf_bp); +		xfs_defer_bjoin(args.dfops, leaf_bp);  		xfs_defer_ijoin(args.dfops, dp);  		error = xfs_defer_finish(&args.trans, args.dfops);  		if (error) @@ -337,13 +345,14 @@ xfs_attr_set(  		/*  		 * Commit the leaf transformation.  We'll need another (linked) -		 * transaction to add the new attribute to the leaf. +		 * transaction to add the new attribute to the leaf, which +		 * means that we have to hold & join the leaf buffer here too.  		 */ -  		error = xfs_trans_roll_inode(&args.trans, dp);  		if (error)  			goto out; - +		xfs_trans_bjoin(args.trans, leaf_bp); +		leaf_bp = NULL;  	}  	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) @@ -374,8 +383,9 @@ xfs_attr_set(  out_defer_cancel:  	xfs_defer_cancel(&dfops); -	args.trans = NULL;  out: +	if (leaf_bp) +		xfs_trans_brelse(args.trans, leaf_bp);  	if (args.trans)  		xfs_trans_cancel(args.trans);  	xfs_iunlock(dp, XFS_ILOCK_EXCL); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 53cc8b986eac..601eaa36f1ad 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)  }  /* - * Convert from using the shortform to the leaf. + * Convert from using the shortform to the leaf.  On success, return the + * buffer so that we can keep it locked until we're totally done with it.   */  int -xfs_attr_shortform_to_leaf(xfs_da_args_t *args) +xfs_attr_shortform_to_leaf( +	struct xfs_da_args	*args, +	struct xfs_buf		**leaf_bp)  {  	xfs_inode_t *dp;  	xfs_attr_shortform_t *sf; @@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)  		sfe = XFS_ATTR_SF_NEXTENTRY(sfe);  	}  	error = 0; - +	*leaf_bp = bp;  out:  	kmem_free(tmpbuffer);  	return error; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index f7dda0c237b0..894124efb421 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -48,7 +48,8 @@ void	xfs_attr_shortform_create(struct xfs_da_args *args);  void	xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);  int	xfs_attr_shortform_lookup(struct xfs_da_args *args);  int	xfs_attr_shortform_getvalue(struct xfs_da_args *args); -int	xfs_attr_shortform_to_leaf(struct xfs_da_args *args); +int	xfs_attr_shortform_to_leaf(struct xfs_da_args *args, +			struct xfs_buf **leaf_bp);  int	xfs_attr_shortform_remove(struct xfs_da_args *args);  int	xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);  int	xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 08df809e2315..1bddbba6b80c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5136,7 +5136,7 @@ __xfs_bunmapi(  	 * blowing out the transaction with a mix of EFIs and reflink  	 * adjustments.  	 */ -	if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) +	if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)  		max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));  	else  		max_len = len; @@ -5662,7 +5662,8 @@ xfs_bmap_collapse_extents(  		*done = true;  		goto del_cursor;  	} -	XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); +	XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), +				del_cursor);  	new_startoff = got.br_startoff - offset_shift_fsb;  	if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) { @@ -5767,7 +5768,8 @@ xfs_bmap_insert_extents(  			goto del_cursor;  		}  	} -	XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); +	XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), +				del_cursor);  	if (stop_fsb >= got.br_startoff + got.br_blockcount) {  		error = -EIO; diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 072ebfe1d6ae..087fea02c389 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -249,6 +249,10 @@ xfs_defer_trans_roll(  	for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)  		xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); +	/* Hold the (previously bjoin'd) buffer locked across the roll. */ +	for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) +		xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]); +  	trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);  	/* Roll the transaction. */ @@ -264,6 +268,12 @@ xfs_defer_trans_roll(  	for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)  		xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); +	/* Rejoin the buffers and dirty them so the log moves forward. */ +	for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) { +		xfs_trans_bjoin(*tp, dop->dop_bufs[i]); +		xfs_trans_bhold(*tp, dop->dop_bufs[i]); +	} +  	return error;  } @@ -295,6 +305,31 @@ xfs_defer_ijoin(  		}  	} +	ASSERT(0); +	return -EFSCORRUPTED; +} + +/* + * Add this buffer to the deferred op.  Each joined buffer is relogged + * each time we roll the transaction. + */ +int +xfs_defer_bjoin( +	struct xfs_defer_ops		*dop, +	struct xfs_buf			*bp) +{ +	int				i; + +	for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) { +		if (dop->dop_bufs[i] == bp) +			return 0; +		else if (dop->dop_bufs[i] == NULL) { +			dop->dop_bufs[i] = bp; +			return 0; +		} +	} + +	ASSERT(0);  	return -EFSCORRUPTED;  } @@ -493,9 +528,7 @@ xfs_defer_init(  	struct xfs_defer_ops		*dop,  	xfs_fsblock_t			*fbp)  { -	dop->dop_committed = false; -	dop->dop_low = false; -	memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes)); +	memset(dop, 0, sizeof(struct xfs_defer_ops));  	*fbp = NULLFSBLOCK;  	INIT_LIST_HEAD(&dop->dop_intake);  	INIT_LIST_HEAD(&dop->dop_pending); diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index d4f046dd44bd..045beacdd37d 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -59,6 +59,7 @@ enum xfs_defer_ops_type {  };  #define XFS_DEFER_OPS_NR_INODES	2	/* join up to two inodes */ +#define XFS_DEFER_OPS_NR_BUFS	2	/* join up to two buffers */  struct xfs_defer_ops {  	bool			dop_committed;	/* did any trans commit? */ @@ -66,8 +67,9 @@ struct xfs_defer_ops {  	struct list_head	dop_intake;	/* unlogged pending work */  	struct list_head	dop_pending;	/* logged pending work */ -	/* relog these inodes with each roll */ +	/* relog these with each roll */  	struct xfs_inode	*dop_inodes[XFS_DEFER_OPS_NR_INODES]; +	struct xfs_buf		*dop_bufs[XFS_DEFER_OPS_NR_BUFS];  };  void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, @@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop);  void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);  bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);  int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); +int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);  /* Description of a deferred type. */  struct xfs_defer_op_type { diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index de3f04a98656..3b57ef0f2f76 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -920,8 +920,7 @@ STATIC xfs_agnumber_t  xfs_ialloc_ag_select(  	xfs_trans_t	*tp,		/* transaction pointer */  	xfs_ino_t	parent,		/* parent directory inode number */ -	umode_t		mode,		/* bits set to indicate file type */ -	int		okalloc)	/* ok to allocate more space */ +	umode_t		mode)		/* bits set to indicate file type */  {  	xfs_agnumber_t	agcount;	/* number of ag's in the filesystem */  	xfs_agnumber_t	agno;		/* current ag number */ @@ -978,9 +977,6 @@ xfs_ialloc_ag_select(  			return agno;  		} -		if (!okalloc) -			goto nextag; -  		if (!pag->pagf_init) {  			error = xfs_alloc_pagf_init(mp, tp, agno, flags);  			if (error) @@ -1680,7 +1676,6 @@ xfs_dialloc(  	struct xfs_trans	*tp,  	xfs_ino_t		parent,  	umode_t			mode, -	int			okalloc,  	struct xfs_buf		**IO_agbp,  	xfs_ino_t		*inop)  { @@ -1692,6 +1687,7 @@ xfs_dialloc(  	int			noroom = 0;  	xfs_agnumber_t		start_agno;  	struct xfs_perag	*pag; +	int			okalloc = 1;  	if (*IO_agbp) {  		/* @@ -1707,7 +1703,7 @@ xfs_dialloc(  	 * We do not have an agbp, so select an initial allocation  	 * group for inode allocation.  	 */ -	start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); +	start_agno = xfs_ialloc_ag_select(tp, parent, mode);  	if (start_agno == NULLAGNUMBER) {  		*inop = NULLFSINO;  		return 0; diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index d2bdcd5e7312..66a8de0b1caa 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -81,7 +81,6 @@ xfs_dialloc(  	struct xfs_trans *tp,		/* transaction pointer */  	xfs_ino_t	parent,		/* parent inode (directory) */  	umode_t		mode,		/* mode bits for new inode */ -	int		okalloc,	/* ok to allocate more space */  	struct xfs_buf	**agbp,		/* buf for a.g. inode header */  	xfs_ino_t	*inop);		/* inode number allocated */ diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 89bf16b4d937..b0f31791c7e6 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -632,8 +632,6 @@ xfs_iext_insert(  	struct xfs_iext_leaf	*new = NULL;  	int			nr_entries, i; -	trace_xfs_iext_insert(ip, cur, state, _RET_IP_); -  	if (ifp->if_height == 0)  		xfs_iext_alloc_root(ifp, cur);  	else if (ifp->if_height == 1) @@ -661,6 +659,8 @@ xfs_iext_insert(  	xfs_iext_set(cur_rec(cur), irec);  	ifp->if_bytes += sizeof(struct xfs_iext_rec); +	trace_xfs_iext_insert(ip, cur, state, _RET_IP_); +  	if (new)  		xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);  } diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 585b35d34142..c40d26763075 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc(  	xfs_extlen_t		aglen,  	struct xfs_defer_ops	*dfops)  { -	int			error; -  	trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,  			agbno, aglen);  	/* Add refcount btree reservation */ -	error = xfs_refcount_adjust_cow(rcur, agbno, aglen, +	return xfs_refcount_adjust_cow(rcur, agbno, aglen,  			XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); -	if (error) -		return error; - -	/* Add rmap entry */ -	if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { -		error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops, -				rcur->bc_private.a.agno, -				agbno, aglen, XFS_RMAP_OWN_COW); -		if (error) -			return error; -	} - -	return error;  }  /* @@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free(  	xfs_extlen_t		aglen,  	struct xfs_defer_ops	*dfops)  { -	int			error; -  	trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,  			agbno, aglen);  	/* Remove refcount btree reservation */ -	error = xfs_refcount_adjust_cow(rcur, agbno, aglen, +	return xfs_refcount_adjust_cow(rcur, agbno, aglen,  			XFS_REFCOUNT_ADJUST_COW_FREE, dfops); -	if (error) -		return error; - -	/* Remove rmap entry */ -	if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { -		error = xfs_rmap_free_extent(rcur->bc_mp, dfops, -				rcur->bc_private.a.agno, -				agbno, aglen, XFS_RMAP_OWN_COW); -		if (error) -			return error; -	} - -	return error;  }  /* Record a CoW staging extent in the refcount btree. */ @@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent(  	xfs_fsblock_t			fsb,  	xfs_extlen_t			len)  { +	int				error; +  	if (!xfs_sb_version_hasreflink(&mp->m_sb))  		return 0; -	return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, +	error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,  			fsb, len); +	if (error) +		return error; + +	/* Add rmap entry */ +	return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), +			XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);  }  /* Forget a CoW staging event in the refcount btree. */ @@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent(  	xfs_fsblock_t			fsb,  	xfs_extlen_t			len)  { +	int				error; +  	if (!xfs_sb_version_hasreflink(&mp->m_sb))  		return 0; +	/* Remove rmap entry */ +	error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), +			XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); +	if (error) +		return error; +  	return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,  			fsb, len);  } diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index dd019cee1b3b..50db920ceeeb 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -368,6 +368,51 @@ xfs_rmap_lookup_le_range(  }  /* + * Perform all the relevant owner checks for a removal op.  If we're doing an + * unknown-owner removal then we have no owner information to check. + */ +static int +xfs_rmap_free_check_owner( +	struct xfs_mount	*mp, +	uint64_t		ltoff, +	struct xfs_rmap_irec	*rec, +	xfs_fsblock_t		bno, +	xfs_filblks_t		len, +	uint64_t		owner, +	uint64_t		offset, +	unsigned int		flags) +{ +	int			error = 0; + +	if (owner == XFS_RMAP_OWN_UNKNOWN) +		return 0; + +	/* Make sure the unwritten flag matches. */ +	XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == +			(rec->rm_flags & XFS_RMAP_UNWRITTEN), out); + +	/* Make sure the owner matches what we expect to find in the tree. */ +	XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out); + +	/* Check the offset, if necessary. */ +	if (XFS_RMAP_NON_INODE_OWNER(owner)) +		goto out; + +	if (flags & XFS_RMAP_BMBT_BLOCK) { +		XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK, +				out); +	} else { +		XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out); +		XFS_WANT_CORRUPTED_GOTO(mp, +				ltoff + rec->rm_blockcount >= offset + len, +				out); +	} + +out: +	return error; +} + +/*   * Find the extent in the rmap btree and remove it.   *   * The record we find should always be an exact match for the extent that we're @@ -444,33 +489,40 @@ xfs_rmap_unmap(  		goto out_done;  	} -	/* Make sure the unwritten flag matches. */ -	XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == -			(ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); +	/* +	 * If we're doing an unknown-owner removal for EFI recovery, we expect +	 * to find the full range in the rmapbt or nothing at all.  If we +	 * don't find any rmaps overlapping either end of the range, we're +	 * done.  Hopefully this means that the EFI creator already queued +	 * (and finished) a RUI to remove the rmap. +	 */ +	if (owner == XFS_RMAP_OWN_UNKNOWN && +	    ltrec.rm_startblock + ltrec.rm_blockcount <= bno) { +		struct xfs_rmap_irec    rtrec; + +		error = xfs_btree_increment(cur, 0, &i); +		if (error) +			goto out_error; +		if (i == 0) +			goto out_done; +		error = xfs_rmap_get_rec(cur, &rtrec, &i); +		if (error) +			goto out_error; +		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); +		if (rtrec.rm_startblock >= bno + len) +			goto out_done; +	}  	/* Make sure the extent we found covers the entire freeing range. */  	XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && -		ltrec.rm_startblock + ltrec.rm_blockcount >= -		bno + len, out_error); +			ltrec.rm_startblock + ltrec.rm_blockcount >= +			bno + len, out_error); -	/* Make sure the owner matches what we expect to find in the tree. */ -	XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner || -				    XFS_RMAP_NON_INODE_OWNER(owner), out_error); - -	/* Check the offset, if necessary. */ -	if (!XFS_RMAP_NON_INODE_OWNER(owner)) { -		if (flags & XFS_RMAP_BMBT_BLOCK) { -			XFS_WANT_CORRUPTED_GOTO(mp, -					ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK, -					out_error); -		} else { -			XFS_WANT_CORRUPTED_GOTO(mp, -					ltrec.rm_offset <= offset, out_error); -			XFS_WANT_CORRUPTED_GOTO(mp, -					ltoff + ltrec.rm_blockcount >= offset + len, -					out_error); -		} -	} +	/* Check owner information. */ +	error = xfs_rmap_free_check_owner(mp, ltoff, <rec, bno, len, owner, +			offset, flags); +	if (error) +		goto out_error;  	if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {  		/* exact match, simply remove the record from rmap tree */ @@ -664,6 +716,7 @@ xfs_rmap_map(  		flags |= XFS_RMAP_UNWRITTEN;  	trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,  			unwritten, oinfo); +	ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));  	/*  	 * For the initial lookup, look for an exact match or the left-adjacent diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 466ede637080..0fcd5b1ba729 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -61,7 +61,21 @@ static inline void  xfs_rmap_skip_owner_update(  	struct xfs_owner_info	*oi)  { -	oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; +	xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL); +} + +static inline bool +xfs_rmap_should_skip_owner_update( +	struct xfs_owner_info	*oi) +{ +	return oi->oi_owner == XFS_RMAP_OWN_NULL; +} + +static inline void +xfs_rmap_any_owner_update( +	struct xfs_owner_info	*oi) +{ +	xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN);  }  /* Reverse mapping functions. */ diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 637b7a892313..f120fb20452f 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -318,8 +318,20 @@ xfs_scrub_dinode(  	/* di_mode */  	mode = be16_to_cpu(dip->di_mode); -	if (mode & ~(S_IALLUGO | S_IFMT)) +	switch (mode & S_IFMT) { +	case S_IFLNK: +	case S_IFREG: +	case S_IFDIR: +	case S_IFCHR: +	case S_IFBLK: +	case S_IFIFO: +	case S_IFSOCK: +		/* mode is recognized */ +		break; +	default:  		xfs_scrub_ino_set_corrupt(sc, ino, bp); +		break; +	}  	/* v1/v2 fields */  	switch (dip->di_version) { diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 8e58ba842946..3d9037eceaf1 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -107,7 +107,7 @@ xfs_scrub_quota_item(  	unsigned long long		rcount;  	xfs_ino_t			fs_icount; -	offset = id * qi->qi_dqperchunk; +	offset = id / qi->qi_dqperchunk;  	/*  	 * We fed $id and DQNEXT into the xfs_qm_dqget call, which means @@ -207,7 +207,7 @@ xfs_scrub_quota(  	xfs_dqid_t			id = 0;  	uint				dqtype;  	int				nimaps; -	int				error; +	int				error = 0;  	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))  		return -ENOENT; diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 9c42c4efd01e..ab3aef2ae823 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -46,7 +46,6 @@  #include "scrub/scrub.h"  #include "scrub/common.h"  #include "scrub/trace.h" -#include "scrub/scrub.h"  #include "scrub/btree.h"  /* diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 472080e75788..86daed0e3a45 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -26,7 +26,6 @@  #include "xfs_mount.h"  #include "xfs_defer.h"  #include "xfs_da_format.h" -#include "xfs_defer.h"  #include "xfs_inode.h"  #include "xfs_btree.h"  #include "xfs_trans.h" diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a3eeaba156c5..4fc526a27a94 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -399,7 +399,7 @@ xfs_map_blocks(  	       (ip->i_df.if_flags & XFS_IFEXTENTS));  	ASSERT(offset <= mp->m_super->s_maxbytes); -	if (offset + count > mp->m_super->s_maxbytes) +	if (offset > mp->m_super->s_maxbytes - count)  		count = mp->m_super->s_maxbytes - offset;  	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);  	offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -896,13 +896,13 @@ xfs_writepage_map(  	struct writeback_control *wbc,  	struct inode		*inode,  	struct page		*page, -	loff_t			offset, -	uint64_t              end_offset) +	uint64_t		end_offset)  {  	LIST_HEAD(submit_list);  	struct xfs_ioend	*ioend, *next;  	struct buffer_head	*bh, *head;  	ssize_t			len = i_blocksize(inode); +	uint64_t		offset;  	int			error = 0;  	int			count = 0;  	int			uptodate = 1; @@ -1146,7 +1146,7 @@ xfs_do_writepage(  		end_offset = offset;  	} -	return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset); +	return xfs_writepage_map(wpc, wbc, inode, page, end_offset);  redirty:  	redirty_page_for_writepage(wbc, page); @@ -1265,7 +1265,7 @@ xfs_map_trim_size(  	if (mapping_size > size)  		mapping_size = size;  	if (offset < i_size_read(inode) && -	    offset + mapping_size >= i_size_read(inode)) { +	    (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) {  		/* limit mapping to block that spans EOF */  		mapping_size = roundup_64(i_size_read(inode) - offset,  					  i_blocksize(inode)); @@ -1312,7 +1312,7 @@ xfs_get_blocks(  	lockmode = xfs_ilock_data_map_shared(ip);  	ASSERT(offset <= mp->m_super->s_maxbytes); -	if (offset + size > mp->m_super->s_maxbytes) +	if (offset > mp->m_super->s_maxbytes - size)  		size = mp->m_super->s_maxbytes - offset;  	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);  	offset_fsb = XFS_B_TO_FSBT(mp, offset); diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index dd136f7275e4..e5fb008d75e8 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -389,7 +389,8 @@ xfs_bud_init(  int  xfs_bui_recover(  	struct xfs_mount		*mp, -	struct xfs_bui_log_item		*buip) +	struct xfs_bui_log_item		*buip, +	struct xfs_defer_ops		*dfops)  {  	int				error = 0;  	unsigned int			bui_type; @@ -404,9 +405,7 @@ xfs_bui_recover(  	xfs_exntst_t			state;  	struct xfs_trans		*tp;  	struct xfs_inode		*ip = NULL; -	struct xfs_defer_ops		dfops;  	struct xfs_bmbt_irec		irec; -	xfs_fsblock_t			firstfsb;  	ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)); @@ -464,7 +463,6 @@ xfs_bui_recover(  	if (VFS_I(ip)->i_nlink == 0)  		xfs_iflags_set(ip, XFS_IRECOVERY); -	xfs_defer_init(&dfops, &firstfsb);  	/* Process deferred bmap item. */  	state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? @@ -479,16 +477,16 @@ xfs_bui_recover(  		break;  	default:  		error = -EFSCORRUPTED; -		goto err_dfops; +		goto err_inode;  	}  	xfs_trans_ijoin(tp, ip, 0);  	count = bmap->me_len; -	error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type, +	error = xfs_trans_log_finish_bmap_update(tp, budp, dfops, type,  			ip, whichfork, bmap->me_startoff,  			bmap->me_startblock, &count, state);  	if (error) -		goto err_dfops; +		goto err_inode;  	if (count > 0) {  		ASSERT(type == XFS_BMAP_UNMAP); @@ -496,16 +494,11 @@ xfs_bui_recover(  		irec.br_blockcount = count;  		irec.br_startoff = bmap->me_startoff;  		irec.br_state = state; -		error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec); +		error = xfs_bmap_unmap_extent(tp->t_mountp, dfops, ip, &irec);  		if (error) -			goto err_dfops; +			goto err_inode;  	} -	/* Finish transaction, free inodes. */ -	error = xfs_defer_finish(&tp, &dfops); -	if (error) -		goto err_dfops; -  	set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);  	error = xfs_trans_commit(tp);  	xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -513,8 +506,6 @@ xfs_bui_recover(  	return error; -err_dfops: -	xfs_defer_cancel(&dfops);  err_inode:  	xfs_trans_cancel(tp);  	if (ip) { diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h index c867daae4a3c..24b354a2c836 100644 --- a/fs/xfs/xfs_bmap_item.h +++ b/fs/xfs/xfs_bmap_item.h @@ -93,6 +93,7 @@ struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *,  		struct xfs_bui_log_item *);  void xfs_bui_item_free(struct xfs_bui_log_item *);  void xfs_bui_release(struct xfs_bui_log_item *); -int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip); +int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip, +		struct xfs_defer_ops *dfops);  #endif	/* __XFS_BMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 4db6e8d780f6..4c6e86d861fd 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1815,22 +1815,27 @@ xfs_alloc_buftarg(  	btp->bt_daxdev = dax_dev;  	if (xfs_setsize_buftarg_early(btp, bdev)) -		goto error; +		goto error_free;  	if (list_lru_init(&btp->bt_lru)) -		goto error; +		goto error_free;  	if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) -		goto error; +		goto error_lru;  	btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;  	btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;  	btp->bt_shrinker.seeks = DEFAULT_SEEKS;  	btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; -	register_shrinker(&btp->bt_shrinker); +	if (register_shrinker(&btp->bt_shrinker)) +		goto error_pcpu;  	return btp; -error: +error_pcpu: +	percpu_counter_destroy(&btp->bt_io_count); +error_lru: +	list_lru_destroy(&btp->bt_lru); +error_free:  	kmem_free(btp);  	return NULL;  } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index d57c2db64e59..f248708c10ff 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -970,14 +970,22 @@ xfs_qm_dqflush_done(  	 * holding the lock before removing the dquot from the AIL.  	 */  	if ((lip->li_flags & XFS_LI_IN_AIL) && -	    lip->li_lsn == qip->qli_flush_lsn) { +	    ((lip->li_lsn == qip->qli_flush_lsn) || +	     (lip->li_flags & XFS_LI_FAILED))) {  		/* xfs_trans_ail_delete() drops the AIL lock. */  		spin_lock(&ailp->xa_lock); -		if (lip->li_lsn == qip->qli_flush_lsn) +		if (lip->li_lsn == qip->qli_flush_lsn) {  			xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); -		else +		} else { +			/* +			 * Clear the failed state since we are about to drop the +			 * flush lock +			 */ +			if (lip->li_flags & XFS_LI_FAILED) +				xfs_clear_li_failed(lip);  			spin_unlock(&ailp->xa_lock); +		}  	}  	/* diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 2c7a1629e064..664dea105e76 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -137,6 +137,26 @@ xfs_qm_dqunpin_wait(  	wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));  } +/* + * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer + * have been failed during writeback + * + * this informs the AIL that the dquot is already flush locked on the next push, + * and acquires a hold on the buffer to ensure that it isn't reclaimed before + * dirty data makes it to disk. + */ +STATIC void +xfs_dquot_item_error( +	struct xfs_log_item	*lip, +	struct xfs_buf		*bp) +{ +	struct xfs_dquot	*dqp; + +	dqp = DQUOT_ITEM(lip)->qli_dquot; +	ASSERT(!completion_done(&dqp->q_flush)); +	xfs_set_li_failed(lip, bp); +} +  STATIC uint  xfs_qm_dquot_logitem_push(  	struct xfs_log_item	*lip, @@ -144,13 +164,28 @@ xfs_qm_dquot_logitem_push(  					      __acquires(&lip->li_ailp->xa_lock)  {  	struct xfs_dquot	*dqp = DQUOT_ITEM(lip)->qli_dquot; -	struct xfs_buf		*bp = NULL; +	struct xfs_buf		*bp = lip->li_buf;  	uint			rval = XFS_ITEM_SUCCESS;  	int			error;  	if (atomic_read(&dqp->q_pincount) > 0)  		return XFS_ITEM_PINNED; +	/* +	 * The buffer containing this item failed to be written back +	 * previously. Resubmit the buffer for IO +	 */ +	if (lip->li_flags & XFS_LI_FAILED) { +		if (!xfs_buf_trylock(bp)) +			return XFS_ITEM_LOCKED; + +		if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) +			rval = XFS_ITEM_FLUSHING; + +		xfs_buf_unlock(bp); +		return rval; +	} +  	if (!xfs_dqlock_nowait(dqp))  		return XFS_ITEM_LOCKED; @@ -242,7 +277,8 @@ static const struct xfs_item_ops xfs_dquot_item_ops = {  	.iop_unlock	= xfs_qm_dquot_logitem_unlock,  	.iop_committed	= xfs_qm_dquot_logitem_committed,  	.iop_push	= xfs_qm_dquot_logitem_push, -	.iop_committing = xfs_qm_dquot_logitem_committing +	.iop_committing = xfs_qm_dquot_logitem_committing, +	.iop_error	= xfs_dquot_item_error  };  /* diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 44f8c5451210..64da90655e95 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -538,7 +538,7 @@ xfs_efi_recover(  		return error;  	efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); -	xfs_rmap_skip_owner_update(&oinfo); +	xfs_rmap_any_owner_update(&oinfo);  	for (i = 0; i < efip->efi_format.efi_nextents; i++) {  		extp = &efip->efi_format.efi_extents[i];  		error = xfs_trans_free_extent(tp, efdp, extp->ext_start, diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 8f22fc579dbb..60a2e128cb6a 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -571,6 +571,11 @@ xfs_growfs_data_private(  		 * this doesn't actually exist in the rmap btree.  		 */  		xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL); +		error = xfs_rmap_free(tp, bp, agno, +				be32_to_cpu(agf->agf_length) - new, +				new, &oinfo); +		if (error) +			goto error0;  		error = xfs_free_extent(tp,  				XFS_AGB_TO_FSB(mp, agno,  					be32_to_cpu(agf->agf_length) - new), diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 43005fbe8b1e..3861d61fb265 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -870,7 +870,7 @@ xfs_eofblocks_worker(   * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default).   * (We'll just piggyback on the post-EOF prealloc space workqueue.)   */ -STATIC void +void  xfs_queue_cowblocks(  	struct xfs_mount *mp)  { @@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks(  	return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks);  } +static inline unsigned long +xfs_iflag_for_tag( +	int		tag) +{ +	switch (tag) { +	case XFS_ICI_EOFBLOCKS_TAG: +		return XFS_IEOFBLOCKS; +	case XFS_ICI_COWBLOCKS_TAG: +		return XFS_ICOWBLOCKS; +	default: +		ASSERT(0); +		return 0; +	} +} +  static void -__xfs_inode_set_eofblocks_tag( +__xfs_inode_set_blocks_tag(  	xfs_inode_t	*ip,  	void		(*execute)(struct xfs_mount *mp),  	void		(*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, @@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag(  	 * Don't bother locking the AG and looking up in the radix trees  	 * if we already know that we have the tag set.  	 */ -	if (ip->i_flags & XFS_IEOFBLOCKS) +	if (ip->i_flags & xfs_iflag_for_tag(tag))  		return;  	spin_lock(&ip->i_flags_lock); -	ip->i_flags |= XFS_IEOFBLOCKS; +	ip->i_flags |= xfs_iflag_for_tag(tag);  	spin_unlock(&ip->i_flags_lock);  	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); @@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag(  	xfs_inode_t	*ip)  {  	trace_xfs_inode_set_eofblocks_tag(ip); -	return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks, +	return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks,  			trace_xfs_perag_set_eofblocks,  			XFS_ICI_EOFBLOCKS_TAG);  }  static void -__xfs_inode_clear_eofblocks_tag( +__xfs_inode_clear_blocks_tag(  	xfs_inode_t	*ip,  	void		(*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,  				    int error, unsigned long caller_ip), @@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag(  	struct xfs_perag *pag;  	spin_lock(&ip->i_flags_lock); -	ip->i_flags &= ~XFS_IEOFBLOCKS; +	ip->i_flags &= ~xfs_iflag_for_tag(tag);  	spin_unlock(&ip->i_flags_lock);  	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); @@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag(  	xfs_inode_t	*ip)  {  	trace_xfs_inode_clear_eofblocks_tag(ip); -	return __xfs_inode_clear_eofblocks_tag(ip, +	return __xfs_inode_clear_blocks_tag(ip,  			trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG);  } @@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag(  	xfs_inode_t	*ip)  {  	trace_xfs_inode_set_cowblocks_tag(ip); -	return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, +	return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks,  			trace_xfs_perag_set_cowblocks,  			XFS_ICI_COWBLOCKS_TAG);  } @@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag(  	xfs_inode_t	*ip)  {  	trace_xfs_inode_clear_cowblocks_tag(ip); -	return __xfs_inode_clear_eofblocks_tag(ip, +	return __xfs_inode_clear_blocks_tag(ip,  			trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);  } diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index bff4d85e5498..d4a77588eca1 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);  int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *);  int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip);  void xfs_cowblocks_worker(struct work_struct *); +void xfs_queue_cowblocks(struct xfs_mount *);  int xfs_inode_ag_iterator(struct xfs_mount *mp,  	int (*execute)(struct xfs_inode *ip, int flags, void *args), diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 61d1cb7dc10d..6f95bdb408ce 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -749,7 +749,6 @@ xfs_ialloc(  	xfs_nlink_t	nlink,  	dev_t		rdev,  	prid_t		prid, -	int		okalloc,  	xfs_buf_t	**ialloc_context,  	xfs_inode_t	**ipp)  { @@ -765,7 +764,7 @@ xfs_ialloc(  	 * Call the space management code to pick  	 * the on-disk inode to be allocated.  	 */ -	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, +	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,  			    ialloc_context, &ino);  	if (error)  		return error; @@ -957,7 +956,6 @@ xfs_dir_ialloc(  	xfs_nlink_t	nlink,  	dev_t		rdev,  	prid_t		prid,		/* project id */ -	int		okalloc,	/* ok to allocate new space */  	xfs_inode_t	**ipp,		/* pointer to inode; it will be  					   locked. */  	int		*committed) @@ -988,8 +986,8 @@ xfs_dir_ialloc(  	 * transaction commit so that no other process can steal  	 * the inode(s) that we've just allocated.  	 */ -	code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, -			  &ialloc_context, &ip); +	code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context, +			&ip);  	/*  	 * Return an error if we were unable to allocate a new inode. @@ -1061,7 +1059,7 @@ xfs_dir_ialloc(  		 * this call should always succeed.  		 */  		code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, -				  okalloc, &ialloc_context, &ip); +				  &ialloc_context, &ip);  		/*  		 * If we get an error at this point, return to the caller @@ -1182,11 +1180,6 @@ xfs_create(  		xfs_flush_inodes(mp);  		error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);  	} -	if (error == -ENOSPC) { -		/* No space at all so try a "no-allocation" reservation */ -		resblks = 0; -		error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp); -	}  	if (error)  		goto out_release_inode; @@ -1203,19 +1196,13 @@ xfs_create(  	if (error)  		goto out_trans_cancel; -	if (!resblks) { -		error = xfs_dir_canenter(tp, dp, name); -		if (error) -			goto out_trans_cancel; -	} -  	/*  	 * A newly created regular or special file just has one directory  	 * entry pointing to them, but a directory also the "." entry  	 * pointing to itself.  	 */ -	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, -			       prid, resblks > 0, &ip, NULL); +	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip, +			NULL);  	if (error)  		goto out_trans_cancel; @@ -1340,11 +1327,6 @@ xfs_create_tmpfile(  	tres = &M_RES(mp)->tr_create_tmpfile;  	error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp); -	if (error == -ENOSPC) { -		/* No space at all so try a "no-allocation" reservation */ -		resblks = 0; -		error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp); -	}  	if (error)  		goto out_release_inode; @@ -1353,8 +1335,7 @@ xfs_create_tmpfile(  	if (error)  		goto out_trans_cancel; -	error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, -				prid, resblks > 0, &ip, NULL); +	error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL);  	if (error)  		goto out_trans_cancel; @@ -1506,6 +1487,24 @@ xfs_link(  	return error;  } +/* Clear the reflink flag and the cowblocks tag if possible. */ +static void +xfs_itruncate_clear_reflink_flags( +	struct xfs_inode	*ip) +{ +	struct xfs_ifork	*dfork; +	struct xfs_ifork	*cfork; + +	if (!xfs_is_reflink_inode(ip)) +		return; +	dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK); +	cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK); +	if (dfork->if_bytes == 0 && cfork->if_bytes == 0) +		ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; +	if (cfork->if_bytes == 0) +		xfs_inode_clear_cowblocks_tag(ip); +} +  /*   * Free up the underlying blocks past new_size.  The new size must be smaller   * than the current size.  This routine can be used both for the attribute and @@ -1602,15 +1601,7 @@ xfs_itruncate_extents(  	if (error)  		goto out; -	/* -	 * Clear the reflink flag if there are no data fork blocks and -	 * there are no extents staged in the cow fork. -	 */ -	if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) { -		if (ip->i_d.di_nblocks == 0) -			ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; -		xfs_inode_clear_cowblocks_tag(ip); -	} +	xfs_itruncate_clear_reflink_flags(ip);  	/*  	 * Always re-log the inode so that our permanent transaction can keep @@ -2401,6 +2392,24 @@ retry:  }  /* + * Free any local-format buffers sitting around before we reset to + * extents format. + */ +static inline void +xfs_ifree_local_data( +	struct xfs_inode	*ip, +	int			whichfork) +{ +	struct xfs_ifork	*ifp; + +	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) +		return; + +	ifp = XFS_IFORK_PTR(ip, whichfork); +	xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); +} + +/*   * This is called to return an inode to the inode free list.   * The inode should already be truncated to 0 length and have   * no pages associated with it.  This routine also assumes that @@ -2437,6 +2446,9 @@ xfs_ifree(  	if (error)  		return error; +	xfs_ifree_local_data(ip, XFS_DATA_FORK); +	xfs_ifree_local_data(ip, XFS_ATTR_FORK); +  	VFS_I(ip)->i_mode = 0;		/* mark incore inode as free */  	ip->i_d.di_flags = 0;  	ip->i_d.di_dmevmask = 0; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index cc13c3763721..d383e392ec9d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)   * log recovery to replay a bmap operation on the inode.   */  #define XFS_IRECOVERY		(1 << 11) +#define XFS_ICOWBLOCKS		(1 << 12)/* has the cowblocks tag set */  /*   * Per-lifetime flags need to be reset when re-using a reclaimable inode during @@ -428,7 +429,7 @@ xfs_extlen_t	xfs_get_extsz_hint(struct xfs_inode *ip);  xfs_extlen_t	xfs_get_cowextsz_hint(struct xfs_inode *ip);  int		xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, -			       xfs_nlink_t, dev_t, prid_t, int, +			       xfs_nlink_t, dev_t, prid_t,  			       struct xfs_inode **, int *);  /* from xfs_file.c */ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 33eb4fb2e3fd..66e1edbfb2b2 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1006,7 +1006,7 @@ xfs_file_iomap_begin(  	}  	ASSERT(offset <= mp->m_super->s_maxbytes); -	if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) +	if (offset > mp->m_super->s_maxbytes - length)  		length = mp->m_super->s_maxbytes - offset;  	offset_fsb = XFS_B_TO_FSBT(mp, offset);  	end_fsb = XFS_B_TO_FSB(mp, offset + length); @@ -1213,7 +1213,7 @@ xfs_xattr_iomap_begin(  	ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);  	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, -			       &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK); +			       &nimaps, XFS_BMAPI_ATTRFORK);  out_unlock:  	xfs_iunlock(ip, lockmode); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 38d4227895ae..a503af96d780 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -781,17 +781,17 @@ xfs_log_mount_finish(  	 * something to an unlinked inode, the irele won't cause  	 * premature truncation and freeing of the inode, which results  	 * in log recovery failure.  We have to evict the unreferenced -	 * lru inodes after clearing MS_ACTIVE because we don't +	 * lru inodes after clearing SB_ACTIVE because we don't  	 * otherwise clean up the lru if there's a subsequent failure in  	 * xfs_mountfs, which leads to us leaking the inodes if nothing  	 * else (e.g. quotacheck) references the inodes before the  	 * mount failure occurs.  	 */ -	mp->m_super->s_flags |= MS_ACTIVE; +	mp->m_super->s_flags |= SB_ACTIVE;  	error = xlog_recover_finish(mp->m_log);  	if (!error)  		xfs_log_work_queue(mp); -	mp->m_super->s_flags &= ~MS_ACTIVE; +	mp->m_super->s_flags &= ~SB_ACTIVE;  	evict_inodes(mp->m_super);  	/* diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 87b1c331f9eb..28d1abfe835e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -24,6 +24,7 @@  #include "xfs_bit.h"  #include "xfs_sb.h"  #include "xfs_mount.h" +#include "xfs_defer.h"  #include "xfs_da_format.h"  #include "xfs_da_btree.h"  #include "xfs_inode.h" @@ -4716,7 +4717,8 @@ STATIC int  xlog_recover_process_cui(  	struct xfs_mount		*mp,  	struct xfs_ail			*ailp, -	struct xfs_log_item		*lip) +	struct xfs_log_item		*lip, +	struct xfs_defer_ops		*dfops)  {  	struct xfs_cui_log_item		*cuip;  	int				error; @@ -4729,7 +4731,7 @@ xlog_recover_process_cui(  		return 0;  	spin_unlock(&ailp->xa_lock); -	error = xfs_cui_recover(mp, cuip); +	error = xfs_cui_recover(mp, cuip, dfops);  	spin_lock(&ailp->xa_lock);  	return error; @@ -4756,7 +4758,8 @@ STATIC int  xlog_recover_process_bui(  	struct xfs_mount		*mp,  	struct xfs_ail			*ailp, -	struct xfs_log_item		*lip) +	struct xfs_log_item		*lip, +	struct xfs_defer_ops		*dfops)  {  	struct xfs_bui_log_item		*buip;  	int				error; @@ -4769,7 +4772,7 @@ xlog_recover_process_bui(  		return 0;  	spin_unlock(&ailp->xa_lock); -	error = xfs_bui_recover(mp, buip); +	error = xfs_bui_recover(mp, buip, dfops);  	spin_lock(&ailp->xa_lock);  	return error; @@ -4805,6 +4808,46 @@ static inline bool xlog_item_is_intent(struct xfs_log_item *lip)  	}  } +/* Take all the collected deferred ops and finish them in order. */ +static int +xlog_finish_defer_ops( +	struct xfs_mount	*mp, +	struct xfs_defer_ops	*dfops) +{ +	struct xfs_trans	*tp; +	int64_t			freeblks; +	uint			resblks; +	int			error; + +	/* +	 * We're finishing the defer_ops that accumulated as a result of +	 * recovering unfinished intent items during log recovery.  We +	 * reserve an itruncate transaction because it is the largest +	 * permanent transaction type.  Since we're the only user of the fs +	 * right now, take 93% (15/16) of the available free blocks.  Use +	 * weird math to avoid a 64-bit division. +	 */ +	freeblks = percpu_counter_sum(&mp->m_fdblocks); +	if (freeblks <= 0) +		return -ENOSPC; +	resblks = min_t(int64_t, UINT_MAX, freeblks); +	resblks = (resblks * 15) >> 4; +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks, +			0, XFS_TRANS_RESERVE, &tp); +	if (error) +		return error; + +	error = xfs_defer_finish(&tp, dfops); +	if (error) +		goto out_cancel; + +	return xfs_trans_commit(tp); + +out_cancel: +	xfs_trans_cancel(tp); +	return error; +} +  /*   * When this is called, all of the log intent items which did not have   * corresponding log done items should be in the AIL.  What we do now @@ -4825,10 +4868,12 @@ STATIC int  xlog_recover_process_intents(  	struct xlog		*log)  { -	struct xfs_log_item	*lip; -	int			error = 0; +	struct xfs_defer_ops	dfops;  	struct xfs_ail_cursor	cur; +	struct xfs_log_item	*lip;  	struct xfs_ail		*ailp; +	xfs_fsblock_t		firstfsb; +	int			error = 0;  #if defined(DEBUG) || defined(XFS_WARN)  	xfs_lsn_t		last_lsn;  #endif @@ -4839,6 +4884,7 @@ xlog_recover_process_intents(  #if defined(DEBUG) || defined(XFS_WARN)  	last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);  #endif +	xfs_defer_init(&dfops, &firstfsb);  	while (lip != NULL) {  		/*  		 * We're done when we see something other than an intent. @@ -4859,6 +4905,12 @@ xlog_recover_process_intents(  		 */  		ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0); +		/* +		 * NOTE: If your intent processing routine can create more +		 * deferred ops, you /must/ attach them to the dfops in this +		 * routine or else those subsequent intents will get +		 * replayed in the wrong order! +		 */  		switch (lip->li_type) {  		case XFS_LI_EFI:  			error = xlog_recover_process_efi(log->l_mp, ailp, lip); @@ -4867,10 +4919,12 @@ xlog_recover_process_intents(  			error = xlog_recover_process_rui(log->l_mp, ailp, lip);  			break;  		case XFS_LI_CUI: -			error = xlog_recover_process_cui(log->l_mp, ailp, lip); +			error = xlog_recover_process_cui(log->l_mp, ailp, lip, +					&dfops);  			break;  		case XFS_LI_BUI: -			error = xlog_recover_process_bui(log->l_mp, ailp, lip); +			error = xlog_recover_process_bui(log->l_mp, ailp, lip, +					&dfops);  			break;  		}  		if (error) @@ -4880,6 +4934,11 @@ xlog_recover_process_intents(  out:  	xfs_trans_ail_cursor_done(&cur);  	spin_unlock(&ailp->xa_lock); +	if (error) +		xfs_defer_cancel(&dfops); +	else +		error = xlog_finish_defer_ops(log->l_mp, &dfops); +  	return error;  } diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 010a13a201aa..b897b11afb2c 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -48,7 +48,7 @@  STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);  STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *); - +STATIC void	xfs_qm_destroy_quotainos(xfs_quotainfo_t *qi);  STATIC void	xfs_qm_dqfree_one(struct xfs_dquot *dqp);  /*   * We use the batch lookup interface to iterate over the dquots as it @@ -695,9 +695,17 @@ xfs_qm_init_quotainfo(  	qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;  	qinf->qi_shrinker.seeks = DEFAULT_SEEKS;  	qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; -	register_shrinker(&qinf->qi_shrinker); + +	error = register_shrinker(&qinf->qi_shrinker); +	if (error) +		goto out_free_inos; +  	return 0; +out_free_inos: +	mutex_destroy(&qinf->qi_quotaofflock); +	mutex_destroy(&qinf->qi_tree_lock); +	xfs_qm_destroy_quotainos(qinf);  out_free_lru:  	list_lru_destroy(&qinf->qi_lru);  out_free_qinf: @@ -706,7 +714,6 @@ out_free_qinf:  	return error;  } -  /*   * Gets called when unmounting a filesystem or when all quotas get   * turned off. @@ -723,19 +730,8 @@ xfs_qm_destroy_quotainfo(  	unregister_shrinker(&qi->qi_shrinker);  	list_lru_destroy(&qi->qi_lru); - -	if (qi->qi_uquotaip) { -		IRELE(qi->qi_uquotaip); -		qi->qi_uquotaip = NULL; /* paranoia */ -	} -	if (qi->qi_gquotaip) { -		IRELE(qi->qi_gquotaip); -		qi->qi_gquotaip = NULL; -	} -	if (qi->qi_pquotaip) { -		IRELE(qi->qi_pquotaip); -		qi->qi_pquotaip = NULL; -	} +	xfs_qm_destroy_quotainos(qi); +	mutex_destroy(&qi->qi_tree_lock);  	mutex_destroy(&qi->qi_quotaofflock);  	kmem_free(qi);  	mp->m_quotainfo = NULL; @@ -793,8 +789,8 @@ xfs_qm_qino_alloc(  		return error;  	if (need_alloc) { -		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, -								&committed); +		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip, +				&committed);  		if (error) {  			xfs_trans_cancel(tp);  			return error; @@ -1600,6 +1596,24 @@ error_rele:  }  STATIC void +xfs_qm_destroy_quotainos( +	xfs_quotainfo_t	*qi) +{ +	if (qi->qi_uquotaip) { +		IRELE(qi->qi_uquotaip); +		qi->qi_uquotaip = NULL; /* paranoia */ +	} +	if (qi->qi_gquotaip) { +		IRELE(qi->qi_gquotaip); +		qi->qi_gquotaip = NULL; +	} +	if (qi->qi_pquotaip) { +		IRELE(qi->qi_pquotaip); +		qi->qi_pquotaip = NULL; +	} +} + +STATIC void  xfs_qm_dqfree_one(  	struct xfs_dquot	*dqp)  { diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 8f2e2fac4255..3a55d6fc271b 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -393,7 +393,8 @@ xfs_cud_init(  int  xfs_cui_recover(  	struct xfs_mount		*mp, -	struct xfs_cui_log_item		*cuip) +	struct xfs_cui_log_item		*cuip, +	struct xfs_defer_ops		*dfops)  {  	int				i;  	int				error = 0; @@ -405,11 +406,9 @@ xfs_cui_recover(  	struct xfs_trans		*tp;  	struct xfs_btree_cur		*rcur = NULL;  	enum xfs_refcount_intent_type	type; -	xfs_fsblock_t			firstfsb;  	xfs_fsblock_t			new_fsb;  	xfs_extlen_t			new_len;  	struct xfs_bmbt_irec		irec; -	struct xfs_defer_ops		dfops;  	bool				requeue_only = false;  	ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)); @@ -465,7 +464,6 @@ xfs_cui_recover(  		return error;  	cudp = xfs_trans_get_cud(tp, cuip); -	xfs_defer_init(&dfops, &firstfsb);  	for (i = 0; i < cuip->cui_format.cui_nextents; i++) {  		refc = &cuip->cui_format.cui_extents[i];  		refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; @@ -485,7 +483,7 @@ xfs_cui_recover(  			new_len = refc->pe_len;  		} else  			error = xfs_trans_log_finish_refcount_update(tp, cudp, -				&dfops, type, refc->pe_startblock, refc->pe_len, +				dfops, type, refc->pe_startblock, refc->pe_len,  				&new_fsb, &new_len, &rcur);  		if (error)  			goto abort_error; @@ -497,21 +495,21 @@ xfs_cui_recover(  			switch (type) {  			case XFS_REFCOUNT_INCREASE:  				error = xfs_refcount_increase_extent( -						tp->t_mountp, &dfops, &irec); +						tp->t_mountp, dfops, &irec);  				break;  			case XFS_REFCOUNT_DECREASE:  				error = xfs_refcount_decrease_extent( -						tp->t_mountp, &dfops, &irec); +						tp->t_mountp, dfops, &irec);  				break;  			case XFS_REFCOUNT_ALLOC_COW:  				error = xfs_refcount_alloc_cow_extent( -						tp->t_mountp, &dfops, +						tp->t_mountp, dfops,  						irec.br_startblock,  						irec.br_blockcount);  				break;  			case XFS_REFCOUNT_FREE_COW:  				error = xfs_refcount_free_cow_extent( -						tp->t_mountp, &dfops, +						tp->t_mountp, dfops,  						irec.br_startblock,  						irec.br_blockcount);  				break; @@ -525,17 +523,12 @@ xfs_cui_recover(  	}  	xfs_refcount_finish_one_cleanup(tp, rcur, error); -	error = xfs_defer_finish(&tp, &dfops); -	if (error) -		goto abort_defer;  	set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);  	error = xfs_trans_commit(tp);  	return error;  abort_error:  	xfs_refcount_finish_one_cleanup(tp, rcur, error); -abort_defer: -	xfs_defer_cancel(&dfops);  	xfs_trans_cancel(tp);  	return error;  } diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h index 5b74dddfa64b..0e5327349a13 100644 --- a/fs/xfs/xfs_refcount_item.h +++ b/fs/xfs/xfs_refcount_item.h @@ -96,6 +96,7 @@ struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *,  		struct xfs_cui_log_item *);  void xfs_cui_item_free(struct xfs_cui_log_item *);  void xfs_cui_release(struct xfs_cui_log_item *); -int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip); +int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip, +		struct xfs_defer_ops *dfops);  #endif	/* __XFS_REFCOUNT_ITEM_H__ */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index cc041a29eb70..47aea2e82c26 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -49,8 +49,6 @@  #include "xfs_alloc.h"  #include "xfs_quota_defs.h"  #include "xfs_quota.h" -#include "xfs_btree.h" -#include "xfs_bmap_btree.h"  #include "xfs_reflink.h"  #include "xfs_iomap.h"  #include "xfs_rmap_btree.h" @@ -456,6 +454,8 @@ retry:  	if (error)  		goto out_bmap_cancel; +	xfs_inode_set_cowblocks_tag(ip); +  	/* Finish up. */  	error = xfs_defer_finish(&tp, &dfops);  	if (error) @@ -492,8 +492,9 @@ xfs_reflink_find_cow_mapping(  	struct xfs_iext_cursor		icur;  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); -	ASSERT(xfs_is_reflink_inode(ip)); +	if (!xfs_is_reflink_inode(ip)) +		return false;  	offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);  	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))  		return false; @@ -612,6 +613,9 @@ xfs_reflink_cancel_cow_blocks(  			/* Remove the mapping from the CoW fork. */  			xfs_bmap_del_extent_cow(ip, &icur, &got, &del); +		} else { +			/* Didn't do anything, push cursor back. */ +			xfs_iext_prev(ifp, &icur);  		}  next_extent:  		if (!xfs_iext_get_extent(ifp, &icur, &got)) @@ -727,7 +731,7 @@ xfs_reflink_end_cow(  			(unsigned int)(end_fsb - offset_fsb),  			XFS_DATA_FORK);  	error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, -			resblks, 0, 0, &tp); +			resblks, 0, XFS_TRANS_RESERVE, &tp);  	if (error)  		goto out; @@ -1293,6 +1297,17 @@ xfs_reflink_remap_range(  	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); +	/* +	 * Clear out post-eof preallocations because we don't have page cache +	 * backing the delayed allocations and they'll never get freed on +	 * their own. +	 */ +	if (xfs_can_free_eofblocks(dest, true)) { +		ret = xfs_free_eofblocks(dest); +		if (ret) +			goto out_unlock; +	} +  	/* Set flags and remap blocks. */  	ret = xfs_reflink_set_inode_flag(src, dest);  	if (ret) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f663022353c0..1dacccc367f8 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -212,9 +212,9 @@ xfs_parseargs(  	 */  	if (sb_rdonly(sb))  		mp->m_flags |= XFS_MOUNT_RDONLY; -	if (sb->s_flags & MS_DIRSYNC) +	if (sb->s_flags & SB_DIRSYNC)  		mp->m_flags |= XFS_MOUNT_DIRSYNC; -	if (sb->s_flags & MS_SYNCHRONOUS) +	if (sb->s_flags & SB_SYNCHRONOUS)  		mp->m_flags |= XFS_MOUNT_WSYNC;  	/* @@ -1312,7 +1312,7 @@ xfs_fs_remount(  	}  	/* ro -> rw */ -	if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { +	if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) {  		if (mp->m_flags & XFS_MOUNT_NORECOVERY) {  			xfs_warn(mp,  		"ro->rw transition prohibited on norecovery mount"); @@ -1360,6 +1360,7 @@ xfs_fs_remount(  			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);  			return error;  		} +		xfs_queue_cowblocks(mp);  		/* Create the per-AG metadata reservation pool .*/  		error = xfs_fs_reserve_ag_blocks(mp); @@ -1368,7 +1369,15 @@ xfs_fs_remount(  	}  	/* rw -> ro */ -	if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { +	if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { +		/* Get rid of any leftover CoW reservations... */ +		cancel_delayed_work_sync(&mp->m_cowblocks_work); +		error = xfs_icache_free_cowblocks(mp, NULL); +		if (error) { +			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); +			return error; +		} +  		/* Free the per-AG metadata reservation pool. */  		error = xfs_fs_unreserve_ag_blocks(mp);  		if (error) { diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 5f2f32408011..fcc5dfc70aa0 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -30,7 +30,7 @@ extern void xfs_qm_exit(void);  #ifdef CONFIG_XFS_POSIX_ACL  # define XFS_ACL_STRING		"ACLs, " -# define set_posix_acl_flag(sb)	((sb)->s_flags |= MS_POSIXACL) +# define set_posix_acl_flag(sb)	((sb)->s_flags |= SB_POSIXACL)  #else  # define XFS_ACL_STRING  # define set_posix_acl_flag(sb)	do { } while (0) diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 68d3ca2c4968..2e9e793a8f9d 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -232,11 +232,6 @@ xfs_symlink(  	resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);  	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp); -	if (error == -ENOSPC && fs_blocks == 0) { -		resblks = 0; -		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0, -				&tp); -	}  	if (error)  		goto out_release_inode; @@ -260,14 +255,6 @@ xfs_symlink(  		goto out_trans_cancel;  	/* -	 * Check for ability to enter directory entry, if no space reserved. -	 */ -	if (!resblks) { -		error = xfs_dir_canenter(tp, dp, link_name); -		if (error) -			goto out_trans_cancel; -	} -	/*  	 * Initialize the bmap freelist prior to calling either  	 * bmapi or the directory create code.  	 */ @@ -277,7 +264,7 @@ xfs_symlink(  	 * Allocate an inode for the symlink.  	 */  	error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, -			       prid, resblks > 0, &ip, NULL); +			       prid, &ip, NULL);  	if (error)  		goto out_trans_cancel; diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 5d95fe348294..35f3546b6af5 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -24,7 +24,6 @@  #include "xfs_mount.h"  #include "xfs_defer.h"  #include "xfs_da_format.h" -#include "xfs_defer.h"  #include "xfs_inode.h"  #include "xfs_btree.h"  #include "xfs_da_btree.h"  | 

