diff options
| -rw-r--r-- | Documentation/filesystems/btrfs.txt | 19 | ||||
| -rw-r--r-- | fs/btrfs/ctree.c | 36 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 50 | ||||
| -rw-r--r-- | fs/btrfs/dev-replace.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 36 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 3 | ||||
| -rw-r--r-- | fs/btrfs/file-item.c | 92 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 105 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 250 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 9 | ||||
| -rw-r--r-- | fs/btrfs/print-tree.c | 23 | ||||
| -rw-r--r-- | fs/btrfs/reada.c | 10 | ||||
| -rw-r--r-- | fs/btrfs/scrub.c | 24 | ||||
| -rw-r--r-- | fs/btrfs/send.c | 36 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 45 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 23 | 
16 files changed, 512 insertions, 251 deletions
| diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt index c772b47e7ef0..6593d2e415c5 100644 --- a/Documentation/filesystems/btrfs.txt +++ b/Documentation/filesystems/btrfs.txt @@ -168,10 +168,23 @@ Options with (*) are default options and will not show in the mount options.    notreelog  	Enable/disable the tree logging used for fsync and O_SYNC writes. -  recovery -	Enable autorecovery attempts if a bad tree root is found at mount time. -	Currently this scans a list of several previous tree roots and tries to +  nologreplay +	Disable the log tree replay at mount time to prevent filesystem +	from getting modified. +	Must be used with 'ro' mount option. +	A filesystem mounted with this option cannot transition to a +	read-write mount via remount,rw - the filesystem must be unmounted +	and mounted back again if read-write access is desired. + +  usebackuproot +	Enable attempts to use backup tree roots if a bad tree root is found at +	mount time. +	Currently this scans a list of 4 previous tree roots and tries to  	use the first readable. +	And since the mount option doesn't affect any behavior after mount, +	it won't be shown in mount info. +	Prior to 4.6, this was done by 'recovery' option that has been +	deprecated, but will work.    rescan_uuid_tree  	Force check and rebuild procedure of the UUID tree. This should not diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 769e0ff1b4ce..77592931ab4f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -311,7 +311,7 @@ struct tree_mod_root {  struct tree_mod_elem {  	struct rb_node node; -	u64 index;		/* shifted logical */ +	u64 logical;  	u64 seq;  	enum mod_log_op op; @@ -435,11 +435,11 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,  /*   * key order of the log: - *       index -> sequence + *       node/leaf start address -> sequence   * - * the index is the shifted logical of the *new* root node for root replace - * operations, or the shifted logical of the affected block for all other - * operations. + * The 'start address' is the logical address of the *new* root node + * for root replace operations, or the logical address of the affected + * block for all other operations.   *   * Note: must be called with write lock (tree_mod_log_write_lock).   */ @@ -460,9 +460,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)  	while (*new) {  		cur = container_of(*new, struct tree_mod_elem, node);  		parent = *new; -		if (cur->index < tm->index) +		if (cur->logical < tm->logical)  			new = &((*new)->rb_left); -		else if (cur->index > tm->index) +		else if (cur->logical > tm->logical)  			new = &((*new)->rb_right);  		else if (cur->seq < tm->seq)  			new = &((*new)->rb_left); @@ -523,7 +523,7 @@ alloc_tree_mod_elem(struct extent_buffer *eb, int slot,  	if (!tm)  		return NULL; -	tm->index = eb->start >> PAGE_CACHE_SHIFT; +	tm->logical = eb->start;  	if (op != MOD_LOG_KEY_ADD) {  		btrfs_node_key(eb, &tm->key, slot);  		tm->blockptr = btrfs_node_blockptr(eb, slot); @@ -588,7 +588,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,  		goto free_tms;  	} -	tm->index = eb->start >> PAGE_CACHE_SHIFT; +	tm->logical = eb->start;  	tm->slot = src_slot;  	tm->move.dst_slot = dst_slot;  	tm->move.nr_items = nr_items; @@ -699,7 +699,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,  		goto free_tms;  	} -	tm->index = new_root->start >> PAGE_CACHE_SHIFT; +	tm->logical = new_root->start;  	tm->old_root.logical = old_root->start;  	tm->old_root.level = btrfs_header_level(old_root);  	tm->generation = btrfs_header_generation(old_root); @@ -739,16 +739,15 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,  	struct rb_node *node;  	struct tree_mod_elem *cur = NULL;  	struct tree_mod_elem *found = NULL; -	u64 index = start >> PAGE_CACHE_SHIFT;  	tree_mod_log_read_lock(fs_info);  	tm_root = &fs_info->tree_mod_log;  	node = tm_root->rb_node;  	while (node) {  		cur = container_of(node, struct tree_mod_elem, node); -		if (cur->index < index) { +		if (cur->logical < start) {  			node = node->rb_left; -		} else if (cur->index > index) { +		} else if (cur->logical > start) {  			node = node->rb_right;  		} else if (cur->seq < min_seq) {  			node = node->rb_left; @@ -1230,9 +1229,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,  		return NULL;  	/* -	 * the very last operation that's logged for a root is the replacement -	 * operation (if it is replaced at all). this has the index of the *new* -	 * root, making it the very first operation that's logged for this root. +	 * the very last operation that's logged for a root is the +	 * replacement operation (if it is replaced at all). this has +	 * the logical address of the *new* root, making it the very +	 * first operation that's logged for this root.  	 */  	while (1) {  		tm = tree_mod_log_search_oldest(fs_info, root_logical, @@ -1336,7 +1336,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,  		if (!next)  			break;  		tm = container_of(next, struct tree_mod_elem, node); -		if (tm->index != first_tm->index) +		if (tm->logical != first_tm->logical)  			break;  	}  	tree_mod_log_read_unlock(fs_info); @@ -5361,7 +5361,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,  		goto out;  	} -	tmp_buf = kmalloc(left_root->nodesize, GFP_NOFS); +	tmp_buf = kmalloc(left_root->nodesize, GFP_KERNEL);  	if (!tmp_buf) {  		ret = -ENOMEM;  		goto out; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e43d987e1c99..e2a5cc0d4a14 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -100,6 +100,9 @@ struct btrfs_ordered_sum;  /* tracks free space in block groups. */  #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL +/* device stats in the device tree */ +#define BTRFS_DEV_STATS_OBJECTID 0ULL +  /* for storing balance parameters in the root tree */  #define BTRFS_BALANCE_OBJECTID -4ULL @@ -2188,13 +2191,43 @@ struct btrfs_ioctl_defrag_range_args {   */  #define BTRFS_QGROUP_RELATION_KEY       246 +/* + * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY. + */  #define BTRFS_BALANCE_ITEM_KEY	248  /* - * Persistantly stores the io stats in the device tree. - * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid). + * The key type for tree items that are stored persistently, but do not need to + * exist for extended period of time. The items can exist in any tree. + * + * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data] + * + * Existing items: + * + * - balance status item + *   (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0) + */ +#define BTRFS_TEMPORARY_ITEM_KEY	248 + +/* + * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY + */ +#define BTRFS_DEV_STATS_KEY		249 + +/* + * The key type for tree items that are stored persistently and usually exist + * for a long period, eg. filesystem lifetime. The item kinds can be status + * information, stats or preference values. The item can exist in any tree. + * + * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data] + * + * Existing items: + * + * - device statistics, store IO stats in the device tree, one key for all + *   stats + *   (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)   */ -#define BTRFS_DEV_STATS_KEY	249 +#define BTRFS_PERSISTENT_ITEM_KEY	249  /*   * Persistantly stores the device replace state in the device tree. @@ -2244,7 +2277,7 @@ struct btrfs_ioctl_defrag_range_args {  #define BTRFS_MOUNT_ENOSPC_DEBUG	 (1 << 15)  #define BTRFS_MOUNT_AUTO_DEFRAG		(1 << 16)  #define BTRFS_MOUNT_INODE_MAP_CACHE	(1 << 17) -#define BTRFS_MOUNT_RECOVERY		(1 << 18) +#define BTRFS_MOUNT_USEBACKUPROOT	(1 << 18)  #define BTRFS_MOUNT_SKIP_BALANCE	(1 << 19)  #define BTRFS_MOUNT_CHECK_INTEGRITY	(1 << 20)  #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) @@ -2253,6 +2286,7 @@ struct btrfs_ioctl_defrag_range_args {  #define BTRFS_MOUNT_FRAGMENT_DATA	(1 << 24)  #define BTRFS_MOUNT_FRAGMENT_METADATA	(1 << 25)  #define BTRFS_MOUNT_FREE_SPACE_TREE	(1 << 26) +#define BTRFS_MOUNT_NOLOGREPLAY		(1 << 27)  #define BTRFS_DEFAULT_COMMIT_INTERVAL	(30)  #define BTRFS_DEFAULT_MAX_INLINE	(8192) @@ -2356,6 +2390,9 @@ struct btrfs_map_token {  	unsigned long offset;  }; +#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \ +				((bytes) >> (fs_info)->sb->s_blocksize_bits) +  static inline void btrfs_init_map_token (struct btrfs_map_token *token)  {  	token->kaddr = NULL; @@ -4030,7 +4067,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,  			struct btrfs_root *root,  			struct inode *dir, u64 objectid,  			const char *name, int name_len); -int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, +int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,  			int front);  int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,  			       struct btrfs_root *root, @@ -4154,7 +4191,8 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info);  ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);  /* super.c */ -int btrfs_parse_options(struct btrfs_root *root, char *options); +int btrfs_parse_options(struct btrfs_root *root, char *options, +			unsigned long new_flags);  int btrfs_sync_fs(struct super_block *sb, int wait);  #ifdef CONFIG_PRINTK diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index cbb7dbfb3fff..01ce5fcecc5c 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -802,7 +802,7 @@ static int btrfs_dev_replace_kthread(void *data)  	struct btrfs_ioctl_dev_replace_args *status_args;  	u64 progress; -	status_args = kzalloc(sizeof(*status_args), GFP_NOFS); +	status_args = kzalloc(sizeof(*status_args), GFP_KERNEL);  	if (status_args) {  		btrfs_dev_replace_status(fs_info, status_args);  		progress = status_args->status.progress_1000; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5e3ec1fc0ac3..de68b8b61fd2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1296,9 +1296,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,  	spin_lock_init(&root->root_item_lock);  } -static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) +static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info, +		gfp_t flags)  { -	struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); +	struct btrfs_root *root = kzalloc(sizeof(*root), flags);  	if (root)  		root->fs_info = fs_info;  	return root; @@ -1310,7 +1311,7 @@ struct btrfs_root *btrfs_alloc_dummy_root(void)  {  	struct btrfs_root *root; -	root = btrfs_alloc_root(NULL); +	root = btrfs_alloc_root(NULL, GFP_KERNEL);  	if (!root)  		return ERR_PTR(-ENOMEM);  	__setup_root(4096, 4096, 4096, root, NULL, 1); @@ -1332,7 +1333,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,  	int ret = 0;  	uuid_le uuid; -	root = btrfs_alloc_root(fs_info); +	root = btrfs_alloc_root(fs_info, GFP_KERNEL);  	if (!root)  		return ERR_PTR(-ENOMEM); @@ -1408,7 +1409,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,  	struct btrfs_root *tree_root = fs_info->tree_root;  	struct extent_buffer *leaf; -	root = btrfs_alloc_root(fs_info); +	root = btrfs_alloc_root(fs_info, GFP_NOFS);  	if (!root)  		return ERR_PTR(-ENOMEM); @@ -1506,7 +1507,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,  	if (!path)  		return ERR_PTR(-ENOMEM); -	root = btrfs_alloc_root(fs_info); +	root = btrfs_alloc_root(fs_info, GFP_NOFS);  	if (!root) {  		ret = -ENOMEM;  		goto alloc_fail; @@ -2385,7 +2386,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,  		return -EIO;  	} -	log_tree_root = btrfs_alloc_root(fs_info); +	log_tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);  	if (!log_tree_root)  		return -ENOMEM; @@ -2510,8 +2511,8 @@ int open_ctree(struct super_block *sb,  	int backup_index = 0;  	int max_active; -	tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); -	chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); +	tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); +	chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);  	if (!tree_root || !chunk_root) {  		err = -ENOMEM;  		goto fail; @@ -2623,7 +2624,7 @@ int open_ctree(struct super_block *sb,  	INIT_LIST_HEAD(&fs_info->ordered_roots);  	spin_lock_init(&fs_info->ordered_root_lock);  	fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), -					GFP_NOFS); +					GFP_KERNEL);  	if (!fs_info->delayed_root) {  		err = -ENOMEM;  		goto fail_iput; @@ -2751,7 +2752,7 @@ int open_ctree(struct super_block *sb,  	 */  	fs_info->compress_type = BTRFS_COMPRESS_ZLIB; -	ret = btrfs_parse_options(tree_root, options); +	ret = btrfs_parse_options(tree_root, options, sb->s_flags);  	if (ret) {  		err = ret;  		goto fail_alloc; @@ -3030,8 +3031,9 @@ retry_root_backup:  	if (ret)  		goto fail_trans_kthread; -	/* do not make disk changes in broken FS */ -	if (btrfs_super_log_root(disk_super) != 0) { +	/* do not make disk changes in broken FS or nologreplay is given */ +	if (btrfs_super_log_root(disk_super) != 0 && +	    !btrfs_test_opt(tree_root, NOLOGREPLAY)) {  		ret = btrfs_replay_log(fs_info, fs_devices);  		if (ret) {  			err = ret; @@ -3147,6 +3149,12 @@ retry_root_backup:  	fs_info->open = 1; +	/* +	 * backuproot only affect mount behavior, and if open_ctree succeeded, +	 * no need to keep the flag +	 */ +	btrfs_clear_opt(fs_info->mount_opt, USEBACKUPROOT); +  	return 0;  fail_qgroup: @@ -3201,7 +3209,7 @@ fail:  	return err;  recovery_tree_root: -	if (!btrfs_test_opt(tree_root, RECOVERY)) +	if (!btrfs_test_opt(tree_root, USEBACKUPROOT))  		goto fail_tree_roots;  	free_root_pointers(fs_info, 0); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2e7c97a3f344..1b2073389dc2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3186,7 +3186,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,  	while (1) {  		lock_extent(tree, start, end); -		ordered = btrfs_lookup_ordered_extent(inode, start); +		ordered = btrfs_lookup_ordered_range(inode, start, +						PAGE_CACHE_SIZE);  		if (!ordered)  			break;  		unlock_extent(tree, start, end); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a67e1c828d0f..1c50a7b09b4e 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -172,6 +172,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,  	u64 item_start_offset = 0;  	u64 item_last_offset = 0;  	u64 disk_bytenr; +	u64 page_bytes_left;  	u32 diff;  	int nblocks;  	int bio_index = 0; @@ -220,6 +221,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,  	disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;  	if (dio)  		offset = logical_offset; + +	page_bytes_left = bvec->bv_len;  	while (bio_index < bio->bi_vcnt) {  		if (!dio)  			offset = page_offset(bvec->bv_page) + bvec->bv_offset; @@ -243,7 +246,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,  				if (BTRFS_I(inode)->root->root_key.objectid ==  				    BTRFS_DATA_RELOC_TREE_OBJECTID) {  					set_extent_bits(io_tree, offset, -						offset + bvec->bv_len - 1, +						offset + root->sectorsize - 1,  						EXTENT_NODATASUM, GFP_NOFS);  				} else {  					btrfs_info(BTRFS_I(inode)->root->fs_info, @@ -281,11 +284,17 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,  found:  		csum += count * csum_size;  		nblocks -= count; -		bio_index += count; +  		while (count--) { -			disk_bytenr += bvec->bv_len; -			offset += bvec->bv_len; -			bvec++; +			disk_bytenr += root->sectorsize; +			offset += root->sectorsize; +			page_bytes_left -= root->sectorsize; +			if (!page_bytes_left) { +				bio_index++; +				bvec++; +				page_bytes_left = bvec->bv_len; +			} +  		}  	}  	btrfs_free_path(path); @@ -432,6 +441,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,  	struct bio_vec *bvec = bio->bi_io_vec;  	int bio_index = 0;  	int index; +	int nr_sectors; +	int i;  	unsigned long total_bytes = 0;  	unsigned long this_sum_bytes = 0;  	u64 offset; @@ -459,41 +470,56 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,  		if (!contig)  			offset = page_offset(bvec->bv_page) + bvec->bv_offset; -		if (offset >= ordered->file_offset + ordered->len || -		    offset < ordered->file_offset) { -			unsigned long bytes_left; -			sums->len = this_sum_bytes; -			this_sum_bytes = 0; -			btrfs_add_ordered_sum(inode, ordered, sums); -			btrfs_put_ordered_extent(ordered); +		data = kmap_atomic(bvec->bv_page); -			bytes_left = bio->bi_iter.bi_size - total_bytes; +		nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, +						bvec->bv_len + root->sectorsize +						- 1); + +		for (i = 0; i < nr_sectors; i++) { +			if (offset >= ordered->file_offset + ordered->len || +				offset < ordered->file_offset) { +				unsigned long bytes_left; + +				kunmap_atomic(data); +				sums->len = this_sum_bytes; +				this_sum_bytes = 0; +				btrfs_add_ordered_sum(inode, ordered, sums); +				btrfs_put_ordered_extent(ordered); + +				bytes_left = bio->bi_iter.bi_size - total_bytes; + +				sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), +					GFP_NOFS); +				BUG_ON(!sums); /* -ENOMEM */ +				sums->len = bytes_left; +				ordered = btrfs_lookup_ordered_extent(inode, +								offset); +				ASSERT(ordered); /* Logic error */ +				sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) +					+ total_bytes; +				index = 0; + +				data = kmap_atomic(bvec->bv_page); +			} -			sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), -				       GFP_NOFS); -			BUG_ON(!sums); /* -ENOMEM */ -			sums->len = bytes_left; -			ordered = btrfs_lookup_ordered_extent(inode, offset); -			BUG_ON(!ordered); /* Logic error */ -			sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) + -				       total_bytes; -			index = 0; +			sums->sums[index] = ~(u32)0; +			sums->sums[index] +				= btrfs_csum_data(data + bvec->bv_offset +						+ (i * root->sectorsize), +						sums->sums[index], +						root->sectorsize); +			btrfs_csum_final(sums->sums[index], +					(char *)(sums->sums + index)); +			index++; +			offset += root->sectorsize; +			this_sum_bytes += root->sectorsize; +			total_bytes += root->sectorsize;  		} -		data = kmap_atomic(bvec->bv_page); -		sums->sums[index] = ~(u32)0; -		sums->sums[index] = btrfs_csum_data(data + bvec->bv_offset, -						    sums->sums[index], -						    bvec->bv_len);  		kunmap_atomic(data); -		btrfs_csum_final(sums->sums[index], -				 (char *)(sums->sums + index));  		bio_index++; -		index++; -		total_bytes += bvec->bv_len; -		this_sum_bytes += bvec->bv_len; -		offset += bvec->bv_len;  		bvec++;  	}  	this_sum_bytes = 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 098bb8f690c9..2d9e4009c7e4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -498,7 +498,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,  	loff_t isize = i_size_read(inode);  	start_pos = pos & ~((u64)root->sectorsize - 1); -	num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize); +	num_bytes = round_up(write_bytes + pos - start_pos, root->sectorsize);  	end_of_last_block = start_pos + num_bytes - 1;  	err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, @@ -1379,16 +1379,19 @@ fail:  static noinline int  lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,  				size_t num_pages, loff_t pos, +				size_t write_bytes,  				u64 *lockstart, u64 *lockend,  				struct extent_state **cached_state)  { +	struct btrfs_root *root = BTRFS_I(inode)->root;  	u64 start_pos;  	u64 last_pos;  	int i;  	int ret = 0; -	start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); -	last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1; +	start_pos = round_down(pos, root->sectorsize); +	last_pos = start_pos +		+ round_up(pos + write_bytes - start_pos, root->sectorsize) - 1;  	if (start_pos < inode->i_size) {  		struct btrfs_ordered_extent *ordered; @@ -1503,6 +1506,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,  	while (iov_iter_count(i) > 0) {  		size_t offset = pos & (PAGE_CACHE_SIZE - 1); +		size_t sector_offset;  		size_t write_bytes = min(iov_iter_count(i),  					 nrptrs * (size_t)PAGE_CACHE_SIZE -  					 offset); @@ -1511,6 +1515,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,  		size_t reserve_bytes;  		size_t dirty_pages;  		size_t copied; +		size_t dirty_sectors; +		size_t num_sectors;  		WARN_ON(num_pages > nrptrs); @@ -1523,7 +1529,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,  			break;  		} -		reserve_bytes = num_pages << PAGE_CACHE_SHIFT; +		sector_offset = pos & (root->sectorsize - 1); +		reserve_bytes = round_up(write_bytes + sector_offset, +				root->sectorsize);  		if (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |  					     BTRFS_INODE_PREALLOC)) { @@ -1542,7 +1550,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,  				 */  				num_pages = DIV_ROUND_UP(write_bytes + offset,  							 PAGE_CACHE_SIZE); -				reserve_bytes = num_pages << PAGE_CACHE_SHIFT; +				reserve_bytes = round_up(write_bytes +							+ sector_offset, +							root->sectorsize);  				goto reserve_metadata;  			}  		} @@ -1576,8 +1586,8 @@ again:  			break;  		ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages, -						      pos, &lockstart, &lockend, -						      &cached_state); +						pos, write_bytes, &lockstart, +						&lockend, &cached_state);  		if (ret < 0) {  			if (ret == -EAGAIN)  				goto again; @@ -1612,9 +1622,16 @@ again:  		 * we still have an outstanding extent for the chunk we actually  		 * managed to copy.  		 */ -		if (num_pages > dirty_pages) { -			release_bytes = (num_pages - dirty_pages) << -				PAGE_CACHE_SHIFT; +		num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, +						reserve_bytes); +		dirty_sectors = round_up(copied + sector_offset, +					root->sectorsize); +		dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, +						dirty_sectors); + +		if (num_sectors > dirty_sectors) { +			release_bytes = (write_bytes - copied) +				& ~((u64)root->sectorsize - 1);  			if (copied > 0) {  				spin_lock(&BTRFS_I(inode)->lock);  				BTRFS_I(inode)->outstanding_extents++; @@ -1633,7 +1650,8 @@ again:  			}  		} -		release_bytes = dirty_pages << PAGE_CACHE_SHIFT; +		release_bytes = round_up(copied + sector_offset, +					root->sectorsize);  		if (copied > 0)  			ret = btrfs_dirty_pages(root, inode, pages, @@ -1654,8 +1672,7 @@ again:  		if (only_release_metadata && copied > 0) {  			lockstart = round_down(pos, root->sectorsize); -			lockend = lockstart + -				(dirty_pages << PAGE_CACHE_SHIFT) - 1; +			lockend = round_up(pos + copied, root->sectorsize) - 1;  			set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,  				       lockend, EXTENT_NORESERVE, NULL, @@ -1761,6 +1778,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,  	ssize_t err;  	loff_t pos;  	size_t count; +	loff_t oldsize; +	int clean_page = 0;  	inode_lock(inode);  	err = generic_write_checks(iocb, from); @@ -1799,14 +1818,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,  	pos = iocb->ki_pos;  	count = iov_iter_count(from);  	start_pos = round_down(pos, root->sectorsize); -	if (start_pos > i_size_read(inode)) { +	oldsize = i_size_read(inode); +	if (start_pos > oldsize) {  		/* Expand hole size to cover write data, preventing empty gap */  		end_pos = round_up(pos + count, root->sectorsize); -		err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); +		err = btrfs_cont_expand(inode, oldsize, end_pos);  		if (err) {  			inode_unlock(inode);  			goto out;  		} +		if (start_pos > round_up(oldsize, root->sectorsize)) +			clean_page = 1;  	}  	if (sync) @@ -1818,6 +1840,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,  		num_written = __btrfs_buffered_write(file, from, pos);  		if (num_written > 0)  			iocb->ki_pos = pos + num_written; +		if (clean_page) +			pagecache_isize_extended(inode, oldsize, +						i_size_read(inode));  	}  	inode_unlock(inode); @@ -2293,10 +2318,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  	int ret = 0;  	int err = 0;  	unsigned int rsv_count; -	bool same_page; +	bool same_block;  	bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);  	u64 ino_size; -	bool truncated_page = false; +	bool truncated_block = false;  	bool updated_inode = false;  	ret = btrfs_wait_ordered_range(inode, offset, len); @@ -2304,7 +2329,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  		return ret;  	inode_lock(inode); -	ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); +	ino_size = round_up(inode->i_size, root->sectorsize);  	ret = find_first_non_hole(inode, &offset, &len);  	if (ret < 0)  		goto out_only_mutex; @@ -2317,31 +2342,30 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  	lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);  	lockend = round_down(offset + len,  			     BTRFS_I(inode)->root->sectorsize) - 1; -	same_page = ((offset >> PAGE_CACHE_SHIFT) == -		    ((offset + len - 1) >> PAGE_CACHE_SHIFT)); - +	same_block = (BTRFS_BYTES_TO_BLKS(root->fs_info, offset)) +		== (BTRFS_BYTES_TO_BLKS(root->fs_info, offset + len - 1));  	/* -	 * We needn't truncate any page which is beyond the end of the file +	 * We needn't truncate any block which is beyond the end of the file  	 * because we are sure there is no data there.  	 */  	/* -	 * Only do this if we are in the same page and we aren't doing the -	 * entire page. +	 * Only do this if we are in the same block and we aren't doing the +	 * entire block.  	 */ -	if (same_page && len < PAGE_CACHE_SIZE) { +	if (same_block && len < root->sectorsize) {  		if (offset < ino_size) { -			truncated_page = true; -			ret = btrfs_truncate_page(inode, offset, len, 0); +			truncated_block = true; +			ret = btrfs_truncate_block(inode, offset, len, 0);  		} else {  			ret = 0;  		}  		goto out_only_mutex;  	} -	/* zero back part of the first page */ +	/* zero back part of the first block */  	if (offset < ino_size) { -		truncated_page = true; -		ret = btrfs_truncate_page(inode, offset, 0, 0); +		truncated_block = true; +		ret = btrfs_truncate_block(inode, offset, 0, 0);  		if (ret) {  			inode_unlock(inode);  			return ret; @@ -2376,9 +2400,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  		if (!ret) {  			/* zero the front end of the last page */  			if (tail_start + tail_len < ino_size) { -				truncated_page = true; -				ret = btrfs_truncate_page(inode, -						tail_start + tail_len, 0, 1); +				truncated_block = true; +				ret = btrfs_truncate_block(inode, +							tail_start + tail_len, +							0, 1);  				if (ret)  					goto out_only_mutex;  			} @@ -2558,7 +2583,7 @@ out:  	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,  			     &cached_state, GFP_NOFS);  out_only_mutex: -	if (!updated_inode && truncated_page && !ret && !err) { +	if (!updated_inode && truncated_block && !ret && !err) {  		/*  		 * If we only end up zeroing part of a page, we still need to  		 * update the inode item, so that all the time fields are @@ -2611,7 +2636,7 @@ static int add_falloc_range(struct list_head *head, u64 start, u64 len)  		return 0;  	}  insert: -	range = kmalloc(sizeof(*range), GFP_NOFS); +	range = kmalloc(sizeof(*range), GFP_KERNEL);  	if (!range)  		return -ENOMEM;  	range->start = start; @@ -2678,10 +2703,10 @@ static long btrfs_fallocate(struct file *file, int mode,  	} else if (offset + len > inode->i_size) {  		/*  		 * If we are fallocating from the end of the file onward we -		 * need to zero out the end of the page if i_size lands in the -		 * middle of a page. +		 * need to zero out the end of the block if i_size lands in the +		 * middle of a block.  		 */ -		ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); +		ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);  		if (ret)  			goto out;  	} @@ -2712,7 +2737,7 @@ static long btrfs_fallocate(struct file *file, int mode,  			btrfs_put_ordered_extent(ordered);  			unlock_extent_cached(&BTRFS_I(inode)->io_tree,  					     alloc_start, locked_end, -					     &cached_state, GFP_NOFS); +					     &cached_state, GFP_KERNEL);  			/*  			 * we can't wait on the range with the transaction  			 * running or with the extent lock held @@ -2806,7 +2831,7 @@ static long btrfs_fallocate(struct file *file, int mode,  	}  out_unlock:  	unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, -			     &cached_state, GFP_NOFS); +			     &cached_state, GFP_KERNEL);  out:  	/*  	 * As we waited the extent range, the data_rsv_map must be empty diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f06eb1f4384..f8be74037f34 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -263,7 +263,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,  		data_len = compressed_size;  	if (start > 0 || -	    actual_end > PAGE_CACHE_SIZE || +	    actual_end > root->sectorsize ||  	    data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) ||  	    (!compressed_size &&  	    (actual_end & (root->sectorsize - 1)) == 0) || @@ -2002,7 +2002,8 @@ again:  	if (PagePrivate2(page))  		goto out; -	ordered = btrfs_lookup_ordered_extent(inode, page_start); +	ordered = btrfs_lookup_ordered_range(inode, page_start, +					PAGE_CACHE_SIZE);  	if (ordered) {  		unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,  				     page_end, &cached_state, GFP_NOFS); @@ -4248,7 +4249,8 @@ static int truncate_inline_extent(struct inode *inode,  		 * read the extent item from disk (data not in the page cache).  		 */  		btrfs_release_path(path); -		return btrfs_truncate_page(inode, offset, page_end - offset, 0); +		return btrfs_truncate_block(inode, offset, page_end - offset, +					0);  	}  	btrfs_set_file_extent_ram_bytes(leaf, fi, size); @@ -4601,17 +4603,17 @@ error:  }  /* - * btrfs_truncate_page - read, zero a chunk and write a page + * btrfs_truncate_block - read, zero a chunk and write a block   * @inode - inode that we're zeroing   * @from - the offset to start zeroing   * @len - the length to zero, 0 to zero the entire range respective to the   *	offset   * @front - zero up to the offset instead of from the offset on   * - * This will find the page for the "from" offset and cow the page and zero the + * This will find the block for the "from" offset and cow the block and zero the   * part we want to zero.  This is used with truncate and hole punching.   */ -int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, +int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,  			int front)  {  	struct address_space *mapping = inode->i_mapping; @@ -4622,18 +4624,19 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,  	char *kaddr;  	u32 blocksize = root->sectorsize;  	pgoff_t index = from >> PAGE_CACHE_SHIFT; -	unsigned offset = from & (PAGE_CACHE_SIZE-1); +	unsigned offset = from & (blocksize - 1);  	struct page *page;  	gfp_t mask = btrfs_alloc_write_mask(mapping);  	int ret = 0; -	u64 page_start; -	u64 page_end; +	u64 block_start; +	u64 block_end;  	if ((offset & (blocksize - 1)) == 0 &&  	    (!len || ((len & (blocksize - 1)) == 0)))  		goto out; +  	ret = btrfs_delalloc_reserve_space(inode, -			round_down(from, PAGE_CACHE_SIZE), PAGE_CACHE_SIZE); +			round_down(from, blocksize), blocksize);  	if (ret)  		goto out; @@ -4641,14 +4644,14 @@ again:  	page = find_or_create_page(mapping, index, mask);  	if (!page) {  		btrfs_delalloc_release_space(inode, -				round_down(from, PAGE_CACHE_SIZE), -				PAGE_CACHE_SIZE); +				round_down(from, blocksize), +				blocksize);  		ret = -ENOMEM;  		goto out;  	} -	page_start = page_offset(page); -	page_end = page_start + PAGE_CACHE_SIZE - 1; +	block_start = round_down(from, blocksize); +	block_end = block_start + blocksize - 1;  	if (!PageUptodate(page)) {  		ret = btrfs_readpage(NULL, page); @@ -4665,12 +4668,12 @@ again:  	}  	wait_on_page_writeback(page); -	lock_extent_bits(io_tree, page_start, page_end, &cached_state); +	lock_extent_bits(io_tree, block_start, block_end, &cached_state);  	set_page_extent_mapped(page); -	ordered = btrfs_lookup_ordered_extent(inode, page_start); +	ordered = btrfs_lookup_ordered_extent(inode, block_start);  	if (ordered) { -		unlock_extent_cached(io_tree, page_start, page_end, +		unlock_extent_cached(io_tree, block_start, block_end,  				     &cached_state, GFP_NOFS);  		unlock_page(page);  		page_cache_release(page); @@ -4679,39 +4682,41 @@ again:  		goto again;  	} -	clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, +	clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,  			  EXTENT_DIRTY | EXTENT_DELALLOC |  			  EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,  			  0, 0, &cached_state, GFP_NOFS); -	ret = btrfs_set_extent_delalloc(inode, page_start, page_end, +	ret = btrfs_set_extent_delalloc(inode, block_start, block_end,  					&cached_state);  	if (ret) { -		unlock_extent_cached(io_tree, page_start, page_end, +		unlock_extent_cached(io_tree, block_start, block_end,  				     &cached_state, GFP_NOFS);  		goto out_unlock;  	} -	if (offset != PAGE_CACHE_SIZE) { +	if (offset != blocksize) {  		if (!len) -			len = PAGE_CACHE_SIZE - offset; +			len = blocksize - offset;  		kaddr = kmap(page);  		if (front) -			memset(kaddr, 0, offset); +			memset(kaddr + (block_start - page_offset(page)), +				0, offset);  		else -			memset(kaddr + offset, 0, len); +			memset(kaddr + (block_start - page_offset(page)) +  offset, +				0, len);  		flush_dcache_page(page);  		kunmap(page);  	}  	ClearPageChecked(page);  	set_page_dirty(page); -	unlock_extent_cached(io_tree, page_start, page_end, &cached_state, +	unlock_extent_cached(io_tree, block_start, block_end, &cached_state,  			     GFP_NOFS);  out_unlock:  	if (ret) -		btrfs_delalloc_release_space(inode, page_start, -					     PAGE_CACHE_SIZE); +		btrfs_delalloc_release_space(inode, block_start, +					     blocksize);  	unlock_page(page);  	page_cache_release(page);  out: @@ -4782,11 +4787,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)  	int err = 0;  	/* -	 * If our size started in the middle of a page we need to zero out the -	 * rest of the page before we expand the i_size, otherwise we could +	 * If our size started in the middle of a block we need to zero out the +	 * rest of the block before we expand the i_size, otherwise we could  	 * expose stale data.  	 */ -	err = btrfs_truncate_page(inode, oldsize, 0, 0); +	err = btrfs_truncate_block(inode, oldsize, 0, 0);  	if (err)  		return err; @@ -4895,7 +4900,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)  	}  	if (newsize > oldsize) { -		truncate_pagecache(inode, newsize);  		/*  		 * Don't do an expanding truncate while snapshoting is ongoing.  		 * This is to ensure the snapshot captures a fully consistent @@ -4918,6 +4922,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)  		i_size_write(inode, newsize);  		btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); +		pagecache_isize_extended(inode, oldsize, newsize);  		ret = btrfs_update_inode(trans, root, inode);  		btrfs_end_write_no_snapshoting(root);  		btrfs_end_transaction(trans, root); @@ -5788,7 +5793,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)  			if (name_len <= sizeof(tmp_name)) {  				name_ptr = tmp_name;  			} else { -				name_ptr = kmalloc(name_len, GFP_NOFS); +				name_ptr = kmalloc(name_len, GFP_KERNEL);  				if (!name_ptr) {  					ret = -ENOMEM;  					goto err; @@ -7752,9 +7757,9 @@ static int btrfs_check_dio_repairable(struct inode *inode,  }  static int dio_read_error(struct inode *inode, struct bio *failed_bio, -			  struct page *page, u64 start, u64 end, -			  int failed_mirror, bio_end_io_t *repair_endio, -			  void *repair_arg) +			struct page *page, unsigned int pgoff, +			u64 start, u64 end, int failed_mirror, +			bio_end_io_t *repair_endio, void *repair_arg)  {  	struct io_failure_record *failrec;  	struct bio *bio; @@ -7775,7 +7780,9 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,  		return -EIO;  	} -	if (failed_bio->bi_vcnt > 1) +	if ((failed_bio->bi_vcnt > 1) +		|| (failed_bio->bi_io_vec->bv_len +			> BTRFS_I(inode)->root->sectorsize))  		read_mode = READ_SYNC | REQ_FAILFAST_DEV;  	else  		read_mode = READ_SYNC; @@ -7783,7 +7790,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,  	isector = start - btrfs_io_bio(failed_bio)->logical;  	isector >>= inode->i_sb->s_blocksize_bits;  	bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, -				      0, isector, repair_endio, repair_arg); +				pgoff, isector, repair_endio, repair_arg);  	if (!bio) {  		free_io_failure(inode, failrec);  		return -EIO; @@ -7813,12 +7820,17 @@ struct btrfs_retry_complete {  static void btrfs_retry_endio_nocsum(struct bio *bio)  {  	struct btrfs_retry_complete *done = bio->bi_private; +	struct inode *inode;  	struct bio_vec *bvec;  	int i;  	if (bio->bi_error)  		goto end; +	ASSERT(bio->bi_vcnt == 1); +	inode = bio->bi_io_vec->bv_page->mapping->host; +	ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize); +  	done->uptodate = 1;  	bio_for_each_segment_all(bvec, bio, i)  		clean_io_failure(done->inode, done->start, bvec->bv_page, 0); @@ -7830,25 +7842,35 @@ end:  static int __btrfs_correct_data_nocsum(struct inode *inode,  				       struct btrfs_io_bio *io_bio)  { +	struct btrfs_fs_info *fs_info;  	struct bio_vec *bvec;  	struct btrfs_retry_complete done;  	u64 start; +	unsigned int pgoff; +	u32 sectorsize; +	int nr_sectors;  	int i;  	int ret; +	fs_info = BTRFS_I(inode)->root->fs_info; +	sectorsize = BTRFS_I(inode)->root->sectorsize; +  	start = io_bio->logical;  	done.inode = inode;  	bio_for_each_segment_all(bvec, &io_bio->bio, i) { -try_again: +		nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len); +		pgoff = bvec->bv_offset; + +next_block_or_try_again:  		done.uptodate = 0;  		done.start = start;  		init_completion(&done.done); -		ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, -				     start + bvec->bv_len - 1, -				     io_bio->mirror_num, -				     btrfs_retry_endio_nocsum, &done); +		ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, +				pgoff, start, start + sectorsize - 1, +				io_bio->mirror_num, +				btrfs_retry_endio_nocsum, &done);  		if (ret)  			return ret; @@ -7856,10 +7878,15 @@ try_again:  		if (!done.uptodate) {  			/* We might have another mirror, so try again */ -			goto try_again; +			goto next_block_or_try_again;  		} -		start += bvec->bv_len; +		start += sectorsize; + +		if (nr_sectors--) { +			pgoff += sectorsize; +			goto next_block_or_try_again; +		}  	}  	return 0; @@ -7869,7 +7896,9 @@ static void btrfs_retry_endio(struct bio *bio)  {  	struct btrfs_retry_complete *done = bio->bi_private;  	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); +	struct inode *inode;  	struct bio_vec *bvec; +	u64 start;  	int uptodate;  	int ret;  	int i; @@ -7878,13 +7907,20 @@ static void btrfs_retry_endio(struct bio *bio)  		goto end;  	uptodate = 1; + +	start = done->start; + +	ASSERT(bio->bi_vcnt == 1); +	inode = bio->bi_io_vec->bv_page->mapping->host; +	ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize); +  	bio_for_each_segment_all(bvec, bio, i) {  		ret = __readpage_endio_check(done->inode, io_bio, i, -					     bvec->bv_page, 0, -					     done->start, bvec->bv_len); +					bvec->bv_page, bvec->bv_offset, +					done->start, bvec->bv_len);  		if (!ret)  			clean_io_failure(done->inode, done->start, -					 bvec->bv_page, 0); +					bvec->bv_page, bvec->bv_offset);  		else  			uptodate = 0;  	} @@ -7898,20 +7934,34 @@ end:  static int __btrfs_subio_endio_read(struct inode *inode,  				    struct btrfs_io_bio *io_bio, int err)  { +	struct btrfs_fs_info *fs_info;  	struct bio_vec *bvec;  	struct btrfs_retry_complete done;  	u64 start;  	u64 offset = 0; +	u32 sectorsize; +	int nr_sectors; +	unsigned int pgoff; +	int csum_pos;  	int i;  	int ret; +	fs_info = BTRFS_I(inode)->root->fs_info; +	sectorsize = BTRFS_I(inode)->root->sectorsize; +  	err = 0;  	start = io_bio->logical;  	done.inode = inode;  	bio_for_each_segment_all(bvec, &io_bio->bio, i) { -		ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, -					     0, start, bvec->bv_len); +		nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len); + +		pgoff = bvec->bv_offset; +next_block: +		csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset); +		ret = __readpage_endio_check(inode, io_bio, csum_pos, +					bvec->bv_page, pgoff, start, +					sectorsize);  		if (likely(!ret))  			goto next;  try_again: @@ -7919,10 +7969,10 @@ try_again:  		done.start = start;  		init_completion(&done.done); -		ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, -				     start + bvec->bv_len - 1, -				     io_bio->mirror_num, -				     btrfs_retry_endio, &done); +		ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, +				pgoff, start, start + sectorsize - 1, +				io_bio->mirror_num, +				btrfs_retry_endio, &done);  		if (ret) {  			err = ret;  			goto next; @@ -7935,8 +7985,15 @@ try_again:  			goto try_again;  		}  next: -		offset += bvec->bv_len; -		start += bvec->bv_len; +		offset += sectorsize; +		start += sectorsize; + +		ASSERT(nr_sectors); + +		if (--nr_sectors) { +			pgoff += sectorsize; +			goto next_block; +		}  	}  	return err; @@ -8188,9 +8245,11 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,  	u64 file_offset = dip->logical_offset;  	u64 submit_len = 0;  	u64 map_length; -	int nr_pages = 0; -	int ret; +	u32 blocksize = root->sectorsize;  	int async_submit = 0; +	int nr_sectors; +	int ret; +	int i;  	map_length = orig_bio->bi_iter.bi_size;  	ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, @@ -8220,9 +8279,12 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,  	atomic_inc(&dip->pending_bios);  	while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { -		if (map_length < submit_len + bvec->bv_len || -		    bio_add_page(bio, bvec->bv_page, bvec->bv_len, -				 bvec->bv_offset) < bvec->bv_len) { +		nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, bvec->bv_len); +		i = 0; +next_block: +		if (unlikely(map_length < submit_len + blocksize || +		    bio_add_page(bio, bvec->bv_page, blocksize, +			    bvec->bv_offset + (i * blocksize)) < blocksize)) {  			/*  			 * inc the count before we submit the bio so  			 * we know the end IO handler won't happen before @@ -8243,7 +8305,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,  			file_offset += submit_len;  			submit_len = 0; -			nr_pages = 0;  			bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,  						  start_sector, GFP_NOFS); @@ -8261,9 +8322,14 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,  				bio_put(bio);  				goto out_err;  			} + +			goto next_block;  		} else { -			submit_len += bvec->bv_len; -			nr_pages++; +			submit_len += blocksize; +			if (--nr_sectors) { +				i++; +				goto next_block; +			}  			bvec++;  		}  	} @@ -8628,6 +8694,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,  	struct extent_state *cached_state = NULL;  	u64 page_start = page_offset(page);  	u64 page_end = page_start + PAGE_CACHE_SIZE - 1; +	u64 start; +	u64 end;  	int inode_evicting = inode->i_state & I_FREEING;  	/* @@ -8647,14 +8715,18 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,  	if (!inode_evicting)  		lock_extent_bits(tree, page_start, page_end, &cached_state); -	ordered = btrfs_lookup_ordered_extent(inode, page_start); +again: +	start = page_start; +	ordered = btrfs_lookup_ordered_range(inode, start, +					page_end - start + 1);  	if (ordered) { +		end = min(page_end, ordered->file_offset + ordered->len - 1);  		/*  		 * IO on this page will never be started, so we need  		 * to account for any ordered extents now  		 */  		if (!inode_evicting) -			clear_extent_bit(tree, page_start, page_end, +			clear_extent_bit(tree, start, end,  					 EXTENT_DIRTY | EXTENT_DELALLOC |  					 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |  					 EXTENT_DEFRAG, 1, 0, &cached_state, @@ -8671,22 +8743,26 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,  			spin_lock_irq(&tree->lock);  			set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags); -			new_len = page_start - ordered->file_offset; +			new_len = start - ordered->file_offset;  			if (new_len < ordered->truncated_len)  				ordered->truncated_len = new_len;  			spin_unlock_irq(&tree->lock);  			if (btrfs_dec_test_ordered_pending(inode, &ordered, -							   page_start, -							   PAGE_CACHE_SIZE, 1)) +							   start, +							   end - start + 1, 1))  				btrfs_finish_ordered_io(ordered);  		}  		btrfs_put_ordered_extent(ordered);  		if (!inode_evicting) {  			cached_state = NULL; -			lock_extent_bits(tree, page_start, page_end, +			lock_extent_bits(tree, start, end,  					 &cached_state);  		} + +		start = end + 1; +		if (start < page_end) +			goto again;  	}  	/* @@ -8747,15 +8823,28 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)  	loff_t size;  	int ret;  	int reserved = 0; +	u64 reserved_space;  	u64 page_start;  	u64 page_end; +	u64 end; + +	reserved_space = PAGE_CACHE_SIZE;  	sb_start_pagefault(inode->i_sb);  	page_start = page_offset(page);  	page_end = page_start + PAGE_CACHE_SIZE - 1; +	end = page_end; +	/* +	 * Reserving delalloc space after obtaining the page lock can lead to +	 * deadlock. For example, if a dirty page is locked by this function +	 * and the call to btrfs_delalloc_reserve_space() ends up triggering +	 * dirty page write out, then the btrfs_writepage() function could +	 * end up waiting indefinitely to get a lock on the page currently +	 * being processed by btrfs_page_mkwrite() function. +	 */  	ret = btrfs_delalloc_reserve_space(inode, page_start, -					   PAGE_CACHE_SIZE); +					   reserved_space);  	if (!ret) {  		ret = file_update_time(vma->vm_file);  		reserved = 1; @@ -8789,7 +8878,7 @@ again:  	 * we can't set the delalloc bits if there are pending ordered  	 * extents.  Drop our locks and wait for them to finish  	 */ -	ordered = btrfs_lookup_ordered_extent(inode, page_start); +	ordered = btrfs_lookup_ordered_range(inode, page_start, page_end);  	if (ordered) {  		unlock_extent_cached(io_tree, page_start, page_end,  				     &cached_state, GFP_NOFS); @@ -8799,6 +8888,18 @@ again:  		goto again;  	} +	if (page->index == ((size - 1) >> PAGE_CACHE_SHIFT)) { +		reserved_space = round_up(size - page_start, root->sectorsize); +		if (reserved_space < PAGE_CACHE_SIZE) { +			end = page_start + reserved_space - 1; +			spin_lock(&BTRFS_I(inode)->lock); +			BTRFS_I(inode)->outstanding_extents++; +			spin_unlock(&BTRFS_I(inode)->lock); +			btrfs_delalloc_release_space(inode, page_start, +						PAGE_CACHE_SIZE - reserved_space); +		} +	} +  	/*  	 * XXX - page_mkwrite gets called every time the page is dirtied, even  	 * if it was already dirty, so for space accounting reasons we need to @@ -8806,12 +8907,12 @@ again:  	 * is probably a better way to do this, but for now keep consistent with  	 * prepare_pages in the normal write path.  	 */ -	clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, +	clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,  			  EXTENT_DIRTY | EXTENT_DELALLOC |  			  EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,  			  0, 0, &cached_state, GFP_NOFS); -	ret = btrfs_set_extent_delalloc(inode, page_start, page_end, +	ret = btrfs_set_extent_delalloc(inode, page_start, end,  					&cached_state);  	if (ret) {  		unlock_extent_cached(io_tree, page_start, page_end, @@ -8850,7 +8951,7 @@ out_unlock:  	}  	unlock_page(page);  out: -	btrfs_delalloc_release_space(inode, page_start, PAGE_CACHE_SIZE); +	btrfs_delalloc_release_space(inode, page_start, reserved_space);  out_noreserve:  	sb_end_pagefault(inode->i_sb);  	return ret; @@ -9236,7 +9337,6 @@ static int btrfs_getattr(struct vfsmount *mnt,  	generic_fillattr(inode, stat);  	stat->dev = BTRFS_I(inode)->root->anon_dev; -	stat->blksize = PAGE_CACHE_SIZE;  	spin_lock(&BTRFS_I(inode)->lock);  	delalloc_bytes = BTRFS_I(inode)->delalloc_bytes; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 952172ca7e45..9ba7b5be3fe5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2925,8 +2925,8 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,  	 * of the array is bounded by len, which is in turn bounded by  	 * BTRFS_MAX_DEDUPE_LEN.  	 */ -	src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); -	dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); +	src_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); +	dst_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);  	if (!src_pgarr || !dst_pgarr) {  		kfree(src_pgarr);  		kfree(dst_pgarr); @@ -3814,8 +3814,9 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,  	 * Truncate page cache pages so that future reads will see the cloned  	 * data immediately and not the previous data.  	 */ -	truncate_inode_pages_range(&inode->i_data, destoff, -				   PAGE_CACHE_ALIGN(destoff + len) - 1); +	truncate_inode_pages_range(&inode->i_data, +				round_down(destoff, PAGE_CACHE_SIZE), +				round_up(destoff + len, PAGE_CACHE_SIZE) - 1);  out_unlock:  	if (!same_inode)  		btrfs_double_inode_unlock(src, inode); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 647ab12fdf5d..147dc6ca5de1 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -295,8 +295,27 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)  			       btrfs_dev_extent_chunk_offset(l, dev_extent),  			       btrfs_dev_extent_length(l, dev_extent));  			break; -		case BTRFS_DEV_STATS_KEY: -			printk(KERN_INFO "\t\tdevice stats\n"); +		case BTRFS_PERSISTENT_ITEM_KEY: +			printk(KERN_INFO "\t\tpersistent item objectid %llu offset %llu\n", +					key.objectid, key.offset); +			switch (key.objectid) { +			case BTRFS_DEV_STATS_OBJECTID: +				printk(KERN_INFO "\t\tdevice stats\n"); +				break; +			default: +				printk(KERN_INFO "\t\tunknown persistent item\n"); +			} +			break; +		case BTRFS_TEMPORARY_ITEM_KEY: +			printk(KERN_INFO "\t\ttemporary item objectid %llu offset %llu\n", +					key.objectid, key.offset); +			switch (key.objectid) { +			case BTRFS_BALANCE_OBJECTID: +				printk(KERN_INFO "\t\tbalance status\n"); +				break; +			default: +				printk(KERN_INFO "\t\tunknown temporary item\n"); +			}  			break;  		case BTRFS_DEV_REPLACE_KEY:  			printk(KERN_INFO "\t\tdev replace\n"); diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index dd5f361f1a8e..bf69c008249c 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -274,7 +274,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,  	end = start + cache->key.offset - 1;  	btrfs_put_block_group(cache); -	zone = kzalloc(sizeof(*zone), GFP_NOFS); +	zone = kzalloc(sizeof(*zone), GFP_KERNEL);  	if (!zone)  		return NULL; @@ -339,7 +339,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,  	if (re)  		return re; -	re = kzalloc(sizeof(*re), GFP_NOFS); +	re = kzalloc(sizeof(*re), GFP_KERNEL);  	if (!re)  		return NULL; @@ -562,7 +562,7 @@ static int reada_add_block(struct reada_control *rc, u64 logical,  	if (!re)  		return -1; -	rec = kzalloc(sizeof(*rec), GFP_NOFS); +	rec = kzalloc(sizeof(*rec), GFP_KERNEL);  	if (!rec) {  		reada_extent_put(root->fs_info, re);  		return -ENOMEM; @@ -792,7 +792,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)  {  	struct reada_machine_work *rmw; -	rmw = kzalloc(sizeof(*rmw), GFP_NOFS); +	rmw = kzalloc(sizeof(*rmw), GFP_KERNEL);  	if (!rmw) {  		/* FIXME we cannot handle this properly right now */  		BUG(); @@ -920,7 +920,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,  		.offset = (u64)-1  	}; -	rc = kzalloc(sizeof(*rc), GFP_NOFS); +	rc = kzalloc(sizeof(*rc), GFP_KERNEL);  	if (!rc)  		return ERR_PTR(-ENOMEM); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 92bf5ee732fb..2de7817d0e1b 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -461,7 +461,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)  	struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;  	int ret; -	sctx = kzalloc(sizeof(*sctx), GFP_NOFS); +	sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);  	if (!sctx)  		goto nomem;  	atomic_set(&sctx->refs, 1); @@ -472,7 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)  	for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {  		struct scrub_bio *sbio; -		sbio = kzalloc(sizeof(*sbio), GFP_NOFS); +		sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);  		if (!sbio)  			goto nomem;  		sctx->bios[i] = sbio; @@ -1654,7 +1654,7 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,  again:  	if (!wr_ctx->wr_curr_bio) {  		wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio), -					      GFP_NOFS); +					      GFP_KERNEL);  		if (!wr_ctx->wr_curr_bio) {  			mutex_unlock(&wr_ctx->wr_lock);  			return -ENOMEM; @@ -1671,7 +1671,8 @@ again:  		sbio->dev = wr_ctx->tgtdev;  		bio = sbio->bio;  		if (!bio) { -			bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio); +			bio = btrfs_io_bio_alloc(GFP_KERNEL, +					wr_ctx->pages_per_wr_bio);  			if (!bio) {  				mutex_unlock(&wr_ctx->wr_lock);  				return -ENOMEM; @@ -2076,7 +2077,8 @@ again:  		sbio->dev = spage->dev;  		bio = sbio->bio;  		if (!bio) { -			bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio); +			bio = btrfs_io_bio_alloc(GFP_KERNEL, +					sctx->pages_per_rd_bio);  			if (!bio)  				return -ENOMEM;  			sbio->bio = bio; @@ -2241,7 +2243,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,  	struct scrub_block *sblock;  	int index; -	sblock = kzalloc(sizeof(*sblock), GFP_NOFS); +	sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);  	if (!sblock) {  		spin_lock(&sctx->stat_lock);  		sctx->stat.malloc_errors++; @@ -2259,7 +2261,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,  		struct scrub_page *spage;  		u64 l = min_t(u64, len, PAGE_SIZE); -		spage = kzalloc(sizeof(*spage), GFP_NOFS); +		spage = kzalloc(sizeof(*spage), GFP_KERNEL);  		if (!spage) {  leave_nomem:  			spin_lock(&sctx->stat_lock); @@ -2286,7 +2288,7 @@ leave_nomem:  			spage->have_csum = 0;  		}  		sblock->page_count++; -		spage->page = alloc_page(GFP_NOFS); +		spage->page = alloc_page(GFP_KERNEL);  		if (!spage->page)  			goto leave_nomem;  		len -= l; @@ -2541,7 +2543,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,  	struct scrub_block *sblock;  	int index; -	sblock = kzalloc(sizeof(*sblock), GFP_NOFS); +	sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);  	if (!sblock) {  		spin_lock(&sctx->stat_lock);  		sctx->stat.malloc_errors++; @@ -2561,7 +2563,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,  		struct scrub_page *spage;  		u64 l = min_t(u64, len, PAGE_SIZE); -		spage = kzalloc(sizeof(*spage), GFP_NOFS); +		spage = kzalloc(sizeof(*spage), GFP_KERNEL);  		if (!spage) {  leave_nomem:  			spin_lock(&sctx->stat_lock); @@ -2591,7 +2593,7 @@ leave_nomem:  			spage->have_csum = 0;  		}  		sblock->page_count++; -		spage->page = alloc_page(GFP_NOFS); +		spage->page = alloc_page(GFP_KERNEL);  		if (!spage->page)  			goto leave_nomem;  		len -= l; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 63a6152be04b..d2e29925f1da 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -304,7 +304,7 @@ static struct fs_path *fs_path_alloc(void)  {  	struct fs_path *p; -	p = kmalloc(sizeof(*p), GFP_NOFS); +	p = kmalloc(sizeof(*p), GFP_KERNEL);  	if (!p)  		return NULL;  	p->reversed = 0; @@ -363,11 +363,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)  	 * First time the inline_buf does not suffice  	 */  	if (p->buf == p->inline_buf) { -		tmp_buf = kmalloc(len, GFP_NOFS); +		tmp_buf = kmalloc(len, GFP_KERNEL);  		if (tmp_buf)  			memcpy(tmp_buf, p->buf, old_buf_len);  	} else { -		tmp_buf = krealloc(p->buf, len, GFP_NOFS); +		tmp_buf = krealloc(p->buf, len, GFP_KERNEL);  	}  	if (!tmp_buf)  		return -ENOMEM; @@ -995,7 +995,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,  	 * values are small.  	 */  	buf_len = PATH_MAX; -	buf = kmalloc(buf_len, GFP_NOFS); +	buf = kmalloc(buf_len, GFP_KERNEL);  	if (!buf) {  		ret = -ENOMEM;  		goto out; @@ -1042,7 +1042,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,  				buf = NULL;  			} else {  				char *tmp = krealloc(buf, buf_len, -						     GFP_NOFS | __GFP_NOWARN); +						GFP_KERNEL | __GFP_NOWARN);  				if (!tmp)  					kfree(buf); @@ -1303,7 +1303,7 @@ static int find_extent_clone(struct send_ctx *sctx,  	/* We only use this path under the commit sem */  	tmp_path->need_commit_sem = 0; -	backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); +	backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL);  	if (!backref_ctx) {  		ret = -ENOMEM;  		goto out; @@ -1984,7 +1984,7 @@ static int name_cache_insert(struct send_ctx *sctx,  	nce_head = radix_tree_lookup(&sctx->name_cache,  			(unsigned long)nce->ino);  	if (!nce_head) { -		nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); +		nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);  		if (!nce_head) {  			kfree(nce);  			return -ENOMEM; @@ -2179,7 +2179,7 @@ out_cache:  	/*  	 * Store the result of the lookup in the name cache.  	 */ -	nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); +	nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL);  	if (!nce) {  		ret = -ENOMEM;  		goto out; @@ -2315,7 +2315,7 @@ static int send_subvol_begin(struct send_ctx *sctx)  	if (!path)  		return -ENOMEM; -	name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS); +	name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);  	if (!name) {  		btrfs_free_path(path);  		return -ENOMEM; @@ -2730,7 +2730,7 @@ static int __record_ref(struct list_head *head, u64 dir,  {  	struct recorded_ref *ref; -	ref = kmalloc(sizeof(*ref), GFP_NOFS); +	ref = kmalloc(sizeof(*ref), GFP_KERNEL);  	if (!ref)  		return -ENOMEM; @@ -2755,7 +2755,7 @@ static int dup_ref(struct recorded_ref *ref, struct list_head *list)  {  	struct recorded_ref *new; -	new = kmalloc(sizeof(*ref), GFP_NOFS); +	new = kmalloc(sizeof(*ref), GFP_KERNEL);  	if (!new)  		return -ENOMEM; @@ -2818,7 +2818,7 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)  	struct rb_node *parent = NULL;  	struct orphan_dir_info *entry, *odi; -	odi = kmalloc(sizeof(*odi), GFP_NOFS); +	odi = kmalloc(sizeof(*odi), GFP_KERNEL);  	if (!odi)  		return ERR_PTR(-ENOMEM);  	odi->ino = dir_ino; @@ -2973,7 +2973,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)  	struct rb_node *parent = NULL;  	struct waiting_dir_move *entry, *dm; -	dm = kmalloc(sizeof(*dm), GFP_NOFS); +	dm = kmalloc(sizeof(*dm), GFP_KERNEL);  	if (!dm)  		return -ENOMEM;  	dm->ino = ino; @@ -3040,7 +3040,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,  	int exists = 0;  	int ret; -	pm = kmalloc(sizeof(*pm), GFP_NOFS); +	pm = kmalloc(sizeof(*pm), GFP_KERNEL);  	if (!pm)  		return -ENOMEM;  	pm->parent_ino = parent_ino; @@ -4280,7 +4280,7 @@ static int __find_xattr(int num, struct btrfs_key *di_key,  	    strncmp(name, ctx->name, name_len) == 0) {  		ctx->found_idx = num;  		ctx->found_data_len = data_len; -		ctx->found_data = kmemdup(data, data_len, GFP_NOFS); +		ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);  		if (!ctx->found_data)  			return -ENOMEM;  		return 1; @@ -4481,7 +4481,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)  	while (index <= last_index) {  		unsigned cur_len = min_t(unsigned, len,  					 PAGE_CACHE_SIZE - pg_offset); -		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); +		page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);  		if (!page) {  			ret = -ENOMEM;  			break; @@ -5989,7 +5989,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)  		goto out;  	} -	sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); +	sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL);  	if (!sctx) {  		ret = -ENOMEM;  		goto out; @@ -5997,7 +5997,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)  	INIT_LIST_HEAD(&sctx->new_refs);  	INIT_LIST_HEAD(&sctx->deleted_refs); -	INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); +	INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);  	INIT_LIST_HEAD(&sctx->name_cache_list);  	sctx->flags = arg->flags; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d41e09fe8e38..bf75200c6f86 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -303,7 +303,8 @@ enum {  	Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,  	Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,  	Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow, -	Opt_datasum, Opt_treelog, Opt_noinode_cache, +	Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_usebackuproot, +	Opt_nologreplay, Opt_norecovery,  #ifdef CONFIG_BTRFS_DEBUG  	Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,  #endif @@ -335,6 +336,8 @@ static const match_table_t tokens = {  	{Opt_noacl, "noacl"},  	{Opt_notreelog, "notreelog"},  	{Opt_treelog, "treelog"}, +	{Opt_nologreplay, "nologreplay"}, +	{Opt_norecovery, "norecovery"},  	{Opt_flushoncommit, "flushoncommit"},  	{Opt_noflushoncommit, "noflushoncommit"},  	{Opt_ratio, "metadata_ratio=%d"}, @@ -352,7 +355,8 @@ static const match_table_t tokens = {  	{Opt_inode_cache, "inode_cache"},  	{Opt_noinode_cache, "noinode_cache"},  	{Opt_no_space_cache, "nospace_cache"}, -	{Opt_recovery, "recovery"}, +	{Opt_recovery, "recovery"}, /* deprecated */ +	{Opt_usebackuproot, "usebackuproot"},  	{Opt_skip_balance, "skip_balance"},  	{Opt_check_integrity, "check_int"},  	{Opt_check_integrity_including_extent_data, "check_int_data"}, @@ -373,7 +377,8 @@ static const match_table_t tokens = {   * reading in a new superblock is parsed here.   * XXX JDM: This needs to be cleaned up for remount.   */ -int btrfs_parse_options(struct btrfs_root *root, char *options) +int btrfs_parse_options(struct btrfs_root *root, char *options, +			unsigned long new_flags)  {  	struct btrfs_fs_info *info = root->fs_info;  	substring_t args[MAX_OPT_ARGS]; @@ -393,8 +398,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)  	else if (cache_gen)  		btrfs_set_opt(info->mount_opt, SPACE_CACHE); +	/* +	 * Even the options are empty, we still need to do extra check +	 * against new flags +	 */  	if (!options) -		goto out; +		goto check;  	/*  	 * strsep changes the string, duplicate it because parse_options @@ -606,6 +615,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)  			btrfs_clear_and_info(root, NOTREELOG,  					     "enabling tree log");  			break; +		case Opt_norecovery: +		case Opt_nologreplay: +			btrfs_set_and_info(root, NOLOGREPLAY, +					   "disabling log replay at mount time"); +			break;  		case Opt_flushoncommit:  			btrfs_set_and_info(root, FLUSHONCOMMIT,  					   "turning on flush-on-commit"); @@ -696,8 +710,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)  					     "disabling auto defrag");  			break;  		case Opt_recovery: -			btrfs_info(root->fs_info, "enabling auto recovery"); -			btrfs_set_opt(info->mount_opt, RECOVERY); +			btrfs_warn(root->fs_info, +				   "'recovery' is deprecated, use 'usebackuproot' instead"); +		case Opt_usebackuproot: +			btrfs_info(root->fs_info, +				   "trying to use backup root at mount time"); +			btrfs_set_opt(info->mount_opt, USEBACKUPROOT);  			break;  		case Opt_skip_balance:  			btrfs_set_opt(info->mount_opt, SKIP_BALANCE); @@ -792,6 +810,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)  			break;  		}  	} +check: +	/* +	 * Extra check for current option against current flag +	 */ +	if (btrfs_test_opt(root, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) { +		btrfs_err(root->fs_info, +			  "nologreplay must be used with ro mount option"); +		ret = -EINVAL; +	}  out:  	if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) &&  	    !btrfs_test_opt(root, FREE_SPACE_TREE) && @@ -1202,6 +1229,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)  		seq_puts(seq, ",ssd");  	if (btrfs_test_opt(root, NOTREELOG))  		seq_puts(seq, ",notreelog"); +	if (btrfs_test_opt(root, NOLOGREPLAY)) +		seq_puts(seq, ",nologreplay");  	if (btrfs_test_opt(root, FLUSHONCOMMIT))  		seq_puts(seq, ",flushoncommit");  	if (btrfs_test_opt(root, DISCARD)) @@ -1228,8 +1257,6 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)  		seq_puts(seq, ",inode_cache");  	if (btrfs_test_opt(root, SKIP_BALANCE))  		seq_puts(seq, ",skip_balance"); -	if (btrfs_test_opt(root, RECOVERY)) -		seq_puts(seq, ",recovery");  #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY  	if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))  		seq_puts(seq, ",check_int_data"); @@ -1685,7 +1712,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)  		}  	} -	ret = btrfs_parse_options(root, data); +	ret = btrfs_parse_options(root, data, *flags);  	if (ret) {  		ret = -EINVAL;  		goto restore; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 366b335946fa..6e0e4396d3a4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -138,7 +138,7 @@ static struct btrfs_fs_devices *__alloc_fs_devices(void)  {  	struct btrfs_fs_devices *fs_devs; -	fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS); +	fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);  	if (!fs_devs)  		return ERR_PTR(-ENOMEM); @@ -220,7 +220,7 @@ static struct btrfs_device *__alloc_device(void)  {  	struct btrfs_device *dev; -	dev = kzalloc(sizeof(*dev), GFP_NOFS); +	dev = kzalloc(sizeof(*dev), GFP_KERNEL);  	if (!dev)  		return ERR_PTR(-ENOMEM); @@ -733,7 +733,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)  		 * uuid mutex so nothing we touch in here is going to disappear.  		 */  		if (orig_dev->name) { -			name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); +			name = rcu_string_strdup(orig_dev->name->str, +					GFP_KERNEL);  			if (!name) {  				kfree(device);  				goto error; @@ -2287,7 +2288,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)  		goto error;  	} -	name = rcu_string_strdup(device_path, GFP_NOFS); +	name = rcu_string_strdup(device_path, GFP_KERNEL);  	if (!name) {  		kfree(device);  		ret = -ENOMEM; @@ -2966,7 +2967,7 @@ static int insert_balance_item(struct btrfs_root *root,  	}  	key.objectid = BTRFS_BALANCE_OBJECTID; -	key.type = BTRFS_BALANCE_ITEM_KEY; +	key.type = BTRFS_TEMPORARY_ITEM_KEY;  	key.offset = 0;  	ret = btrfs_insert_empty_item(trans, root, path, &key, @@ -3015,7 +3016,7 @@ static int del_balance_item(struct btrfs_root *root)  	}  	key.objectid = BTRFS_BALANCE_OBJECTID; -	key.type = BTRFS_BALANCE_ITEM_KEY; +	key.type = BTRFS_TEMPORARY_ITEM_KEY;  	key.offset = 0;  	ret = btrfs_search_slot(trans, root, &key, path, -1, 1); @@ -3867,7 +3868,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)  		return -ENOMEM;  	key.objectid = BTRFS_BALANCE_OBJECTID; -	key.type = BTRFS_BALANCE_ITEM_KEY; +	key.type = BTRFS_TEMPORARY_ITEM_KEY;  	key.offset = 0;  	ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); @@ -6705,8 +6706,8 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)  		int item_size;  		struct btrfs_dev_stats_item *ptr; -		key.objectid = 0; -		key.type = BTRFS_DEV_STATS_KEY; +		key.objectid = BTRFS_DEV_STATS_OBJECTID; +		key.type = BTRFS_PERSISTENT_ITEM_KEY;  		key.offset = device->devid;  		ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);  		if (ret) { @@ -6753,8 +6754,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,  	int ret;  	int i; -	key.objectid = 0; -	key.type = BTRFS_DEV_STATS_KEY; +	key.objectid = BTRFS_DEV_STATS_OBJECTID; +	key.type = BTRFS_PERSISTENT_ITEM_KEY;  	key.offset = device->devid;  	path = btrfs_alloc_path(); | 

