diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/backref.c | 8 | ||||
| -rw-r--r-- | fs/btrfs/btrfs_inode.h | 2 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 6 | ||||
| -rw-r--r-- | fs/btrfs/export.c | 10 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 16 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 80 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 45 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 8 | ||||
| -rw-r--r-- | fs/btrfs/send.c | 8 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 33 | ||||
| -rw-r--r-- | fs/btrfs/transaction.h | 6 | ||||
| -rw-r--r-- | fs/btrfs/volumes.h | 8 | 
14 files changed, 171 insertions, 63 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index ecbc63d3143e..9a2ec79e8cfb 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1828,7 +1828,6 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,  	int found = 0;  	struct extent_buffer *eb;  	struct btrfs_inode_extref *extref; -	struct extent_buffer *leaf;  	u32 item_size;  	u32 cur_offset;  	unsigned long ptr; @@ -1856,9 +1855,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,  		btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);  		btrfs_release_path(path); -		leaf = path->nodes[0]; -		item_size = btrfs_item_size_nr(leaf, slot); -		ptr = btrfs_item_ptr_offset(leaf, slot); +		item_size = btrfs_item_size_nr(eb, slot); +		ptr = btrfs_item_ptr_offset(eb, slot);  		cur_offset = 0;  		while (cur_offset < item_size) { @@ -1872,7 +1870,7 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,  			if (ret)  				break; -			cur_offset += btrfs_inode_extref_name_len(leaf, extref); +			cur_offset += btrfs_inode_extref_name_len(eb, extref);  			cur_offset += sizeof(*extref);  		}  		btrfs_tree_read_unlock_blocking(eb); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 81220b2203c6..0ef5cc13fae2 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -44,8 +44,6 @@  #define BTRFS_INODE_IN_DELALLOC_LIST		9  #define BTRFS_INODE_READDIO_NEED_LOCK		10  #define BTRFS_INODE_HAS_PROPS		        11 -/* DIO is ready to submit */ -#define BTRFS_INODE_DIO_READY		        12  /*   * The following 3 bits are meant only for the btree inode.   * When any of them is set, it means an error happened while writing an diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0d98aee34fee..1e60d00d4ea7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2847,6 +2847,8 @@ int open_ctree(struct super_block *sb,  	    !extent_buffer_uptodate(chunk_root->node)) {  		printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",  		       sb->s_id); +		if (!IS_ERR(chunk_root->node)) +			free_extent_buffer(chunk_root->node);  		chunk_root->node = NULL;  		goto fail_tree_roots;  	} @@ -2885,6 +2887,8 @@ retry_root_backup:  	    !extent_buffer_uptodate(tree_root->node)) {  		printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",  		       sb->s_id); +		if (!IS_ERR(tree_root->node)) +			free_extent_buffer(tree_root->node);  		tree_root->node = NULL;  		goto recovery_tree_root;  	} @@ -3765,9 +3769,7 @@ void close_ctree(struct btrfs_root *root)  		 * block groups queued for removal, the deletion will be  		 * skipped when we quit the cleaner thread.  		 */ -		mutex_lock(&root->fs_info->cleaner_mutex);  		btrfs_delete_unused_bgs(root->fs_info); -		mutex_unlock(&root->fs_info->cleaner_mutex);  		ret = btrfs_commit_super(root);  		if (ret) diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 8d052209f473..2513a7f53334 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -112,11 +112,11 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,  	u32 generation;  	if (fh_type == FILEID_BTRFS_WITH_PARENT) { -		if (fh_len !=  BTRFS_FID_SIZE_CONNECTABLE) +		if (fh_len <  BTRFS_FID_SIZE_CONNECTABLE)  			return NULL;  		root_objectid = fid->root_objectid;  	} else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) { -		if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) +		if (fh_len < BTRFS_FID_SIZE_CONNECTABLE_ROOT)  			return NULL;  		root_objectid = fid->parent_root_objectid;  	} else @@ -136,11 +136,11 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,  	u32 generation;  	if ((fh_type != FILEID_BTRFS_WITH_PARENT || -	     fh_len != BTRFS_FID_SIZE_CONNECTABLE) && +	     fh_len < BTRFS_FID_SIZE_CONNECTABLE) &&  	    (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT || -	     fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) && +	     fh_len < BTRFS_FID_SIZE_CONNECTABLE_ROOT) &&  	    (fh_type != FILEID_BTRFS_WITHOUT_PARENT || -	     fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE)) +	     fh_len < BTRFS_FID_SIZE_NON_CONNECTABLE))  		return NULL;  	objectid = fid->objectid; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5411f0ab5683..601d7d45d164 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2828,6 +2828,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,  	struct btrfs_delayed_ref_head *head;  	int ret;  	int run_all = count == (unsigned long)-1; +	bool can_flush_pending_bgs = trans->can_flush_pending_bgs;  	/* We'll clean this up in btrfs_cleanup_transaction */  	if (trans->aborted) @@ -2844,6 +2845,7 @@ again:  #ifdef SCRAMBLE_DELAYED_REFS  	delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);  #endif +	trans->can_flush_pending_bgs = false;  	ret = __btrfs_run_delayed_refs(trans, root, count);  	if (ret < 0) {  		btrfs_abort_transaction(trans, root, ret); @@ -2893,6 +2895,7 @@ again:  	}  out:  	assert_qgroups_uptodate(trans); +	trans->can_flush_pending_bgs = can_flush_pending_bgs;  	return 0;  } @@ -3742,10 +3745,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,  	found->bytes_reserved = 0;  	found->bytes_readonly = 0;  	found->bytes_may_use = 0; -	if (total_bytes > 0) -		found->full = 0; -	else -		found->full = 1; +	found->full = 0;  	found->force_alloc = CHUNK_ALLOC_NO_FORCE;  	found->chunk_alloc = 0;  	found->flush = 0; @@ -4309,7 +4309,8 @@ out:  	 * the block groups that were made dirty during the lifetime of the  	 * transaction.  	 */ -	if (trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) { +	if (trans->can_flush_pending_bgs && +	    trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) {  		btrfs_create_pending_block_groups(trans, trans->root);  		btrfs_trans_release_chunk_metadata(trans);  	} @@ -8668,7 +8669,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,  	}  	if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) { -		btrfs_drop_and_free_fs_root(tree_root->fs_info, root); +		btrfs_add_dropped_root(trans, root);  	} else {  		free_extent_buffer(root->node);  		free_extent_buffer(root->commit_root); @@ -9563,7 +9564,9 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,  	struct btrfs_block_group_item item;  	struct btrfs_key key;  	int ret = 0; +	bool can_flush_pending_bgs = trans->can_flush_pending_bgs; +	trans->can_flush_pending_bgs = false;  	list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {  		if (ret)  			goto next; @@ -9584,6 +9587,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,  next:  		list_del_init(&block_group->bg_list);  	} +	trans->can_flush_pending_bgs = can_flush_pending_bgs;  }  int btrfs_make_block_group(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f1018cfbfefa..3915c9473e94 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2798,7 +2798,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,  			      bio_end_io_t end_io_func,  			      int mirror_num,  			      unsigned long prev_bio_flags, -			      unsigned long bio_flags) +			      unsigned long bio_flags, +			      bool force_bio_submit)  {  	int ret = 0;  	struct bio *bio; @@ -2814,6 +2815,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,  			contig = bio_end_sector(bio) == sector;  		if (prev_bio_flags != bio_flags || !contig || +		    force_bio_submit ||  		    merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||  		    bio_add_page(bio, page, page_size, offset) < page_size) {  			ret = submit_one_bio(rw, bio, mirror_num, @@ -2910,7 +2912,8 @@ static int __do_readpage(struct extent_io_tree *tree,  			 get_extent_t *get_extent,  			 struct extent_map **em_cached,  			 struct bio **bio, int mirror_num, -			 unsigned long *bio_flags, int rw) +			 unsigned long *bio_flags, int rw, +			 u64 *prev_em_start)  {  	struct inode *inode = page->mapping->host;  	u64 start = page_offset(page); @@ -2958,6 +2961,7 @@ static int __do_readpage(struct extent_io_tree *tree,  	}  	while (cur <= end) {  		unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; +		bool force_bio_submit = false;  		if (cur >= last_byte) {  			char *userpage; @@ -3008,6 +3012,49 @@ static int __do_readpage(struct extent_io_tree *tree,  		block_start = em->block_start;  		if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))  			block_start = EXTENT_MAP_HOLE; + +		/* +		 * If we have a file range that points to a compressed extent +		 * and it's followed by a consecutive file range that points to +		 * to the same compressed extent (possibly with a different +		 * offset and/or length, so it either points to the whole extent +		 * or only part of it), we must make sure we do not submit a +		 * single bio to populate the pages for the 2 ranges because +		 * this makes the compressed extent read zero out the pages +		 * belonging to the 2nd range. Imagine the following scenario: +		 * +		 *  File layout +		 *  [0 - 8K]                     [8K - 24K] +		 *    |                               | +		 *    |                               | +		 * points to extent X,         points to extent X, +		 * offset 4K, length of 8K     offset 0, length 16K +		 * +		 * [extent X, compressed length = 4K uncompressed length = 16K] +		 * +		 * If the bio to read the compressed extent covers both ranges, +		 * it will decompress extent X into the pages belonging to the +		 * first range and then it will stop, zeroing out the remaining +		 * pages that belong to the other range that points to extent X. +		 * So here we make sure we submit 2 bios, one for the first +		 * range and another one for the third range. Both will target +		 * the same physical extent from disk, but we can't currently +		 * make the compressed bio endio callback populate the pages +		 * for both ranges because each compressed bio is tightly +		 * coupled with a single extent map, and each range can have +		 * an extent map with a different offset value relative to the +		 * uncompressed data of our extent and different lengths. This +		 * is a corner case so we prioritize correctness over +		 * non-optimal behavior (submitting 2 bios for the same extent). +		 */ +		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) && +		    prev_em_start && *prev_em_start != (u64)-1 && +		    *prev_em_start != em->orig_start) +			force_bio_submit = true; + +		if (prev_em_start) +			*prev_em_start = em->orig_start; +  		free_extent_map(em);  		em = NULL; @@ -3057,7 +3104,8 @@ static int __do_readpage(struct extent_io_tree *tree,  					 bdev, bio, pnr,  					 end_bio_extent_readpage, mirror_num,  					 *bio_flags, -					 this_bio_flag); +					 this_bio_flag, +					 force_bio_submit);  		if (!ret) {  			nr++;  			*bio_flags = this_bio_flag; @@ -3084,7 +3132,8 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,  					     get_extent_t *get_extent,  					     struct extent_map **em_cached,  					     struct bio **bio, int mirror_num, -					     unsigned long *bio_flags, int rw) +					     unsigned long *bio_flags, int rw, +					     u64 *prev_em_start)  {  	struct inode *inode;  	struct btrfs_ordered_extent *ordered; @@ -3104,7 +3153,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,  	for (index = 0; index < nr_pages; index++) {  		__do_readpage(tree, pages[index], get_extent, em_cached, bio, -			      mirror_num, bio_flags, rw); +			      mirror_num, bio_flags, rw, prev_em_start);  		page_cache_release(pages[index]);  	}  } @@ -3114,7 +3163,8 @@ static void __extent_readpages(struct extent_io_tree *tree,  			       int nr_pages, get_extent_t *get_extent,  			       struct extent_map **em_cached,  			       struct bio **bio, int mirror_num, -			       unsigned long *bio_flags, int rw) +			       unsigned long *bio_flags, int rw, +			       u64 *prev_em_start)  {  	u64 start = 0;  	u64 end = 0; @@ -3135,7 +3185,7 @@ static void __extent_readpages(struct extent_io_tree *tree,  						  index - first_index, start,  						  end, get_extent, em_cached,  						  bio, mirror_num, bio_flags, -						  rw); +						  rw, prev_em_start);  			start = page_start;  			end = start + PAGE_CACHE_SIZE - 1;  			first_index = index; @@ -3146,7 +3196,8 @@ static void __extent_readpages(struct extent_io_tree *tree,  		__do_contiguous_readpages(tree, &pages[first_index],  					  index - first_index, start,  					  end, get_extent, em_cached, bio, -					  mirror_num, bio_flags, rw); +					  mirror_num, bio_flags, rw, +					  prev_em_start);  }  static int __extent_read_full_page(struct extent_io_tree *tree, @@ -3172,7 +3223,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,  	}  	ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, -			    bio_flags, rw); +			    bio_flags, rw, NULL);  	return ret;  } @@ -3198,7 +3249,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,  	int ret;  	ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, -				      &bio_flags, READ); +			    &bio_flags, READ, NULL);  	if (bio)  		ret = submit_one_bio(READ, bio, mirror_num, bio_flags);  	return ret; @@ -3451,7 +3502,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,  						 sector, iosize, pg_offset,  						 bdev, &epd->bio, max_nr,  						 end_bio_extent_writepage, -						 0, 0, 0); +						 0, 0, 0, false);  			if (ret)  				SetPageError(page);  		} @@ -3754,7 +3805,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,  		ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,  					 PAGE_CACHE_SIZE, 0, bdev, &epd->bio,  					 -1, end_bio_extent_buffer_writepage, -					 0, epd->bio_flags, bio_flags); +					 0, epd->bio_flags, bio_flags, false);  		epd->bio_flags = bio_flags;  		if (ret) {  			set_btree_ioerr(p); @@ -4158,6 +4209,7 @@ int extent_readpages(struct extent_io_tree *tree,  	struct page *page;  	struct extent_map *em_cached = NULL;  	int nr = 0; +	u64 prev_em_start = (u64)-1;  	for (page_idx = 0; page_idx < nr_pages; page_idx++) {  		page = list_entry(pages->prev, struct page, lru); @@ -4174,12 +4226,12 @@ int extent_readpages(struct extent_io_tree *tree,  		if (nr < ARRAY_SIZE(pagepool))  			continue;  		__extent_readpages(tree, pagepool, nr, get_extent, &em_cached, -				   &bio, 0, &bio_flags, READ); +				   &bio, 0, &bio_flags, READ, &prev_em_start);  		nr = 0;  	}  	if (nr)  		__extent_readpages(tree, pagepool, nr, get_extent, &em_cached, -				   &bio, 0, &bio_flags, READ); +				   &bio, 0, &bio_flags, READ, &prev_em_start);  	if (em_cached)  		free_extent_map(em_cached); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b823fac91c92..8c6f247ba81d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2584,7 +2584,7 @@ static long btrfs_fallocate(struct file *file, int mode,  					alloc_start);  		if (ret)  			goto out; -	} else { +	} else if (offset + len > inode->i_size) {  		/*  		 * If we are fallocating from the end of the file onward we  		 * need to zero out the end of the page if i_size lands in the diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a0fa7253a2d7..611b66d73e80 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5084,7 +5084,8 @@ void btrfs_evict_inode(struct inode *inode)  		goto no_delete;  	}  	/* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ -	btrfs_wait_ordered_range(inode, 0, (u64)-1); +	if (!special_file(inode->i_mode)) +		btrfs_wait_ordered_range(inode, 0, (u64)-1);  	btrfs_free_io_failure_record(inode, 0, (u64)-1); @@ -7408,6 +7409,10 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,  	return em;  } +struct btrfs_dio_data { +	u64 outstanding_extents; +	u64 reserve; +};  static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,  				   struct buffer_head *bh_result, int create) @@ -7415,10 +7420,10 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,  	struct extent_map *em;  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct extent_state *cached_state = NULL; +	struct btrfs_dio_data *dio_data = NULL;  	u64 start = iblock << inode->i_blkbits;  	u64 lockstart, lockend;  	u64 len = bh_result->b_size; -	u64 *outstanding_extents = NULL;  	int unlock_bits = EXTENT_LOCKED;  	int ret = 0; @@ -7436,7 +7441,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,  		 * that anything that needs to check if there's a transction doesn't get  		 * confused.  		 */ -		outstanding_extents = current->journal_info; +		dio_data = current->journal_info;  		current->journal_info = NULL;  	} @@ -7568,17 +7573,18 @@ unlock:  		 * within our reservation, otherwise we need to adjust our inode  		 * counter appropriately.  		 */ -		if (*outstanding_extents) { -			(*outstanding_extents)--; +		if (dio_data->outstanding_extents) { +			(dio_data->outstanding_extents)--;  		} else {  			spin_lock(&BTRFS_I(inode)->lock);  			BTRFS_I(inode)->outstanding_extents++;  			spin_unlock(&BTRFS_I(inode)->lock);  		} -		current->journal_info = outstanding_extents;  		btrfs_free_reserved_data_space(inode, len); -		set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags); +		WARN_ON(dio_data->reserve < len); +		dio_data->reserve -= len; +		current->journal_info = dio_data;  	}  	/* @@ -7601,8 +7607,8 @@ unlock:  unlock_err:  	clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,  			 unlock_bits, 1, 0, &cached_state, GFP_NOFS); -	if (outstanding_extents) -		current->journal_info = outstanding_extents; +	if (dio_data) +		current->journal_info = dio_data;  	return ret;  } @@ -8329,7 +8335,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,  {  	struct file *file = iocb->ki_filp;  	struct inode *inode = file->f_mapping->host; -	u64 outstanding_extents = 0; +	struct btrfs_root *root = BTRFS_I(inode)->root; +	struct btrfs_dio_data dio_data = { 0 };  	size_t count = 0;  	int flags = 0;  	bool wakeup = true; @@ -8367,7 +8374,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,  		ret = btrfs_delalloc_reserve_space(inode, count);  		if (ret)  			goto out; -		outstanding_extents = div64_u64(count + +		dio_data.outstanding_extents = div64_u64(count +  						BTRFS_MAX_EXTENT_SIZE - 1,  						BTRFS_MAX_EXTENT_SIZE); @@ -8376,7 +8383,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,  		 * do the accounting properly if we go over the number we  		 * originally calculated.  Abuse current->journal_info for this.  		 */ -		current->journal_info = &outstanding_extents; +		dio_data.reserve = round_up(count, root->sectorsize); +		current->journal_info = &dio_data;  	} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,  				     &BTRFS_I(inode)->runtime_flags)) {  		inode_dio_end(inode); @@ -8391,16 +8399,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,  	if (iov_iter_rw(iter) == WRITE) {  		current->journal_info = NULL;  		if (ret < 0 && ret != -EIOCBQUEUED) { -			/* -			 * If the error comes from submitting stage, -			 * btrfs_get_blocsk_direct() has free'd data space, -			 * and metadata space will be handled by -			 * finish_ordered_fn, don't do that again to make -			 * sure bytes_may_use is correct. -			 */ -			if (!test_and_clear_bit(BTRFS_INODE_DIO_READY, -				     &BTRFS_I(inode)->runtime_flags)) -				btrfs_delalloc_release_space(inode, count); +			if (dio_data.reserve) +				btrfs_delalloc_release_space(inode, +							dio_data.reserve);  		} else if (ret >= 0 && (size_t)ret < count)  			btrfs_delalloc_release_space(inode,  						     count - (size_t)ret); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0adf5422fce9..8d20f3b1cab0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4639,6 +4639,11 @@ locked:  		bctl->flags |= BTRFS_BALANCE_TYPE_MASK;  	} +	if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) { +		ret = -EINVAL; +		goto out_bctl; +	} +  do_balance:  	/*  	 * Ownership of bctl and mutually_exclusive_operation_running @@ -4650,12 +4655,15 @@ do_balance:  	need_unlock = false;  	ret = btrfs_balance(bctl, bargs); +	bctl = NULL;  	if (arg) {  		if (copy_to_user(arg, bargs, sizeof(*bargs)))  			ret = -EFAULT;  	} +out_bctl: +	kfree(bctl);  out_bargs:  	kfree(bargs);  out_unlock: diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index aa72bfd28f7d..a739b825bdd3 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1920,10 +1920,12 @@ static int did_overwrite_ref(struct send_ctx *sctx,  	/*  	 * We know that it is or will be overwritten. Check this now.  	 * The current inode being processed might have been the one that caused -	 * inode 'ino' to be orphanized, therefore ow_inode can actually be the -	 * same as sctx->send_progress. +	 * inode 'ino' to be orphanized, therefore check if ow_inode matches +	 * the current inode being processed.  	 */ -	if (ow_inode <= sctx->send_progress) +	if ((ow_inode < sctx->send_progress) || +	    (ino != sctx->cur_ino && ow_inode == sctx->cur_ino && +	     gen == sctx->cur_inode_gen))  		ret = 1;  	else  		ret = 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2b07b3581781..11d1eab9234d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1658,9 +1658,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)  		 * groups on disk until we're mounted read-write again  		 * unless we clean them up here.  		 */ -		mutex_lock(&root->fs_info->cleaner_mutex);  		btrfs_delete_unused_bgs(fs_info); -		mutex_unlock(&root->fs_info->cleaner_mutex);  		btrfs_dev_replace_suspend_for_unmount(fs_info);  		btrfs_scrub_cancel(fs_info); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8f259b3a66b3..a5b06442f0bf 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -117,6 +117,18 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans,  			btrfs_unpin_free_ino(root);  		clear_btree_io_tree(&root->dirty_log_pages);  	} + +	/* We can free old roots now. */ +	spin_lock(&trans->dropped_roots_lock); +	while (!list_empty(&trans->dropped_roots)) { +		root = list_first_entry(&trans->dropped_roots, +					struct btrfs_root, root_list); +		list_del_init(&root->root_list); +		spin_unlock(&trans->dropped_roots_lock); +		btrfs_drop_and_free_fs_root(fs_info, root); +		spin_lock(&trans->dropped_roots_lock); +	} +	spin_unlock(&trans->dropped_roots_lock);  	up_write(&fs_info->commit_root_sem);  } @@ -255,11 +267,13 @@ loop:  	INIT_LIST_HEAD(&cur_trans->pending_ordered);  	INIT_LIST_HEAD(&cur_trans->dirty_bgs);  	INIT_LIST_HEAD(&cur_trans->io_bgs); +	INIT_LIST_HEAD(&cur_trans->dropped_roots);  	mutex_init(&cur_trans->cache_write_mutex);  	cur_trans->num_dirty_bgs = 0;  	spin_lock_init(&cur_trans->dirty_bgs_lock);  	INIT_LIST_HEAD(&cur_trans->deleted_bgs);  	spin_lock_init(&cur_trans->deleted_bgs_lock); +	spin_lock_init(&cur_trans->dropped_roots_lock);  	list_add_tail(&cur_trans->list, &fs_info->trans_list);  	extent_io_tree_init(&cur_trans->dirty_pages,  			     fs_info->btree_inode->i_mapping); @@ -336,6 +350,24 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,  } +void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, +			    struct btrfs_root *root) +{ +	struct btrfs_transaction *cur_trans = trans->transaction; + +	/* Add ourselves to the transaction dropped list */ +	spin_lock(&cur_trans->dropped_roots_lock); +	list_add_tail(&root->root_list, &cur_trans->dropped_roots); +	spin_unlock(&cur_trans->dropped_roots_lock); + +	/* Make sure we don't try to update the root at commit time */ +	spin_lock(&root->fs_info->fs_roots_radix_lock); +	radix_tree_tag_clear(&root->fs_info->fs_roots_radix, +			     (unsigned long)root->root_key.objectid, +			     BTRFS_ROOT_TRANS_TAG); +	spin_unlock(&root->fs_info->fs_roots_radix_lock); +} +  int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,  			       struct btrfs_root *root)  { @@ -525,6 +557,7 @@ again:  	h->delayed_ref_elem.seq = 0;  	h->type = type;  	h->allocating_chunk = false; +	h->can_flush_pending_bgs = true;  	h->reloc_reserved = false;  	h->sync = false;  	INIT_LIST_HEAD(&h->qgroup_ref_list); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index edc2fbc262d7..a994bb097ee5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -65,6 +65,7 @@ struct btrfs_transaction {  	struct list_head switch_commits;  	struct list_head dirty_bgs;  	struct list_head io_bgs; +	struct list_head dropped_roots;  	u64 num_dirty_bgs;  	/* @@ -76,6 +77,7 @@ struct btrfs_transaction {  	spinlock_t dirty_bgs_lock;  	struct list_head deleted_bgs;  	spinlock_t deleted_bgs_lock; +	spinlock_t dropped_roots_lock;  	struct btrfs_delayed_ref_root delayed_refs;  	int aborted;  	int dirty_bg_run; @@ -116,6 +118,7 @@ struct btrfs_trans_handle {  	short aborted;  	short adding_csums;  	bool allocating_chunk; +	bool can_flush_pending_bgs;  	bool reloc_reserved;  	bool sync;  	unsigned int type; @@ -216,5 +219,6 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info);  int btrfs_transaction_in_commit(struct btrfs_fs_info *info);  void btrfs_put_transaction(struct btrfs_transaction *transaction);  void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info); - +void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, +			    struct btrfs_root *root);  #endif diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2ca784a14e84..595279a8b99f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -376,6 +376,14 @@ struct map_lookup {  #define BTRFS_BALANCE_ARGS_VRANGE	(1ULL << 4)  #define BTRFS_BALANCE_ARGS_LIMIT	(1ULL << 5) +#define BTRFS_BALANCE_ARGS_MASK			\ +	(BTRFS_BALANCE_ARGS_PROFILES |		\ +	 BTRFS_BALANCE_ARGS_USAGE |		\ +	 BTRFS_BALANCE_ARGS_DEVID | 		\ +	 BTRFS_BALANCE_ARGS_DRANGE |		\ +	 BTRFS_BALANCE_ARGS_VRANGE |		\ +	 BTRFS_BALANCE_ARGS_LIMIT) +  /*   * Profile changing flags.  When SOFT is set we won't relocate chunk if   * it already has the target profile (even though it may be  | 

