summaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/check-integrity.c10
-rw-r--r--fs/btrfs/compression.c29
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/dev-replace.c2
-rw-r--r--fs/btrfs/disk-io.c79
-rw-r--r--fs/btrfs/extent-tree.c34
-rw-r--r--fs/btrfs/extent_io.c39
-rw-r--r--fs/btrfs/inode-map.c17
-rw-r--r--fs/btrfs/inode.c143
-rw-r--r--fs/btrfs/ioctl.c259
-rw-r--r--fs/btrfs/ordered-data.c5
-rw-r--r--fs/btrfs/qgroup.c56
-rw-r--r--fs/btrfs/raid56.c62
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/scrub.c79
-rw-r--r--fs/btrfs/super.c3
-rw-r--r--fs/btrfs/transaction.c7
-rw-r--r--fs/btrfs/tree-log.c226
-rw-r--r--fs/btrfs/volumes.c145
20 files changed, 835 insertions, 365 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 0ef5cc13fae2..81220b2203c6 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,6 +44,8 @@
#define BTRFS_INODE_IN_DELALLOC_LIST 9
#define BTRFS_INODE_READDIO_NEED_LOCK 10
#define BTRFS_INODE_HAS_PROPS 11
+/* DIO is ready to submit */
+#define BTRFS_INODE_DIO_READY 12
/*
* The following 3 bits are meant only for the btree inode.
* When any of them is set, it means an error happened while writing an
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index ce7dec88f4b8..541fbfaed276 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -343,7 +343,7 @@ static int btrfsic_process_written_superblock(
struct btrfsic_state *state,
struct btrfsic_block *const block,
struct btrfs_super_block *const super_hdr);
-static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status);
+static void btrfsic_bio_end_io(struct bio *bp);
static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
const struct btrfsic_block *block,
@@ -2207,7 +2207,7 @@ continue_loop:
goto again;
}
-static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
+static void btrfsic_bio_end_io(struct bio *bp)
{
struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
int iodone_w_error;
@@ -2215,7 +2215,7 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
/* mutex is not held! This is not save if IO is not yet completed
* on umount */
iodone_w_error = 0;
- if (bio_error_status)
+ if (bp->bi_error)
iodone_w_error = 1;
BUG_ON(NULL == block);
@@ -2230,7 +2230,7 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
printk(KERN_INFO
"bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
- bio_error_status,
+ bp->bi_error,
btrfsic_get_block_type(dev_state->state, block),
block->logical_bytenr, dev_state->name,
block->dev_bytenr, block->mirror_num);
@@ -2252,7 +2252,7 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
block = next_block;
} while (NULL != block);
- bp->bi_end_io(bp, bio_error_status);
+ bp->bi_end_io(bp);
}
static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index ce62324c78e7..57ee8ca29b06 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -97,10 +97,7 @@ static inline int compressed_bio_size(struct btrfs_root *root,
static struct bio *compressed_bio_alloc(struct block_device *bdev,
u64 first_byte, gfp_t gfp_flags)
{
- int nr_vecs;
-
- nr_vecs = bio_get_nr_vecs(bdev);
- return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
+ return btrfs_bio_alloc(bdev, first_byte >> 9, BIO_MAX_PAGES, gfp_flags);
}
static int check_compressed_csum(struct inode *inode,
@@ -152,7 +149,7 @@ fail:
* The compressed pages are freed here, and it must be run
* in process context
*/
-static void end_compressed_bio_read(struct bio *bio, int err)
+static void end_compressed_bio_read(struct bio *bio)
{
struct compressed_bio *cb = bio->bi_private;
struct inode *inode;
@@ -160,7 +157,7 @@ static void end_compressed_bio_read(struct bio *bio, int err)
unsigned long index;
int ret;
- if (err)
+ if (bio->bi_error)
cb->errors = 1;
/* if there are more bios still pending for this compressed
@@ -210,7 +207,7 @@ csum_failed:
bio_for_each_segment_all(bvec, cb->orig_bio, i)
SetPageChecked(bvec->bv_page);
- bio_endio(cb->orig_bio, 0);
+ bio_endio(cb->orig_bio);
}
/* finally free the cb struct */
@@ -266,7 +263,7 @@ static noinline void end_compressed_writeback(struct inode *inode,
* This also calls the writeback end hooks for the file pages so that
* metadata and checksums can be updated in the file.
*/
-static void end_compressed_bio_write(struct bio *bio, int err)
+static void end_compressed_bio_write(struct bio *bio)
{
struct extent_io_tree *tree;
struct compressed_bio *cb = bio->bi_private;
@@ -274,7 +271,7 @@ static void end_compressed_bio_write(struct bio *bio, int err)
struct page *page;
unsigned long index;
- if (err)
+ if (bio->bi_error)
cb->errors = 1;
/* if there are more bios still pending for this compressed
@@ -293,7 +290,7 @@ static void end_compressed_bio_write(struct bio *bio, int err)
cb->start,
cb->start + cb->len - 1,
NULL,
- err ? 0 : 1);
+ bio->bi_error ? 0 : 1);
cb->compressed_pages[0]->mapping = NULL;
end_compressed_writeback(inode, cb);
@@ -697,8 +694,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(root, READ, comp_bio,
mirror_num, 0);
- if (ret)
- bio_endio(comp_bio, ret);
+ if (ret) {
+ bio->bi_error = ret;
+ bio_endio(comp_bio);
+ }
bio_put(comp_bio);
@@ -724,8 +723,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
}
ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
- if (ret)
- bio_endio(comp_bio, ret);
+ if (ret) {
+ bio->bi_error = ret;
+ bio_endio(comp_bio);
+ }
bio_put(comp_bio);
return 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80a9aefb0c46..aac314e14188 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1778,6 +1778,7 @@ struct btrfs_fs_info {
spinlock_t unused_bgs_lock;
struct list_head unused_bgs;
struct mutex unused_bg_unpin_mutex;
+ struct mutex delete_unused_bgs_mutex;
/* For btrfs to record security options */
struct security_mnt_opts security_opts;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 862fbc206755..564a7de17d99 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -378,7 +378,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
ret = btrfs_kobj_add_device(tgt_device->fs_devices, tgt_device);
if (ret)
- btrfs_error(root->fs_info, ret, "kobj add dev failed");
+ btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret);
printk_in_rcu(KERN_INFO
"BTRFS: dev_replace from %s (devid %llu) to %s started\n",
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3f43bfea3684..5e307bd0471a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -703,7 +703,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
return -EIO; /* we fixed nothing */
}
-static void end_workqueue_bio(struct bio *bio, int err)
+static void end_workqueue_bio(struct bio *bio)
{
struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
struct btrfs_fs_info *fs_info;
@@ -711,7 +711,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
btrfs_work_func_t func;
fs_info = end_io_wq->info;
- end_io_wq->error = err;
+ end_io_wq->error = bio->bi_error;
if (bio->bi_rw & REQ_WRITE) {
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@ -808,7 +808,8 @@ static void run_one_async_done(struct btrfs_work *work)
/* If an error occured we just want to clean up the bio and move on */
if (async->error) {
- bio_endio(async->bio, async->error);
+ async->bio->bi_error = async->error;
+ bio_endio(async->bio);
return;
}
@@ -908,8 +909,10 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
* submission context. Just jump into btrfs_map_bio
*/
ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
- if (ret)
- bio_endio(bio, ret);
+ if (ret) {
+ bio->bi_error = ret;
+ bio_endio(bio);
+ }
return ret;
}
@@ -960,10 +963,13 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
__btree_submit_bio_done);
}
- if (ret) {
+ if (ret)
+ goto out_w_error;
+ return 0;
+
out_w_error:
- bio_endio(bio, ret);
- }
+ bio->bi_error = ret;
+ bio_endio(bio);
return ret;
}
@@ -1735,22 +1741,22 @@ static void end_workqueue_fn(struct btrfs_work *work)
{
struct bio *bio;
struct btrfs_end_io_wq *end_io_wq;
- int error;
end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
bio = end_io_wq->bio;
- error = end_io_wq->error;
+ bio->bi_error = end_io_wq->error;
bio->bi_private = end_io_wq->private;
bio->bi_end_io = end_io_wq->end_io;
kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
- bio_endio(bio, error);
+ bio_endio(bio);
}
static int cleaner_kthread(void *arg)
{
struct btrfs_root *root = arg;
int again;
+ struct btrfs_trans_handle *trans;
do {
again = 0;
@@ -1772,7 +1778,6 @@ static int cleaner_kthread(void *arg)
}
btrfs_run_delayed_iputs(root);
- btrfs_delete_unused_bgs(root->fs_info);
again = btrfs_clean_one_deleted_snapshot(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
@@ -1781,6 +1786,16 @@ static int cleaner_kthread(void *arg)
* needn't do anything special here.
*/
btrfs_run_defrag_inodes(root->fs_info);
+
+ /*
+ * Acquires fs_info->delete_unused_bgs_mutex to avoid racing
+ * with relocation (btrfs_relocate_chunk) and relocation
+ * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group)
+ * after acquiring fs_info->delete_unused_bgs_mutex. So we
+ * can't hold, nor need to, fs_info->cleaner_mutex when deleting
+ * unused block groups.
+ */
+ btrfs_delete_unused_bgs(root->fs_info);
sleep:
if (!try_to_freeze() && !again) {
set_current_state(TASK_INTERRUPTIBLE);
@@ -1789,6 +1804,34 @@ sleep:
__set_current_state(TASK_RUNNING);
}
} while (!kthread_should_stop());
+
+ /*
+ * Transaction kthread is stopped before us and wakes us up.
+ * However we might have started a new transaction and COWed some
+ * tree blocks when deleting unused block groups for example. So
+ * make sure we commit the transaction we started to have a clean
+ * shutdown when evicting the btree inode - if it has dirty pages
+ * when we do the final iput() on it, eviction will trigger a
+ * writeback for it which will fail with null pointer dereferences
+ * since work queues and other resources were already released and
+ * destroyed by the time the iput/eviction/writeback is made.
+ */
+ trans = btrfs_attach_transaction(root);
+ if (IS_ERR(trans)) {
+ if (PTR_ERR(trans) != -ENOENT)
+ btrfs_err(root->fs_info,
+ "cleaner transaction attach returned %ld",
+ PTR_ERR(trans));
+ } else {
+ int ret;
+
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ btrfs_err(root->fs_info,
+ "cleaner open transaction commit returned %d",
+ ret);
+ }
+
return 0;
}
@@ -2492,6 +2535,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->unused_bgs_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->unused_bg_unpin_mutex);
+ mutex_init(&fs_info->delete_unused_bgs_mutex);
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
seqlock_init(&fs_info->profiles_lock);
@@ -2803,6 +2847,7 @@ int open_ctree(struct super_block *sb,
!extent_buffer_uptodate(chunk_root->node)) {
printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
sb->s_id);
+ chunk_root->node = NULL;
goto fail_tree_roots;
}
btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
@@ -2840,7 +2885,7 @@ retry_root_backup:
!extent_buffer_uptodate(tree_root->node)) {
printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
sb->s_id);
-
+ tree_root->node = NULL;
goto recovery_tree_root;
}
@@ -3284,10 +3329,8 @@ static int write_dev_supers(struct btrfs_device *device,
* endio for the write_dev_flush, this will wake anyone waiting
* for the barrier when it is done
*/
-static void btrfs_end_empty_barrier(struct bio *bio, int err)
+static void btrfs_end_empty_barrier(struct bio *bio)
{
- if (err)
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
if (bio->bi_private)
complete(bio->bi_private);
bio_put(bio);
@@ -3315,8 +3358,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
wait_for_completion(&device->flush_wait);
- if (!bio_flagged(bio, BIO_UPTODATE)) {
- ret = -EIO;
+ if (bio->bi_error) {
+ ret = bio->bi_error;
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_FLUSH_ERRS);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 38b76cc02f48..07204bf601ed 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2296,9 +2296,22 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
static inline struct btrfs_delayed_ref_node *
select_delayed_ref(struct btrfs_delayed_ref_head *head)
{
+ struct btrfs_delayed_ref_node *ref;
+
if (list_empty(&head->ref_list))
return NULL;
+ /*
+ * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
+ * This is to prevent a ref count from going down to zero, which deletes
+ * the extent item from the extent tree, when there still are references
+ * to add, which would fail because they would not find the extent item.
+ */
+ list_for_each_entry(ref, &head->ref_list, list) {
+ if (ref->action == BTRFS_ADD_DELAYED_REF)
+ return ref;
+ }
+
return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node,
list);
}
@@ -4214,6 +4227,24 @@ out:
space_info->chunk_alloc = 0;
spin_unlock(&space_info->lock);
mutex_unlock(&fs_info->chunk_mutex);
+ /*
+ * When we allocate a new chunk we reserve space in the chunk block
+ * reserve to make sure we can COW nodes/leafs in the chunk tree or
+ * add new nodes/leafs to it if we end up needing to do it when
+ * inserting the chunk item and updating device items as part of the
+ * second phase of chunk allocation, performed by
+ * btrfs_finish_chunk_alloc(). So make sure we don't accumulate a
+ * large number of new block groups to create in our transaction
+ * handle's new_bgs list to avoid exhausting the chunk block reserve
+ * in extreme cases - like having a single transaction create many new
+ * block groups when starting to write out the free space caches of all
+ * the block groups that were made dirty during the lifetime of the
+ * transaction.
+ */
+ if (trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) {
+ btrfs_create_pending_block_groups(trans, trans->root);
+ btrfs_trans_release_chunk_metadata(trans);
+ }
return ret;
}
@@ -9889,6 +9920,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
}
spin_unlock(&fs_info->unused_bgs_lock);
+ mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
+
/* Don't want to race with allocators so take the groups_sem */
down_write(&space_info->groups_sem);
spin_lock(&block_group->lock);
@@ -9983,6 +10016,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
end_trans:
btrfs_end_transaction(trans, root);
next:
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
btrfs_put_block_group(block_group);
spin_lock(&fs_info->unused_bgs_lock);
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 02d05817cbdf..68b12bbc709f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2486,7 +2486,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
* Scheduling is not allowed, so the extent state tree is expected
* to have one and only one object corresponding to this IO.
*/
-static void end_bio_extent_writepage(struct bio *bio, int err)
+static void end_bio_extent_writepage(struct bio *bio)
{
struct bio_vec *bvec;
u64 start;
@@ -2516,7 +2516,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
start = page_offset(page);
end = start + bvec->bv_offset + bvec->bv_len - 1;
- if (end_extent_writepage(page, err, start, end))
+ if (end_extent_writepage(page, bio->bi_error, start, end))
continue;
end_page_writeback(page);
@@ -2548,10 +2548,10 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
* Scheduling is not allowed, so the extent state tree is expected
* to have one and only one object corresponding to this IO.
*/
-static void end_bio_extent_readpage(struct bio *bio, int err)
+static void end_bio_extent_readpage(struct bio *bio)
{
struct bio_vec *bvec;
- int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ int uptodate = !bio->bi_error;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct extent_io_tree *tree;
u64 offset = 0;
@@ -2564,16 +2564,13 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
int ret;
int i;
- if (err)
- uptodate = 0;
-
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
struct inode *inode = page->mapping->host;
pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
- "mirror=%u\n", (u64)bio->bi_iter.bi_sector, err,
- io_bio->mirror_num);
+ "mirror=%u\n", (u64)bio->bi_iter.bi_sector,
+ bio->bi_error, io_bio->mirror_num);
tree = &BTRFS_I(inode)->io_tree;
/* We always issue full-page reads, but if some block
@@ -2614,8 +2611,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
if (tree->ops && tree->ops->readpage_io_failed_hook) {
ret = tree->ops->readpage_io_failed_hook(page, mirror);
- if (!ret && !err &&
- test_bit(BIO_UPTODATE, &bio->bi_flags))
+ if (!ret && !bio->bi_error)
uptodate = 1;
} else {
/*
@@ -2631,10 +2627,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
ret = bio_readpage_error(bio, offset, page, start, end,
mirror);
if (ret == 0) {
- uptodate =
- test_bit(BIO_UPTODATE, &bio->bi_flags);
- if (err)
- uptodate = 0;
+ uptodate = !bio->bi_error;
offset += len;
continue;
}
@@ -2684,7 +2677,7 @@ readpage_ok:
endio_readpage_release_extent(tree, extent_start, extent_len,
uptodate);
if (io_bio->end_io)
- io_bio->end_io(io_bio, err);
+ io_bio->end_io(io_bio, bio->bi_error);
bio_put(bio);
}
@@ -2802,9 +2795,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
{
int ret = 0;
struct bio *bio;
- int nr;
int contig = 0;
- int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
@@ -2829,12 +2820,9 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
return 0;
}
}
- if (this_compressed)
- nr = BIO_MAX_PAGES;
- else
- nr = bio_get_nr_vecs(bdev);
- bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+ bio = btrfs_bio_alloc(bdev, sector, BIO_MAX_PAGES,
+ GFP_NOFS | __GFP_HIGH);
if (!bio)
return -ENOMEM;
@@ -3696,7 +3684,7 @@ static void set_btree_ioerr(struct page *page)
}
}
-static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
+static void end_bio_extent_buffer_writepage(struct bio *bio)
{
struct bio_vec *bvec;
struct extent_buffer *eb;
@@ -3709,7 +3697,8 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
BUG_ON(!eb);
done = atomic_dec_and_test(&eb->io_pages);
- if (err || test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
+ if (bio->bi_error ||
+ test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
ClearPageUptodate(page);
set_btree_ioerr(page);
}
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index f6a596d5a637..d4a582ac3f73 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
+ spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock;
struct btrfs_free_space *info;
struct rb_node *n;
u64 count;
@@ -254,24 +255,30 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
return;
while (1) {
+ bool add_to_ctl = true;
+
+ spin_lock(rbroot_lock);
n = rb_first(rbroot);
- if (!n)
+ if (!n) {
+ spin_unlock(rbroot_lock);
break;
+ }
info = rb_entry(n, struct btrfs_free_space, offset_index);
BUG_ON(info->bitmap); /* Logic error */
if (info->offset > root->ino_cache_progress)
- goto free;
+ add_to_ctl = false;
else if (info->offset + info->bytes > root->ino_cache_progress)
count = root->ino_cache_progress - info->offset + 1;
else
count = info->bytes;
- __btrfs_add_free_space(ctl, info->offset, count);
-free:
rb_erase(&info->offset_index, rbroot);
- kfree(info);
+ spin_unlock(rbroot_lock);
+ if (add_to_ctl)
+ __btrfs_add_free_space(ctl, info->offset, count);
+ kmem_cache_free(btrfs_free_space_cachep, info);
}
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 855935f6671a..f924d9a62700 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1845,8 +1845,10 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
int ret;
ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
- if (ret)
- bio_endio(bio, ret);
+ if (ret) {
+ bio->bi_error = ret;
+ bio_endio(bio);
+ }
return ret;
}
@@ -1906,8 +1908,10 @@ mapit:
ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
out:
- if (ret < 0)
- bio_endio(bio, ret);
+ if (ret < 0) {
+ bio->bi_error = ret;
+ bio_endio(bio);
+ }
return ret;
}
@@ -4209,7 +4213,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u64 extent_num_bytes = 0;
u64 extent_offset = 0;
u64 item_end = 0;
- u64 last_size = (u64)-1;
+ u64 last_size = new_size;
u32 found_type = (u8)-1;
int found_extent;
int del_item;
@@ -4493,8 +4497,7 @@ out:
btrfs_abort_transaction(trans, root, ret);
}
error:
- if (last_size != (u64)-1 &&
- root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
+ if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
btrfs_ordered_update_i_size(inode, last_size, NULL);
btrfs_free_path(path);
@@ -4989,8 +4992,9 @@ static void evict_inode_truncate_pages(struct inode *inode)
/*
* Keep looping until we have no more ranges in the io tree.
* We can have ongoing bios started by readpages (called from readahead)
- * that didn't get their end io callbacks called yet or they are still
- * in progress ((extent_io.c:end_bio_extent_readpage()). This means some
+ * that have their endio callback (extent_io.c:end_bio_extent_readpage)
+ * still in progress (unlocked the pages in the bio but did not yet
+ * unlocked the ranges in the io tree). Therefore this means some
* ranges can still be locked and eviction started because before
* submitting those bios, which are executed by a separate task (work
* queue kthread), inode references (inode->i_count) were not taken
@@ -7546,6 +7550,7 @@ unlock:
current->journal_info = outstanding_extents;
btrfs_free_reserved_data_space(inode, len);
+ set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags);
}
/*
@@ -7687,13 +7692,13 @@ struct btrfs_retry_complete {
int uptodate;
};
-static void btrfs_retry_endio_nocsum(struct bio *bio, int err)
+static void btrfs_retry_endio_nocsum(struct bio *bio)
{
struct btrfs_retry_complete *done = bio->bi_private;
struct bio_vec *bvec;
int i;
- if (err)
+ if (bio->bi_error)
goto end;
done->uptodate = 1;
@@ -7742,7 +7747,7 @@ try_again:
return 0;
}
-static void btrfs_retry_endio(struct bio *bio, int err)
+static void btrfs_retry_endio(struct bio *bio)
{
struct btrfs_retry_complete *done = bio->bi_private;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
@@ -7751,7 +7756,7 @@ static void btrfs_retry_endio(struct bio *bio, int err)
int ret;
int i;
- if (err)
+ if (bio->bi_error)
goto end;
uptodate = 1;
@@ -7834,12 +7839,13 @@ static int btrfs_subio_endio_read(struct inode *inode,
}
}
-static void btrfs_endio_direct_read(struct bio *bio, int err)
+static void btrfs_endio_direct_read(struct bio *bio)
{
struct btrfs_dio_private *dip = bio->bi_private;
struct inode *inode = dip->inode;
struct bio *dio_bio;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+ int err = bio->bi_error;
if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
err = btrfs_subio_endio_read(inode, io_bio, err);
@@ -7850,17 +7856,14 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
kfree(dip);
- /* If we had a csum failure make sure to clear the uptodate flag */
- if (err)
- clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
- dio_end_io(dio_bio, err);
+ dio_end_io(dio_bio, bio->bi_error);
if (io_bio->end_io)
io_bio->end_io(io_bio, err);
bio_put(bio);
}
-static void btrfs_endio_direct_write(struct bio *bio, int err)
+static void btrfs_endio_direct_write(struct bio *bio)
{
struct btrfs_dio_private *dip = bio->bi_private;
struct inode *inode = dip->inode;
@@ -7871,12 +7874,11 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
struct bio *dio_bio;
int ret;
- if (err)
- goto out_done;
again:
ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
&ordered_offset,
- ordered_bytes, !err);
+ ordered_bytes,
+ !bio->bi_error);
if (!ret)
goto out_test;
@@ -7895,15 +7897,11 @@ out_test:
ordered = NULL;
goto again;
}
-out_done:
dio_bio = dip->dio_bio;
kfree(dip);
- /* If we had an error make sure to clear the uptodate flag */
- if (err)
- clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
- dio_end_io(dio_bio, err);
+ dio_end_io(dio_bio, bio->bi_error);
bio_put(bio);
}
@@ -7918,9 +7916,10 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
return 0;
}
-static void btrfs_end_dio_bio(struct bio *bio, int err)
+static void btrfs_end_dio_bio(struct bio *bio)
{
struct btrfs_dio_private *dip = bio->bi_private;
+ int err = bio->bi_error;
if (err)
btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
@@ -7949,8 +7948,8 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
if (dip->errors) {
bio_io_error(dip->orig_bio);
} else {
- set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
- bio_endio(dip->orig_bio, 0);
+ dip->dio_bio->bi_error = 0;
+ bio_endio(dip->orig_bio);
}
out:
bio_put(bio);
@@ -7959,8 +7958,7 @@ out:
static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
u64 first_sector, gfp_t gfp_flags)
{
- int nr_vecs = bio_get_nr_vecs(bdev);
- return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
+ return btrfs_bio_alloc(bdev, first_sector, BIO_MAX_PAGES, gfp_flags);
}
static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root,
@@ -8163,9 +8161,8 @@ out_err:
static void btrfs_submit_direct(int rw, struct bio *dio_bio,
struct inode *inode, loff_t file_offset)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_dio_private *dip;
- struct bio *io_bio;
+ struct btrfs_dio_private *dip = NULL;
+ struct bio *io_bio = NULL;
struct btrfs_io_bio *btrfs_bio;
int skip_sum;
int write = rw & REQ_WRITE;
@@ -8182,7 +8179,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
dip = kzalloc(sizeof(*dip), GFP_NOFS);
if (!dip) {
ret = -ENOMEM;
- goto free_io_bio;
+ goto free_ordered;
}
dip->private = dio_bio->bi_private;
@@ -8210,25 +8207,56 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
if (btrfs_bio->end_io)
btrfs_bio->end_io(btrfs_bio, ret);
-free_io_bio:
- bio_put(io_bio);
free_ordered:
/*
- * If this is a write, we need to clean up the reserved space and kill
- * the ordered extent.
+ * If we arrived here it means either we failed to submit the dip
+ * or we either failed to clone the dio_bio or failed to allocate the
+ * dip. If we cloned the dio_bio and allocated the dip, we can just
+ * call bio_endio against our io_bio so that we get proper resource
+ * cleanup if we fail to submit the dip, otherwise, we must do the
+ * same as btrfs_endio_direct_[write|read] because we can't call these
+ * callbacks - they require an allocated dip and a clone of dio_bio.
*/
- if (write) {
- struct btrfs_ordered_extent *ordered;
- ordered = btrfs_lookup_ordered_extent(inode, file_offset);
- if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
- !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
- btrfs_free_reserved_extent(root, ordered->start,
- ordered->disk_len, 1);
- btrfs_put_ordered_extent(ordered);
- btrfs_put_ordered_extent(ordered);
+ if (io_bio && dip) {
+ io_bio->bi_error = -EIO;
+ bio_endio(io_bio);
+ /*
+ * The end io callbacks free our dip, do the final put on io_bio
+ * and all the cleanup and final put for dio_bio (through
+ * dio_end_io()).
+ */
+ dip = NULL;
+ io_bio = NULL;
+ } else {
+ if (write) {
+ struct btrfs_ordered_extent *ordered;
+
+ ordered = btrfs_lookup_ordered_extent(inode,
+ file_offset);
+ set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
+ /*
+ * Decrements our ref on the ordered extent and removes
+ * the ordered extent from the inode's ordered tree,
+ * doing all the proper resource cleanup such as for the
+ * reserved space and waking up any waiters for this
+ * ordered extent (through btrfs_remove_ordered_extent).
+ */
+ btrfs_finish_ordered_io(ordered);
+ } else {
+ unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
+ file_offset + dio_bio->bi_iter.bi_size - 1);
+ }
+ dio_bio->bi_error = -EIO;
+ /*
+ * Releases and cleans up our dio_bio, no need to bio_put()
+ * nor bio_endio()/bio_io_error() against dio_bio.
+ */
+ dio_end_io(dio_bio, ret);
}
- bio_endio(dio_bio, ret);
+ if (io_bio)
+ bio_put(io_bio);
+ kfree(dip);
}
static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb,
@@ -8330,9 +8358,18 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
btrfs_submit_direct, flags);
if (iov_iter_rw(iter) == WRITE) {
current->journal_info = NULL;
- if (ret < 0 && ret != -EIOCBQUEUED)
- btrfs_delalloc_release_space(inode, count);
- else if (ret >= 0 && (size_t)ret < count)
+ if (ret < 0 && ret != -EIOCBQUEUED) {
+ /*
+ * If the error comes from submitting stage,
+ * btrfs_get_blocsk_direct() has free'd data space,
+ * and metadata space will be handled by
+ * finish_ordered_fn, don't do that again to make
+ * sure bytes_may_use is correct.
+ */
+ if (!test_and_clear_bit(BTRFS_INODE_DIO_READY,
+ &BTRFS_I(inode)->runtime_flags))
+ btrfs_delalloc_release_space(inode, count);
+ } else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode,
count - (size_t)ret);
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c86b835da7a8..0770c91586ca 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -87,7 +87,8 @@ struct btrfs_ioctl_received_subvol_args_32 {
static int btrfs_clone(struct inode *src, struct inode *inode,
- u64 off, u64 olen, u64 olen_aligned, u64 destoff);
+ u64 off, u64 olen, u64 olen_aligned, u64 destoff,
+ int no_time_update);
/* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -2765,14 +2766,11 @@ out:
return ret;
}
-static struct page *extent_same_get_page(struct inode *inode, u64 off)
+static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
{
struct page *page;
- pgoff_t index;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
- index = off >> PAGE_CACHE_SHIFT;
-
page = grab_cache_page(inode->i_mapping, index);
if (!page)
return NULL;
@@ -2793,6 +2791,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off)
return page;
}
+static int gather_extent_pages(struct inode *inode, struct page **pages,
+ int num_pages, u64 off)
+{
+ int i;
+ pgoff_t index = off >> PAGE_CACHE_SHIFT;
+
+ for (i = 0; i < num_pages; i++) {
+ pages[i] = extent_same_get_page(inode, index + i);
+ if (!pages[i])
+ return -ENOMEM;
+ }
+ return 0;
+}
+
static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
{
/* do any pending delalloc/csum calc on src, one way or
@@ -2818,52 +2830,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
}
}
-static void btrfs_double_unlock(struct inode *inode1, u64 loff1,
- struct inode *inode2, u64 loff2, u64 len)
+static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
{
- unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
- unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
-
mutex_unlock(&inode1->i_mutex);
mutex_unlock(&inode2->i_mutex);
}
-static void btrfs_double_lock(struct inode *inode1, u64 loff1,
- struct inode *inode2, u64 loff2, u64 len)
+static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
+{
+ if (inode1 < inode2)
+ swap(inode1, inode2);
+
+ mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+ if (inode1 != inode2)
+ mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+}
+
+static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
+ struct inode *inode2, u64 loff2, u64 len)
+{
+ unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
+ unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
+}
+
+static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
+ struct inode *inode2, u64 loff2, u64 len)
{
if (inode1 < inode2) {
swap(inode1, inode2);
swap(loff1, loff2);
}
-
- mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
lock_extent_range(inode1, loff1, len);
- if (inode1 != inode2) {
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+ if (inode1 != inode2)
lock_extent_range(inode2, loff2, len);
+}
+
+struct cmp_pages {
+ int num_pages;
+ struct page **src_pages;
+ struct page **dst_pages;
+};
+
+static void btrfs_cmp_data_free(struct cmp_pages *cmp)
+{
+ int i;
+ struct page *pg;
+
+ for (i = 0; i < cmp->num_pages; i++) {
+ pg = cmp->src_pages[i];
+ if (pg)
+ page_cache_release(pg);
+ pg = cmp->dst_pages[i];
+ if (pg)
+ page_cache_release(pg);
+ }
+ kfree(cmp->src_pages);
+ kfree(cmp->dst_pages);
+}
+
+static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
+ struct inode *dst, u64 dst_loff,
+ u64 len, struct cmp_pages *cmp)
+{
+ int ret;
+ int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
+ struct page **src_pgarr, **dst_pgarr;
+
+ /*
+ * We must gather up all the pages before we initiate our
+ * extent locking. We use an array for the page pointers. Size
+ * of the array is bounded by len, which is in turn bounded by
+ * BTRFS_MAX_DEDUPE_LEN.
+ */
+ src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
+ dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
+ if (!src_pgarr || !dst_pgarr) {
+ kfree(src_pgarr);
+ kfree(dst_pgarr);
+ return -ENOMEM;
}
+ cmp->num_pages = num_pages;
+ cmp->src_pages = src_pgarr;
+ cmp->dst_pages = dst_pgarr;
+
+ ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff);
+ if (ret)
+ goto out;
+
+ ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff);
+
+out:
+ if (ret)
+ btrfs_cmp_data_free(cmp);
+ return 0;
}
static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
- u64 dst_loff, u64 len)
+ u64 dst_loff, u64 len, struct cmp_pages *cmp)
{
int ret = 0;
+ int i;
struct page *src_page, *dst_page;
unsigned int cmp_len = PAGE_CACHE_SIZE;
void *addr, *dst_addr;
+ i = 0;
while (len) {
if (len < PAGE_CACHE_SIZE)
cmp_len = len;
- src_page = extent_same_get_page(src, loff);
- if (!src_page)
- return -EINVAL;
- dst_page = extent_same_get_page(dst, dst_loff);
- if (!dst_page) {
- page_cache_release(src_page);
- return -EINVAL;
- }
+ BUG_ON(i >= cmp->num_pages);
+
+ src_page = cmp->src_pages[i];
+ dst_page = cmp->dst_pages[i];
+
addr = kmap_atomic(src_page);
dst_addr = kmap_atomic(dst_page);
@@ -2875,15 +2955,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
kunmap_atomic(addr);
kunmap_atomic(dst_addr);
- page_cache_release(src_page);
- page_cache_release(dst_page);
if (ret)
break;
- loff += cmp_len;
- dst_loff += cmp_len;
len -= cmp_len;
+ i++;
}
return ret;
@@ -2914,27 +2991,62 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
{
int ret;
u64 len = olen;
+ struct cmp_pages cmp;
+ int same_inode = 0;
+ u64 same_lock_start = 0;
+ u64 same_lock_len = 0;
- /*
- * btrfs_clone() can't handle extents in the same file
- * yet. Once that works, we can drop this check and replace it
- * with a check for the same inode, but overlapping extents.
- */
if (src == dst)
- return -EINVAL;
+ same_inode = 1;
if (len == 0)
return 0;
- btrfs_double_lock(src, loff, dst, dst_loff, len);
+ if (same_inode) {
+ mutex_lock(&src->i_mutex);
- ret = extent_same_check_offsets(src, loff, &len, olen);
- if (ret)
- goto out_unlock;
+ ret = extent_same_check_offsets(src, loff, &len, olen);
+ if (ret)
+ goto out_unlock;
- ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
- if (ret)
- goto out_unlock;
+ /*
+ * Single inode case wants the same checks, except we
+ * don't want our length pushed out past i_size as
+ * comparing that data range makes no sense.
+ *
+ * extent_same_check_offsets() will do this for an
+ * unaligned length at i_size, so catch it here and
+ * reject the request.
+ *
+ * This effectively means we require aligned extents
+ * for the single-inode case, whereas the other cases
+ * allow an unaligned length so long as it ends at
+ * i_size.
+ */
+ if (len != olen) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ /* Check for overlapping ranges */
+ if (dst_loff + len > loff && dst_loff < loff + len) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ same_lock_start = min_t(u64, loff, dst_loff);
+ same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
+ } else {
+ btrfs_double_inode_lock(src, dst);
+
+ ret = extent_same_check_offsets(src, loff, &len, olen);
+ if (ret)
+ goto out_unlock;
+
+ ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
+ if (ret)
+ goto out_unlock;
+ }
/* don't make the dst file partly checksummed */
if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
@@ -2943,12 +3055,32 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
goto out_unlock;
}
- ret = btrfs_cmp_data(src, loff, dst, dst_loff, len);
+ ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
+ if (ret)
+ goto out_unlock;
+
+ if (same_inode)
+ lock_extent_range(src, same_lock_start, same_lock_len);
+ else
+ btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
+
+ /* pass original length for comparison so we stay within i_size */
+ ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
if (ret == 0)
- ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
+ ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
+
+ if (same_inode)
+ unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start,
+ same_lock_start + same_lock_len - 1);
+ else
+ btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
+ btrfs_cmp_data_free(&cmp);
out_unlock:
- btrfs_double_unlock(src, loff, dst, dst_loff, len);
+ if (same_inode)
+ mutex_unlock(&src->i_mutex);
+ else
+ btrfs_double_inode_unlock(src, dst);
return ret;
}
@@ -2958,7 +3090,7 @@ out_unlock:
static long btrfs_ioctl_file_extent_same(struct file *file,
struct btrfs_ioctl_same_args __user *argp)
{
- struct btrfs_ioctl_same_args *same;
+ struct btrfs_ioctl_same_args *same = NULL;
struct btrfs_ioctl_same_extent_info *info;
struct inode *src = file_inode(file);
u64 off;
@@ -2988,6 +3120,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
if (IS_ERR(same)) {
ret = PTR_ERR(same);
+ same = NULL;
goto out;
}
@@ -3058,6 +3191,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
out:
mnt_drop_write_file(file);
+ kfree(same);
return ret;
}
@@ -3100,13 +3234,15 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
struct inode *inode,
u64 endoff,
const u64 destoff,
- const u64 olen)
+ const u64 olen,
+ int no_time_update)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
inode_inc_iversion(inode);
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ if (!no_time_update)
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
/*
* We round up to the block size at eof when determining which
* extents to clone above, but shouldn't round up the file size.
@@ -3191,13 +3327,13 @@ static void clone_update_extent_map(struct inode *inode,
* @inode: Inode to clone to
* @off: Offset within source to start clone from
* @olen: Original length, passed by user, of range to clone
- * @olen_aligned: Block-aligned value of olen, extent_same uses
- * identical values here
+ * @olen_aligned: Block-aligned value of olen
* @destoff: Offset within @inode to start clone
+ * @no_time_update: Whether to update mtime/ctime on the target inode
*/
static int btrfs_clone(struct inode *src, struct inode *inode,
const u64 off, const u64 olen, const u64 olen_aligned,
- const u64 destoff)
+ const u64 destoff, int no_time_update)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path = NULL;
@@ -3452,6 +3588,20 @@ process_slot:
u64 trim = 0;
u64 aligned_end = 0;
+ /*
+ * Don't copy an inline extent into an offset
+ * greater than zero. Having an inline extent
+ * at such an offset results in chaos as btrfs
+ * isn't prepared for such cases. Just skip
+ * this case for the same reasons as commented
+ * at btrfs_ioctl_clone().
+ */
+ if (last_dest_end > 0) {
+ ret = -EOPNOTSUPP;
+ btrfs_end_transaction(trans, root);
+ goto out;
+ }
+
if (off > key.offset) {
skip = off - key.offset;
new_key.offset += skip;
@@ -3521,7 +3671,8 @@ process_slot:
root->sectorsize);
ret = clone_finish_inode_update(trans, inode,
last_dest_end,
- destoff, olen);
+ destoff, olen,
+ no_time_update);
if (ret)
goto out;
if (new_key.offset + datal >= destoff + len)
@@ -3559,7 +3710,7 @@ process_slot:
clone_update_extent_map(inode, trans, NULL, last_dest_end,
destoff + len - last_dest_end);
ret = clone_finish_inode_update(trans, inode, destoff + len,
- destoff, olen);
+ destoff, olen, no_time_update);
}
out:
@@ -3696,7 +3847,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
lock_extent_range(inode, destoff, len);
}
- ret = btrfs_clone(src, inode, off, olen, len, destoff);
+ ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
if (same_inode) {
u64 lock_start = min_t(u64, off, destoff);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 89656d799ff6..52170cf1757e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -552,6 +552,10 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
trace_btrfs_ordered_extent_put(entry->inode, entry);
if (atomic_dec_and_test(&entry->refs)) {
+ ASSERT(list_empty(&entry->log_list));
+ ASSERT(list_empty(&entry->trans_list));
+ ASSERT(list_empty(&entry->root_extent_list));
+ ASSERT(RB_EMPTY_NODE(&entry->rb_node));
if (entry->inode)
btrfs_add_delayed_iput(entry->inode);
while (!list_empty(&entry->list)) {
@@ -579,6 +583,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
spin_lock_irq(&tree->lock);
node = &entry->rb_node;
rb_erase(node, &tree->tree);
+ RB_CLEAR_NODE(node);
if (tree->last == node)
tree->last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d5f1f033b7a0..d904ee1c5349 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -376,7 +376,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
qgroup = find_qgroup_rb(fs_info, found_key.offset);
if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
(!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
- btrfs_err(fs_info, "inconsitent qgroup config");
+ btrfs_err(fs_info, "inconsistent qgroup config");
flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
}
if (!qgroup) {
@@ -1349,6 +1349,11 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
int ret = 0;
+ /* Sometimes we would want to clear the limit on this qgroup.
+ * To meet this requirement, we treat the -1 as a special value
+ * which tell kernel to clear the limit on this qgroup.
+ */
+ const u64 CLEAR_VALUE = -1;
mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root;
@@ -1364,14 +1369,42 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
}
spin_lock(&fs_info->qgroup_lock);
- if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER)
- qgroup->max_rfer = limit->max_rfer;
- if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
- qgroup->max_excl = limit->max_excl;
- if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER)
- qgroup->rsv_rfer = limit->rsv_rfer;
- if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL)
- qgroup->rsv_excl = limit->rsv_excl;
+ if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) {
+ if (limit->max_rfer == CLEAR_VALUE) {
+ qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
+ limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
+ qgroup->max_rfer = 0;
+ } else {
+ qgroup->max_rfer = limit->max_rfer;
+ }
+ }
+ if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
+ if (limit->max_excl == CLEAR_VALUE) {
+ qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
+ limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
+ qgroup->max_excl = 0;
+ } else {
+ qgroup->max_excl = limit->max_excl;
+ }
+ }
+ if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) {
+ if (limit->rsv_rfer == CLEAR_VALUE) {
+ qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
+ limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
+ qgroup->rsv_rfer = 0;
+ } else {
+ qgroup->rsv_rfer = limit->rsv_rfer;
+ }
+ }
+ if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) {
+ if (limit->rsv_excl == CLEAR_VALUE) {
+ qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
+ limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
+ qgroup->rsv_excl = 0;
+ } else {
+ qgroup->rsv_excl = limit->rsv_excl;
+ }
+ }
qgroup->lim_flags |= limit->flags;
spin_unlock(&fs_info->qgroup_lock);
@@ -1618,6 +1651,11 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
/* Exclusive -> exclusive, nothing changed */
}
}
+
+ /* For exclusive extent, free its reserved bytes too */
+ if (nr_old_roots == 0 && nr_new_roots == 1 &&
+ cur_new_count == nr_new_roots)
+ qg->reserved -= num_bytes;
if (dirty)
qgroup_dirty(fs_info, qg);
}
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index fa72068bd256..0a02e24900aa 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -851,7 +851,7 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
* this frees the rbio and runs through all the bios in the
* bio_list and calls end_io on them
*/
-static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate)
+static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
{
struct bio *cur = bio_list_get(&rbio->bio_list);
struct bio *next;
@@ -864,9 +864,8 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate)
while (cur) {
next = cur->bi_next;
cur->bi_next = NULL;
- if (uptodate)
- set_bit(BIO_UPTODATE, &cur->bi_flags);
- bio_endio(cur, err);
+ cur->bi_error = err;
+ bio_endio(cur);
cur = next;
}
}
@@ -875,9 +874,10 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate)
* end io function used by finish_rmw. When we finally
* get here, we've written a full stripe
*/
-static void raid_write_end_io(struct bio *bio, int err)
+static void raid_write_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
+ int err = bio->bi_error;
if (err)
fail_bio_stripe(rbio, bio);
@@ -893,7 +893,7 @@ static void raid_write_end_io(struct bio *bio, int err)
if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
err = -EIO;
- rbio_orig_end_io(rbio, err, 0);
+ rbio_orig_end_io(rbio, err);
return;
}
@@ -1071,7 +1071,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
* devices or if they are not contiguous
*/
if (last_end == disk_start && stripe->dev->bdev &&
- test_bit(BIO_UPTODATE, &last->bi_flags) &&
+ !last->bi_error &&
last->bi_bdev == stripe->dev->bdev) {
ret = bio_add_page(last, page, PAGE_CACHE_SIZE, 0);
if (ret == PAGE_CACHE_SIZE)
@@ -1087,7 +1087,6 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
bio->bi_iter.bi_size = 0;
bio->bi_bdev = stripe->dev->bdev;
bio->bi_iter.bi_sector = disk_start >> 9;
- set_bit(BIO_UPTODATE, &bio->bi_flags);
bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
bio_list_add(bio_list, bio);
@@ -1312,13 +1311,12 @@ write_data:
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
- BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
submit_bio(WRITE, bio);
}
return;
cleanup:
- rbio_orig_end_io(rbio, -EIO, 0);
+ rbio_orig_end_io(rbio, -EIO);
}
/*
@@ -1441,11 +1439,11 @@ static void set_bio_pages_uptodate(struct bio *bio)
* This will usually kick off finish_rmw once all the bios are read in, but it
* may trigger parity reconstruction if we had any errors along the way
*/
-static void raid_rmw_end_io(struct bio *bio, int err)
+static void raid_rmw_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- if (err)
+ if (bio->bi_error)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
@@ -1455,7 +1453,6 @@ static void raid_rmw_end_io(struct bio *bio, int err)
if (!atomic_dec_and_test(&rbio->stripes_pending))
return;
- err = 0;
if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
goto cleanup;
@@ -1469,7 +1466,7 @@ static void raid_rmw_end_io(struct bio *bio, int err)
cleanup:
- rbio_orig_end_io(rbio, -EIO, 0);
+ rbio_orig_end_io(rbio, -EIO);
}
static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
@@ -1572,14 +1569,13 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
btrfs_bio_wq_end_io(rbio->fs_info, bio,
BTRFS_WQ_ENDIO_RAID56);
- BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
submit_bio(READ, bio);
}
/* the actual write will happen once the reads are done */
return 0;
cleanup:
- rbio_orig_end_io(rbio, -EIO, 0);
+ rbio_orig_end_io(rbio, -EIO);
return -EIO;
finish:
@@ -1964,7 +1960,7 @@ cleanup_io:
else
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
- rbio_orig_end_io(rbio, err, err == 0);
+ rbio_orig_end_io(rbio, err);
} else if (err == 0) {
rbio->faila = -1;
rbio->failb = -1;
@@ -1976,7 +1972,7 @@ cleanup_io:
else
BUG();
} else {
- rbio_orig_end_io(rbio, err, 0);
+ rbio_orig_end_io(rbio, err);
}
}
@@ -1984,7 +1980,7 @@ cleanup_io:
* This is called only for stripes we've read from disk to
* reconstruct the parity.
*/
-static void raid_recover_end_io(struct bio *bio, int err)
+static void raid_recover_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
@@ -1992,7 +1988,7 @@ static void raid_recover_end_io(struct bio *bio, int err)
* we only read stripe pages off the disk, set them
* up to date if there were no errors
*/
- if (err)
+ if (bio->bi_error)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
@@ -2002,7 +1998,7 @@ static void raid_recover_end_io(struct bio *bio, int err)
return;
if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
- rbio_orig_end_io(rbio, -EIO, 0);
+ rbio_orig_end_io(rbio, -EIO);
else
__raid_recover_end_io(rbio);
}
@@ -2094,7 +2090,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
btrfs_bio_wq_end_io(rbio->fs_info, bio,
BTRFS_WQ_ENDIO_RAID56);
- BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
submit_bio(READ, bio);
}
out:
@@ -2102,7 +2097,7 @@ out:
cleanup:
if (rbio->operation == BTRFS_RBIO_READ_REBUILD)
- rbio_orig_end_io(rbio, -EIO, 0);
+ rbio_orig_end_io(rbio, -EIO);
return -EIO;
}
@@ -2277,11 +2272,12 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
* end io function used by finish_rmw. When we finally
* get here, we've written a full stripe
*/
-static void raid_write_parity_end_io(struct bio *bio, int err)
+static void raid_write_parity_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
+ int err = bio->bi_error;
- if (err)
+ if (bio->bi_error)
fail_bio_stripe(rbio, bio);
bio_put(bio);
@@ -2294,7 +2290,7 @@ static void raid_write_parity_end_io(struct bio *bio, int err)
if (atomic_read(&rbio->error))
err = -EIO;
- rbio_orig_end_io(rbio, err, 0);
+ rbio_orig_end_io(rbio, err);
}
static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
@@ -2437,7 +2433,7 @@ submit_write:
nr_data = bio_list_size(&bio_list);
if (!nr_data) {
/* Every parity is right */
- rbio_orig_end_io(rbio, 0, 0);
+ rbio_orig_end_io(rbio, 0);
return;
}
@@ -2450,13 +2446,12 @@ submit_write:
bio->bi_private = rbio;
bio->bi_end_io = raid_write_parity_end_io;
- BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
submit_bio(WRITE, bio);
}
return;
cleanup:
- rbio_orig_end_io(rbio, -EIO, 0);
+ rbio_orig_end_io(rbio, -EIO);
}
static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
@@ -2524,7 +2519,7 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
return;
cleanup:
- rbio_orig_end_io(rbio, -EIO, 0);
+ rbio_orig_end_io(rbio, -EIO);
}
/*
@@ -2535,11 +2530,11 @@ cleanup:
* This will usually kick off finish_rmw once all the bios are read in, but it
* may trigger parity reconstruction if we had any errors along the way
*/
-static void raid56_parity_scrub_end_io(struct bio *bio, int err)
+static void raid56_parity_scrub_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- if (err)
+ if (bio->bi_error)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
@@ -2632,14 +2627,13 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
btrfs_bio_wq_end_io(rbio->fs_info, bio,
BTRFS_WQ_ENDIO_RAID56);
- BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
submit_bio(READ, bio);
}
/* the actual write will happen once the reads are done */
return;
cleanup:
- rbio_orig_end_io(rbio, -EIO, 0);
+ rbio_orig_end_io(rbio, -EIO);
return;
finish:
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 827951fbf7fc..88cbb5995667 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4049,7 +4049,7 @@ restart:
if (trans && progress && err == -ENOSPC) {
ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
rc->block_group->flags);
- if (ret == 0) {
+ if (ret == 1) {
err = 0;
progress = 0;
goto restart;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 9f2feabe99f2..9c146d8307b5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -278,7 +278,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
u64 physical, struct btrfs_device *dev, u64 flags,
u64 gen, int mirror_num, u8 *csum, int force,
u64 physical_for_dev_replace);
-static void scrub_bio_end_io(struct bio *bio, int err);
+static void scrub_bio_end_io(struct bio *bio);
static void scrub_bio_end_io_worker(struct btrfs_work *work);
static void scrub_block_complete(struct scrub_block *sblock);
static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
@@ -295,7 +295,7 @@ static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
struct scrub_page *spage);
static void scrub_wr_submit(struct scrub_ctx *sctx);
-static void scrub_wr_bio_end_io(struct bio *bio, int err);
+static void scrub_wr_bio_end_io(struct bio *bio);
static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
static int write_page_nocow(struct scrub_ctx *sctx,
u64 physical_for_dev_replace, struct page *page);
@@ -454,27 +454,14 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
struct scrub_ctx *sctx;
int i;
struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
- int pages_per_rd_bio;
int ret;
- /*
- * the setting of pages_per_rd_bio is correct for scrub but might
- * be wrong for the dev_replace code where we might read from
- * different devices in the initial huge bios. However, that
- * code is able to correctly handle the case when adding a page
- * to a bio fails.
- */
- if (dev->bdev)
- pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO,
- bio_get_nr_vecs(dev->bdev));
- else
- pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
if (!sctx)
goto nomem;
atomic_set(&sctx->refs, 1);
sctx->is_dev_replace = is_dev_replace;
- sctx->pages_per_rd_bio = pages_per_rd_bio;
+ sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
sctx->curr = -1;
sctx->dev_root = dev->dev_root;
for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
@@ -1429,11 +1416,11 @@ struct scrub_bio_ret {
int error;
};
-static void scrub_bio_wait_endio(struct bio *bio, int error)
+static void scrub_bio_wait_endio(struct bio *bio)
{
struct scrub_bio_ret *ret = bio->bi_private;
- ret->error = error;
+ ret->error = bio->bi_error;
complete(&ret->event);
}
@@ -1790,12 +1777,12 @@ static void scrub_wr_submit(struct scrub_ctx *sctx)
btrfsic_submit_bio(WRITE, sbio->bio);
}
-static void scrub_wr_bio_end_io(struct bio *bio, int err)
+static void scrub_wr_bio_end_io(struct bio *bio)
{
struct scrub_bio *sbio = bio->bi_private;
struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
- sbio->err = err;
+ sbio->err = bio->bi_error;
sbio->bio = bio;
btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
@@ -2098,7 +2085,7 @@ static void scrub_submit(struct scrub_ctx *sctx)
*/
printk_ratelimited(KERN_WARNING
"BTRFS: scrub_submit(bio bdev == NULL) is unexpected!\n");
- bio_endio(sbio->bio, -EIO);
+ bio_io_error(sbio->bio);
} else {
btrfsic_submit_bio(READ, sbio->bio);
}
@@ -2260,12 +2247,12 @@ leave_nomem:
return 0;
}
-static void scrub_bio_end_io(struct bio *bio, int err)
+static void scrub_bio_end_io(struct bio *bio)
{
struct scrub_bio *sbio = bio->bi_private;
struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
- sbio->err = err;
+ sbio->err = bio->bi_error;
sbio->bio = bio;
btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
@@ -2672,11 +2659,11 @@ static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
scrub_pending_bio_dec(sctx);
}
-static void scrub_parity_bio_endio(struct bio *bio, int error)
+static void scrub_parity_bio_endio(struct bio *bio)
{
struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
- if (error)
+ if (bio->bi_error)
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
sparity->nsectors);
@@ -3571,7 +3558,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
int is_dev_replace)
{
- int ret = 0;
unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
int max_active = fs_info->thread_pool_size;
@@ -3584,34 +3570,36 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
fs_info->scrub_workers =
btrfs_alloc_workqueue("btrfs-scrub", flags,
max_active, 4);
- if (!fs_info->scrub_workers) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!fs_info->scrub_workers)
+ goto fail_scrub_workers;
+
fs_info->scrub_wr_completion_workers =
btrfs_alloc_workqueue("btrfs-scrubwrc", flags,
max_active, 2);
- if (!fs_info->scrub_wr_completion_workers) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!fs_info->scrub_wr_completion_workers)
+ goto fail_scrub_wr_completion_workers;
+
fs_info->scrub_nocow_workers =
btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0);
- if (!fs_info->scrub_nocow_workers) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!fs_info->scrub_nocow_workers)
+ goto fail_scrub_nocow_workers;
fs_info->scrub_parity_workers =
btrfs_alloc_workqueue("btrfs-scrubparity", flags,
max_active, 2);
- if (!fs_info->scrub_parity_workers) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!fs_info->scrub_parity_workers)
+ goto fail_scrub_parity_workers;
}
++fs_info->scrub_workers_refcnt;
-out:
- return ret;
+ return 0;
+
+fail_scrub_parity_workers:
+ btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
+fail_scrub_nocow_workers:
+ btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
+fail_scrub_wr_completion_workers:
+ btrfs_destroy_workqueue(fs_info->scrub_workers);
+fail_scrub_workers:
+ return -ENOMEM;
}
static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
@@ -3895,8 +3883,7 @@ static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
return 0;
WARN_ON(!dev->bdev);
- wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO,
- bio_get_nr_vecs(dev->bdev));
+ wr_ctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
wr_ctx->tgtdev = dev;
atomic_set(&wr_ctx->flush_all_writes, 0);
return 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index cd7ef34d2dce..6bad63379a4c 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2163,8 +2163,7 @@ static int btrfs_interface_init(void)
static void btrfs_interface_exit(void)
{
- if (misc_deregister(&btrfs_misc) < 0)
- printk(KERN_INFO "BTRFS: misc_deregister failed for control device\n");
+ misc_deregister(&btrfs_misc);
}
static void btrfs_print_info(void)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c0f18e7266b6..f5021fcb154e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -761,7 +761,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
if (!list_empty(&trans->ordered)) {
spin_lock(&info->trans_lock);
- list_splice(&trans->ordered, &cur_trans->pending_ordered);
+ list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
spin_unlock(&info->trans_lock);
}
@@ -1866,7 +1866,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
}
spin_lock(&root->fs_info->trans_lock);
- list_splice(&trans->ordered, &cur_trans->pending_ordered);
+ list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
spin_unlock(&root->fs_info->trans_lock);
atomic_inc(&cur_trans->use_count);
@@ -2152,7 +2152,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
kmem_cache_free(btrfs_trans_handle_cachep, trans);
- if (current != root->fs_info->transaction_kthread)
+ if (current != root->fs_info->transaction_kthread &&
+ current != root->fs_info->cleaner_kthread)
btrfs_run_delayed_iputs(root);
return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1ce80c1c4eb6..9c45431e69ab 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4117,6 +4117,187 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
return 0;
}
+/*
+ * At the moment we always log all xattrs. This is to figure out at log replay
+ * time which xattrs must have their deletion replayed. If a xattr is missing
+ * in the log tree and exists in the fs/subvol tree, we delete it. This is
+ * because if a xattr is deleted, the inode is fsynced and a power failure
+ * happens, causing the log to be replayed the next time the fs is mounted,
+ * we want the xattr to not exist anymore (same behaviour as other filesystems
+ * with a journal, ext3/4, xfs, f2fs, etc).
+ */
+static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode,
+ struct btrfs_path *path,
+ struct btrfs_path *dst_path)
+{
+ int ret;
+ struct btrfs_key key;
+ const u64 ino = btrfs_ino(inode);
+ int ins_nr = 0;
+ int start_slot = 0;
+
+ key.objectid = ino;
+ key.type = BTRFS_XATTR_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ while (true) {
+ int slot = path->slots[0];
+ struct extent_buffer *leaf = path->nodes[0];
+ int nritems = btrfs_header_nritems(leaf);
+
+ if (slot >= nritems) {
+ if (ins_nr > 0) {
+ u64 last_extent = 0;
+
+ ret = copy_items(trans, inode, dst_path, path,
+ &last_extent, start_slot,
+ ins_nr, 1, 0);
+ /* can't be 1, extent items aren't processed */
+ ASSERT(ret <= 0);
+ if (ret < 0)
+ return ret;
+ ins_nr = 0;
+ }
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ return ret;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY)
+ break;
+
+ if (ins_nr == 0)
+ start_slot = slot;
+ ins_nr++;
+ path->slots[0]++;
+ cond_resched();
+ }
+ if (ins_nr > 0) {
+ u64 last_extent = 0;
+
+ ret = copy_items(trans, inode, dst_path, path,
+ &last_extent, start_slot,
+ ins_nr, 1, 0);
+ /* can't be 1, extent items aren't processed */
+ ASSERT(ret <= 0);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * If the no holes feature is enabled we need to make sure any hole between the
+ * last extent and the i_size of our inode is explicitly marked in the log. This
+ * is to make sure that doing something like:
+ *
+ * 1) create file with 128Kb of data
+ * 2) truncate file to 64Kb
+ * 3) truncate file to 256Kb
+ * 4) fsync file
+ * 5) <crash/power failure>
+ * 6) mount fs and trigger log replay
+ *
+ * Will give us a file with a size of 256Kb, the first 64Kb of data match what
+ * the file had in its first 64Kb of data at step 1 and the last 192Kb of the
+ * file correspond to a hole. The presence of explicit holes in a log tree is
+ * what guarantees that log replay will remove/adjust file extent items in the
+ * fs/subvol tree.
+ *
+ * Here we do not need to care about holes between extents, that is already done
+ * by copy_items(). We also only need to do this in the full sync path, where we
+ * lookup for extents from the fs/subvol tree only. In the fast path case, we
+ * lookup the list of modified extent maps and if any represents a hole, we
+ * insert a corresponding extent representing a hole in the log tree.
+ */
+static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode,
+ struct btrfs_path *path)
+{
+ int ret;
+ struct btrfs_key key;
+ u64 hole_start;
+ u64 hole_size;
+ struct extent_buffer *leaf;
+ struct btrfs_root *log = root->log_root;
+ const u64 ino = btrfs_ino(inode);
+ const u64 i_size = i_size_read(inode);
+
+ if (!btrfs_fs_incompat(root->fs_info, NO_HOLES))
+ return 0;
+
+ key.objectid = ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ ASSERT(ret != 0);
+ if (ret < 0)
+ return ret;
+
+ ASSERT(path->slots[0] > 0);
+ path->slots[0]--;
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+ if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) {
+ /* inode does not have any extents */
+ hole_start = 0;
+ hole_size = i_size;
+ } else {
+ struct btrfs_file_extent_item *extent;
+ u64 len;
+
+ /*
+ * If there's an extent beyond i_size, an explicit hole was
+ * already inserted by copy_items().
+ */
+ if (key.offset >= i_size)
+ return 0;
+
+ extent = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_type(leaf, extent) ==
+ BTRFS_FILE_EXTENT_INLINE) {
+ len = btrfs_file_extent_inline_len(leaf,
+ path->slots[0],
+ extent);
+ ASSERT(len == i_size);
+ return 0;
+ }
+
+ len = btrfs_file_extent_num_bytes(leaf, extent);
+ /* Last extent goes beyond i_size, no need to log a hole. */
+ if (key.offset + len > i_size)
+ return 0;
+ hole_start = key.offset + len;
+ hole_size = i_size - hole_start;
+ }
+ btrfs_release_path(path);
+
+ /* Last extent ends at i_size. */
+ if (hole_size == 0)
+ return 0;
+
+ hole_size = ALIGN(hole_size, root->sectorsize);
+ ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0,
+ hole_size, 0, hole_size, 0, 0, 0);
+ return ret;
+}
+
/* log a single inode in the tree log.
* At least one parent directory for this inode must exist in the tree
* or be logged already.
@@ -4155,6 +4336,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
u64 ino = btrfs_ino(inode);
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
u64 logged_isize = 0;
+ bool need_log_inode_item = true;
path = btrfs_alloc_path();
if (!path)
@@ -4263,11 +4445,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
} else {
if (inode_only == LOG_INODE_ALL)
fast_search = true;
- ret = log_inode_item(trans, log, dst_path, inode);
- if (ret) {
- err = ret;
- goto out_unlock;
- }
goto log_extents;
}
@@ -4290,6 +4467,28 @@ again:
if (min_key.type > max_key.type)
break;
+ if (min_key.type == BTRFS_INODE_ITEM_KEY)
+ need_log_inode_item = false;
+
+ /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
+ if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
+ if (ins_nr == 0)
+ goto next_slot;
+ ret = copy_items(trans, inode, dst_path, path,
+ &last_extent, ins_start_slot,
+ ins_nr, inode_only, logged_isize);
+ if (ret < 0) {
+ err = ret;
+ goto out_unlock;
+ }
+ ins_nr = 0;
+ if (ret) {
+ btrfs_release_path(path);
+ continue;
+ }
+ goto next_slot;
+ }
+
src = path->nodes[0];
if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
ins_nr++;
@@ -4357,9 +4556,26 @@ next_slot:
ins_nr = 0;
}
+ btrfs_release_path(path);
+ btrfs_release_path(dst_path);
+ err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
+ if (err)
+ goto out_unlock;
+ if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
+ btrfs_release_path(path);
+ btrfs_release_path(dst_path);
+ err = btrfs_log_trailing_hole(trans, root, inode, path);
+ if (err)
+ goto out_unlock;
+ }
log_extents:
btrfs_release_path(path);
btrfs_release_path(dst_path);
+ if (need_log_inode_item) {
+ err = log_inode_item(trans, log, dst_path, inode);
+ if (err)
+ goto out_unlock;
+ }
if (fast_search) {
/*
* Some ordered extents started by fsync might have completed
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 4b438b4c8c91..762476f5f08d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2766,6 +2766,20 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
root = root->fs_info->chunk_root;
extent_root = root->fs_info->extent_root;
+ /*
+ * Prevent races with automatic removal of unused block groups.
+ * After we relocate and before we remove the chunk with offset
+ * chunk_offset, automatic removal of the block group can kick in,
+ * resulting in a failure when calling btrfs_remove_chunk() below.
+ *
+ * Make sure to acquire this mutex before doing a tree search (dev
+ * or chunk trees) to find chunks. Otherwise the cleaner kthread might
+ * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after
+ * we release the path used to search the chunk/dev tree and before
+ * the current task acquires this mutex and calls us.
+ */
+ ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex));
+
ret = btrfs_can_relocate(extent_root, chunk_offset);
if (ret)
return -ENOSPC;
@@ -2814,13 +2828,18 @@ again:
key.type = BTRFS_CHUNK_ITEM_KEY;
while (1) {
+ mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
- if (ret < 0)
+ if (ret < 0) {
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
goto error;
+ }
BUG_ON(ret == 0); /* Corruption */
ret = btrfs_previous_item(chunk_root, path, key.objectid,
key.type);
+ if (ret)
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
if (ret < 0)
goto error;
if (ret > 0)
@@ -2843,6 +2862,7 @@ again:
else
BUG_ON(ret);
}
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
if (found_key.offset == 0)
break;
@@ -3299,9 +3319,12 @@ again:
goto error;
}
+ mutex_lock(&fs_info->delete_unused_bgs_mutex);
ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
- if (ret < 0)
+ if (ret < 0) {
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
goto error;
+ }
/*
* this shouldn't happen, it means the last relocate
@@ -3313,6 +3336,7 @@ again:
ret = btrfs_previous_item(chunk_root, path, 0,
BTRFS_CHUNK_ITEM_KEY);
if (ret) {
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
ret = 0;
break;
}
@@ -3321,8 +3345,10 @@ again:
slot = path->slots[0];
btrfs_item_key_to_cpu(leaf, &found_key, slot);
- if (found_key.objectid != key.objectid)
+ if (found_key.objectid != key.objectid) {
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
break;
+ }
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@ -3335,10 +3361,13 @@ again:
ret = should_balance_chunk(chunk_root, leaf, chunk,
found_key.offset);
btrfs_release_path(path);
- if (!ret)
+ if (!ret) {
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
goto loop;
+ }
if (counting) {
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
spin_lock(&fs_info->balance_lock);
bctl->stat.expected++;
spin_unlock(&fs_info->balance_lock);
@@ -3348,6 +3377,7 @@ again:
ret = btrfs_relocate_chunk(chunk_root,
found_key.objectid,
found_key.offset);
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
if (ret && ret != -ENOSPC)
goto error;
if (ret == -ENOSPC) {
@@ -4087,11 +4117,16 @@ again:
key.type = BTRFS_DEV_EXTENT_KEY;
do {
+ mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
+ if (ret < 0) {
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
goto done;
+ }
ret = btrfs_previous_item(root, path, 0, key.type);
+ if (ret)
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
if (ret < 0)
goto done;
if (ret) {
@@ -4105,6 +4140,7 @@ again:
btrfs_item_key_to_cpu(l, &key, path->slots[0]);
if (key.objectid != device->devid) {
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
btrfs_release_path(path);
break;
}
@@ -4113,6 +4149,7 @@ again:
length = btrfs_dev_extent_length(l, dev_extent);
if (key.offset + length <= new_size) {
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
btrfs_release_path(path);
break;
}
@@ -4122,6 +4159,7 @@ again:
btrfs_release_path(path);
ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
+ mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
if (ret && ret != -ENOSPC)
goto done;
if (ret == -ENOSPC)
@@ -5703,26 +5741,26 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
return 0;
}
-static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err)
+static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
{
bio->bi_private = bbio->private;
bio->bi_end_io = bbio->end_io;
- bio_endio(bio, err);
+ bio_endio(bio);
btrfs_put_bbio(bbio);
}
-static void btrfs_end_bio(struct bio *bio, int err)
+static void btrfs_end_bio(struct bio *bio)
{
struct btrfs_bio *bbio = bio->bi_private;
- struct btrfs_device *dev = bbio->stripes[0].dev;
int is_orig_bio = 0;
- if (err) {
+ if (bio->bi_error) {
atomic_inc(&bbio->error);
- if (err == -EIO || err == -EREMOTEIO) {
+ if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) {
unsigned int stripe_index =
btrfs_io_bio(bio)->stripe_index;
+ struct btrfs_device *dev;
BUG_ON(stripe_index >= bbio->num_stripes);
dev = bbio->stripes[stripe_index].dev;
@@ -5757,17 +5795,16 @@ static void btrfs_end_bio(struct bio *bio, int err)
* beyond the tolerance of the btrfs bio
*/
if (atomic_read(&bbio->error) > bbio->max_errors) {
- err = -EIO;
+ bio->bi_error = -EIO;
} else {
/*
* this bio is actually up to date, we didn't
* go over the max number of errors
*/
- set_bit(BIO_UPTODATE, &bio->bi_flags);
- err = 0;
+ bio->bi_error = 0;
}
- btrfs_end_bbio(bbio, bio, err);
+ btrfs_end_bbio(bbio, bio);
} else if (!is_orig_bio) {
bio_put(bio);
}
@@ -5788,7 +5825,7 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root,
struct btrfs_pending_bios *pending_bios;
if (device->missing || !device->bdev) {
- bio_endio(bio, -EIO);
+ bio_io_error(bio);
return;
}
@@ -5833,34 +5870,6 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root,
&device->work);
}
-static int bio_size_ok(struct block_device *bdev, struct bio *bio,
- sector_t sector)
-{
- struct bio_vec *prev;
- struct request_queue *q = bdev_get_queue(bdev);
- unsigned int max_sectors = queue_max_sectors(q);
- struct bvec_merge_data bvm = {
- .bi_bdev = bdev,
- .bi_sector = sector,
- .bi_rw = bio->bi_rw,
- };
-
- if (WARN_ON(bio->bi_vcnt == 0))
- return 1;
-
- prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
- if (bio_sectors(bio) > max_sectors)
- return 0;
-
- if (!q->merge_bvec_fn)
- return 1;
-
- bvm.bi_size = bio->bi_iter.bi_size - prev->bv_len;
- if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len)
- return 0;
- return 1;
-}
-
static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
struct bio *bio, u64 physical, int dev_nr,
int rw, int async)
@@ -5894,38 +5903,6 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
btrfsic_submit_bio(rw, bio);
}
-static int breakup_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
- struct bio *first_bio, struct btrfs_device *dev,
- int dev_nr, int rw, int async)
-{
- struct bio_vec *bvec = first_bio->bi_io_vec;
- struct bio *bio;
- int nr_vecs = bio_get_nr_vecs(dev->bdev);
- u64 physical = bbio->stripes[dev_nr].physical;
-
-again:
- bio = btrfs_bio_alloc(dev->bdev, physical >> 9, nr_vecs, GFP_NOFS);
- if (!bio)
- return -ENOMEM;
-
- while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) {
- if (bio_add_page(bio, bvec->bv_page, bvec->bv_len,
- bvec->bv_offset) < bvec->bv_len) {
- u64 len = bio->bi_iter.bi_size;
-
- atomic_inc(&bbio->stripes_pending);
- submit_stripe_bio(root, bbio, bio, physical, dev_nr,
- rw, async);
- physical += len;
- goto again;
- }
- bvec++;
- }
-
- submit_stripe_bio(root, bbio, bio, physical, dev_nr, rw, async);
- return 0;
-}
-
static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
{
atomic_inc(&bbio->error);
@@ -5935,8 +5912,8 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
bio->bi_iter.bi_sector = logical >> 9;
-
- btrfs_end_bbio(bbio, bio, -EIO);
+ bio->bi_error = -EIO;
+ btrfs_end_bbio(bbio, bio);
}
}
@@ -5998,18 +5975,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
continue;
}
- /*
- * Check and see if we're ok with this bio based on it's size
- * and offset with the given device.
- */
- if (!bio_size_ok(dev->bdev, first_bio,
- bbio->stripes[dev_nr].physical >> 9)) {
- ret = breakup_stripe_bio(root, bbio, first_bio, dev,
- dev_nr, rw, async_submit);
- BUG_ON(ret);
- continue;
- }
-
if (dev_nr < total_devs - 1) {
bio = btrfs_bio_clone(first_bio, GFP_NOFS);
BUG_ON(!bio); /* -ENOMEM */
OpenPOWER on IntegriCloud