diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 170 |
1 files changed, 103 insertions, 67 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 44fe66b53c8b..ee40384c394d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -20,6 +20,7 @@ #include "locking.h" #include "rcu-string.h" #include "backref.h" +#include "transaction.h" static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; @@ -74,8 +75,7 @@ void btrfs_leak_debug_check(void) while (!list_empty(&buffers)) { eb = list_entry(buffers.next, struct extent_buffer, leak_list); - printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu " - "refs %d\n", + pr_err("BTRFS: buffer leak start %llu len %lu refs %d\n", eb->start, eb->len, atomic_read(&eb->refs)); list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); @@ -460,8 +460,7 @@ static int insert_state(struct extent_io_tree *tree, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); - printk(KERN_ERR "BTRFS: found node %llu %llu on insert of " - "%llu %llu\n", + pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n", found->start, found->end, start, end); return -EEXIST; } @@ -572,9 +571,8 @@ alloc_extent_state_atomic(struct extent_state *prealloc) static void extent_io_tree_panic(struct extent_io_tree *tree, int err) { - btrfs_panic(tree_fs_info(tree), err, "Locking error: " - "Extent tree was modified by another " - "thread while locked."); + btrfs_panic(tree_fs_info(tree), err, + "Locking error: Extent tree was modified by another thread while locked."); } /* @@ -1729,7 +1727,7 @@ out_failed: } void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, - struct page *locked_page, + u64 delalloc_end, struct page *locked_page, unsigned clear_bits, unsigned long page_ops) { @@ -2122,8 +2120,9 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page, if (failrec->in_validation) { /* there was no real error, just free the record */ - pr_debug("clean_io_failure: freeing dummy error at %llu\n", - failrec->start); + btrfs_debug(fs_info, + "clean_io_failure: freeing dummy error at %llu", + failrec->start); goto out; } if (fs_info->sb->s_flags & MS_RDONLY) @@ -2189,6 +2188,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end) int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, struct io_failure_record **failrec_ret) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct io_failure_record *failrec; struct extent_map *em; struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; @@ -2236,8 +2236,9 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, em->compress_type); } - pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n", - logical, start, failrec->len); + btrfs_debug(fs_info, + "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu", + logical, start, failrec->len); failrec->logical = logical; free_extent_map(em); @@ -2255,9 +2256,10 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, return ret; } } else { - pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n", - failrec->logical, failrec->start, failrec->len, - failrec->in_validation); + btrfs_debug(fs_info, + "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d", + failrec->logical, failrec->start, failrec->len, + failrec->in_validation); /* * when data can be on disk more than twice, add to failrec here * (e.g. with a list for failed_mirror) to make @@ -2273,18 +2275,19 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, struct io_failure_record *failrec, int failed_mirror) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); int num_copies; - num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, - failrec->logical, failrec->len); + num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); if (num_copies == 1) { /* * we only have a single copy of the data, so don't bother with * all the retry and error correction code that follows. no * matter what the error is, it is very likely to persist. */ - pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", - num_copies, failrec->this_mirror, failed_mirror); + btrfs_debug(fs_info, + "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d", + num_copies, failrec->this_mirror, failed_mirror); return 0; } @@ -2323,8 +2326,9 @@ int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, } if (failrec->this_mirror > num_copies) { - pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", - num_copies, failrec->this_mirror, failed_mirror); + btrfs_debug(fs_info, + "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d", + num_copies, failrec->this_mirror, failed_mirror); return 0; } @@ -2415,8 +2419,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, } bio_set_op_attrs(bio, REQ_OP_READ, read_mode); - pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n", - read_mode, failrec->this_mirror, failrec->in_validation); + btrfs_debug(btrfs_sb(inode->i_sb), + "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d", + read_mode, failrec->this_mirror, failrec->in_validation); ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror, failrec->bio_flags, 0); @@ -2484,8 +2489,7 @@ static void end_bio_extent_writepage(struct bio *bio) bvec->bv_offset, bvec->bv_len); else btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info, - "incomplete page write in btrfs with offset %u and " - "length %u", + "incomplete page write in btrfs with offset %u and length %u", bvec->bv_offset, bvec->bv_len); } @@ -2541,10 +2545,12 @@ static void end_bio_extent_readpage(struct bio *bio) bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; struct inode *inode = page->mapping->host; + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " - "mirror=%u\n", (u64)bio->bi_iter.bi_sector, - bio->bi_error, io_bio->mirror_num); + btrfs_debug(fs_info, + "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u", + (u64)bio->bi_iter.bi_sector, bio->bi_error, + io_bio->mirror_num); tree = &BTRFS_I(inode)->io_tree; /* We always issue full-page reads, but if some block @@ -2554,13 +2560,12 @@ static void end_bio_extent_readpage(struct bio *bio) * if they don't add up to a full page. */ if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) { if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE) - btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info, - "partial page read in btrfs with offset %u and length %u", + btrfs_err(fs_info, + "partial page read in btrfs with offset %u and length %u", bvec->bv_offset, bvec->bv_len); else - btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info, - "incomplete page read in btrfs with offset %u and " - "length %u", + btrfs_info(fs_info, + "incomplete page read in btrfs with offset %u and length %u", bvec->bv_offset, bvec->bv_len); } @@ -3624,7 +3629,6 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb) static void set_btree_ioerr(struct page *page) { struct extent_buffer *eb = (struct extent_buffer *)page->private; - struct btrfs_inode *btree_ino = BTRFS_I(eb->fs_info->btree_inode); SetPageError(page); if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) @@ -3670,13 +3674,13 @@ static void set_btree_ioerr(struct page *page) */ switch (eb->log_index) { case -1: - set_bit(BTRFS_INODE_BTREE_ERR, &btree_ino->runtime_flags); + set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags); break; case 0: - set_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); + set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags); break; case 1: - set_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); + set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags); break; default: BUG(); /* unexpected, logic error */ @@ -3721,8 +3725,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, struct block_device *bdev = fs_info->fs_devices->latest_bdev; struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree; u64 offset = eb->start; + u32 nritems; unsigned long i, num_pages; unsigned long bio_flags = 0; + unsigned long start, end; int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META; int ret = 0; @@ -3732,6 +3738,23 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) bio_flags = EXTENT_BIO_TREE_LOG; + /* set btree blocks beyond nritems with 0 to avoid stale content. */ + nritems = btrfs_header_nritems(eb); + if (btrfs_header_level(eb) > 0) { + end = btrfs_node_key_ptr_offset(nritems); + + memset_extent_buffer(eb, 0, end, eb->len - end); + } else { + /* + * leaf: + * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0 + */ + start = btrfs_item_nr_offset(nritems); + end = btrfs_leaf_data(eb) + + leaf_data_end(fs_info->tree_root, eb); + memset_extent_buffer(eb, 0, start, end - start); + } + for (i = 0; i < num_pages; i++) { struct page *p = eb->pages[i]; @@ -4487,21 +4510,36 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags |= (FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN); } else if (fieinfo->fi_extents_max) { + struct btrfs_trans_handle *trans; + u64 bytenr = em->block_start - (em->start - em->orig_start); disko = em->block_start + offset_in_extent; /* + * We need a trans handle to get delayed refs + */ + trans = btrfs_join_transaction(root); + /* + * It's OK if we can't start a trans we can still check + * from commit_root + */ + if (IS_ERR(trans)) + trans = NULL; + + /* * As btrfs supports shared space, this information * can be exported to userspace tools via * flag FIEMAP_EXTENT_SHARED. If fi_extents_max == 0 * then we're just getting a count and we can skip the * lookup stuff. */ - ret = btrfs_check_shared(NULL, root->fs_info, + ret = btrfs_check_shared(trans, root->fs_info, root->objectid, btrfs_ino(inode), bytenr); + if (trans) + btrfs_end_transaction(trans, root); if (ret < 0) goto out_free; if (ret) @@ -5173,11 +5211,10 @@ int extent_buffer_uptodate(struct extent_buffer *eb) } int read_extent_buffer_pages(struct extent_io_tree *tree, - struct extent_buffer *eb, u64 start, int wait, + struct extent_buffer *eb, int wait, get_extent_t *get_extent, int mirror_num) { unsigned long i; - unsigned long start_i; struct page *page; int err; int ret = 0; @@ -5191,16 +5228,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 0; - if (start) { - WARN_ON(start < eb->start); - start_i = (start >> PAGE_SHIFT) - - (eb->start >> PAGE_SHIFT); - } else { - start_i = 0; - } - num_pages = num_extent_pages(eb->start, eb->len); - for (i = start_i; i < num_pages; i++) { + for (i = 0; i < num_pages; i++) { page = eb->pages[i]; if (wait == WAIT_NONE) { if (!trylock_page(page)) @@ -5209,21 +5238,29 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, lock_page(page); } locked_pages++; + } + /* + * We need to firstly lock all pages to make sure that + * the uptodate bit of our pages won't be affected by + * clear_extent_buffer_uptodate(). + */ + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; if (!PageUptodate(page)) { num_reads++; all_uptodate = 0; } } + if (all_uptodate) { - if (start_i == 0) - set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); goto unlock_exit; } clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = 0; atomic_set(&eb->io_pages, num_reads); - for (i = start_i; i < num_pages; i++) { + for (i = 0; i < num_pages; i++) { page = eb->pages[i]; if (!PageUptodate(page)) { @@ -5264,7 +5301,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (ret || wait != WAIT_COMPLETE) return ret; - for (i = start_i; i < num_pages; i++) { + for (i = 0; i < num_pages; i++) { page = eb->pages[i]; wait_on_page_locked(page); if (!PageUptodate(page)) @@ -5274,12 +5311,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, return ret; unlock_exit: - i = start_i; while (locked_pages > 0) { - page = eb->pages[i]; - i++; - unlock_page(page); locked_pages--; + page = eb->pages[locked_pages]; + unlock_page(page); } return ret; } @@ -5382,8 +5417,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, } if (start + min_len > eb->len) { - WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, " - "wanted %lu %lu\n", + WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); return -EINVAL; } @@ -5713,14 +5747,14 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, if (src_offset + len > dst->len) { btrfs_err(dst->fs_info, - "memmove bogus src_offset %lu move " - "len %lu dst len %lu", src_offset, len, dst->len); + "memmove bogus src_offset %lu move len %lu dst len %lu", + src_offset, len, dst->len); BUG_ON(1); } if (dst_offset + len > dst->len) { btrfs_err(dst->fs_info, - "memmove bogus dst_offset %lu move " - "len %lu dst len %lu", dst_offset, len, dst->len); + "memmove bogus dst_offset %lu move len %lu dst len %lu", + dst_offset, len, dst->len); BUG_ON(1); } @@ -5760,13 +5794,15 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_i; if (src_offset + len > dst->len) { - btrfs_err(dst->fs_info, "memmove bogus src_offset %lu move " - "len %lu len %lu", src_offset, len, dst->len); + btrfs_err(dst->fs_info, + "memmove bogus src_offset %lu move len %lu len %lu", + src_offset, len, dst->len); BUG_ON(1); } if (dst_offset + len > dst->len) { - btrfs_err(dst->fs_info, "memmove bogus dst_offset %lu move " - "len %lu len %lu", dst_offset, len, dst->len); + btrfs_err(dst->fs_info, + "memmove bogus dst_offset %lu move len %lu len %lu", + dst_offset, len, dst->len); BUG_ON(1); } if (dst_offset < src_offset) { |