From 07d400a6df4767a90d49a153fdb7f4cfa1e3f23e Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Tue, 6 Jan 2009 11:42:00 -0500 Subject: Btrfs: tree logging checksum fixes This patch contains following things. 1) Limit the max size of btrfs_ordered_sum structure to PAGE_SIZE. This struct is kmalloced so we want to keep it reasonable. 2) Replace copy_extent_csums by btrfs_lookup_csums_range. This was duplicated code in tree-log.c 3) Remove replay_one_csum. csum items are replayed at the same time as replaying file extents. This guarantees we only replay useful csums. 4) nbytes accounting fix. Signed-off-by: Yan Zheng --- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/file-item.c | 62 ++++++----- fs/btrfs/inode.c | 5 +- fs/btrfs/tree-log.c | 293 +++++++++++++++---------------------------------- 4 files changed, 130 insertions(+), 232 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 171ca30a3755..293da650873f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5579,7 +5579,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) BUG_ON(ordered->file_offset != file_pos || ordered->len != len); disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; - ret = btrfs_lookup_csums_range(root, disk_bytenr, + ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, disk_bytenr + len - 1, &list); while (!list_empty(&list)) { diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b11abfad81a5..964652435fd1 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -27,6 +27,12 @@ #define MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) * 2) / \ size) - 1)) + +#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ + sizeof(struct btrfs_ordered_sum)) / \ + sizeof(struct btrfs_sector_sum) * \ + (r)->sectorsize - (r)->sectorsize) + int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, @@ -259,8 +265,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, key.offset = start; key.type = BTRFS_EXTENT_CSUM_KEY; - ret = btrfs_search_slot(NULL, root->fs_info->csum_root, - &key, path, 0, 0); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto fail; if (ret > 0 && path->slots[0] > 0) { @@ -279,7 +284,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, while (start <= end) { leaf = path->nodes[0]; if (path->slots[0] >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(root->fs_info->csum_root, path); + ret = btrfs_next_leaf(root, path); if (ret < 0) goto fail; if (ret > 0) @@ -306,33 +311,38 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, continue; } - size = min(csum_end, end + 1) - start; - sums = kzalloc(btrfs_ordered_sum_size(root, size), GFP_NOFS); - BUG_ON(!sums); + csum_end = min(csum_end, end + 1); + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_csum_item); + while (start < csum_end) { + size = min_t(size_t, csum_end - start, + MAX_ORDERED_SUM_BYTES(root)); + sums = kzalloc(btrfs_ordered_sum_size(root, size), + GFP_NOFS); + BUG_ON(!sums); - sector_sum = sums->sums; - sums->bytenr = start; - sums->len = size; + sector_sum = sums->sums; + sums->bytenr = start; + sums->len = size; - offset = (start - key.offset) >> - root->fs_info->sb->s_blocksize_bits; - offset *= csum_size; + offset = (start - key.offset) >> + root->fs_info->sb->s_blocksize_bits; + offset *= csum_size; - item = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_csum_item); - while (size > 0) { - read_extent_buffer(path->nodes[0], §or_sum->sum, - ((unsigned long)item) + offset, - csum_size); - sector_sum->bytenr = start; - - size -= root->sectorsize; - start += root->sectorsize; - offset += csum_size; - sector_sum++; + while (size > 0) { + read_extent_buffer(path->nodes[0], + §or_sum->sum, + ((unsigned long)item) + + offset, csum_size); + sector_sum->bytenr = start; + + size -= root->sectorsize; + start += root->sectorsize; + offset += csum_size; + sector_sum++; + } + list_add_tail(&sums->list, list); } - list_add_tail(&sums->list, list); - path->slots[0]++; } ret = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c0ca9c3723c0..4e57fe68e4b9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -157,7 +157,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, key.objectid = inode->i_ino; key.offset = start; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); - inode_add_bytes(inode, size); datasize = btrfs_file_extent_calc_inline_size(cur_size); inode_add_bytes(inode, size); @@ -920,8 +919,8 @@ static noinline int csum_exist_in_range(struct btrfs_root *root, struct btrfs_ordered_sum *sums; LIST_HEAD(list); - ret = btrfs_lookup_csums_range(root, bytenr, bytenr + num_bytes - 1, - &list); + ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr, + bytenr + num_bytes - 1, &list); if (ret == 0 && list_empty(&list)) return 0; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3a72a1b6c247..332ec35d2c08 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -433,49 +433,6 @@ insert: trans->transid); } } - - if (overwrite_root && - key->type == BTRFS_EXTENT_DATA_KEY) { - int extent_type; - struct btrfs_file_extent_item *fi; - - fi = (struct btrfs_file_extent_item *)dst_ptr; - extent_type = btrfs_file_extent_type(path->nodes[0], fi); - if (extent_type == BTRFS_FILE_EXTENT_REG || - extent_type == BTRFS_FILE_EXTENT_PREALLOC) { - struct btrfs_key ins; - ins.objectid = btrfs_file_extent_disk_bytenr( - path->nodes[0], fi); - ins.offset = btrfs_file_extent_disk_num_bytes( - path->nodes[0], fi); - ins.type = BTRFS_EXTENT_ITEM_KEY; - - /* - * is this extent already allocated in the extent - * allocation tree? If so, just add a reference - */ - ret = btrfs_lookup_extent(root, ins.objectid, - ins.offset); - if (ret == 0) { - ret = btrfs_inc_extent_ref(trans, root, - ins.objectid, ins.offset, - path->nodes[0]->start, - root->root_key.objectid, - trans->transid, key->objectid); - } else { - /* - * insert the extent pointer in the extent - * allocation tree - */ - ret = btrfs_alloc_logged_extent(trans, root, - path->nodes[0]->start, - root->root_key.objectid, - trans->transid, key->objectid, - &ins); - BUG_ON(ret); - } - } - } no_copy: btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root, path); @@ -530,6 +487,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, u64 extent_end; u64 alloc_hint; u64 start = key->offset; + u64 saved_nbytes; struct btrfs_file_extent_item *item; struct inode *inode = NULL; unsigned long size; @@ -591,17 +549,95 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, } btrfs_release_path(root, path); + saved_nbytes = inode_get_bytes(inode); /* drop any overlapping extents */ ret = btrfs_drop_extents(trans, root, inode, start, extent_end, start, &alloc_hint); BUG_ON(ret); - /* insert the extent */ - ret = overwrite_item(trans, root, path, eb, slot, key); - BUG_ON(ret); + if (found_type == BTRFS_FILE_EXTENT_REG || + found_type == BTRFS_FILE_EXTENT_PREALLOC) { + unsigned long dest_offset; + struct btrfs_key ins; + + ret = btrfs_insert_empty_item(trans, root, path, key, + sizeof(*item)); + BUG_ON(ret); + dest_offset = btrfs_item_ptr_offset(path->nodes[0], + path->slots[0]); + copy_extent_buffer(path->nodes[0], eb, dest_offset, + (unsigned long)item, sizeof(*item)); + + ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); + ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); + ins.type = BTRFS_EXTENT_ITEM_KEY; + + if (ins.objectid > 0) { + u64 csum_start; + u64 csum_end; + LIST_HEAD(ordered_sums); + /* + * is this extent already allocated in the extent + * allocation tree? If so, just add a reference + */ + ret = btrfs_lookup_extent(root, ins.objectid, + ins.offset); + if (ret == 0) { + ret = btrfs_inc_extent_ref(trans, root, + ins.objectid, ins.offset, + path->nodes[0]->start, + root->root_key.objectid, + trans->transid, key->objectid); + } else { + /* + * insert the extent pointer in the extent + * allocation tree + */ + ret = btrfs_alloc_logged_extent(trans, root, + path->nodes[0]->start, + root->root_key.objectid, + trans->transid, key->objectid, + &ins); + BUG_ON(ret); + } + btrfs_release_path(root, path); + + if (btrfs_file_extent_compression(eb, item)) { + csum_start = ins.objectid; + csum_end = csum_start + ins.offset; + } else { + csum_start = ins.objectid + + btrfs_file_extent_offset(eb, item); + csum_end = csum_start + + btrfs_file_extent_num_bytes(eb, item); + } + + ret = btrfs_lookup_csums_range(root->log_root, + csum_start, csum_end - 1, + &ordered_sums); + BUG_ON(ret); + while (!list_empty(&ordered_sums)) { + struct btrfs_ordered_sum *sums; + sums = list_entry(ordered_sums.next, + struct btrfs_ordered_sum, + list); + ret = btrfs_csum_file_blocks(trans, + root->fs_info->csum_root, + sums); + BUG_ON(ret); + list_del(&sums->list); + kfree(sums); + } + } else { + btrfs_release_path(root, path); + } + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + /* inline extents are easy, we just overwrite them */ + ret = overwrite_item(trans, root, path, eb, slot, key); + BUG_ON(ret); + } - /* btrfs_drop_extents changes i_bytes & i_blocks, update it here */ - inode_add_bytes(inode, extent_end - start); + inode_set_bytes(inode, saved_nbytes); btrfs_update_inode(trans, root, inode); out: if (inode) @@ -902,70 +938,6 @@ out_nowrite: return 0; } -/* - * replay one csum item from the log tree into the subvolume 'root' - * eb, slot and key all refer to the log tree - * path is for temp use by this function and should be released on return - * - * This copies the checksums out of the log tree and inserts them into - * the subvolume. Any existing checksums for this range in the file - * are overwritten, and new items are added where required. - * - * We keep this simple by reusing the btrfs_ordered_sum code from - * the data=ordered mode. This basically means making a copy - * of all the checksums in ram, which we have to do anyway for kmap - * rules. - * - * The copy is then sent down to btrfs_csum_file_blocks, which - * does all the hard work of finding existing items in the file - * or adding new ones. - */ -static noinline int replay_one_csum(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct extent_buffer *eb, int slot, - struct btrfs_key *key) -{ - int ret; - u32 item_size = btrfs_item_size_nr(eb, slot); - u64 cur_offset; - u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); - unsigned long file_bytes; - struct btrfs_ordered_sum *sums; - struct btrfs_sector_sum *sector_sum; - unsigned long ptr; - - file_bytes = (item_size / csum_size) * root->sectorsize; - sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS); - if (!sums) - return -ENOMEM; - - INIT_LIST_HEAD(&sums->list); - sums->len = file_bytes; - sums->bytenr = key->offset; - - /* - * copy all the sums into the ordered sum struct - */ - sector_sum = sums->sums; - cur_offset = key->offset; - ptr = btrfs_item_ptr_offset(eb, slot); - while (item_size > 0) { - sector_sum->bytenr = cur_offset; - read_extent_buffer(eb, §or_sum->sum, ptr, csum_size); - sector_sum++; - item_size -= csum_size; - ptr += csum_size; - cur_offset += root->sectorsize; - } - - /* let btrfs_csum_file_blocks add them into the file */ - ret = btrfs_csum_file_blocks(trans, root->fs_info->csum_root, sums); - BUG_ON(ret); - kfree(sums); - return 0; -} /* * There are a few corners where the link count of the file can't * be properly maintained during replay. So, instead of adding @@ -1659,10 +1631,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, ret = replay_one_extent(wc->trans, root, path, eb, i, &key); BUG_ON(ret); - } else if (key.type == BTRFS_EXTENT_CSUM_KEY) { - ret = replay_one_csum(wc->trans, root, path, - eb, i, &key); - BUG_ON(ret); } else if (key.type == BTRFS_DIR_ITEM_KEY || key.type == BTRFS_DIR_INDEX_KEY) { ret = replay_one_dir_item(wc->trans, root, path, @@ -2021,7 +1989,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) .process_func = process_one_buffer }; - if (!root->log_root) + if (!root->log_root || root->fs_info->log_root_recovering) return 0; log = root->log_root; @@ -2453,86 +2421,6 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, return 0; } -static noinline int copy_extent_csums(struct btrfs_trans_handle *trans, - struct list_head *list, - struct btrfs_root *root, - u64 disk_bytenr, u64 len) -{ - struct btrfs_ordered_sum *sums; - struct btrfs_sector_sum *sector_sum; - int ret; - struct btrfs_path *path; - struct btrfs_csum_item *item = NULL; - u64 end = disk_bytenr + len; - u64 item_start_offset = 0; - u64 item_last_offset = 0; - u32 diff; - u32 sum; - u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); - - sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS); - - sector_sum = sums->sums; - sums->bytenr = disk_bytenr; - sums->len = len; - list_add_tail(&sums->list, list); - - path = btrfs_alloc_path(); - while (disk_bytenr < end) { - if (!item || disk_bytenr < item_start_offset || - disk_bytenr >= item_last_offset) { - struct btrfs_key found_key; - u32 item_size; - - if (item) - btrfs_release_path(root, path); - item = btrfs_lookup_csum(NULL, root, path, - disk_bytenr, 0); - if (IS_ERR(item)) { - ret = PTR_ERR(item); - if (ret == -ENOENT || ret == -EFBIG) - ret = 0; - sum = 0; - printk(KERN_INFO "log no csum found for " - "byte %llu\n", - (unsigned long long)disk_bytenr); - item = NULL; - btrfs_release_path(root, path); - goto found; - } - btrfs_item_key_to_cpu(path->nodes[0], &found_key, - path->slots[0]); - - item_start_offset = found_key.offset; - item_size = btrfs_item_size_nr(path->nodes[0], - path->slots[0]); - item_last_offset = item_start_offset + - (item_size / csum_size) * - root->sectorsize; - item = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_csum_item); - } - /* - * this byte range must be able to fit inside - * a single leaf so it will also fit inside a u32 - */ - diff = disk_bytenr - item_start_offset; - diff = diff / root->sectorsize; - diff = diff * csum_size; - - read_extent_buffer(path->nodes[0], &sum, - ((unsigned long)item) + diff, - csum_size); -found: - sector_sum->bytenr = disk_bytenr; - sector_sum->sum = sum; - disk_bytenr += root->sectorsize; - sector_sum++; - } - btrfs_free_path(path); - return 0; -} - static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_root *log, struct btrfs_path *dst_path, @@ -2622,10 +2510,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, trans->transid, ins_keys[i].objectid); BUG_ON(ret); - ret = copy_extent_csums(trans, - &ordered_sums, - log->fs_info->csum_root, - ds + cs, cl); + ret = btrfs_lookup_csums_range( + log->fs_info->csum_root, + ds + cs, ds + cs + cl - 1, + &ordered_sums); BUG_ON(ret); } } @@ -2942,9 +2830,9 @@ again: tmp_key.offset = (u64)-1; wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); - BUG_ON(!wc.replay_dest); + wc.replay_dest->log_root = log; btrfs_record_root_in_trans(wc.replay_dest); ret = walk_log_tree(trans, log, &wc); BUG_ON(ret); @@ -2961,6 +2849,7 @@ again: } key.offset = found_key.offset - 1; + wc.replay_dest->log_root = NULL; free_extent_buffer(log->node); kfree(log); -- cgit v1.2.1