diff options
author | Mark Brown <broonie@kernel.org> | 2016-11-04 12:16:38 -0600 |
---|---|---|
committer | Mark Brown <broonie@kernel.org> | 2016-11-04 12:16:38 -0600 |
commit | cc9b94029e9ef51787af908e9856b1eed314bc00 (patch) | |
tree | 9675310b89d0f6fb1f7bd9423f0638c4ee5226fd /fs/btrfs | |
parent | 13bed58ce8748d430a26e353a09b89f9d613a71f (diff) | |
parent | 1b5b42216469b05ef4b5916cb40b127dfab1da88 (diff) | |
download | talos-op-linux-cc9b94029e9ef51787af908e9856b1eed314bc00.tar.gz talos-op-linux-cc9b94029e9ef51787af908e9856b1eed314bc00.zip |
Merge branch 'topic/error' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator into regulator-fixed
Diffstat (limited to 'fs/btrfs')
65 files changed, 5016 insertions, 3114 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 67a607709d4f..247b8dfaf6e5 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -55,8 +55,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type) } if (size > 0) { acl = posix_acl_from_xattr(&init_user_ns, value, size); - } else if (size == -ENOENT || size == -ENODATA || size == 0) { - /* FIXME, who returns -ENOENT? I think nobody */ + } else if (size == -ERANGE || size == -ENODATA || size == 0) { acl = NULL; } else { acl = ERR_PTR(-EIO); @@ -80,11 +79,9 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans, case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; if (acl) { - ret = posix_acl_equiv_mode(acl, &inode->i_mode); - if (ret < 0) + ret = posix_acl_update_mode(inode, &inode->i_mode, &acl); + if (ret) return ret; - if (ret == 0) - acl = NULL; } ret = 0; break; diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 5fb60ea7eee2..e0f071f6b5a7 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -34,6 +34,10 @@ struct __btrfs_workqueue { struct workqueue_struct *normal_wq; + + /* File system this workqueue services */ + struct btrfs_fs_info *fs_info; + /* List head pointing to ordered work list */ struct list_head ordered_list; @@ -70,6 +74,18 @@ void btrfs_##name(struct work_struct *arg) \ normal_work_helper(work); \ } +struct btrfs_fs_info * +btrfs_workqueue_owner(struct __btrfs_workqueue *wq) +{ + return wq->fs_info; +} + +struct btrfs_fs_info * +btrfs_work_owner(struct btrfs_work *work) +{ + return work->wq->fs_info; +} + BTRFS_WORK_HELPER(worker_helper); BTRFS_WORK_HELPER(delalloc_helper); BTRFS_WORK_HELPER(flush_delalloc_helper); @@ -94,14 +110,15 @@ BTRFS_WORK_HELPER(scrubnc_helper); BTRFS_WORK_HELPER(scrubparity_helper); static struct __btrfs_workqueue * -__btrfs_alloc_workqueue(const char *name, unsigned int flags, int limit_active, - int thresh) +__btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name, + unsigned int flags, int limit_active, int thresh) { struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; + ret->fs_info = fs_info; ret->limit_active = limit_active; atomic_set(&ret->pending, 0); if (thresh == 0) @@ -143,7 +160,8 @@ __btrfs_alloc_workqueue(const char *name, unsigned int flags, int limit_active, static inline void __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); -struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, +struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, + const char *name, unsigned int flags, int limit_active, int thresh) @@ -153,7 +171,8 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, if (!ret) return NULL; - ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, + ret->normal = __btrfs_alloc_workqueue(fs_info, name, + flags & ~WQ_HIGHPRI, limit_active, thresh); if (!ret->normal) { kfree(ret); @@ -161,8 +180,8 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, } if (flags & WQ_HIGHPRI) { - ret->high = __btrfs_alloc_workqueue(name, flags, limit_active, - thresh); + ret->high = __btrfs_alloc_workqueue(fs_info, name, flags, + limit_active, thresh); if (!ret->high) { __btrfs_destroy_workqueue(ret->normal); kfree(ret); diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index ad4d0647d1a6..8e52484cd461 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -21,6 +21,7 @@ #define __BTRFS_ASYNC_THREAD_ #include <linux/workqueue.h> +struct btrfs_fs_info; struct btrfs_workqueue; /* Internal use only */ struct __btrfs_workqueue; @@ -67,7 +68,8 @@ BTRFS_WORK_HELPER_PROTO(scrubnc_helper); BTRFS_WORK_HELPER_PROTO(scrubparity_helper); -struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, +struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, + const char *name, unsigned int flags, int limit_active, int thresh); @@ -80,4 +82,6 @@ void btrfs_queue_work(struct btrfs_workqueue *wq, void btrfs_destroy_workqueue(struct btrfs_workqueue *wq); void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max); void btrfs_set_work_high_priority(struct btrfs_work *work); +struct btrfs_fs_info *btrfs_work_owner(struct btrfs_work *work); +struct btrfs_fs_info *btrfs_workqueue_owner(struct __btrfs_workqueue *wq); #endif diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 8bb3509099e8..85dc7ab8f89e 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -17,6 +17,7 @@ */ #include <linux/vmalloc.h> +#include <linux/rbtree.h> #include "ctree.h" #include "disk-io.h" #include "backref.h" @@ -34,6 +35,265 @@ struct extent_inode_elem { struct extent_inode_elem *next; }; +/* + * ref_root is used as the root of the ref tree that hold a collection + * of unique references. + */ +struct ref_root { + struct rb_root rb_root; + + /* + * The unique_refs represents the number of ref_nodes with a positive + * count stored in the tree. Even if a ref_node (the count is greater + * than one) is added, the unique_refs will only increase by one. + */ + unsigned int unique_refs; +}; + +/* ref_node is used to store a unique reference to the ref tree. */ +struct ref_node { + struct rb_node rb_node; + + /* For NORMAL_REF, otherwise all these fields should be set to 0 */ + u64 root_id; + u64 object_id; + u64 offset; + + /* For SHARED_REF, otherwise parent field should be set to 0 */ + u64 parent; + + /* Ref to the ref_mod of btrfs_delayed_ref_node */ + int ref_mod; +}; + +/* Dynamically allocate and initialize a ref_root */ +static struct ref_root *ref_root_alloc(void) +{ + struct ref_root *ref_tree; + + ref_tree = kmalloc(sizeof(*ref_tree), GFP_NOFS); + if (!ref_tree) + return NULL; + + ref_tree->rb_root = RB_ROOT; + ref_tree->unique_refs = 0; + + return ref_tree; +} + +/* Free all nodes in the ref tree, and reinit ref_root */ +static void ref_root_fini(struct ref_root *ref_tree) +{ + struct ref_node *node; + struct rb_node *next; + + while ((next = rb_first(&ref_tree->rb_root)) != NULL) { + node = rb_entry(next, struct ref_node, rb_node); + rb_erase(next, &ref_tree->rb_root); + kfree(node); + } + + ref_tree->rb_root = RB_ROOT; + ref_tree->unique_refs = 0; +} + +static void ref_root_free(struct ref_root *ref_tree) +{ + if (!ref_tree) + return; + + ref_root_fini(ref_tree); + kfree(ref_tree); +} + +/* + * Compare ref_node with (root_id, object_id, offset, parent) + * + * The function compares two ref_node a and b. It returns an integer less + * than, equal to, or greater than zero , respectively, to be less than, to + * equal, or be greater than b. + */ +static int ref_node_cmp(struct ref_node *a, struct ref_node *b) +{ + if (a->root_id < b->root_id) + return -1; + else if (a->root_id > b->root_id) + return 1; + + if (a->object_id < b->object_id) + return -1; + else if (a->object_id > b->object_id) + return 1; + + if (a->offset < b->offset) + return -1; + else if (a->offset > b->offset) + return 1; + + if (a->parent < b->parent) + return -1; + else if (a->parent > b->parent) + return 1; + + return 0; +} + +/* + * Search ref_node with (root_id, object_id, offset, parent) in the tree + * + * if found, the pointer of the ref_node will be returned; + * if not found, NULL will be returned and pos will point to the rb_node for + * insert, pos_parent will point to pos'parent for insert; +*/ +static struct ref_node *__ref_tree_search(struct ref_root *ref_tree, + struct rb_node ***pos, + struct rb_node **pos_parent, + u64 root_id, u64 object_id, + u64 offset, u64 parent) +{ + struct ref_node *cur = NULL; + struct ref_node entry; + int ret; + + entry.root_id = root_id; + entry.object_id = object_id; + entry.offset = offset; + entry.parent = parent; + + *pos = &ref_tree->rb_root.rb_node; + + while (**pos) { + *pos_parent = **pos; + cur = rb_entry(*pos_parent, struct ref_node, rb_node); + + ret = ref_node_cmp(cur, &entry); + if (ret > 0) + *pos = &(**pos)->rb_left; + else if (ret < 0) + *pos = &(**pos)->rb_right; + else + return cur; + } + + return NULL; +} + +/* + * Insert a ref_node to the ref tree + * @pos used for specifiy the position to insert + * @pos_parent for specifiy pos's parent + * + * success, return 0; + * ref_node already exists, return -EEXIST; +*/ +static int ref_tree_insert(struct ref_root *ref_tree, struct rb_node **pos, + struct rb_node *pos_parent, struct ref_node *ins) +{ + struct rb_node **p = NULL; + struct rb_node *parent = NULL; + struct ref_node *cur = NULL; + + if (!pos) { + cur = __ref_tree_search(ref_tree, &p, &parent, ins->root_id, + ins->object_id, ins->offset, + ins->parent); + if (cur) + return -EEXIST; + } else { + p = pos; + parent = pos_parent; + } + + rb_link_node(&ins->rb_node, parent, p); + rb_insert_color(&ins->rb_node, &ref_tree->rb_root); + + return 0; +} + +/* Erase and free ref_node, caller should update ref_root->unique_refs */ +static void ref_tree_remove(struct ref_root *ref_tree, struct ref_node *node) +{ + rb_erase(&node->rb_node, &ref_tree->rb_root); + kfree(node); +} + +/* + * Update ref_root->unique_refs + * + * Call __ref_tree_search + * 1. if ref_node doesn't exist, ref_tree_insert this node, and update + * ref_root->unique_refs: + * if ref_node->ref_mod > 0, ref_root->unique_refs++; + * if ref_node->ref_mod < 0, do noting; + * + * 2. if ref_node is found, then get origin ref_node->ref_mod, and update + * ref_node->ref_mod. + * if ref_node->ref_mod is equal to 0,then call ref_tree_remove + * + * according to origin_mod and new_mod, update ref_root->items + * +----------------+--------------+-------------+ + * | |new_count <= 0|new_count > 0| + * +----------------+--------------+-------------+ + * |origin_count < 0| 0 | 1 | + * +----------------+--------------+-------------+ + * |origin_count > 0| -1 | 0 | + * +----------------+--------------+-------------+ + * + * In case of allocation failure, -ENOMEM is returned and the ref_tree stays + * unaltered. + * Success, return 0 + */ +static int ref_tree_add(struct ref_root *ref_tree, u64 root_id, u64 object_id, + u64 offset, u64 parent, int count) +{ + struct ref_node *node = NULL; + struct rb_node **pos = NULL; + struct rb_node *pos_parent = NULL; + int origin_count; + int ret; + + if (!count) + return 0; + + node = __ref_tree_search(ref_tree, &pos, &pos_parent, root_id, + object_id, offset, parent); + if (node == NULL) { + node = kmalloc(sizeof(*node), GFP_NOFS); + if (!node) + return -ENOMEM; + + node->root_id = root_id; + node->object_id = object_id; + node->offset = offset; + node->parent = parent; + node->ref_mod = count; + + ret = ref_tree_insert(ref_tree, pos, pos_parent, node); + ASSERT(!ret); + if (ret) { + kfree(node); + return ret; + } + + ref_tree->unique_refs += node->ref_mod > 0 ? 1 : 0; + + return 0; + } + + origin_count = node->ref_mod; + node->ref_mod += count; + + if (node->ref_mod > 0) + ref_tree->unique_refs += origin_count > 0 ? 0 : 1; + else if (node->ref_mod <= 0) + ref_tree->unique_refs += origin_count > 0 ? -1 : 0; + + if (!node->ref_mod) + ref_tree_remove(ref_tree, node); + + return 0; +} + static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, struct btrfs_file_extent_item *fi, u64 extent_item_pos, @@ -139,7 +399,7 @@ int __init btrfs_prelim_ref_init(void) btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref", sizeof(struct __prelim_ref), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + SLAB_MEM_SPREAD, NULL); if (!btrfs_prelim_ref_cache) return -ENOMEM; @@ -361,7 +621,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, goto out; } - if (btrfs_test_is_dummy_root(root)) { + if (btrfs_is_testing(fs_info)) { srcu_read_unlock(&fs_info->subvol_srcu, index); ret = -ENOENT; goto out; @@ -390,8 +650,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, /* root node has been locked, we can release @subvol_srcu safely here */ srcu_read_unlock(&fs_info->subvol_srcu, index); - pr_debug("search slot in root %llu (level %d, ref count %d) returned " - "%d for key (%llu %u %llu)\n", + btrfs_debug(fs_info, + "search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)", ref->root_id, level, ref->count, ret, ref->key_for_search.objectid, ref->key_for_search.type, ref->key_for_search.offset); @@ -589,6 +849,7 @@ static void __merge_refs(struct list_head *head, int mode) list_del(&ref2->list); kmem_cache_free(btrfs_prelim_ref_cache, ref2); + cond_resched(); } } @@ -699,6 +960,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, static int __add_inline_refs(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, int *info_level, struct list_head *prefs, + struct ref_root *ref_tree, u64 *total_refs, u64 inum) { int ret = 0; @@ -766,6 +1028,13 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, count = btrfs_shared_data_ref_count(leaf, sdref); ret = __add_prelim_ref(prefs, 0, NULL, 0, offset, bytenr, count, GFP_NOFS); + if (ref_tree) { + if (!ret) + ret = ref_tree_add(ref_tree, 0, 0, 0, + bytenr, count); + if (!ret && ref_tree->unique_refs > 1) + ret = BACKREF_FOUND_SHARED; + } break; } case BTRFS_TREE_BLOCK_REF_KEY: @@ -793,6 +1062,15 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, root = btrfs_extent_data_ref_root(leaf, dref); ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr, count, GFP_NOFS); + if (ref_tree) { + if (!ret) + ret = ref_tree_add(ref_tree, root, + key.objectid, + key.offset, 0, + count); + if (!ret && ref_tree->unique_refs > 1) + ret = BACKREF_FOUND_SHARED; + } break; } default: @@ -811,7 +1089,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, */ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, - int info_level, struct list_head *prefs, u64 inum) + int info_level, struct list_head *prefs, + struct ref_root *ref_tree, u64 inum) { struct btrfs_root *extent_root = fs_info->extent_root; int ret; @@ -854,6 +1133,13 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, count = btrfs_shared_data_ref_count(leaf, sdref); ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset, bytenr, count, GFP_NOFS); + if (ref_tree) { + if (!ret) + ret = ref_tree_add(ref_tree, 0, 0, 0, + bytenr, count); + if (!ret && ref_tree->unique_refs > 1) + ret = BACKREF_FOUND_SHARED; + } break; } case BTRFS_TREE_BLOCK_REF_KEY: @@ -882,6 +1168,15 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, root = btrfs_extent_data_ref_root(leaf, dref); ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr, count, GFP_NOFS); + if (ref_tree) { + if (!ret) + ret = ref_tree_add(ref_tree, root, + key.objectid, + key.offset, 0, + count); + if (!ret && ref_tree->unique_refs > 1) + ret = BACKREF_FOUND_SHARED; + } break; } default: @@ -908,13 +1203,16 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, * commit root. * The special case is for qgroup to search roots in commit_transaction(). * + * If check_shared is set to 1, any extent has more than one ref item, will + * be returned BACKREF_FOUND_SHARED immediately. + * * FIXME some caching might speed things up */ static int find_parent_nodes(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 time_seq, struct ulist *refs, struct ulist *roots, const u64 *extent_item_pos, - u64 root_objectid, u64 inum) + u64 root_objectid, u64 inum, int check_shared) { struct btrfs_key key; struct btrfs_path *path; @@ -926,6 +1224,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, struct list_head prefs; struct __prelim_ref *ref; struct extent_inode_elem *eie = NULL; + struct ref_root *ref_tree = NULL; u64 total_refs = 0; INIT_LIST_HEAD(&prefs); @@ -957,6 +1256,18 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, again: head = NULL; + if (check_shared) { + if (!ref_tree) { + ref_tree = ref_root_alloc(); + if (!ref_tree) { + ret = -ENOMEM; + goto out; + } + } else { + ref_root_fini(ref_tree); + } + } + ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto out; @@ -1001,6 +1312,36 @@ again: } else { spin_unlock(&delayed_refs->lock); } + + if (check_shared && !list_empty(&prefs_delayed)) { + /* + * Add all delay_ref to the ref_tree and check if there + * are multiple ref items added. + */ + list_for_each_entry(ref, &prefs_delayed, list) { + if (ref->key_for_search.type) { + ret = ref_tree_add(ref_tree, + ref->root_id, + ref->key_for_search.objectid, + ref->key_for_search.offset, + 0, ref->count); + if (ret) + goto out; + } else { + ret = ref_tree_add(ref_tree, 0, 0, 0, + ref->parent, ref->count); + if (ret) + goto out; + } + + } + + if (ref_tree->unique_refs > 1) { + ret = BACKREF_FOUND_SHARED; + goto out; + } + + } } if (path->slots[0]) { @@ -1016,11 +1357,13 @@ again: key.type == BTRFS_METADATA_ITEM_KEY)) { ret = __add_inline_refs(fs_info, path, bytenr, &info_level, &prefs, - &total_refs, inum); + ref_tree, &total_refs, + inum); if (ret) goto out; ret = __add_keyed_refs(fs_info, path, bytenr, - info_level, &prefs, inum); + info_level, &prefs, + ref_tree, inum); if (ret) goto out; } @@ -1105,6 +1448,7 @@ again: out: btrfs_free_path(path); + ref_root_free(ref_tree); while (!list_empty(&prefs)) { ref = list_first_entry(&prefs, struct __prelim_ref, list); list_del(&ref->list); @@ -1158,8 +1502,8 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, if (!*leafs) return -ENOMEM; - ret = find_parent_nodes(trans, fs_info, bytenr, - time_seq, *leafs, NULL, extent_item_pos, 0, 0); + ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, + *leafs, NULL, extent_item_pos, 0, 0, 0); if (ret < 0 && ret != -ENOENT) { free_leaf_list(*leafs); return ret; @@ -1201,8 +1545,8 @@ static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans, ULIST_ITER_INIT(&uiter); while (1) { - ret = find_parent_nodes(trans, fs_info, bytenr, - time_seq, tmp, *roots, NULL, 0, 0); + ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, + tmp, *roots, NULL, 0, 0, 0); if (ret < 0 && ret != -ENOENT) { ulist_free(tmp); ulist_free(*roots); @@ -1272,7 +1616,7 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans, ULIST_ITER_INIT(&uiter); while (1) { ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp, - roots, NULL, root_objectid, inum); + roots, NULL, root_objectid, inum, 1); if (ret == BACKREF_FOUND_SHARED) { /* this is the only condition under which we return 1 */ ret = 1; @@ -1491,7 +1835,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, if (found_key->objectid > logical || found_key->objectid + size <= logical) { - pr_debug("logical %llu is not within any extent\n", logical); + btrfs_debug(fs_info, + "logical %llu is not within any extent", logical); return -ENOENT; } @@ -1502,8 +1847,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); flags = btrfs_extent_flags(eb, ei); - pr_debug("logical %llu is at position %llu within the extent (%llu " - "EXTENT_ITEM %llu) flags %#llx size %u\n", + btrfs_debug(fs_info, + "logical %llu is at position %llu within the extent (%llu EXTENT_ITEM %llu) flags %#llx size %u", logical, logical - found_key->objectid, found_key->objectid, found_key->offset, flags, item_size); @@ -1624,21 +1969,24 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, return 0; } -static int iterate_leaf_refs(struct extent_inode_elem *inode_list, - u64 root, u64 extent_item_objectid, - iterate_extent_inodes_t *iterate, void *ctx) +static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, + struct extent_inode_elem *inode_list, + u64 root, u64 extent_item_objectid, + iterate_extent_inodes_t *iterate, void *ctx) { struct extent_inode_elem *eie; int ret = 0; for (eie = inode_list; eie; eie = eie->next) { - pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " - "root %llu\n", extent_item_objectid, - eie->inum, eie->offset, root); + btrfs_debug(fs_info, + "ref for %llu resolved, key (%llu EXTEND_DATA %llu), root %llu", + extent_item_objectid, eie->inum, + eie->offset, root); ret = iterate(eie->inum, eie->offset, root, ctx); if (ret) { - pr_debug("stopping iteration for %llu due to ret=%d\n", - extent_item_objectid, ret); + btrfs_debug(fs_info, + "stopping iteration for %llu due to ret=%d", + extent_item_objectid, ret); break; } } @@ -1666,7 +2014,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, struct ulist_iterator ref_uiter; struct ulist_iterator root_uiter; - pr_debug("resolving all inodes for extent %llu\n", + btrfs_debug(fs_info, "resolving all inodes for extent %llu", extent_item_objectid); if (!search_commit_root) { @@ -1692,10 +2040,12 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, break; ULIST_ITER_INIT(&root_uiter); while (!ret && (root_node = ulist_next(roots, &root_uiter))) { - pr_debug("root %llu references leaf %llu, data list " - "%#llx\n", root_node->val, ref_node->val, - ref_node->aux); - ret = iterate_leaf_refs((struct extent_inode_elem *) + btrfs_debug(fs_info, + "root %llu references leaf %llu, data list %#llx", + root_node->val, ref_node->val, + ref_node->aux); + ret = iterate_leaf_refs(fs_info, + (struct extent_inode_elem *) (uintptr_t)ref_node->aux, root_node->val, extent_item_objectid, @@ -1791,9 +2141,9 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) { name_len = btrfs_inode_ref_name_len(eb, iref); /* path must be released before calling iterate()! */ - pr_debug("following ref at offset %u for inode %llu in " - "tree %llu\n", cur, found_key.objectid, - fs_root->objectid); + btrfs_debug(fs_root->fs_info, + "following ref at offset %u for inode %llu in tree %llu", + cur, found_key.objectid, fs_root->objectid); ret = iterate(parent, name_len, (unsigned long)(iref + 1), eb, ctx); if (ret) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 4919aedb5fc1..1a8fa46ff87e 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -44,17 +44,6 @@ #define BTRFS_INODE_IN_DELALLOC_LIST 9 #define BTRFS_INODE_READDIO_NEED_LOCK 10 #define BTRFS_INODE_HAS_PROPS 11 -/* - * The following 3 bits are meant only for the btree inode. - * When any of them is set, it means an error happened while writing an - * extent buffer belonging to: - * 1) a non-log btree - * 2) a log btree and first log sub-transaction - * 3) a log btree and second log sub-transaction - */ -#define BTRFS_INODE_BTREE_ERR 12 -#define BTRFS_INODE_BTREE_LOG1_ERR 13 -#define BTRFS_INODE_BTREE_LOG2_ERR 14 /* in memory btrfs inode */ struct btrfs_inode { diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index b677a6ea6001..8e99251650b3 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -656,7 +656,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, BUG_ON(NULL == state); selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS); if (NULL == selected_super) { - printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); + pr_info("btrfsic: error, kmalloc failed!\n"); return -ENOMEM; } @@ -681,7 +681,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, } if (NULL == state->latest_superblock) { - printk(KERN_INFO "btrfsic: no superblock found!\n"); + pr_info("btrfsic: no superblock found!\n"); kfree(selected_super); return -1; } @@ -698,13 +698,13 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, next_bytenr = btrfs_super_root(selected_super); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "root@%llu\n", next_bytenr); + pr_info("root@%llu\n", next_bytenr); break; case 1: next_bytenr = btrfs_super_chunk_root(selected_super); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "chunk@%llu\n", next_bytenr); + pr_info("chunk@%llu\n", next_bytenr); break; case 2: next_bytenr = btrfs_super_log_root(selected_super); @@ -712,7 +712,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, continue; if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "log@%llu\n", next_bytenr); + pr_info("log@%llu\n", next_bytenr); break; } @@ -720,7 +720,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, btrfs_num_copies(state->root->fs_info, next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) - printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", + pr_info("num_copies(log_bytenr=%llu) = %d\n", next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { @@ -733,9 +733,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, &tmp_next_block_ctx, mirror_num); if (ret) { - printk(KERN_INFO "btrfsic:" - " btrfsic_map_block(root @%llu," - " mirror %d) failed!\n", + pr_info("btrfsic: btrfsic_map_block(root @%llu, mirror %d) failed!\n", next_bytenr, mirror_num); kfree(selected_super); return -1; @@ -758,8 +756,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, ret = btrfsic_read_block(state, &tmp_next_block_ctx); if (ret < (int)PAGE_SIZE) { - printk(KERN_INFO - "btrfsic: read @logical %llu failed!\n", + pr_info("btrfsic: read @logical %llu failed!\n", tmp_next_block_ctx.start); btrfsic_release_block_ctx(&tmp_next_block_ctx); kfree(selected_super); @@ -820,7 +817,7 @@ static int btrfsic_process_superblock_dev_mirror( if (NULL == superblock_tmp) { superblock_tmp = btrfsic_block_alloc(); if (NULL == superblock_tmp) { - printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); + pr_info("btrfsic: error, kmalloc failed!\n"); brelse(bh); return -1; } @@ -894,7 +891,7 @@ static int btrfsic_process_superblock_dev_mirror( btrfs_num_copies(state->root->fs_info, next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) - printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", + pr_info("num_copies(log_bytenr=%llu) = %d\n", next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { struct btrfsic_block *next_block; @@ -905,8 +902,7 @@ static int btrfsic_process_superblock_dev_mirror( state->metablock_size, &tmp_next_block_ctx, mirror_num)) { - printk(KERN_INFO "btrfsic: btrfsic_map_block(" - "bytenr @%llu, mirror %d) failed!\n", + pr_info("btrfsic: btrfsic_map_block(bytenr @%llu, mirror %d) failed!\n", next_bytenr, mirror_num); brelse(bh); return -1; @@ -948,7 +944,7 @@ static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void) sf = kzalloc(sizeof(*sf), GFP_NOFS); if (NULL == sf) - printk(KERN_INFO "btrfsic: alloc memory failed!\n"); + pr_info("btrfsic: alloc memory failed!\n"); else sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER; return sf; @@ -994,9 +990,7 @@ continue_with_new_stack_frame: sf->nr = btrfs_stack_header_nritems(&leafhdr->header); if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO - "leaf %llu items %d generation %llu" - " owner %llu\n", + pr_info("leaf %llu items %d generation %llu owner %llu\n", sf->block_ctx->start, sf->nr, btrfs_stack_header_generation( &leafhdr->header), @@ -1023,8 +1017,7 @@ continue_with_current_leaf_stack_frame: if (disk_item_offset + sizeof(struct btrfs_item) > sf->block_ctx->len) { leaf_item_out_of_bounce_error: - printk(KERN_INFO - "btrfsic: leaf item out of bounce at logical %llu, dev %s\n", + pr_info("btrfsic: leaf item out of bounce at logical %llu, dev %s\n", sf->block_ctx->start, sf->block_ctx->dev->name); goto one_stack_frame_backwards; @@ -1120,8 +1113,7 @@ leaf_item_out_of_bounce_error: sf->nr = btrfs_stack_header_nritems(&nodehdr->header); if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO "node %llu level %d items %d" - " generation %llu owner %llu\n", + pr_info("node %llu level %d items %d generation %llu owner %llu\n", sf->block_ctx->start, nodehdr->header.level, sf->nr, btrfs_stack_header_generation( @@ -1145,8 +1137,7 @@ continue_with_current_node_stack_frame: (uintptr_t)nodehdr; if (key_ptr_offset + sizeof(struct btrfs_key_ptr) > sf->block_ctx->len) { - printk(KERN_INFO - "btrfsic: node item out of bounce at logical %llu, dev %s\n", + pr_info("btrfsic: node item out of bounce at logical %llu, dev %s\n", sf->block_ctx->start, sf->block_ctx->dev->name); goto one_stack_frame_backwards; @@ -1275,7 +1266,7 @@ static int btrfsic_create_link_to_next_block( btrfs_num_copies(state->root->fs_info, next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) - printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", + pr_info("num_copies(log_bytenr=%llu) = %d\n", next_bytenr, *num_copiesp); *mirror_nump = 1; } @@ -1284,15 +1275,13 @@ static int btrfsic_create_link_to_next_block( return 0; if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO - "btrfsic_create_link_to_next_block(mirror_num=%d)\n", + pr_info("btrfsic_create_link_to_next_block(mirror_num=%d)\n", *mirror_nump); ret = btrfsic_map_block(state, next_bytenr, state->metablock_size, next_block_ctx, *mirror_nump); if (ret) { - printk(KERN_INFO - "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", + pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", next_bytenr, *mirror_nump); btrfsic_release_block_ctx(next_block_ctx); *next_blockp = NULL; @@ -1318,16 +1307,14 @@ static int btrfsic_create_link_to_next_block( if (next_block->logical_bytenr != next_bytenr && !(!next_block->is_metadata && 0 == next_block->logical_bytenr)) - printk(KERN_INFO - "Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n", + pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n", next_bytenr, next_block_ctx->dev->name, next_block_ctx->dev_bytenr, *mirror_nump, btrfsic_get_block_type(state, next_block), next_block->logical_bytenr); else - printk(KERN_INFO - "Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n", + pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n", next_bytenr, next_block_ctx->dev->name, next_block_ctx->dev_bytenr, *mirror_nump, btrfsic_get_block_type(state, @@ -1348,7 +1335,7 @@ static int btrfsic_create_link_to_next_block( if (NULL == l) { l = btrfsic_block_link_alloc(); if (NULL == l) { - printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); + pr_info("btrfsic: error, kmalloc failed!\n"); btrfsic_release_block_ctx(next_block_ctx); *next_blockp = NULL; return -1; @@ -1381,8 +1368,7 @@ static int btrfsic_create_link_to_next_block( if (limit_nesting > 0 && did_alloc_block_link) { ret = btrfsic_read_block(state, next_block_ctx); if (ret < (int)next_block_ctx->len) { - printk(KERN_INFO - "btrfsic: read block @logical %llu failed!\n", + pr_info("btrfsic: read block @logical %llu failed!\n", next_bytenr); btrfsic_release_block_ctx(next_block_ctx); *next_blockp = NULL; @@ -1417,8 +1403,7 @@ static int btrfsic_handle_extent_data( if (file_extent_item_offset + offsetof(struct btrfs_file_extent_item, disk_num_bytes) > block_ctx->len) { - printk(KERN_INFO - "btrfsic: file item out of bounce at logical %llu, dev %s\n", + pr_info("btrfsic: file item out of bounce at logical %llu, dev %s\n", block_ctx->start, block_ctx->dev->name); return -1; } @@ -1429,7 +1414,7 @@ static int btrfsic_handle_extent_data( if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) { if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) - printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", + pr_info("extent_data: type %u, disk_bytenr = %llu\n", file_extent_item.type, btrfs_stack_file_extent_disk_bytenr( &file_extent_item)); @@ -1438,8 +1423,7 @@ static int btrfsic_handle_extent_data( if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) > block_ctx->len) { - printk(KERN_INFO - "btrfsic: file item out of bounce at logical %llu, dev %s\n", + pr_info("btrfsic: file item out of bounce at logical %llu, dev %s\n", block_ctx->start, block_ctx->dev->name); return -1; } @@ -1457,8 +1441,7 @@ static int btrfsic_handle_extent_data( generation = btrfs_stack_file_extent_generation(&file_extent_item); if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) - printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," - " offset = %llu, num_bytes = %llu\n", + pr_info("extent_data: type %u, disk_bytenr = %llu, offset = %llu, num_bytes = %llu\n", file_extent_item.type, btrfs_stack_file_extent_disk_bytenr(&file_extent_item), btrfs_stack_file_extent_offset(&file_extent_item), @@ -1477,7 +1460,7 @@ static int btrfsic_handle_extent_data( btrfs_num_copies(state->root->fs_info, next_bytenr, state->datablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) - printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", + pr_info("num_copies(log_bytenr=%llu) = %d\n", next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { struct btrfsic_block_data_ctx next_block_ctx; @@ -1485,19 +1468,16 @@ static int btrfsic_handle_extent_data( int block_was_created; if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO "btrfsic_handle_extent_data(" - "mirror_num=%d)\n", mirror_num); + pr_info("btrfsic_handle_extent_data(mirror_num=%d)\n", + mirror_num); if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) - printk(KERN_INFO - "\tdisk_bytenr = %llu, num_bytes %u\n", + pr_info("\tdisk_bytenr = %llu, num_bytes %u\n", next_bytenr, chunk_len); ret = btrfsic_map_block(state, next_bytenr, chunk_len, &next_block_ctx, mirror_num); if (ret) { - printk(KERN_INFO - "btrfsic: btrfsic_map_block(@%llu," - " mirror=%d) failed!\n", + pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", next_bytenr, mirror_num); return -1; } @@ -1512,8 +1492,7 @@ static int btrfsic_handle_extent_data( mirror_num, &block_was_created); if (NULL == next_block) { - printk(KERN_INFO - "btrfsic: error, kmalloc failed!\n"); + pr_info("btrfsic: error, kmalloc failed!\n"); btrfsic_release_block_ctx(&next_block_ctx); return -1; } @@ -1523,12 +1502,7 @@ static int btrfsic_handle_extent_data( next_block->logical_bytenr != next_bytenr && !(!next_block->is_metadata && 0 == next_block->logical_bytenr)) { - printk(KERN_INFO - "Referenced block" - " @%llu (%s/%llu/%d)" - " found in hash table, D," - " bytenr mismatch" - " (!= stored %llu).\n", + pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu).\n", next_bytenr, next_block_ctx.dev->name, next_block_ctx.dev_bytenr, @@ -1592,7 +1566,7 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, kfree(multi); if (NULL == block_ctx_out->dev) { ret = -ENXIO; - printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n"); + pr_info("btrfsic: error, cannot lookup dev (#1)!\n"); } return ret; @@ -1638,8 +1612,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, BUG_ON(block_ctx->pagev); BUG_ON(block_ctx->mem_to_free); if (block_ctx->dev_bytenr & ((u64)PAGE_SIZE - 1)) { - printk(KERN_INFO - "btrfsic: read_block() with unaligned bytenr %llu\n", + pr_info("btrfsic: read_block() with unaligned bytenr %llu\n", block_ctx->dev_bytenr); return -1; } @@ -1666,13 +1639,13 @@ static int btrfsic_read_block(struct btrfsic_state *state, bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i); if (!bio) { - printk(KERN_INFO - "btrfsic: bio_alloc() for %u pages failed!\n", + pr_info("btrfsic: bio_alloc() for %u pages failed!\n", num_pages - i); return -1; } bio->bi_bdev = block_ctx->dev->bdev; bio->bi_iter.bi_sector = dev_bytenr >> 9; + bio_set_op_attrs(bio, REQ_OP_READ, 0); for (j = i; j < num_pages; j++) { ret = bio_add_page(bio, block_ctx->pagev[j], @@ -1681,13 +1654,11 @@ static int btrfsic_read_block(struct btrfsic_state *state, break; } if (j == i) { - printk(KERN_INFO - "btrfsic: error, failed to add a single page!\n"); + pr_info("btrfsic: error, failed to add a single page!\n"); return -1; } - if (submit_bio_wait(READ, bio)) { - printk(KERN_INFO - "btrfsic: read error at logical %llu dev %s!\n", + if (submit_bio_wait(bio)) { + pr_info("btrfsic: read error at logical %llu dev %s!\n", block_ctx->start, block_ctx->dev->name); bio_put(bio); return -1; @@ -1699,7 +1670,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, for (i = 0; i < num_pages; i++) { block_ctx->datav[i] = kmap(block_ctx->pagev[i]); if (!block_ctx->datav[i]) { - printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n", + pr_info("btrfsic: kmap() failed (dev %s)!\n", block_ctx->dev->name); return -1; } @@ -1714,19 +1685,17 @@ static void btrfsic_dump_database(struct btrfsic_state *state) BUG_ON(NULL == state); - printk(KERN_INFO "all_blocks_list:\n"); + pr_info("all_blocks_list:\n"); list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) { const struct btrfsic_block_link *l; - printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n", + pr_info("%c-block @%llu (%s/%llu/%d)\n", btrfsic_get_block_type(state, b_all), b_all->logical_bytenr, b_all->dev_state->name, b_all->dev_bytenr, b_all->mirror_num); list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) { - printk(KERN_INFO " %c @%llu (%s/%llu/%d)" - " refers %u* to" - " %c @%llu (%s/%llu/%d)\n", + pr_info(" %c @%llu (%s/%llu/%d) refers %u* to %c @%llu (%s/%llu/%d)\n", btrfsic_get_block_type(state, b_all), b_all->logical_bytenr, b_all->dev_state->name, b_all->dev_bytenr, b_all->mirror_num, @@ -1739,9 +1708,7 @@ static void btrfsic_dump_database(struct btrfsic_state *state) } list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) { - printk(KERN_INFO " %c @%llu (%s/%llu/%d)" - " is ref %u* from" - " %c @%llu (%s/%llu/%d)\n", + pr_info(" %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n", btrfsic_get_block_type(state, b_all), b_all->logical_bytenr, b_all->dev_state->name, b_all->dev_bytenr, b_all->mirror_num, @@ -1753,7 +1720,7 @@ static void btrfsic_dump_database(struct btrfsic_state *state) l->block_ref_from->mirror_num); } - printk(KERN_INFO "\n"); + pr_info("\n"); } } @@ -1828,8 +1795,7 @@ again: mapped_datav[0]); if (num_pages * PAGE_SIZE < BTRFS_SUPER_INFO_SIZE) { - printk(KERN_INFO - "btrfsic: cannot work with too short bios!\n"); + pr_info("btrfsic: cannot work with too short bios!\n"); return; } is_metadata = 1; @@ -1837,8 +1803,7 @@ again: processed_len = BTRFS_SUPER_INFO_SIZE; if (state->print_mask & BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { - printk(KERN_INFO - "[before new superblock is written]:\n"); + pr_info("[before new superblock is written]:\n"); btrfsic_dump_tree_sub(state, block, 0); } } @@ -1846,8 +1811,7 @@ again: if (!block->is_superblock) { if (num_pages * PAGE_SIZE < state->metablock_size) { - printk(KERN_INFO - "btrfsic: cannot work with too short bios!\n"); + pr_info("btrfsic: cannot work with too short bios!\n"); return; } processed_len = state->metablock_size; @@ -1862,8 +1826,7 @@ again: if (block->logical_bytenr != bytenr && !(!block->is_metadata && block->logical_bytenr == 0)) - printk(KERN_INFO - "Written block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n", + pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n", bytenr, dev_state->name, dev_bytenr, block->mirror_num, @@ -1871,8 +1834,7 @@ again: block), block->logical_bytenr); else - printk(KERN_INFO - "Written block @%llu (%s/%llu/%d) found in hash table, %c.\n", + pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c.\n", bytenr, dev_state->name, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, @@ -1882,33 +1844,24 @@ again: } else { if (num_pages * PAGE_SIZE < state->datablock_size) { - printk(KERN_INFO - "btrfsic: cannot work with too short bios!\n"); + pr_info("btrfsic: cannot work with too short bios!\n"); return; } processed_len = state->datablock_size; bytenr = block->logical_bytenr; if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO - "Written block @%llu (%s/%llu/%d)" - " found in hash table, %c.\n", + pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c.\n", bytenr, dev_state->name, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block)); } if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO - "ref_to_list: %cE, ref_from_list: %cE\n", + pr_info("ref_to_list: %cE, ref_from_list: %cE\n", list_empty(&block->ref_to_list) ? ' ' : '!', list_empty(&block->ref_from_list) ? ' ' : '!'); if (btrfsic_is_block_ref_by_superblock(state, block, 0)) { - printk(KERN_INFO "btrfs: attempt to overwrite %c-block" - " @%llu (%s/%llu/%d), old(gen=%llu," - " objectid=%llu, type=%d, offset=%llu)," - " new(gen=%llu)," - " which is referenced by most recent superblock" - " (superblockgen=%llu)!\n", + pr_info("btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n", btrfsic_get_block_type(state, block), bytenr, dev_state->name, dev_bytenr, block->mirror_num, block->generation, @@ -1922,9 +1875,7 @@ again: } if (!block->is_iodone && !block->never_written) { - printk(KERN_INFO "btrfs: attempt to overwrite %c-block" - " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu," - " which is not yet iodone!\n", + pr_info("btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n", btrfsic_get_block_type(state, block), bytenr, dev_state->name, dev_bytenr, block->mirror_num, block->generation, @@ -2022,8 +1973,7 @@ again: mapped_datav[0]); if (state->print_mask & BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { - printk(KERN_INFO - "[after new superblock is written]:\n"); + pr_info("[after new superblock is written]:\n"); btrfsic_dump_tree_sub(state, block, 0); } } else { @@ -2035,9 +1985,7 @@ again: 0, 0); } if (ret) - printk(KERN_INFO - "btrfsic: btrfsic_process_metablock" - "(root @%llu) failed!\n", + pr_info("btrfsic: btrfsic_process_metablock(root @%llu) failed!\n", dev_bytenr); } else { block->is_metadata = 0; @@ -2064,8 +2012,7 @@ again: if (!is_metadata) { processed_len = state->datablock_size; if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO "Written block (%s/%llu/?)" - " !found in hash table, D.\n", + pr_info("Written block (%s/%llu/?) !found in hash table, D.\n", dev_state->name, dev_bytenr); if (!state->include_extent_data) { /* ignore that written D block */ @@ -2083,9 +2030,7 @@ again: btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, dev_bytenr); if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO - "Written block @%llu (%s/%llu/?)" - " !found in hash table, M.\n", + pr_info("Written block @%llu (%s/%llu/?) !found in hash table, M.\n", bytenr, dev_state->name, dev_bytenr); } @@ -2099,7 +2044,7 @@ again: block = btrfsic_block_alloc(); if (NULL == block) { - printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); + pr_info("btrfsic: error, kmalloc failed!\n"); btrfsic_release_block_ctx(&block_ctx); goto continue_loop; } @@ -2149,8 +2094,7 @@ again: block->next_in_same_bio = NULL; } if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO - "New written %c-block @%llu (%s/%llu/%d)\n", + pr_info("New written %c-block @%llu (%s/%llu/%d)\n", is_metadata ? 'M' : 'D', block->logical_bytenr, block->dev_state->name, block->dev_bytenr, block->mirror_num); @@ -2161,9 +2105,7 @@ again: ret = btrfsic_process_metablock(state, block, &block_ctx, 0, 0); if (ret) - printk(KERN_INFO - "btrfsic: process_metablock(root @%llu)" - " failed!\n", + pr_info("btrfsic: process_metablock(root @%llu) failed!\n", dev_bytenr); } btrfsic_release_block_ctx(&block_ctx); @@ -2198,20 +2140,18 @@ static void btrfsic_bio_end_io(struct bio *bp) if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) - printk(KERN_INFO - "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", + pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", bp->bi_error, btrfsic_get_block_type(dev_state->state, block), block->logical_bytenr, dev_state->name, block->dev_bytenr, block->mirror_num); next_block = block->next_in_same_bio; block->iodone_w_error = iodone_w_error; - if (block->submit_bio_bh_rw & REQ_FLUSH) { + if (block->submit_bio_bh_rw & REQ_PREFLUSH) { dev_state->last_flush_gen++; if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) - printk(KERN_INFO - "bio_end_io() new %s flush_gen=%llu\n", + pr_info("bio_end_io() new %s flush_gen=%llu\n", dev_state->name, dev_state->last_flush_gen); } @@ -2234,20 +2174,18 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) BUG_ON(NULL == block); dev_state = block->dev_state; if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) - printk(KERN_INFO - "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n", + pr_info("bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n", iodone_w_error, btrfsic_get_block_type(dev_state->state, block), block->logical_bytenr, block->dev_state->name, block->dev_bytenr, block->mirror_num); block->iodone_w_error = iodone_w_error; - if (block->submit_bio_bh_rw & REQ_FLUSH) { + if (block->submit_bio_bh_rw & REQ_PREFLUSH) { dev_state->last_flush_gen++; if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) - printk(KERN_INFO - "bh_end_io() new %s flush_gen=%llu\n", + pr_info("bh_end_io() new %s flush_gen=%llu\n", dev_state->name, dev_state->last_flush_gen); } if (block->submit_bio_bh_rw & REQ_FUA) @@ -2270,9 +2208,7 @@ static int btrfsic_process_written_superblock( if (!(superblock->generation > state->max_superblock_generation || 0 == state->max_superblock_generation)) { if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) - printk(KERN_INFO - "btrfsic: superblock @%llu (%s/%llu/%d)" - " with old gen %llu <= %llu\n", + pr_info("btrfsic: superblock @%llu (%s/%llu/%d) with old gen %llu <= %llu\n", superblock->logical_bytenr, superblock->dev_state->name, superblock->dev_bytenr, superblock->mirror_num, @@ -2280,9 +2216,7 @@ static int btrfsic_process_written_superblock( state->max_superblock_generation); } else { if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) - printk(KERN_INFO - "btrfsic: got new superblock @%llu (%s/%llu/%d)" - " with new gen %llu > %llu\n", + pr_info("btrfsic: got new superblock @%llu (%s/%llu/%d) with new gen %llu > %llu\n", superblock->logical_bytenr, superblock->dev_state->name, superblock->dev_bytenr, superblock->mirror_num, @@ -2317,7 +2251,7 @@ static int btrfsic_process_written_superblock( next_bytenr = btrfs_super_root(super_hdr); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "root@%llu\n", next_bytenr); + pr_info("root@%llu\n", next_bytenr); break; case 1: btrfs_set_disk_key_objectid(&tmp_disk_key, @@ -2326,7 +2260,7 @@ static int btrfsic_process_written_superblock( next_bytenr = btrfs_super_chunk_root(super_hdr); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "chunk@%llu\n", next_bytenr); + pr_info("chunk@%llu\n", next_bytenr); break; case 2: btrfs_set_disk_key_objectid(&tmp_disk_key, @@ -2337,7 +2271,7 @@ static int btrfsic_process_written_superblock( continue; if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "log@%llu\n", next_bytenr); + pr_info("log@%llu\n", next_bytenr); break; } @@ -2345,23 +2279,19 @@ static int btrfsic_process_written_superblock( btrfs_num_copies(state->root->fs_info, next_bytenr, BTRFS_SUPER_INFO_SIZE); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) - printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", + pr_info("num_copies(log_bytenr=%llu) = %d\n", next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { int was_created; if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO - "btrfsic_process_written_superblock(" - "mirror_num=%d)\n", mirror_num); + pr_info("btrfsic_process_written_superblock(mirror_num=%d)\n", mirror_num); ret = btrfsic_map_block(state, next_bytenr, BTRFS_SUPER_INFO_SIZE, &tmp_next_block_ctx, mirror_num); if (ret) { - printk(KERN_INFO - "btrfsic: btrfsic_map_block(@%llu," - " mirror=%d) failed!\n", + pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", next_bytenr, mirror_num); return -1; } @@ -2374,8 +2304,7 @@ static int btrfsic_process_written_superblock( mirror_num, &was_created); if (NULL == next_block) { - printk(KERN_INFO - "btrfsic: error, kmalloc failed!\n"); + pr_info("btrfsic: error, kmalloc failed!\n"); btrfsic_release_block_ctx(&tmp_next_block_ctx); return -1; } @@ -2424,8 +2353,7 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, * by the most recent super block. */ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO - "btrfsic: abort cyclic linkage (case 1).\n"); + pr_info("btrfsic: abort cyclic linkage (case 1).\n"); return ret; } @@ -2436,9 +2364,7 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, */ list_for_each_entry(l, &block->ref_to_list, node_ref_to) { if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO - "rl=%d, %c @%llu (%s/%llu/%d)" - " %u* refers to %c @%llu (%s/%llu/%d)\n", + pr_info("rl=%d, %c @%llu (%s/%llu/%d) %u* refers to %c @%llu (%s/%llu/%d)\n", recursion_level, btrfsic_get_block_type(state, block), block->logical_bytenr, block->dev_state->name, @@ -2450,9 +2376,7 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); if (l->block_ref_to->never_written) { - printk(KERN_INFO "btrfs: attempt to write superblock" - " which references block %c @%llu (%s/%llu/%d)" - " which is never written!\n", + pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is never written!\n", btrfsic_get_block_type(state, l->block_ref_to), l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, @@ -2460,9 +2384,7 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, l->block_ref_to->mirror_num); ret = -1; } else if (!l->block_ref_to->is_iodone) { - printk(KERN_INFO "btrfs: attempt to write superblock" - " which references block %c @%llu (%s/%llu/%d)" - " which is not yet iodone!\n", + pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not yet iodone!\n", btrfsic_get_block_type(state, l->block_ref_to), l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, @@ -2470,9 +2392,7 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, l->block_ref_to->mirror_num); ret = -1; } else if (l->block_ref_to->iodone_w_error) { - printk(KERN_INFO "btrfs: attempt to write superblock" - " which references block %c @%llu (%s/%llu/%d)" - " which has write error!\n", + pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which has write error!\n", btrfsic_get_block_type(state, l->block_ref_to), l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, @@ -2485,10 +2405,7 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, l->parent_generation && BTRFSIC_GENERATION_UNKNOWN != l->block_ref_to->generation) { - printk(KERN_INFO "btrfs: attempt to write superblock" - " which references block %c @%llu (%s/%llu/%d)" - " with generation %llu !=" - " parent generation %llu!\n", + pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) with generation %llu != parent generation %llu!\n", btrfsic_get_block_type(state, l->block_ref_to), l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, @@ -2499,11 +2416,7 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, ret = -1; } else if (l->block_ref_to->flush_gen > l->block_ref_to->dev_state->last_flush_gen) { - printk(KERN_INFO "btrfs: attempt to write superblock" - " which references block %c @%llu (%s/%llu/%d)" - " which is not flushed out of disk's write cache" - " (block flush_gen=%llu," - " dev->flush_gen=%llu)!\n", + pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n", btrfsic_get_block_type(state, l->block_ref_to), l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, @@ -2532,8 +2445,7 @@ static int btrfsic_is_block_ref_by_superblock( if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { /* refer to comment at "abort cyclic linkage (case 1)" */ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO - "btrfsic: abort cyclic linkage (case 2).\n"); + pr_info("btrfsic: abort cyclic linkage (case 2).\n"); return 0; } @@ -2544,9 +2456,7 @@ static int btrfsic_is_block_ref_by_superblock( */ list_for_each_entry(l, &block->ref_from_list, node_ref_from) { if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO - "rl=%d, %c @%llu (%s/%llu/%d)" - " is ref %u* from %c @%llu (%s/%llu/%d)\n", + pr_info("rl=%d, %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n", recursion_level, btrfsic_get_block_type(state, block), block->logical_bytenr, block->dev_state->name, @@ -2576,9 +2486,7 @@ static int btrfsic_is_block_ref_by_superblock( static void btrfsic_print_add_link(const struct btrfsic_state *state, const struct btrfsic_block_link *l) { - printk(KERN_INFO - "Add %u* link from %c @%llu (%s/%llu/%d)" - " to %c @%llu (%s/%llu/%d).\n", + pr_info("Add %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n", l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), l->block_ref_from->logical_bytenr, @@ -2593,9 +2501,7 @@ static void btrfsic_print_add_link(const struct btrfsic_state *state, static void btrfsic_print_rem_link(const struct btrfsic_state *state, const struct btrfsic_block_link *l) { - printk(KERN_INFO - "Rem %u* link from %c @%llu (%s/%llu/%d)" - " to %c @%llu (%s/%llu/%d).\n", + pr_info("Rem %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n", l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), l->block_ref_from->logical_bytenr, @@ -2645,7 +2551,7 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, * This algorithm is recursive because the amount of used stack space * is very small and the max recursion depth is limited. */ - indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", + indent_add = sprintf(buf, "%c-%llu(%s/%llu/%u)", btrfsic_get_block_type(state, block), block->logical_bytenr, block->dev_state->name, block->dev_bytenr, block->mirror_num); @@ -2707,8 +2613,7 @@ static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( if (NULL == l) { l = btrfsic_block_link_alloc(); if (NULL == l) { - printk(KERN_INFO - "btrfsic: error, kmalloc" " failed!\n"); + pr_info("btrfsic: error, kmalloc failed!\n"); return NULL; } @@ -2755,13 +2660,12 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add( block = btrfsic_block_alloc(); if (NULL == block) { - printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); + pr_info("btrfsic: error, kmalloc failed!\n"); return NULL; } dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev); if (NULL == dev_state) { - printk(KERN_INFO - "btrfsic: error, lookup dev_state failed!\n"); + pr_info("btrfsic: error, lookup dev_state failed!\n"); btrfsic_block_free(block); return NULL; } @@ -2773,8 +2677,7 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add( block->never_written = never_written; block->mirror_num = mirror_num; if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO - "New %s%c-block @%llu (%s/%llu/%d)\n", + pr_info("New %s%c-block @%llu (%s/%llu/%d)\n", additional_string, btrfsic_get_block_type(state, block), block->logical_bytenr, dev_state->name, @@ -2809,9 +2712,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, ret = btrfsic_map_block(state, bytenr, state->metablock_size, &block_ctx, mirror_num); if (ret) { - printk(KERN_INFO "btrfsic:" - " btrfsic_map_block(logical @%llu," - " mirror %d) failed!\n", + pr_info("btrfsic: btrfsic_map_block(logical @%llu, mirror %d) failed!\n", bytenr, mirror_num); continue; } @@ -2826,9 +2727,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, } if (WARN_ON(!match)) { - printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," - " buffer->log_bytenr=%llu, submit_bio(bdev=%s," - " phys_bytenr=%llu)!\n", + pr_info("btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%s, phys_bytenr=%llu)!\n", bytenr, dev_state->name, dev_bytenr); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { ret = btrfsic_map_block(state, bytenr, @@ -2837,8 +2736,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, if (ret) continue; - printk(KERN_INFO "Read logical bytenr @%llu maps to" - " (%s/%llu/%d)\n", + pr_info("Read logical bytenr @%llu maps to (%s/%llu/%d)\n", bytenr, block_ctx.dev->name, block_ctx.dev_bytenr, mirror_num); } @@ -2848,19 +2746,16 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, static struct btrfsic_dev_state *btrfsic_dev_state_lookup( struct block_device *bdev) { - struct btrfsic_dev_state *ds; - - ds = btrfsic_dev_state_hashtable_lookup(bdev, - &btrfsic_dev_state_hashtable); - return ds; + return btrfsic_dev_state_hashtable_lookup(bdev, + &btrfsic_dev_state_hashtable); } -int btrfsic_submit_bh(int rw, struct buffer_head *bh) +int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh) { struct btrfsic_dev_state *dev_state; if (!btrfsic_is_initialized) - return submit_bh(rw, bh); + return submit_bh(op, op_flags, bh); mutex_lock(&btrfsic_mutex); /* since btrfsic_submit_bh() might also be called before @@ -2869,34 +2764,28 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) /* Only called to write the superblock (incl. FLUSH/FUA) */ if (NULL != dev_state && - (rw & WRITE) && bh->b_size > 0) { + (op == REQ_OP_WRITE) && bh->b_size > 0) { u64 dev_bytenr; dev_bytenr = 4096 * bh->b_blocknr; if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) - printk(KERN_INFO - "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu)," - " size=%zu, data=%p, bdev=%p)\n", - rw, (unsigned long long)bh->b_blocknr, + pr_info("submit_bh(op=0x%x,0x%x, blocknr=%llu (bytenr %llu), size=%zu, data=%p, bdev=%p)\n", + op, op_flags, (unsigned long long)bh->b_blocknr, dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev); btrfsic_process_written_block(dev_state, dev_bytenr, &bh->b_data, 1, NULL, - NULL, bh, rw); - } else if (NULL != dev_state && (rw & REQ_FLUSH)) { + NULL, bh, op_flags); + } else if (NULL != dev_state && (op_flags & REQ_PREFLUSH)) { if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) - printk(KERN_INFO - "submit_bh(rw=0x%x FLUSH, bdev=%p)\n", - rw, bh->b_bdev); + pr_info("submit_bh(op=0x%x,0x%x FLUSH, bdev=%p)\n", + op, op_flags, bh->b_bdev); if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { if ((dev_state->state->print_mask & (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | BTRFSIC_PRINT_MASK_VERBOSE))) - printk(KERN_INFO - "btrfsic_submit_bh(%s) with FLUSH" - " but dummy block already in use" - " (ignored)!\n", + pr_info("btrfsic_submit_bh(%s) with FLUSH but dummy block already in use (ignored)!\n", dev_state->name); } else { struct btrfsic_block *const block = @@ -2906,7 +2795,7 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) block->never_written = 0; block->iodone_w_error = 0; block->flush_gen = dev_state->last_flush_gen + 1; - block->submit_bio_bh_rw = rw; + block->submit_bio_bh_rw = op_flags; block->orig_bio_bh_private = bh->b_private; block->orig_bio_bh_end_io.bh = bh->b_end_io; block->next_in_same_bio = NULL; @@ -2915,10 +2804,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) } } mutex_unlock(&btrfsic_mutex); - return submit_bh(rw, bh); + return submit_bh(op, op_flags, bh); } -static void __btrfsic_submit_bio(int rw, struct bio *bio) +static void __btrfsic_submit_bio(struct bio *bio) { struct btrfsic_dev_state *dev_state; @@ -2930,7 +2819,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) * btrfsic_mount(), this might return NULL */ dev_state = btrfsic_dev_state_lookup(bio->bi_bdev); if (NULL != dev_state && - (rw & WRITE) && NULL != bio->bi_io_vec) { + (bio_op(bio) == REQ_OP_WRITE) && NULL != bio->bi_io_vec) { unsigned int i; u64 dev_bytenr; u64 cur_bytenr; @@ -2941,10 +2830,8 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) bio_is_patched = 0; if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) - printk(KERN_INFO - "submit_bio(rw=0x%x, bi_vcnt=%u," - " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n", - rw, bio->bi_vcnt, + pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n", + bio_op(bio), bio->bi_opf, bio->bi_vcnt, (unsigned long long)bio->bi_iter.bi_sector, dev_bytenr, bio->bi_bdev); @@ -2966,8 +2853,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) } if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE) - printk(KERN_INFO - "#%u: bytenr=%llu, len=%u, offset=%u\n", + pr_info("#%u: bytenr=%llu, len=%u, offset=%u\n", i, cur_bytenr, bio->bi_io_vec[i].bv_len, bio->bi_io_vec[i].bv_offset); cur_bytenr += bio->bi_io_vec[i].bv_len; @@ -2975,26 +2861,22 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) btrfsic_process_written_block(dev_state, dev_bytenr, mapped_datav, bio->bi_vcnt, bio, &bio_is_patched, - NULL, rw); + NULL, bio->bi_opf); while (i > 0) { i--; kunmap(bio->bi_io_vec[i].bv_page); } kfree(mapped_datav); - } else if (NULL != dev_state && (rw & REQ_FLUSH)) { + } else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) { if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) - printk(KERN_INFO - "submit_bio(rw=0x%x FLUSH, bdev=%p)\n", - rw, bio->bi_bdev); + pr_info("submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n", + bio_op(bio), bio->bi_opf, bio->bi_bdev); if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { if ((dev_state->state->print_mask & (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | BTRFSIC_PRINT_MASK_VERBOSE))) - printk(KERN_INFO - "btrfsic_submit_bio(%s) with FLUSH" - " but dummy block already in use" - " (ignored)!\n", + pr_info("btrfsic_submit_bio(%s) with FLUSH but dummy block already in use (ignored)!\n", dev_state->name); } else { struct btrfsic_block *const block = @@ -3004,7 +2886,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) block->never_written = 0; block->iodone_w_error = 0; block->flush_gen = dev_state->last_flush_gen + 1; - block->submit_bio_bh_rw = rw; + block->submit_bio_bh_rw = bio->bi_opf; block->orig_bio_bh_private = bio->bi_private; block->orig_bio_bh_end_io.bio = bio->bi_end_io; block->next_in_same_bio = NULL; @@ -3016,16 +2898,16 @@ leave: mutex_unlock(&btrfsic_mutex); } -void btrfsic_submit_bio(int rw, struct bio *bio) +void btrfsic_submit_bio(struct bio *bio) { - __btrfsic_submit_bio(rw, bio); - submit_bio(rw, bio); + __btrfsic_submit_bio(bio); + submit_bio(bio); } -int btrfsic_submit_bio_wait(int rw, struct bio *bio) +int btrfsic_submit_bio_wait(struct bio *bio) { - __btrfsic_submit_bio(rw, bio); - return submit_bio_wait(rw, bio); + __btrfsic_submit_bio(bio); + return submit_bio_wait(bio); } int btrfsic_mount(struct btrfs_root *root, @@ -3038,14 +2920,12 @@ int btrfsic_mount(struct btrfs_root *root, struct btrfs_device *device; if (root->nodesize & ((u64)PAGE_SIZE - 1)) { - printk(KERN_INFO - "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_SIZE %ld!\n", + pr_info("btrfsic: cannot handle nodesize %d not being a multiple of PAGE_SIZE %ld!\n", root->nodesize, PAGE_SIZE); return -1; } if (root->sectorsize & ((u64)PAGE_SIZE - 1)) { - printk(KERN_INFO - "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_SIZE %ld!\n", + pr_info("btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_SIZE %ld!\n", root->sectorsize, PAGE_SIZE); return -1; } @@ -3053,7 +2933,7 @@ int btrfsic_mount(struct btrfs_root *root, if (!state) { state = vzalloc(sizeof(*state)); if (!state) { - printk(KERN_INFO "btrfs check-integrity: vzalloc() failed!\n"); + pr_info("btrfs check-integrity: vzalloc() failed!\n"); return -1; } } @@ -3085,8 +2965,7 @@ int btrfsic_mount(struct btrfs_root *root, ds = btrfsic_dev_state_alloc(); if (NULL == ds) { - printk(KERN_INFO - "btrfs check-integrity: kmalloc() failed!\n"); + pr_info("btrfs check-integrity: kmalloc() failed!\n"); mutex_unlock(&btrfsic_mutex); return -1; } @@ -3147,9 +3026,7 @@ void btrfsic_unmount(struct btrfs_root *root, } if (NULL == state) { - printk(KERN_INFO - "btrfsic: error, cannot find state information" - " on umount!\n"); + pr_info("btrfsic: error, cannot find state information on umount!\n"); mutex_unlock(&btrfsic_mutex); return; } @@ -3176,9 +3053,7 @@ void btrfsic_unmount(struct btrfs_root *root, if (b_all->is_iodone || b_all->never_written) btrfsic_block_free(b_all); else - printk(KERN_INFO "btrfs: attempt to free %c-block" - " @%llu (%s/%llu/%d) on umount which is" - " not yet iodone!\n", + pr_info("btrfs: attempt to free %c-block @%llu (%s/%llu/%d) on umount which is not yet iodone!\n", btrfsic_get_block_type(state, b_all), b_all->logical_bytenr, b_all->dev_state->name, b_all->dev_bytenr, b_all->mirror_num); diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h index 13b8566c97ab..f78dff1c7e86 100644 --- a/fs/btrfs/check-integrity.h +++ b/fs/btrfs/check-integrity.h @@ -20,9 +20,9 @@ #define __BTRFS_CHECK_INTEGRITY__ #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY -int btrfsic_submit_bh(int rw, struct buffer_head *bh); -void btrfsic_submit_bio(int rw, struct bio *bio); -int btrfsic_submit_bio_wait(int rw, struct bio *bio); +int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh); +void btrfsic_submit_bio(struct bio *bio); +int btrfsic_submit_bio_wait(struct bio *bio); #else #define btrfsic_submit_bh submit_bh #define btrfsic_submit_bio submit_bio diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 658c39b70fba..ccc70d96958d 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -363,6 +363,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, kfree(cb); return -ENOMEM; } + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; atomic_inc(&cb->pending_bios); @@ -373,7 +374,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, page = compressed_pages[pg_index]; page->mapping = inode->i_mapping; if (bio->bi_iter.bi_size) - ret = io_tree->ops->merge_bio_hook(WRITE, page, 0, + ret = io_tree->ops->merge_bio_hook(page, 0, PAGE_SIZE, bio, 0); else @@ -401,13 +402,17 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, BUG_ON(ret); /* -ENOMEM */ } - ret = btrfs_map_bio(root, WRITE, bio, 0, 1); - BUG_ON(ret); /* -ENOMEM */ + ret = btrfs_map_bio(root, bio, 0, 1); + if (ret) { + bio->bi_error = ret; + bio_endio(bio); + } bio_put(bio); bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); BUG_ON(!bio); + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; bio_add_page(bio, page, PAGE_SIZE, 0); @@ -431,8 +436,11 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, BUG_ON(ret); /* -ENOMEM */ } - ret = btrfs_map_bio(root, WRITE, bio, 0, 1); - BUG_ON(ret); /* -ENOMEM */ + ret = btrfs_map_bio(root, bio, 0, 1); + if (ret) { + bio->bi_error = ret; + bio_endio(bio); + } bio_put(bio); return 0; @@ -646,6 +654,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); if (!comp_bio) goto fail2; + bio_set_op_attrs (comp_bio, REQ_OP_READ, 0); comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; atomic_inc(&cb->pending_bios); @@ -656,7 +665,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, page->index = em_start >> PAGE_SHIFT; if (comp_bio->bi_iter.bi_size) - ret = tree->ops->merge_bio_hook(READ, page, 0, + ret = tree->ops->merge_bio_hook(page, 0, PAGE_SIZE, comp_bio, 0); else @@ -687,8 +696,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, sums += DIV_ROUND_UP(comp_bio->bi_iter.bi_size, root->sectorsize); - ret = btrfs_map_bio(root, READ, comp_bio, - mirror_num, 0); + ret = btrfs_map_bio(root, comp_bio, mirror_num, 0); if (ret) { bio->bi_error = ret; bio_endio(comp_bio); @@ -699,6 +707,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); BUG_ON(!comp_bio); + bio_set_op_attrs(comp_bio, REQ_OP_READ, 0); comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; @@ -717,7 +726,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, BUG_ON(ret); /* -ENOMEM */ } - ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); + ret = btrfs_map_bio(root, comp_bio, mirror_num, 0); if (ret) { bio->bi_error = ret; bio_endio(comp_bio); @@ -774,8 +783,7 @@ void __init btrfs_init_compress(void) */ workspace = btrfs_compress_op[i]->alloc_workspace(); if (IS_ERR(workspace)) { - printk(KERN_WARNING - "BTRFS: cannot preallocate compression workspace, will try later"); + pr_warn("BTRFS: cannot preallocate compression workspace, will try later\n"); } else { atomic_set(&btrfs_comp_ws[i].total_ws, 1); btrfs_comp_ws[i].free_ws = 1; @@ -845,8 +853,7 @@ again: /* no burst */ 1); if (__ratelimit(&_rs)) { - printk(KERN_WARNING - "no compression workspaces, low memory, retrying"); + pr_warn("BTRFS: no compression workspaces, low memory, retrying\n"); } } goto again; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 427c36b430a6..f6ba165d3f81 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -45,9 +45,7 @@ static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct btrfs_path *btrfs_alloc_path(void) { - struct btrfs_path *path; - path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); - return path; + return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); } /* @@ -1102,7 +1100,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, int level, ret; int last_ref = 0; int unlock_orig = 0; - u64 parent_start; + u64 parent_start = 0; if (*cow_ret == buf) unlock_orig = 1; @@ -1121,13 +1119,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, else btrfs_node_key(buf, &disk_key, 0); - if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { - if (parent) - parent_start = parent->start; - else - parent_start = 0; - } else - parent_start = 0; + if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) + parent_start = parent->start; cow = btrfs_alloc_tree_block(trans, root, parent_start, root->root_key.objectid, &disk_key, level, @@ -1153,14 +1146,14 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; } if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { ret = btrfs_reloc_cow_block(trans, root, buf, cow); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; } } @@ -1170,8 +1163,6 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) parent_start = buf->start; - else - parent_start = 0; extent_buffer_get(cow); tree_mod_log_set_root_pointer(root, cow, 1); @@ -1182,11 +1173,6 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, free_extent_buffer(buf); add_root_to_dirty_list(root); } else { - if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) - parent_start = parent->start; - else - parent_start = 0; - WARN_ON(trans->transid != btrfs_header_generation(parent)); tree_mod_log_insert_key(root->fs_info, parent, parent_slot, MOD_LOG_KEY_REPLACE, GFP_NOFS); @@ -1198,7 +1184,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (last_ref) { ret = tree_mod_log_free_eb(root->fs_info, buf); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; } } @@ -1373,7 +1359,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { BUG_ON(tm->slot != 0); - eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start); + eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start, + eb->len); if (!eb_rewin) { btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); @@ -1454,7 +1441,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq) } else if (old_root) { btrfs_tree_read_unlock(eb_root); free_extent_buffer(eb_root); - eb = alloc_dummy_extent_buffer(root->fs_info, logical); + eb = alloc_dummy_extent_buffer(root->fs_info, logical, + root->nodesize); } else { btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); eb = btrfs_clone_extent_buffer(eb_root); @@ -1503,7 +1491,7 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) { - if (btrfs_test_is_dummy_root(root)) + if (btrfs_is_testing(root->fs_info)) return 0; /* ensure we can see the force_cow */ @@ -1552,6 +1540,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid, root->fs_info->generation); if (!should_cow_block(trans, root, buf)) { + trans->dirty = true; *cow_ret = buf; return 0; } @@ -1726,20 +1715,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, return err; } -/* - * The leaf data grows from end-to-front in the node. - * this returns the address of the start of the last item, - * which is the stop of the leaf data stack - */ -static inline unsigned int leaf_data_end(struct btrfs_root *root, - struct extent_buffer *leaf) -{ - u32 nr = btrfs_header_nritems(leaf); - if (nr == 0) - return BTRFS_LEAF_DATA_SIZE(root); - return btrfs_item_offset_nr(leaf, nr - 1); -} - /* * search for key in the extent_buffer. The items start at offset p, @@ -1768,6 +1743,14 @@ static noinline int generic_bin_search(struct extent_buffer *eb, unsigned long map_len = 0; int err; + if (low > high) { + btrfs_err(eb->fs_info, + "%s: low (%d) > high (%d) eb %llu owner %llu level %d", + __func__, low, high, eb->start, + btrfs_header_owner(eb), btrfs_header_level(eb)); + return -EINVAL; + } + while (low < high) { mid = (low + high) / 2; offset = p + mid * item_size; @@ -1783,10 +1766,12 @@ static noinline int generic_bin_search(struct extent_buffer *eb, if (!err) { tmp = (struct btrfs_disk_key *)(kaddr + offset - map_start); - } else { + } else if (err == 1) { read_extent_buffer(eb, &unaligned, offset, sizeof(unaligned)); tmp = &unaligned; + } else { + return err; } } else { @@ -1853,7 +1838,6 @@ static void root_sub_used(struct btrfs_root *root, u32 size) /* given a node and slot number, this reads the blocks it points to. The * extent buffer is returned with a reference taken (but unlocked). - * NULL is returned on error. */ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, struct extent_buffer *parent, int slot) @@ -1861,19 +1845,16 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, int level = btrfs_header_level(parent); struct extent_buffer *eb; - if (slot < 0) - return NULL; - if (slot >= btrfs_header_nritems(parent)) - return NULL; + if (slot < 0 || slot >= btrfs_header_nritems(parent)) + return ERR_PTR(-ENOENT); BUG_ON(level == 0); eb = read_tree_block(root, btrfs_node_blockptr(parent, slot), btrfs_node_ptr_generation(parent, slot)); - if (IS_ERR(eb) || !extent_buffer_uptodate(eb)) { - if (!IS_ERR(eb)) - free_extent_buffer(eb); - eb = NULL; + if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + eb = ERR_PTR(-EIO); } return eb; @@ -1926,8 +1907,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* promote the child to a root */ child = read_node_slot(root, mid, 0); - if (!child) { - ret = -EROFS; + if (IS_ERR(child)) { + ret = PTR_ERR(child); btrfs_handle_fs_error(root->fs_info, ret, NULL); goto enospc; } @@ -1965,6 +1946,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, return 0; left = read_node_slot(root, parent, pslot - 1); + if (IS_ERR(left)) + left = NULL; + if (left) { btrfs_tree_lock(left); btrfs_set_lock_blocking(left); @@ -1975,7 +1959,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, goto enospc; } } + right = read_node_slot(root, parent, pslot + 1); + if (IS_ERR(right)) + right = NULL; + if (right) { btrfs_tree_lock(right); btrfs_set_lock_blocking(right); @@ -2130,6 +2118,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, return 1; left = read_node_slot(root, parent, pslot - 1); + if (IS_ERR(left)) + left = NULL; /* first, try to make some room in the middle buffer */ if (left) { @@ -2180,6 +2170,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, free_extent_buffer(left); } right = read_node_slot(root, parent, pslot + 1); + if (IS_ERR(right)) + right = NULL; /* * then try to empty the right most buffer into the middle @@ -2248,7 +2240,6 @@ static void reada_for_search(struct btrfs_root *root, u64 search; u64 target; u64 nread = 0; - u64 gen; struct extent_buffer *eb; u32 nr; u32 blocksize; @@ -2293,7 +2284,6 @@ static void reada_for_search(struct btrfs_root *root, search = btrfs_node_blockptr(node, nr); if ((search <= target && target - search <= 65536) || (search > target && search - target <= 65536)) { - gen = btrfs_node_ptr_generation(node, nr); readahead_tree_block(root, search); nread += blocksize; } @@ -2510,6 +2500,8 @@ read_block_for_search(struct btrfs_trans_handle *trans, if (!btrfs_buffer_uptodate(tmp, 0, 0)) ret = -EIO; free_extent_buffer(tmp); + } else { + ret = PTR_ERR(tmp); } return ret; } @@ -2773,8 +2765,10 @@ again: * then we don't want to set the path blocking, * so we test it here */ - if (!should_cow_block(trans, root, b)) + if (!should_cow_block(trans, root, b)) { + trans->dirty = true; goto cow_done; + } /* * must have write locks on this node and the @@ -2823,6 +2817,8 @@ cow_done: } ret = key_search(b, key, level, &prev_cmp, &slot); + if (ret < 0) + goto done; if (level != 0) { int dec = 0; @@ -3229,7 +3225,7 @@ static int push_node_left(struct btrfs_trans_handle *trans, ret = tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, push_items); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; } copy_extent_buffer(dst, src, @@ -3304,7 +3300,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, ret = tree_mod_log_eb_copy(root->fs_info, dst, src, 0, src_nritems - push_items, push_items); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; } copy_extent_buffer(dst, src, @@ -3508,7 +3504,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; } copy_extent_buffer(split, c, @@ -3762,7 +3758,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_assert_tree_locked(path->nodes[1]); right = read_node_slot(root, upper, slot + 1); - if (right == NULL) + /* + * slot + 1 is not valid or we fail to read the right node, + * no big deal, just return. + */ + if (IS_ERR(right)) return 1; btrfs_tree_lock(right); @@ -3992,7 +3992,11 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_assert_tree_locked(path->nodes[1]); left = read_node_slot(root, path->nodes[1], slot - 1); - if (left == NULL) + /* + * slot - 1 is not valid or we fail to read the left node, + * no big deal, just return. + */ + if (IS_ERR(left)) return 1; btrfs_tree_lock(left); @@ -4307,7 +4311,11 @@ again: if (path->slots[1] == 0) fixup_low_keys(fs_info, path, &disk_key, 1); } - btrfs_mark_buffer_dirty(right); + /* + * We create a new leaf 'right' for the required ins_len and + * we'll do btrfs_mark_buffer_dirty() on this leaf after copying + * the content of ins_len to 'right'. + */ return ret; } @@ -4738,8 +4746,9 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, if (btrfs_leaf_free_space(root, leaf) < total_size) { btrfs_print_leaf(root, leaf); - btrfs_crit(root->fs_info, "not enough freespace need %u have %d", - total_size, btrfs_leaf_free_space(root, leaf)); + btrfs_crit(root->fs_info, + "not enough freespace need %u have %d", + total_size, btrfs_leaf_free_space(root, leaf)); BUG(); } @@ -4748,8 +4757,9 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, if (old_data < data_end) { btrfs_print_leaf(root, leaf); - btrfs_crit(root->fs_info, "slot %d old_data %d data_end %d", - slot, old_data, data_end); + btrfs_crit(root->fs_info, + "slot %d old_data %d data_end %d", + slot, old_data, data_end); BUG_ON(1); } /* @@ -4759,7 +4769,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, for (i = slot; i < nritems; i++) { u32 ioff; - item = btrfs_item_nr( i); + item = btrfs_item_nr(i); ioff = btrfs_token_item_offset(leaf, item, &token); btrfs_set_token_item_offset(leaf, item, ioff - total_data, &token); @@ -5199,7 +5209,10 @@ find_next_key: } btrfs_set_path_blocking(path); cur = read_node_slot(root, cur, slot); - BUG_ON(!cur); /* -ENOMEM */ + if (IS_ERR(cur)) { + ret = PTR_ERR(cur); + goto out; + } btrfs_tree_read_lock(cur); @@ -5218,15 +5231,21 @@ out: return ret; } -static void tree_move_down(struct btrfs_root *root, +static int tree_move_down(struct btrfs_root *root, struct btrfs_path *path, int *level, int root_level) { + struct extent_buffer *eb; + BUG_ON(*level == 0); - path->nodes[*level - 1] = read_node_slot(root, path->nodes[*level], - path->slots[*level]); + eb = read_node_slot(root, path->nodes[*level], path->slots[*level]); + if (IS_ERR(eb)) + return PTR_ERR(eb); + + path->nodes[*level - 1] = eb; path->slots[*level - 1] = 0; (*level)--; + return 0; } static int tree_move_next_or_upnext(struct btrfs_root *root, @@ -5271,8 +5290,7 @@ static int tree_advance(struct btrfs_root *root, if (*level == 0 || !allow_down) { ret = tree_move_next_or_upnext(root, path, level, root_level); } else { - tree_move_down(root, path, level, root_level); - ret = 0; + ret = tree_move_down(root, path, level, root_level); } if (ret >= 0) { if (*level == 0) @@ -5446,8 +5464,10 @@ int btrfs_compare_trees(struct btrfs_root *left_root, left_root_level, advance_left != ADVANCE_ONLY_NEXT, &left_key); - if (ret < 0) + if (ret == -1) left_end_reached = ADVANCE; + else if (ret < 0) + goto out; advance_left = 0; } if (advance_right && !right_end_reached) { @@ -5455,8 +5475,10 @@ int btrfs_compare_trees(struct btrfs_root *left_root, right_root_level, advance_right != ADVANCE_ONLY_NEXT, &right_key); - if (ret < 0) + if (ret == -1) right_end_reached = ADVANCE; + else if (ret < 0) + goto out; advance_right = 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 101c3cfd3f7c..0b8ce2b9f7d0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -37,6 +37,7 @@ #include <linux/workqueue.h> #include <linux/security.h> #include <linux/sizes.h> +#include <linux/dynamic_debug.h> #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" @@ -117,6 +118,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) #define BTRFS_FS_STATE_REMOUNTING 1 #define BTRFS_FS_STATE_TRANS_ABORTED 2 #define BTRFS_FS_STATE_DEV_REPLACING 3 +#define BTRFS_FS_STATE_DUMMY_FS_INFO 4 #define BTRFS_BACKREF_REV_MAX 256 #define BTRFS_BACKREF_REV_SHIFT 56 @@ -144,21 +146,6 @@ struct btrfs_header { u8 level; } __attribute__ ((__packed__)); -#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \ - sizeof(struct btrfs_header)) / \ - sizeof(struct btrfs_key_ptr)) -#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) -#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->nodesize)) -#define BTRFS_FILE_EXTENT_INLINE_DATA_START \ - (offsetof(struct btrfs_file_extent_item, disk_bytenr)) -#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ - sizeof(struct btrfs_item) - \ - BTRFS_FILE_EXTENT_INLINE_DATA_START) -#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ - sizeof(struct btrfs_item) -\ - sizeof(struct btrfs_dir_item)) - - /* * this is a very generous portion of the super block, giving us * room to translate 14 chunks with 3 stripes each. @@ -265,7 +252,8 @@ struct btrfs_super_block { #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL #define BTRFS_FEATURE_COMPAT_RO_SUPP \ - (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE) + (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \ + BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID) #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL @@ -439,6 +427,9 @@ struct btrfs_space_info { struct list_head list; /* Protected by the spinlock 'lock'. */ struct list_head ro_bgs; + struct list_head priority_tickets; + struct list_head tickets; + u64 tickets_id; struct rw_semaphore groups_sem; /* for block groups in our same type */ @@ -687,9 +678,25 @@ struct btrfs_device; struct btrfs_fs_devices; struct btrfs_balance_control; struct btrfs_delayed_root; + +#define BTRFS_FS_BARRIER 1 +#define BTRFS_FS_CLOSING_START 2 +#define BTRFS_FS_CLOSING_DONE 3 +#define BTRFS_FS_LOG_RECOVERING 4 +#define BTRFS_FS_OPEN 5 +#define BTRFS_FS_QUOTA_ENABLED 6 +#define BTRFS_FS_QUOTA_ENABLING 7 +#define BTRFS_FS_QUOTA_DISABLING 8 +#define BTRFS_FS_UPDATE_UUID_TREE_GEN 9 +#define BTRFS_FS_CREATING_FREE_SPACE_TREE 10 +#define BTRFS_FS_BTREE_ERR 11 +#define BTRFS_FS_LOG1_ERR 12 +#define BTRFS_FS_LOG2_ERR 13 + struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; + unsigned long flags; struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *chunk_root; @@ -918,10 +925,6 @@ struct btrfs_fs_info { int thread_pool_size; struct kobject *space_info_kobj; - int do_barriers; - int closing; - int log_root_recovering; - int open; u64 total_pinned; @@ -998,17 +1001,6 @@ struct btrfs_fs_info { #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY u32 check_integrity_print_mask; #endif - /* - * quota information - */ - unsigned int quota_enabled:1; - - /* - * quota_enabled only changes state after a commit. This holds the - * next state. - */ - unsigned int pending_quota_state:1; - /* is qgroup tracking in a consistent state? */ u64 qgroup_flags; @@ -1040,6 +1032,7 @@ struct btrfs_fs_info { struct btrfs_workqueue *qgroup_rescan_workers; struct completion qgroup_rescan_completion; struct btrfs_work qgroup_rescan_work; + bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */ /* filesystem state */ unsigned long fs_state; @@ -1071,7 +1064,6 @@ struct btrfs_fs_info { wait_queue_head_t replace_wait; struct semaphore uuid_tree_rescan_sem; - unsigned int update_uuid_tree_gen:1; /* Used to reclaim the metadata space in the background. */ struct work_struct async_reclaim_work; @@ -1090,7 +1082,8 @@ struct btrfs_fs_info { */ struct list_head pinned_chunks; - int creating_free_space_tree; + /* Used to record internally whether fs has been frozen */ + int fs_frozen; }; struct btrfs_subvolume_writers { @@ -1112,12 +1105,11 @@ struct btrfs_subvolume_writers { #define BTRFS_ROOT_REF_COWS 1 #define BTRFS_ROOT_TRACK_DIRTY 2 #define BTRFS_ROOT_IN_RADIX 3 -#define BTRFS_ROOT_DUMMY_ROOT 4 -#define BTRFS_ROOT_ORPHAN_ITEM_INSERTED 5 -#define BTRFS_ROOT_DEFRAG_RUNNING 6 -#define BTRFS_ROOT_FORCE_COW 7 -#define BTRFS_ROOT_MULTI_LOG_TASKS 8 -#define BTRFS_ROOT_DIRTY 9 +#define BTRFS_ROOT_ORPHAN_ITEM_INSERTED 4 +#define BTRFS_ROOT_DEFRAG_RUNNING 5 +#define BTRFS_ROOT_FORCE_COW 6 +#define BTRFS_ROOT_MULTI_LOG_TASKS 7 +#define BTRFS_ROOT_DIRTY 8 /* * in ram representation of the tree. extent_root is used for all allocations @@ -1179,8 +1171,10 @@ struct btrfs_root { u64 highest_objectid; +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS /* only used with CONFIG_BTRFS_FS_RUN_SANITY_TESTS is enabled */ u64 alloc_bytenr; +#endif u64 defrag_trans_start; struct btrfs_key defrag_progress; @@ -1257,6 +1251,39 @@ struct btrfs_root { atomic_t qgroup_meta_rsv; }; +static inline u32 __BTRFS_LEAF_DATA_SIZE(u32 blocksize) +{ + return blocksize - sizeof(struct btrfs_header); +} + +static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_root *root) +{ + return __BTRFS_LEAF_DATA_SIZE(root->nodesize); +} + +static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_root *root) +{ + return BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); +} + +static inline u32 BTRFS_NODEPTRS_PER_BLOCK(const struct btrfs_root *root) +{ + return BTRFS_LEAF_DATA_SIZE(root) / sizeof(struct btrfs_key_ptr); +} + +#define BTRFS_FILE_EXTENT_INLINE_DATA_START \ + (offsetof(struct btrfs_file_extent_item, disk_bytenr)) +static inline u32 BTRFS_MAX_INLINE_DATA_SIZE(const struct btrfs_root *root) +{ + return BTRFS_MAX_ITEM_SIZE(root) - + BTRFS_FILE_EXTENT_INLINE_DATA_START; +} + +static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_root *root) +{ + return BTRFS_MAX_ITEM_SIZE(root) - sizeof(struct btrfs_dir_item); +} + /* * Flags for mount options. * @@ -1297,21 +1324,21 @@ struct btrfs_root { #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) -#define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ +#define btrfs_test_opt(fs_info, opt) ((fs_info)->mount_opt & \ BTRFS_MOUNT_##opt) -#define btrfs_set_and_info(root, opt, fmt, args...) \ +#define btrfs_set_and_info(fs_info, opt, fmt, args...) \ { \ - if (!btrfs_test_opt(root, opt)) \ - btrfs_info(root->fs_info, fmt, ##args); \ - btrfs_set_opt(root->fs_info->mount_opt, opt); \ + if (!btrfs_test_opt(fs_info, opt)) \ + btrfs_info(fs_info, fmt, ##args); \ + btrfs_set_opt(fs_info->mount_opt, opt); \ } -#define btrfs_clear_and_info(root, opt, fmt, args...) \ +#define btrfs_clear_and_info(fs_info, opt, fmt, args...) \ { \ - if (btrfs_test_opt(root, opt)) \ - btrfs_info(root->fs_info, fmt, ##args); \ - btrfs_clear_opt(root->fs_info->mount_opt, opt); \ + if (btrfs_test_opt(fs_info, opt)) \ + btrfs_info(fs_info, fmt, ##args); \ + btrfs_clear_opt(fs_info->mount_opt, opt); \ } #ifdef CONFIG_BTRFS_DEBUG @@ -1319,9 +1346,9 @@ static inline int btrfs_should_fragment_free_space(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) { - return (btrfs_test_opt(root, FRAGMENT_METADATA) && + return (btrfs_test_opt(root->fs_info, FRAGMENT_METADATA) && block_group->flags & BTRFS_BLOCK_GROUP_METADATA) || - (btrfs_test_opt(root, FRAGMENT_DATA) && + (btrfs_test_opt(root->fs_info, FRAGMENT_DATA) && block_group->flags & BTRFS_BLOCK_GROUP_DATA); } #endif @@ -1409,13 +1436,13 @@ static inline void btrfs_init_map_token (struct btrfs_map_token *token) #define cpu_to_le8(v) (v) #define __le8 u8 -#define read_eb_member(eb, ptr, type, member, result) ( \ +#define read_eb_member(eb, ptr, type, member, result) (\ read_extent_buffer(eb, (char *)(result), \ ((unsigned long)(ptr)) + \ offsetof(type, member), \ sizeof(((type *)0)->member))) -#define write_eb_member(eb, ptr, type, member, result) ( \ +#define write_eb_member(eb, ptr, type, member, result) (\ write_extent_buffer(eb, (char *)(result), \ ((unsigned long)(ptr)) + \ offsetof(type, member), \ @@ -2267,6 +2294,21 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) return offsetof(struct btrfs_leaf, items); } +/* + * The leaf data grows from end-to-front in the node. + * this returns the address of the start of the last item, + * which is the stop of the leaf data stack + */ +static inline unsigned int leaf_data_end(struct btrfs_root *root, + struct extent_buffer *leaf) +{ + u32 nr = btrfs_header_nritems(leaf); + + if (nr == 0) + return BTRFS_LEAF_DATA_SIZE(root); + return btrfs_item_offset_nr(leaf, nr - 1); +} + /* struct btrfs_file_extent_item */ BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, @@ -2518,7 +2560,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); int btrfs_async_run_delayed_refs(struct btrfs_root *root, - unsigned long count, int wait); + unsigned long count, u64 transid, int wait); int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, @@ -2556,7 +2598,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 root_objectid, u64 owner, u64 offset, struct btrfs_key *ins); -int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, +int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, int is_data, int delalloc); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -2624,6 +2666,15 @@ enum btrfs_reserve_flush_enum { BTRFS_RESERVE_FLUSH_ALL, }; +enum btrfs_flush_state { + FLUSH_DELAYED_ITEMS_NR = 1, + FLUSH_DELAYED_ITEMS = 2, + FLUSH_DELALLOC = 3, + FLUSH_DELALLOC_WAIT = 4, + ALLOC_CHUNK = 5, + COMMIT_TRANS = 6, +}; + int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len); int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes); void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len); @@ -2661,8 +2712,8 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 min_reserved, enum btrfs_reserve_flush_enum flush); int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, - struct btrfs_block_rsv *dst_rsv, - u64 num_bytes); + struct btrfs_block_rsv *dst_rsv, u64 num_bytes, + int update_size); int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *dest, u64 num_bytes, int min_factor); @@ -2832,10 +2883,14 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) { /* - * Get synced with close_ctree() + * Do it this way so we only ever do one test_bit in the normal case. */ - smp_mb(); - return fs_info->closing; + if (test_bit(BTRFS_FS_CLOSING_START, &fs_info->flags)) { + if (test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags)) + return 2; + return 1; + } + return 0; } /* @@ -2875,9 +2930,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); /* root-item.c */ -int btrfs_find_root_ref(struct btrfs_root *tree_root, - struct btrfs_path *path, - u64 root_id, u64 ref_id); int btrfs_add_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root, u64 root_id, u64 ref_id, u64 dirid, u64 sequence, @@ -3086,12 +3138,12 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, int nr); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, - struct extent_state **cached_state); + struct extent_state **cached_state, int dedupe); int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, struct btrfs_root *new_root, struct btrfs_root *parent_root, u64 new_dirid); -int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset, +int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); @@ -3129,7 +3181,6 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, struct btrfs_trans_handle *trans, int mode, u64 start, u64 num_bytes, u64 min_size, loff_t actual_len, u64 *alloc_hint); -int btrfs_inode_check_errors(struct inode *inode); extern const struct dentry_operations btrfs_dentry_operations; #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS void btrfs_test_inode_set_ops(struct inode *inode); @@ -3205,14 +3256,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, unsigned long new_flags); int btrfs_sync_fs(struct super_block *sb, int wait); +static inline __printf(2, 3) +void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) +{ +} + #ifdef CONFIG_PRINTK __printf(2, 3) void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...); #else -static inline __printf(2, 3) -void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) -{ -} +#define btrfs_printk(fs_info, fmt, args...) \ + btrfs_no_printk(fs_info, fmt, ##args) #endif #define btrfs_emerg(fs_info, fmt, args...) \ @@ -3283,7 +3337,35 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) btrfs_printk_ratelimited(fs_info, KERN_NOTICE fmt, ##args) #define btrfs_info_rl(fs_info, fmt, args...) \ btrfs_printk_ratelimited(fs_info, KERN_INFO fmt, ##args) -#ifdef DEBUG + +#if defined(CONFIG_DYNAMIC_DEBUG) +#define btrfs_debug(fs_info, fmt, args...) \ +do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT)) \ + btrfs_printk(fs_info, KERN_DEBUG fmt, ##args); \ +} while (0) +#define btrfs_debug_in_rcu(fs_info, fmt, args...) \ +do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT)) \ + btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args); \ +} while (0) +#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ +do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT)) \ + btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, \ + ##args);\ +} while (0) +#define btrfs_debug_rl(fs_info, fmt, args...) \ +do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT)) \ + btrfs_printk_ratelimited(fs_info, KERN_DEBUG fmt, \ + ##args); \ +} while (0) +#elif defined(DEBUG) #define btrfs_debug(fs_info, fmt, args...) \ btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) #define btrfs_debug_in_rcu(fs_info, fmt, args...) \ @@ -3294,13 +3376,13 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) btrfs_printk_ratelimited(fs_info, KERN_DEBUG fmt, ##args) #else #define btrfs_debug(fs_info, fmt, args...) \ - no_printk(KERN_DEBUG fmt, ##args) + btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) #define btrfs_debug_in_rcu(fs_info, fmt, args...) \ - no_printk(KERN_DEBUG fmt, ##args) + btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) #define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ - no_printk(KERN_DEBUG fmt, ##args) + btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) #define btrfs_debug_rl(fs_info, fmt, args...) \ - no_printk(KERN_DEBUG fmt, ##args) + btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) #endif #define btrfs_printk_in_rcu(fs_info, fmt, args...) \ @@ -3331,7 +3413,7 @@ do { \ __cold static inline void assfail(char *expr, char *file, int line) { - pr_err("BTRFS: assertion failed: %s, file: %s, line: %d", + pr_err("assertion failed: %s, file: %s, line: %d\n", expr, file, line); BUG(); } @@ -3351,23 +3433,23 @@ const char *btrfs_decode_error(int errno); __cold void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root, const char *function, + const char *function, unsigned int line, int errno); /* * Call btrfs_abort_transaction as early as possible when an error condition is * detected, that way the exact line number is reported. */ -#define btrfs_abort_transaction(trans, root, errno) \ +#define btrfs_abort_transaction(trans, errno) \ do { \ /* Report first abort since mount */ \ if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \ - &((root)->fs_info->fs_state))) { \ + &((trans)->fs_info->fs_state))) { \ WARN(1, KERN_DEBUG \ "BTRFS: Transaction aborted (error %d)\n", \ (errno)); \ } \ - __btrfs_abort_transaction((trans), (root), __func__, \ + __btrfs_abort_transaction((trans), __func__, \ __LINE__, (errno)); \ } while (0) @@ -3599,13 +3681,13 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info) void btrfs_test_destroy_inode(struct inode *inode); #endif -static inline int btrfs_test_is_dummy_root(struct btrfs_root *root) +static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info) { #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS - if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) + if (unlikely(test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, + &fs_info->fs_state))) return 1; #endif return 0; } - #endif diff --git a/fs/btrfs/dedupe.h b/fs/btrfs/dedupe.h new file mode 100644 index 000000000000..83ebfe28da9e --- /dev/null +++ b/fs/btrfs/dedupe.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2016 Fujitsu. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_DEDUPE__ +#define __BTRFS_DEDUPE__ + +/* later in-band dedupe will expand this struct */ +struct btrfs_dedupe_hash; +#endif diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 61561c2a3f96..0fcf5f25d524 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -34,7 +34,7 @@ int __init btrfs_delayed_inode_init(void) delayed_node_cache = kmem_cache_create("btrfs_delayed_node", sizeof(struct btrfs_delayed_node), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + SLAB_MEM_SPREAD, NULL); if (!delayed_node_cache) return -ENOMEM; @@ -385,11 +385,8 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item( struct btrfs_delayed_node *delayed_node, struct btrfs_key *key) { - struct btrfs_delayed_item *item; - - item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key, + return __btrfs_lookup_delayed_item(&delayed_node->ins_root, key, NULL, NULL); - return item; } static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node, @@ -553,7 +550,7 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, dst_rsv = &root->fs_info->delayed_block_rsv; num_bytes = btrfs_calc_trans_metadata_size(root, 1); - ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); + ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); if (!ret) { trace_btrfs_space_reservation(root->fs_info, "delayed_item", item->key.objectid, @@ -598,6 +595,29 @@ static int btrfs_delayed_inode_reserve_metadata( num_bytes = btrfs_calc_trans_metadata_size(root, 1); /* + * If our block_rsv is the delalloc block reserve then check and see if + * we have our extra reservation for updating the inode. If not fall + * through and try to reserve space quickly. + * + * We used to try and steal from the delalloc block rsv or the global + * reserve, but we'd steal a full reservation, which isn't kind. We are + * here through delalloc which means we've likely just cowed down close + * to the leaf that contains the inode, so we would steal less just + * doing the fallback inode update, so if we do end up having to steal + * from the global block rsv we hopefully only steal one or two blocks + * worth which is less likely to hurt us. + */ + if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) { + spin_lock(&BTRFS_I(inode)->lock); + if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, + &BTRFS_I(inode)->runtime_flags)) + release = true; + else + src_rsv = NULL; + spin_unlock(&BTRFS_I(inode)->lock); + } + + /* * btrfs_dirty_inode will update the inode under btrfs_join_transaction * which doesn't reserve space for speed. This is a problem since we * still need to reserve space for this update, so try to reserve the @@ -626,51 +646,10 @@ static int btrfs_delayed_inode_reserve_metadata( num_bytes, 1); } return ret; - } else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) { - spin_lock(&BTRFS_I(inode)->lock); - if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, - &BTRFS_I(inode)->runtime_flags)) { - spin_unlock(&BTRFS_I(inode)->lock); - release = true; - goto migrate; - } - spin_unlock(&BTRFS_I(inode)->lock); - - /* Ok we didn't have space pre-reserved. This shouldn't happen - * too often but it can happen if we do delalloc to an existing - * inode which gets dirtied because of the time update, and then - * isn't touched again until after the transaction commits and - * then we try to write out the data. First try to be nice and - * reserve something strictly for us. If not be a pain and try - * to steal from the delalloc block rsv. - */ - ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, - BTRFS_RESERVE_NO_FLUSH); - if (!ret) - goto out; - - ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); - if (!ret) - goto out; - - if (btrfs_test_opt(root, ENOSPC_DEBUG)) { - btrfs_debug(root->fs_info, - "block rsv migrate returned %d", ret); - WARN_ON(1); - } - /* - * Ok this is a problem, let's just steal from the global rsv - * since this really shouldn't happen that often. - */ - ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv, - dst_rsv, num_bytes); - goto out; } -migrate: - ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); + ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); -out: /* * Migrate only takes a reservation, it doesn't touch the size of the * block_rsv. This is to simplify people who don't normally have things @@ -1188,7 +1167,7 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, if (ret) { btrfs_release_delayed_node(curr_node); curr_node = NULL; - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); break; } @@ -1499,11 +1478,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, mutex_lock(&delayed_node->mutex); ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); if (unlikely(ret)) { - btrfs_err(root->fs_info, "err add delayed dir index item(name: %.*s) " - "into the insertion tree of the delayed node" - "(root id: %llu, inode id: %llu, errno: %d)", - name_len, name, delayed_node->root->objectid, - delayed_node->inode_id, ret); + btrfs_err(root->fs_info, + "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", + name_len, name, delayed_node->root->objectid, + delayed_node->inode_id, ret); BUG(); } mutex_unlock(&delayed_node->mutex); @@ -1571,11 +1549,9 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, mutex_lock(&node->mutex); ret = __btrfs_add_delayed_deletion_item(node, item); if (unlikely(ret)) { - btrfs_err(root->fs_info, "err add delayed dir index item(index: %llu) " - "into the deletion tree of the delayed node" - "(root id: %llu, inode id: %llu, errno: %d)", - index, node->root->objectid, node->inode_id, - ret); + btrfs_err(root->fs_info, + "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", + index, node->root->objectid, node->inode_id, ret); BUG(); } mutex_unlock(&node->mutex); @@ -1606,15 +1582,23 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode) return 0; } -void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, - struct list_head *del_list) +bool btrfs_readdir_get_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list) { struct btrfs_delayed_node *delayed_node; struct btrfs_delayed_item *item; delayed_node = btrfs_get_delayed_node(inode); if (!delayed_node) - return; + return false; + + /* + * We can only do one readdir with delayed items at a time because of + * item->readdir_list. + */ + inode_unlock_shared(inode); + inode_lock(inode); mutex_lock(&delayed_node->mutex); item = __btrfs_first_delayed_insertion_item(delayed_node); @@ -1641,10 +1625,13 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, * requeue or dequeue this delayed node. */ atomic_dec(&delayed_node->refs); + + return true; } -void btrfs_put_delayed_items(struct list_head *ins_list, - struct list_head *del_list) +void btrfs_readdir_put_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list) { struct btrfs_delayed_item *curr, *next; @@ -1659,6 +1646,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list, if (atomic_dec_and_test(&curr->refs)) kfree(curr); } + + /* + * The VFS is going to do up_read(), so we need to downgrade back to a + * read lock. + */ + downgrade_write(&inode->i_rwsem); } int btrfs_should_delete_dir_index(struct list_head *del_list, @@ -1875,7 +1868,8 @@ int btrfs_delayed_delete_inode_ref(struct inode *inode) * leads to enospc problems. This means we also can't do * delayed inode refs */ - if (BTRFS_I(inode)->root->fs_info->log_root_recovering) + if (test_bit(BTRFS_FS_LOG_RECOVERING, + &BTRFS_I(inode)->root->fs_info->flags)) return -EAGAIN; delayed_node = btrfs_get_or_create_delayed_node(inode); diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 0167853c84ae..2495b3d4075f 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -137,10 +137,12 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); void btrfs_destroy_delayed_inodes(struct btrfs_root *root); /* Used for readdir() */ -void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, - struct list_head *del_list); -void btrfs_put_delayed_items(struct list_head *ins_list, - struct list_head *del_list); +bool btrfs_readdir_get_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list); +void btrfs_readdir_put_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list); int btrfs_should_delete_dir_index(struct list_head *del_list, u64 index); int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 430b3689b112..8d93854a4b4f 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -322,10 +322,11 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, elem = list_first_entry(&fs_info->tree_mod_seq_list, struct seq_list, list); if (seq >= elem->seq) { - pr_debug("holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)\n", - (u32)(seq >> 32), (u32)seq, - (u32)(elem->seq >> 32), (u32)elem->seq, - delayed_refs); + btrfs_debug(fs_info, + "holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)", + (u32)(seq >> 32), (u32)seq, + (u32)(elem->seq >> 32), (u32)elem->seq, + delayed_refs); ret = 1; } } @@ -541,7 +542,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_head *head_ref = NULL; struct btrfs_delayed_ref_root *delayed_refs; - struct btrfs_qgroup_extent_record *qexisting; int count_mod = 1; int must_insert_reserved = 0; @@ -606,16 +606,15 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, qrecord->num_bytes = num_bytes; qrecord->old_roots = NULL; - qexisting = btrfs_qgroup_insert_dirty_extent(delayed_refs, - qrecord); - if (qexisting) + if(btrfs_qgroup_insert_dirty_extent_nolock(fs_info, + delayed_refs, qrecord)) kfree(qrecord); } spin_lock_init(&head_ref->lock); mutex_init(&head_ref->mutex); - trace_add_delayed_ref_head(ref, head_ref, action); + trace_add_delayed_ref_head(fs_info, ref, head_ref, action); existing = htree_insert(&delayed_refs->href_root, &head_ref->href_node); @@ -682,7 +681,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref->type = BTRFS_TREE_BLOCK_REF_KEY; full_ref->level = level; - trace_add_delayed_tree_ref(ref, full_ref, action); + trace_add_delayed_tree_ref(fs_info, ref, full_ref, action); ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref); @@ -739,7 +738,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, full_ref->objectid = owner; full_ref->offset = offset; - trace_add_delayed_data_ref(ref, full_ref, action); + trace_add_delayed_data_ref(fs_info, ref, full_ref, action); ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref); @@ -772,7 +771,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, if (!head_ref) goto free_ref; - if (fs_info->quota_enabled && is_fstree(ref_root)) { + if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && + is_fstree(ref_root)) { record = kmalloc(sizeof(*record), GFP_NOFS); if (!record) goto free_head_ref; @@ -830,7 +830,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, return -ENOMEM; } - if (fs_info->quota_enabled && is_fstree(ref_root)) { + if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && + is_fstree(ref_root)) { record = kmalloc(sizeof(*record), GFP_NOFS); if (!record) { kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); @@ -861,33 +862,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, return 0; } -int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, - u64 ref_root, u64 bytenr, u64 num_bytes) -{ - struct btrfs_delayed_ref_root *delayed_refs; - struct btrfs_delayed_ref_head *ref_head; - int ret = 0; - - if (!fs_info->quota_enabled || !is_fstree(ref_root)) - return 0; - - delayed_refs = &trans->transaction->delayed_refs; - - spin_lock(&delayed_refs->lock); - ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0); - if (!ref_head) { - ret = -ENOENT; - goto out; - } - WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root); - ref_head->qgroup_ref_root = ref_root; - ref_head->qgroup_reserved = num_bytes; -out: - spin_unlock(&delayed_refs->lock); - return ret; -} - int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, @@ -940,28 +914,28 @@ int btrfs_delayed_ref_init(void) btrfs_delayed_ref_head_cachep = kmem_cache_create( "btrfs_delayed_ref_head", sizeof(struct btrfs_delayed_ref_head), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + SLAB_MEM_SPREAD, NULL); if (!btrfs_delayed_ref_head_cachep) goto fail; btrfs_delayed_tree_ref_cachep = kmem_cache_create( "btrfs_delayed_tree_ref", sizeof(struct btrfs_delayed_tree_ref), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + SLAB_MEM_SPREAD, NULL); if (!btrfs_delayed_tree_ref_cachep) goto fail; btrfs_delayed_data_ref_cachep = kmem_cache_create( "btrfs_delayed_data_ref", sizeof(struct btrfs_delayed_data_ref), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + SLAB_MEM_SPREAD, NULL); if (!btrfs_delayed_data_ref_cachep) goto fail; btrfs_delayed_extent_op_cachep = kmem_cache_create( "btrfs_delayed_extent_op", sizeof(struct btrfs_delayed_extent_op), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + SLAB_MEM_SPREAD, NULL); if (!btrfs_delayed_extent_op_cachep) goto fail; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 5fca9534a271..43f3629760e9 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -250,9 +250,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, u64 parent, u64 ref_root, u64 owner, u64 offset, u64 reserved, int action, struct btrfs_delayed_extent_op *extent_op); -int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, - u64 ref_root, u64 bytenr, u64 num_bytes); int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 63ef9cdf0144..05169ef30596 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -142,7 +142,7 @@ no_valid_dev_replace_entry_found: * missing */ if (!dev_replace->srcdev && - !btrfs_test_opt(dev_root, DEGRADED)) { + !btrfs_test_opt(dev_root->fs_info, DEGRADED)) { ret = -EIO; btrfs_warn(fs_info, "cannot mount because device replace operation is ongoing and"); @@ -151,7 +151,7 @@ no_valid_dev_replace_entry_found: src_devid); } if (!dev_replace->tgtdev && - !btrfs_test_opt(dev_root, DEGRADED)) { + !btrfs_test_opt(dev_root->fs_info, DEGRADED)) { ret = -EIO; btrfs_warn(fs_info, "cannot mount because device replace operation is ongoing and"); @@ -218,8 +218,9 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, } ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); if (ret < 0) { - btrfs_warn(fs_info, "error %d while searching for dev_replace item!", - ret); + btrfs_warn(fs_info, + "error %d while searching for dev_replace item!", + ret); goto out; } @@ -238,8 +239,9 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, */ ret = btrfs_del_item(trans, dev_root, path); if (ret != 0) { - btrfs_warn(fs_info, "delete too small dev_replace item failed %d!", - ret); + btrfs_warn(fs_info, + "delete too small dev_replace item failed %d!", + ret); goto out; } ret = 1; @@ -251,8 +253,8 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, dev_root, path, &key, sizeof(*ptr)); if (ret < 0) { - btrfs_warn(fs_info, "insert dev_replace item failed %d!", - ret); + btrfs_warn(fs_info, + "insert dev_replace item failed %d!", ret); goto out; } } @@ -383,7 +385,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, char *tgtdev_name, ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device); if (ret) - btrfs_err(fs_info, "kobj add dev failed %d\n", ret); + btrfs_err(fs_info, "kobj add dev failed %d", ret); btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1); @@ -772,9 +774,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info) break; } if (!dev_replace->tgtdev || !dev_replace->tgtdev->bdev) { - btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing"); btrfs_info(fs_info, - "you may cancel the operation after 'mount -o degraded'"); + "cannot continue dev_replace, tgtdev is missing"); + btrfs_info(fs_info, + "you may cancel the operation after 'mount -o degraded'"); btrfs_dev_replace_unlock(dev_replace, 1); return 0; } diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 1752625fb4dd..0dc1a033275e 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -472,9 +472,10 @@ int verify_dir_item(struct btrfs_root *root, /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ if ((btrfs_dir_data_len(leaf, dir_item) + btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) { - btrfs_crit(root->fs_info, "invalid dir item name + data len: %u + %u", - (unsigned)btrfs_dir_name_len(leaf, dir_item), - (unsigned)btrfs_dir_data_len(leaf, dir_item)); + btrfs_crit(root->fs_info, + "invalid dir item name + data len: %u + %u", + (unsigned)btrfs_dir_name_len(leaf, dir_item), + (unsigned)btrfs_dir_data_len(leaf, dir_item)); return 1; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6628fca9f4ed..3a57f99d96aa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -101,7 +101,7 @@ int __init btrfs_end_io_wq_init(void) btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq", sizeof(struct btrfs_end_io_wq), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + SLAB_MEM_SPREAD, NULL); if (!btrfs_end_io_wq_cache) return -ENOMEM; @@ -124,7 +124,6 @@ struct async_submit_bio { struct list_head list; extent_submit_bio_hook_t *submit_bio_start; extent_submit_bio_hook_t *submit_bio_done; - int rw; int mirror_num; unsigned long bio_flags; /* @@ -327,8 +326,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, read_extent_buffer(buf, &val, 0, csum_size); btrfs_warn_rl(fs_info, - "%s checksum verify failed on %llu wanted %X found %X " - "level %d", + "%s checksum verify failed on %llu wanted %X found %X level %d", fs_info->sb->s_id, buf->start, val, found, btrfs_header_level(buf)); if (result != (char *)&inline_result) @@ -403,7 +401,8 @@ out: * Return 0 if the superblock checksum type matches the checksum value of that * algorithm. Pass the raw disk superblock data. */ -static int btrfs_check_super_csum(char *raw_disk_sb) +static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, + char *raw_disk_sb) { struct btrfs_super_block *disk_sb = (struct btrfs_super_block *)raw_disk_sb; @@ -429,7 +428,7 @@ static int btrfs_check_super_csum(char *raw_disk_sb) } if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { - printk(KERN_ERR "BTRFS: unsupported checksum algorithm %u\n", + btrfs_err(fs_info, "unsupported checksum algorithm %u", csum_type); ret = 1; } @@ -443,7 +442,7 @@ static int btrfs_check_super_csum(char *raw_disk_sb) */ static int btree_read_extent_buffer_pages(struct btrfs_root *root, struct extent_buffer *eb, - u64 start, u64 parent_transid) + u64 parent_transid) { struct extent_io_tree *io_tree; int failed = 0; @@ -455,8 +454,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; while (1) { - ret = read_extent_buffer_pages(io_tree, eb, start, - WAIT_COMPLETE, + ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, btree_get_extent, mirror_num); if (!ret) { if (!verify_parent_transid(io_tree, eb, @@ -548,9 +546,10 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, } #define CORRUPT(reason, eb, root, slot) \ - btrfs_crit(root->fs_info, "corrupt leaf, %s: block=%llu," \ - "root=%llu, slot=%d", reason, \ - btrfs_header_bytenr(eb), root->objectid, slot) + btrfs_crit(root->fs_info, "corrupt %s, %s: block=%llu," \ + " root=%llu, slot=%d", \ + btrfs_header_level(eb) == 0 ? "leaf" : "node",\ + reason, btrfs_header_bytenr(eb), root->objectid, slot) static noinline int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) @@ -560,8 +559,29 @@ static noinline int check_leaf(struct btrfs_root *root, u32 nritems = btrfs_header_nritems(leaf); int slot; - if (nritems == 0) + if (nritems == 0) { + struct btrfs_root *check_root; + + key.objectid = btrfs_header_owner(leaf); + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + check_root = btrfs_get_fs_root(root->fs_info, &key, false); + /* + * The only reason we also check NULL here is that during + * open_ctree() some roots has not yet been set up. + */ + if (!IS_ERR_OR_NULL(check_root)) { + /* if leaf is the root, then it's fine */ + if (leaf->start != + btrfs_root_bytenr(&check_root->root_item)) { + CORRUPT("non-root leaf's nritems is 0", + leaf, root, 0); + return -EIO; + } + } return 0; + } /* Check the 0 item */ if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != @@ -613,6 +633,42 @@ static noinline int check_leaf(struct btrfs_root *root, return 0; } +static int check_node(struct btrfs_root *root, struct extent_buffer *node) +{ + unsigned long nr = btrfs_header_nritems(node); + struct btrfs_key key, next_key; + int slot; + u64 bytenr; + int ret = 0; + + if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) { + btrfs_crit(root->fs_info, + "corrupt node: block %llu root %llu nritems %lu", + node->start, root->objectid, nr); + return -EIO; + } + + for (slot = 0; slot < nr - 1; slot++) { + bytenr = btrfs_node_blockptr(node, slot); + btrfs_node_key_to_cpu(node, &key, slot); + btrfs_node_key_to_cpu(node, &next_key, slot + 1); + + if (!bytenr) { + CORRUPT("invalid item slot", node, root, slot); + ret = -EIO; + goto out; + } + + if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { + CORRUPT("bad key order", node, root, slot); + ret = -EIO; + goto out; + } + } +out: + return ret; +} + static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, u64 phy_offset, struct page *page, u64 start, u64 end, int mirror) @@ -683,6 +739,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, ret = -EIO; } + if (found_level > 0 && check_node(root, eb)) + ret = -EIO; + if (!ret) set_extent_buffer_uptodate(eb); err: @@ -727,7 +786,7 @@ static void end_workqueue_bio(struct bio *bio) fs_info = end_io_wq->info; end_io_wq->error = bio->bi_error; - if (bio->bi_rw & REQ_WRITE) { + if (bio_op(bio) == REQ_OP_WRITE) { if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) { wq = fs_info->endio_meta_write_workers; func = btrfs_endio_meta_write_helper; @@ -797,7 +856,7 @@ static void run_one_async_start(struct btrfs_work *work) int ret; async = container_of(work, struct async_submit_bio, work); - ret = async->submit_bio_start(async->inode, async->rw, async->bio, + ret = async->submit_bio_start(async->inode, async->bio, async->mirror_num, async->bio_flags, async->bio_offset); if (ret) @@ -830,9 +889,8 @@ static void run_one_async_done(struct btrfs_work *work) return; } - async->submit_bio_done(async->inode, async->rw, async->bio, - async->mirror_num, async->bio_flags, - async->bio_offset); + async->submit_bio_done(async->inode, async->bio, async->mirror_num, + async->bio_flags, async->bio_offset); } static void run_one_async_free(struct btrfs_work *work) @@ -844,7 +902,7 @@ static void run_one_async_free(struct btrfs_work *work) } int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, - int rw, struct bio *bio, int mirror_num, + struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset, extent_submit_bio_hook_t *submit_bio_start, @@ -857,7 +915,6 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, return -ENOMEM; async->inode = inode; - async->rw = rw; async->bio = bio; async->mirror_num = mirror_num; async->submit_bio_start = submit_bio_start; @@ -873,7 +930,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, atomic_inc(&fs_info->nr_async_submits); - if (rw & REQ_SYNC) + if (bio->bi_opf & REQ_SYNC) btrfs_set_work_high_priority(&async->work); btrfs_queue_work(fs_info->workers, &async->work); @@ -903,9 +960,8 @@ static int btree_csum_one_bio(struct bio *bio) return ret; } -static int __btree_submit_bio_start(struct inode *inode, int rw, - struct bio *bio, int mirror_num, - unsigned long bio_flags, +static int __btree_submit_bio_start(struct inode *inode, struct bio *bio, + int mirror_num, unsigned long bio_flags, u64 bio_offset) { /* @@ -915,7 +971,7 @@ static int __btree_submit_bio_start(struct inode *inode, int rw, return btree_csum_one_bio(bio); } -static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, +static int __btree_submit_bio_done(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset) { @@ -925,7 +981,7 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, * when we're called for a write, we're already in the async * submission context. Just jump into btrfs_map_bio */ - ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); + ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 1); if (ret) { bio->bi_error = ret; bio_endio(bio); @@ -944,14 +1000,14 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags) return 1; } -static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, +static int btree_submit_bio_hook(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset) { int async = check_async_write(inode, bio_flags); int ret; - if (!(rw & REQ_WRITE)) { + if (bio_op(bio) != REQ_OP_WRITE) { /* * called for a read, do the setup so that checksum validation * can happen in the async kernel threads @@ -960,21 +1016,19 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, bio, BTRFS_WQ_ENDIO_METADATA); if (ret) goto out_w_error; - ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, - mirror_num, 0); + ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 0); } else if (!async) { ret = btree_csum_one_bio(bio); if (ret) goto out_w_error; - ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, - mirror_num, 0); + ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 0); } else { /* * kthread helpers are used to submit writes so that * checksumming can happen in parallel across all CPUs */ ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, - inode, rw, bio, mirror_num, 0, + inode, bio, mirror_num, 0, bio_offset, __btree_submit_bio_start, __btree_submit_bio_done); @@ -1098,10 +1152,10 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr) struct inode *btree_inode = root->fs_info->btree_inode; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) + if (IS_ERR(buf)) return; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, 0, WAIT_NONE, btree_get_extent, 0); + buf, WAIT_NONE, btree_get_extent, 0); free_extent_buffer(buf); } @@ -1114,12 +1168,12 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, int ret; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) + if (IS_ERR(buf)) return 0; set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); - ret = read_extent_buffer_pages(io_tree, buf, 0, WAIT_PAGE_LOCK, + ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK, btree_get_extent, mirror_num); if (ret) { free_extent_buffer(buf); @@ -1146,8 +1200,9 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr) { - if (btrfs_test_is_dummy_root(root)) - return alloc_test_extent_buffer(root->fs_info, bytenr); + if (btrfs_is_testing(root->fs_info)) + return alloc_test_extent_buffer(root->fs_info, bytenr, + root->nodesize); return alloc_extent_buffer(root->fs_info, bytenr); } @@ -1171,10 +1226,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, int ret; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) - return ERR_PTR(-ENOMEM); + if (IS_ERR(buf)) + return buf; - ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); + ret = btree_read_extent_buffer_pages(root, buf, parent_transid); if (ret) { free_extent_buffer(buf); return ERR_PTR(ret); @@ -1232,6 +1287,7 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { + bool dummy = test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state); root->node = NULL; root->commit_root = NULL; root->sectorsize = sectorsize; @@ -1286,14 +1342,14 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, root->log_transid = 0; root->log_transid_committed = -1; root->last_log_commit = 0; - if (fs_info) + if (!dummy) extent_io_tree_init(&root->dirty_log_pages, fs_info->btree_inode->i_mapping); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); - if (fs_info) + if (!dummy) root->defrag_trans_start = fs_info->generation; else root->defrag_trans_start = 0; @@ -1314,15 +1370,20 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info, #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS /* Should only be used by the testing infrastructure */ -struct btrfs_root *btrfs_alloc_dummy_root(void) +struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info, + u32 sectorsize, u32 nodesize) { struct btrfs_root *root; - root = btrfs_alloc_root(NULL, GFP_KERNEL); + if (!fs_info) + return ERR_PTR(-EINVAL); + + root = btrfs_alloc_root(fs_info, GFP_KERNEL); if (!root) return ERR_PTR(-ENOMEM); - __setup_root(4096, 4096, 4096, root, NULL, 1); - set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); + /* We don't use the stripesize in selftest, set it as sectorsize */ + __setup_root(nodesize, sectorsize, sectorsize, root, fs_info, + BTRFS_ROOT_TREE_OBJECTID); root->alloc_bytenr = 0; return root; @@ -1597,14 +1658,14 @@ int btrfs_init_fs_root(struct btrfs_root *root) ret = get_anon_bdev(&root->anon_dev); if (ret) - goto free_writers; + goto fail; mutex_lock(&root->objectid_mutex); ret = btrfs_find_highest_objectid(root, &root->highest_objectid); if (ret) { mutex_unlock(&root->objectid_mutex); - goto free_root_dev; + goto fail; } ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID); @@ -1612,19 +1673,13 @@ int btrfs_init_fs_root(struct btrfs_root *root) mutex_unlock(&root->objectid_mutex); return 0; - -free_root_dev: - free_anon_bdev(root->anon_dev); -free_writers: - btrfs_free_subvolume_writers(root->subv_writers); fail: - kfree(root->free_ino_ctl); - kfree(root->free_ino_pinned); + /* the caller is responsible to call free_fs_root */ return ret; } -static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, - u64 root_id) +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id) { struct btrfs_root *root; @@ -1803,6 +1858,13 @@ static int cleaner_kthread(void *arg) if (btrfs_need_cleaner_sleep(root)) goto sleep; + /* + * Do not do anything if we might cause open_ctree() to block + * before we have finished mounting the filesystem. + */ + if (!test_bit(BTRFS_FS_OPEN, &root->fs_info->flags)) + goto sleep; + if (!mutex_trylock(&root->fs_info->cleaner_mutex)) goto sleep; @@ -2293,9 +2355,8 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) fs_info->qgroup_op_tree = RB_ROOT; INIT_LIST_HEAD(&fs_info->dirty_qgroups); fs_info->qgroup_seq = 1; - fs_info->quota_enabled = 0; - fs_info->pending_quota_state = 0; fs_info->qgroup_ulist = NULL; + fs_info->qgroup_rescan_running = false; mutex_init(&fs_info->qgroup_rescan_lock); } @@ -2306,17 +2367,19 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; fs_info->workers = - btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, - max_active, 16); + btrfs_alloc_workqueue(fs_info, "worker", + flags | WQ_HIGHPRI, max_active, 16); fs_info->delalloc_workers = - btrfs_alloc_workqueue("delalloc", flags, max_active, 2); + btrfs_alloc_workqueue(fs_info, "delalloc", + flags, max_active, 2); fs_info->flush_workers = - btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0); + btrfs_alloc_workqueue(fs_info, "flush_delalloc", + flags, max_active, 0); fs_info->caching_workers = - btrfs_alloc_workqueue("cache", flags, max_active, 0); + btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0); /* * a higher idle thresh on the submit workers makes it much more @@ -2324,41 +2387,48 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, * devices */ fs_info->submit_workers = - btrfs_alloc_workqueue("submit", flags, + btrfs_alloc_workqueue(fs_info, "submit", flags, min_t(u64, fs_devices->num_devices, max_active), 64); fs_info->fixup_workers = - btrfs_alloc_workqueue("fixup", flags, 1, 0); + btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0); /* * endios are largely parallel and should have a very * low idle thresh */ fs_info->endio_workers = - btrfs_alloc_workqueue("endio", flags, max_active, 4); + btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4); fs_info->endio_meta_workers = - btrfs_alloc_workqueue("endio-meta", flags, max_active, 4); + btrfs_alloc_workqueue(fs_info, "endio-meta", flags, + max_active, 4); fs_info->endio_meta_write_workers = - btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); + btrfs_alloc_workqueue(fs_info, "endio-meta-write", flags, + max_active, 2); fs_info->endio_raid56_workers = - btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); + btrfs_alloc_workqueue(fs_info, "endio-raid56", flags, + max_active, 4); fs_info->endio_repair_workers = - btrfs_alloc_workqueue("endio-repair", flags, 1, 0); + btrfs_alloc_workqueue(fs_info, "endio-repair", flags, 1, 0); fs_info->rmw_workers = - btrfs_alloc_workqueue("rmw", flags, max_active, 2); + btrfs_alloc_workqueue(fs_info, "rmw", flags, max_active, 2); fs_info->endio_write_workers = - btrfs_alloc_workqueue("endio-write", flags, max_active, 2); + btrfs_alloc_workqueue(fs_info, "endio-write", flags, + max_active, 2); fs_info->endio_freespace_worker = - btrfs_alloc_workqueue("freespace-write", flags, max_active, 0); + btrfs_alloc_workqueue(fs_info, "freespace-write", flags, + max_active, 0); fs_info->delayed_workers = - btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0); + btrfs_alloc_workqueue(fs_info, "delayed-meta", flags, + max_active, 0); fs_info->readahead_workers = - btrfs_alloc_workqueue("readahead", flags, max_active, 2); + btrfs_alloc_workqueue(fs_info, "readahead", flags, + max_active, 2); fs_info->qgroup_rescan_workers = - btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); + btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0); fs_info->extent_workers = - btrfs_alloc_workqueue("extent-refs", flags, + btrfs_alloc_workqueue(fs_info, "extent-refs", flags, min_t(u64, fs_devices->num_devices, max_active), 8); @@ -2469,8 +2539,7 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info, root = btrfs_read_tree_root(tree_root, &location); if (!IS_ERR(root)) { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); - fs_info->quota_enabled = 1; - fs_info->pending_quota_state = 1; + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); fs_info->quota_root = root; } @@ -2517,7 +2586,7 @@ int open_ctree(struct super_block *sb, int num_backups_tried = 0; int backup_index = 0; int max_active; - bool cleaner_mutex_locked = false; + int clear_free_space_tree = 0; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); @@ -2614,6 +2683,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->qgroup_op_seq, 0); atomic_set(&fs_info->reada_works_cnt, 0); atomic64_set(&fs_info->tree_mod_seq, 0); + fs_info->fs_frozen = 0; fs_info->sb = sb; fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; fs_info->metadata_ratio = 0; @@ -2661,8 +2731,7 @@ int open_ctree(struct super_block *sb, extent_io_tree_init(&fs_info->freed_extents[1], fs_info->btree_inode->i_mapping); fs_info->pinned_extents = &fs_info->freed_extents[0]; - fs_info->do_barriers = 1; - + set_bit(BTRFS_FS_BARRIER, &fs_info->flags); mutex_init(&fs_info->ordered_operations_mutex); mutex_init(&fs_info->tree_log_mutex); @@ -2713,7 +2782,7 @@ int open_ctree(struct super_block *sb, * We want to check superblock checksum, the type is stored inside. * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). */ - if (btrfs_check_super_csum(bh->b_data)) { + if (btrfs_check_super_csum(fs_info, bh->b_data)) { btrfs_err(fs_info, "superblock checksum mismatch"); err = -EINVAL; brelse(bh); @@ -2797,7 +2866,7 @@ int open_ctree(struct super_block *sb, nodesize = btrfs_super_nodesize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); - stripesize = btrfs_super_stripesize(disk_super); + stripesize = sectorsize; fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); @@ -2996,13 +3065,6 @@ retry_root_backup: goto fail_sysfs; } - /* - * Hold the cleaner_mutex thread here so that we don't block - * for a long time on btrfs_recover_relocation. cleaner_kthread - * will wait for us to finish mounting the filesystem. - */ - mutex_lock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = true; fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, "btrfs-cleaner"); if (IS_ERR(fs_info->cleaner_kthread)) @@ -3014,8 +3076,8 @@ retry_root_backup: if (IS_ERR(fs_info->transaction_kthread)) goto fail_cleaner; - if (!btrfs_test_opt(tree_root, SSD) && - !btrfs_test_opt(tree_root, NOSSD) && + if (!btrfs_test_opt(tree_root->fs_info, SSD) && + !btrfs_test_opt(tree_root->fs_info, NOSSD) && !fs_info->fs_devices->rotating) { btrfs_info(fs_info, "detected SSD devices, enabling SSD mode"); btrfs_set_opt(fs_info->mount_opt, SSD); @@ -3028,9 +3090,9 @@ retry_root_backup: btrfs_apply_pending_changes(fs_info); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY - if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) { + if (btrfs_test_opt(tree_root->fs_info, CHECK_INTEGRITY)) { ret = btrfsic_mount(tree_root, fs_devices, - btrfs_test_opt(tree_root, + btrfs_test_opt(tree_root->fs_info, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA) ? 1 : 0, fs_info->check_integrity_print_mask); @@ -3046,7 +3108,7 @@ retry_root_backup: /* do not make disk changes in broken FS or nologreplay is given */ if (btrfs_super_log_root(disk_super) != 0 && - !btrfs_test_opt(tree_root, NOLOGREPLAY)) { + !btrfs_test_opt(tree_root->fs_info, NOLOGREPLAY)) { ret = btrfs_replay_log(fs_info, fs_devices); if (ret) { err = ret; @@ -3062,8 +3124,10 @@ retry_root_backup: ret = btrfs_cleanup_fs_roots(fs_info); if (ret) goto fail_qgroup; - /* We locked cleaner_mutex before creating cleaner_kthread. */ + + mutex_lock(&fs_info->cleaner_mutex); ret = btrfs_recover_relocation(tree_root); + mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { btrfs_warn(fs_info, "failed to recover relocation: %d", ret); @@ -3071,8 +3135,6 @@ retry_root_backup: goto fail_qgroup; } } - mutex_unlock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = false; location.objectid = BTRFS_FS_TREE_OBJECTID; location.type = BTRFS_ROOT_ITEM_KEY; @@ -3087,7 +3149,27 @@ retry_root_backup: if (sb->s_flags & MS_RDONLY) return 0; - if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) && + if (btrfs_test_opt(fs_info, CLEAR_CACHE) && + btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { + clear_free_space_tree = 1; + } else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && + !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) { + btrfs_warn(fs_info, "free space tree is invalid"); + clear_free_space_tree = 1; + } + + if (clear_free_space_tree) { + btrfs_info(fs_info, "clearing free space tree"); + ret = btrfs_clear_free_space_tree(fs_info); + if (ret) { + btrfs_warn(fs_info, + "failed to clear free space tree: %d", ret); + close_ctree(tree_root); + return ret; + } + } + + if (btrfs_test_opt(tree_root->fs_info, FREE_SPACE_TREE) && !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { btrfs_info(fs_info, "creating free space tree"); ret = btrfs_create_free_space_tree(fs_info); @@ -3124,18 +3206,6 @@ retry_root_backup: btrfs_qgroup_rescan_resume(fs_info); - if (btrfs_test_opt(tree_root, CLEAR_CACHE) && - btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { - btrfs_info(fs_info, "clearing free space tree"); - ret = btrfs_clear_free_space_tree(fs_info); - if (ret) { - btrfs_warn(fs_info, - "failed to clear free space tree: %d", ret); - close_ctree(tree_root); - return ret; - } - } - if (!fs_info->uuid_root) { btrfs_info(fs_info, "creating UUID tree"); ret = btrfs_create_uuid_tree(fs_info); @@ -3145,7 +3215,7 @@ retry_root_backup: close_ctree(tree_root); return ret; } - } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) || + } else if (btrfs_test_opt(tree_root->fs_info, RESCAN_UUID_TREE) || fs_info->generation != btrfs_super_uuid_tree_generation(disk_super)) { btrfs_info(fs_info, "checking UUID tree"); @@ -3157,10 +3227,9 @@ retry_root_backup: return ret; } } else { - fs_info->update_uuid_tree_gen = 1; + set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags); } - - fs_info->open = 1; + set_bit(BTRFS_FS_OPEN, &fs_info->flags); /* * backuproot only affect mount behavior, and if open_ctree succeeded, @@ -3186,10 +3255,6 @@ fail_cleaner: filemap_write_and_wait(fs_info->btree_inode->i_mapping); fail_sysfs: - if (cleaner_mutex_locked) { - mutex_unlock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = false; - } btrfs_sysfs_remove_mounted(fs_info); fail_fsdev_sysfs: @@ -3226,7 +3291,7 @@ fail: return err; recovery_tree_root: - if (!btrfs_test_opt(tree_root, USEBACKUPROOT)) + if (!btrfs_test_opt(tree_root->fs_info, USEBACKUPROOT)) goto fail_tree_roots; free_root_pointers(fs_info, 0); @@ -3420,9 +3485,9 @@ static int write_dev_supers(struct btrfs_device *device, * to go down lazy. */ if (i == 0) - ret = btrfsic_submit_bh(WRITE_FUA, bh); + ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_FUA, bh); else - ret = btrfsic_submit_bh(WRITE_SYNC, bh); + ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh); if (ret) errors++; } @@ -3486,12 +3551,13 @@ static int write_dev_flush(struct btrfs_device *device, int wait) bio->bi_end_io = btrfs_end_empty_barrier; bio->bi_bdev = device->bdev; + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; device->flush_bio = bio; bio_get(bio); - btrfsic_submit_bio(WRITE_FLUSH, bio); + btrfsic_submit_bio(bio); return 0; } @@ -3568,7 +3634,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags) } if (min_tolerated == INT_MAX) { - pr_warn("BTRFS: unknown raid flag: %llu\n", flags); + pr_warn("BTRFS: unknown raid flag: %llu", flags); min_tolerated = 0; } @@ -3641,7 +3707,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) int total_errors = 0; u64 flags; - do_barriers = !btrfs_test_opt(root, NOBARRIER); + do_barriers = !btrfs_test_opt(root->fs_info, NOBARRIER); backup_super_roots(root->fs_info); sb = root->fs_info->super_for_commit; @@ -3739,8 +3805,15 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, if (btrfs_root_refs(&root->root_item) == 0) synchronize_srcu(&fs_info->subvol_srcu); - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { btrfs_free_log(NULL, root); + if (root->reloc_root) { + free_extent_buffer(root->reloc_root->node); + free_extent_buffer(root->reloc_root->commit_root); + btrfs_put_fs_root(root->reloc_root); + root->reloc_root = NULL; + } + } if (root->free_ino_pinned) __btrfs_remove_free_space_cache(root->free_ino_pinned); @@ -3847,11 +3920,10 @@ void close_ctree(struct btrfs_root *root) struct btrfs_fs_info *fs_info = root->fs_info; int ret; - fs_info->closing = 1; - smp_mb(); + set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags); /* wait for the qgroup rescan worker to stop */ - btrfs_qgroup_wait_for_completion(fs_info); + btrfs_qgroup_wait_for_completion(fs_info, false); /* wait for the uuid_scan task to finish */ down(&fs_info->uuid_tree_rescan_sem); @@ -3893,8 +3965,7 @@ void close_ctree(struct btrfs_root *root) kthread_stop(fs_info->transaction_kthread); kthread_stop(fs_info->cleaner_kthread); - fs_info->closing = 2; - smp_mb(); + set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags); btrfs_free_qgroup_config(fs_info); @@ -3919,13 +3990,13 @@ void close_ctree(struct btrfs_root *root) invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); - fs_info->open = 0; + clear_bit(BTRFS_FS_OPEN, &fs_info->flags); free_root_pointers(fs_info, 1); iput(fs_info->btree_inode); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY - if (btrfs_test_opt(root, CHECK_INTEGRITY)) + if (btrfs_test_opt(root->fs_info, CHECK_INTEGRITY)) btrfsic_unmount(root, fs_info->fs_devices); #endif @@ -3990,8 +4061,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) root = BTRFS_I(buf->pages[0]->mapping->host)->root; btrfs_assert_tree_locked(buf); if (transid != root->fs_info->generation) - WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " - "found %llu running %llu\n", + WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n", buf->start, transid, root->fs_info->generation); was_dirty = set_extent_buffer_dirty(buf); if (!was_dirty) @@ -4042,7 +4112,7 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root) int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) { struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; - return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); + return btree_read_extent_buffer_pages(root, buf, parent_transid); } static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, @@ -4054,24 +4124,24 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, int ret = 0; if (btrfs_super_magic(sb) != BTRFS_MAGIC) { - printk(KERN_ERR "BTRFS: no valid FS found\n"); + btrfs_err(fs_info, "no valid FS found"); ret = -EINVAL; } if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) - printk(KERN_WARNING "BTRFS: unrecognized super flag: %llu\n", + btrfs_warn(fs_info, "unrecognized super flag: %llu", btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP); if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { - printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n", + btrfs_err(fs_info, "tree_root level too big: %d >= %d", btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { - printk(KERN_ERR "BTRFS: chunk_root level too big: %d >= %d\n", + btrfs_err(fs_info, "chunk_root level too big: %d >= %d", btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { - printk(KERN_ERR "BTRFS: log_root level too big: %d >= %d\n", + btrfs_err(fs_info, "log_root level too big: %d >= %d", btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } @@ -4082,47 +4152,48 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, */ if (!is_power_of_2(sectorsize) || sectorsize < 4096 || sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) { - printk(KERN_ERR "BTRFS: invalid sectorsize %llu\n", sectorsize); + btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize); ret = -EINVAL; } /* Only PAGE SIZE is supported yet */ if (sectorsize != PAGE_SIZE) { - printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n", - sectorsize, PAGE_SIZE); + btrfs_err(fs_info, + "sectorsize %llu not supported yet, only support %lu", + sectorsize, PAGE_SIZE); ret = -EINVAL; } if (!is_power_of_2(nodesize) || nodesize < sectorsize || nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) { - printk(KERN_ERR "BTRFS: invalid nodesize %llu\n", nodesize); + btrfs_err(fs_info, "invalid nodesize %llu", nodesize); ret = -EINVAL; } if (nodesize != le32_to_cpu(sb->__unused_leafsize)) { - printk(KERN_ERR "BTRFS: invalid leafsize %u, should be %llu\n", - le32_to_cpu(sb->__unused_leafsize), - nodesize); + btrfs_err(fs_info, "invalid leafsize %u, should be %llu", + le32_to_cpu(sb->__unused_leafsize), nodesize); ret = -EINVAL; } /* Root alignment check */ if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) { - printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", - btrfs_super_root(sb)); + btrfs_warn(fs_info, "tree_root block unaligned: %llu", + btrfs_super_root(sb)); ret = -EINVAL; } if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) { - printk(KERN_WARNING "BTRFS: chunk_root block unaligned: %llu\n", - btrfs_super_chunk_root(sb)); + btrfs_warn(fs_info, "chunk_root block unaligned: %llu", + btrfs_super_chunk_root(sb)); ret = -EINVAL; } if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) { - printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n", - btrfs_super_log_root(sb)); + btrfs_warn(fs_info, "log_root block unaligned: %llu", + btrfs_super_log_root(sb)); ret = -EINVAL; } if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { - printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", - fs_info->fsid, sb->dev_item.fsid); + btrfs_err(fs_info, + "dev_item UUID does not match fsid: %pU != %pU", + fs_info->fsid, sb->dev_item.fsid); ret = -EINVAL; } @@ -4130,17 +4201,27 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * Hint to catch really bogus numbers, bitflips or so, more exact checks are * done later */ + if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) { + btrfs_err(fs_info, "bytes_used is too small %llu", + btrfs_super_bytes_used(sb)); + ret = -EINVAL; + } + if (!is_power_of_2(btrfs_super_stripesize(sb))) { + btrfs_err(fs_info, "invalid stripesize %u", + btrfs_super_stripesize(sb)); + ret = -EINVAL; + } if (btrfs_super_num_devices(sb) > (1UL << 31)) - printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", - btrfs_super_num_devices(sb)); + btrfs_warn(fs_info, "suspicious number of devices: %llu", + btrfs_super_num_devices(sb)); if (btrfs_super_num_devices(sb) == 0) { - printk(KERN_ERR "BTRFS: number of devices is 0\n"); + btrfs_err(fs_info, "number of devices is 0"); ret = -EINVAL; } if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) { - printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", - btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); + btrfs_err(fs_info, "super offset mismatch %llu != %u", + btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); ret = -EINVAL; } @@ -4149,17 +4230,17 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * and one chunk */ if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { - printk(KERN_ERR "BTRFS: system chunk array too big %u > %u\n", - btrfs_super_sys_array_size(sb), - BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); + btrfs_err(fs_info, "system chunk array too big %u > %u", + btrfs_super_sys_array_size(sb), + BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); ret = -EINVAL; } if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) + sizeof(struct btrfs_chunk)) { - printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n", - btrfs_super_sys_array_size(sb), - sizeof(struct btrfs_disk_key) - + sizeof(struct btrfs_chunk)); + btrfs_err(fs_info, "system chunk array too small %u < %zu", + btrfs_super_sys_array_size(sb), + sizeof(struct btrfs_disk_key) + + sizeof(struct btrfs_chunk)); ret = -EINVAL; } @@ -4168,14 +4249,16 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * but it's still possible that it's the one that's wrong. */ if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb)) - printk(KERN_WARNING - "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n", - btrfs_super_generation(sb), btrfs_super_chunk_root_generation(sb)); + btrfs_warn(fs_info, + "suspicious: generation < chunk_root_generation: %llu < %llu", + btrfs_super_generation(sb), + btrfs_super_chunk_root_generation(sb)); if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb) && btrfs_super_cache_generation(sb) != (u64)-1) - printk(KERN_WARNING - "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n", - btrfs_super_generation(sb), btrfs_super_cache_generation(sb)); + btrfs_warn(fs_info, + "suspicious: generation < cache_generation: %llu < %llu", + btrfs_super_generation(sb), + btrfs_super_cache_generation(sb)); return ret; } @@ -4419,9 +4502,80 @@ again: return 0; } +static void btrfs_cleanup_bg_io(struct btrfs_block_group_cache *cache) +{ + struct inode *inode; + + inode = cache->io_ctl.inode; + if (inode) { + invalidate_inode_pages2(inode->i_mapping); + BTRFS_I(inode)->generation = 0; + cache->io_ctl.inode = NULL; + iput(inode); + } + btrfs_put_block_group(cache); +} + +void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans, + struct btrfs_root *root) +{ + struct btrfs_block_group_cache *cache; + + spin_lock(&cur_trans->dirty_bgs_lock); + while (!list_empty(&cur_trans->dirty_bgs)) { + cache = list_first_entry(&cur_trans->dirty_bgs, + struct btrfs_block_group_cache, + dirty_list); + if (!cache) { + btrfs_err(root->fs_info, + "orphan block group dirty_bgs list"); + spin_unlock(&cur_trans->dirty_bgs_lock); + return; + } + + if (!list_empty(&cache->io_list)) { + spin_unlock(&cur_trans->dirty_bgs_lock); + list_del_init(&cache->io_list); + btrfs_cleanup_bg_io(cache); + spin_lock(&cur_trans->dirty_bgs_lock); + } + + list_del_init(&cache->dirty_list); + spin_lock(&cache->lock); + cache->disk_cache_state = BTRFS_DC_ERROR; + spin_unlock(&cache->lock); + + spin_unlock(&cur_trans->dirty_bgs_lock); + btrfs_put_block_group(cache); + spin_lock(&cur_trans->dirty_bgs_lock); + } + spin_unlock(&cur_trans->dirty_bgs_lock); + + while (!list_empty(&cur_trans->io_bgs)) { + cache = list_first_entry(&cur_trans->io_bgs, + struct btrfs_block_group_cache, + io_list); + if (!cache) { + btrfs_err(root->fs_info, + "orphan block group on io_bgs list"); + return; + } + + list_del_init(&cache->io_list); + spin_lock(&cache->lock); + cache->disk_cache_state = BTRFS_DC_ERROR; + spin_unlock(&cache->lock); + btrfs_cleanup_bg_io(cache); + } +} + void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_root *root) { + btrfs_cleanup_dirty_bgs(cur_trans, root); + ASSERT(list_empty(&cur_trans->dirty_bgs)); + ASSERT(list_empty(&cur_trans->io_bgs)); + btrfs_destroy_delayed_refs(cur_trans, root); cur_trans->state = TRANS_STATE_COMMIT_START; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 8e79d0070bcf..1a3237e5700f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -68,6 +68,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, struct btrfs_key *location); int btrfs_init_fs_root(struct btrfs_root *root); +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id); int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); @@ -90,7 +92,8 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, void btrfs_free_fs_root(struct btrfs_root *root); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -struct btrfs_root *btrfs_alloc_dummy_root(void); +struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info, + u32 sectorsize, u32 nodesize); #endif /* @@ -122,7 +125,7 @@ void btrfs_csum_final(u32 crc, char *result); int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, enum btrfs_wq_endio_type metadata); int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, - int rw, struct bio *bio, int mirror_num, + struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset, extent_submit_bio_hook_t *submit_bio_start, extent_submit_bio_hook_t *submit_bio_done); @@ -133,6 +136,8 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_add_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root); +void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *trans, + struct btrfs_root *root); void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, struct btrfs_root *root); struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a400951e8678..210c94ac8818 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -60,21 +60,6 @@ enum { CHUNK_ALLOC_FORCE = 2, }; -/* - * Control how reservations are dealt with. - * - * RESERVE_FREE - freeing a reservation. - * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for - * ENOSPC accounting - * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update - * bytes_may_use as the ENOSPC accounting is done elsewhere - */ -enum { - RESERVE_FREE = 0, - RESERVE_ALLOC = 1, - RESERVE_ALLOC_NO_ACCOUNT = 2, -}; - static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); @@ -102,15 +87,27 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, int force); static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key); -static void dump_space_info(struct btrfs_space_info *info, u64 bytes, +static void dump_space_info(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *info, u64 bytes, int dump_block_groups); -static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, - int delalloc); +static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 ram_bytes, u64 num_bytes, int delalloc); +static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int delalloc); static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes); int btrfs_pin_extent(struct btrfs_root *root, u64 bytenr, u64 num_bytes, int reserved); +static int __reserve_metadata_bytes(struct btrfs_root *root, + struct btrfs_space_info *space_info, + u64 orig_bytes, + enum btrfs_reserve_flush_enum flush); +static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + u64 num_bytes); +static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + u64 num_bytes); static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) @@ -270,9 +267,8 @@ static int exclude_super_stripes(struct btrfs_root *root, for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { bytenr = btrfs_sb_offset(i); - ret = btrfs_rmap_block(&root->fs_info->mapping_tree, - cache->key.objectid, bytenr, - 0, &logical, &nr, &stripe_len); + ret = btrfs_rmap_block(root->fs_info, cache->key.objectid, + bytenr, 0, &logical, &nr, &stripe_len); if (ret) return ret; @@ -734,11 +730,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, static struct btrfs_block_group_cache * btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) { - struct btrfs_block_group_cache *cache; - - cache = block_group_cache_tree_search(info, bytenr, 0); - - return cache; + return block_group_cache_tree_search(info, bytenr, 0); } /* @@ -748,11 +740,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group( struct btrfs_fs_info *info, u64 bytenr) { - struct btrfs_block_group_cache *cache; - - cache = block_group_cache_tree_search(info, bytenr, 1); - - return cache; + return block_group_cache_tree_search(info, bytenr, 1); } static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, @@ -2042,8 +2030,13 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, struct btrfs_bio *bbio = NULL; + /* + * Avoid races with device replace and make sure our bbio has devices + * associated to its stripes that don't go away while we are discarding. + */ + btrfs_bio_counter_inc_blocked(root->fs_info); /* Tell the block device(s) that the sectors can be discarded */ - ret = btrfs_map_block(root->fs_info, REQ_DISCARD, + ret = btrfs_map_block(root->fs_info, REQ_OP_DISCARD, bytenr, &num_bytes, &bbio, 0); /* Error condition is -ENOMEM */ if (!ret) { @@ -2074,6 +2067,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, } btrfs_put_bbio(bbio); } + btrfs_bio_counter_dec(root->fs_info); if (actual_bytes) *actual_bytes = discarded_bytes; @@ -2164,7 +2158,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, path, bytenr, parent, root_objectid, owner, offset, refs_to_add); if (ret) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); out: btrfs_free_path(path); return ret; @@ -2188,7 +2182,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; ref = btrfs_delayed_node_to_data_ref(node); - trace_run_delayed_data_ref(node, ref, node->action); + trace_run_delayed_data_ref(root->fs_info, node, ref, node->action); if (node->type == BTRFS_SHARED_DATA_REF_KEY) parent = ref->parent; @@ -2343,7 +2337,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, SKINNY_METADATA); ref = btrfs_delayed_node_to_tree_ref(node); - trace_run_delayed_tree_ref(node, ref, node->action); + trace_run_delayed_tree_ref(root->fs_info, node, ref, node->action); if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) parent = ref->parent; @@ -2358,7 +2352,13 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; } - BUG_ON(node->ref_mod != 1); + if (node->ref_mod != 1) { + btrfs_err(root->fs_info, + "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu", + node->bytenr, node->ref_mod, node->action, ref_root, + parent); + return -EIO; + } if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { BUG_ON(!extent_op || !extent_op->update_flags); ret = alloc_reserved_tree_block(trans, root, @@ -2407,7 +2407,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, */ BUG_ON(extent_op); head = btrfs_delayed_node_to_head(node); - trace_run_delayed_ref_head(node, head, node->action); + trace_run_delayed_ref_head(root->fs_info, node, head, + node->action); if (insert_reserved) { btrfs_pin_extent(root, node->bytenr, @@ -2587,7 +2588,9 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, if (must_insert_reserved) locked_ref->must_insert_reserved = 1; locked_ref->processing = 0; - btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); + btrfs_debug(fs_info, + "run_delayed_extent_op returned %d", + ret); btrfs_delayed_ref_unlock(locked_ref); return ret; } @@ -2647,7 +2650,8 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, locked_ref->processing = 0; btrfs_delayed_ref_unlock(locked_ref); btrfs_put_delayed_ref(ref); - btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret); + btrfs_debug(fs_info, "run_one_delayed_ref returned %d", + ret); return ret; } @@ -2762,7 +2766,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes) u64 num_csums_per_leaf; u64 num_csums; - csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); + csum_size = BTRFS_MAX_ITEM_SIZE(root); num_csums_per_leaf = div64_u64(csum_size, (u64)btrfs_super_csum_size(root->fs_info->super_copy)); num_csums = div64_u64(csum_bytes, root->sectorsize); @@ -2829,6 +2833,7 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, struct async_delayed_refs { struct btrfs_root *root; + u64 transid; int count; int error; int sync; @@ -2844,6 +2849,10 @@ static void delayed_ref_async_start(struct btrfs_work *work) async = container_of(work, struct async_delayed_refs, work); + /* if the commit is already started, we don't need to wait here */ + if (btrfs_transaction_blocked(async->root->fs_info)) + goto done; + trans = btrfs_join_transaction(async->root); if (IS_ERR(trans)) { async->error = PTR_ERR(trans); @@ -2855,10 +2864,15 @@ static void delayed_ref_async_start(struct btrfs_work *work) * wait on delayed refs */ trans->sync = true; + + /* Don't bother flushing if we got into a different transaction */ + if (trans->transid > async->transid) + goto end; + ret = btrfs_run_delayed_refs(trans, async->root, async->count); if (ret) async->error = ret; - +end: ret = btrfs_end_transaction(trans, async->root); if (ret && !async->error) async->error = ret; @@ -2870,7 +2884,7 @@ done: } int btrfs_async_run_delayed_refs(struct btrfs_root *root, - unsigned long count, int wait) + unsigned long count, u64 transid, int wait) { struct async_delayed_refs *async; int ret; @@ -2882,6 +2896,7 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root, async->root = root->fs_info->tree_root; async->count = count; async->error = 0; + async->transid = transid; if (wait) async->sync = 1; else @@ -2926,7 +2941,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, if (trans->aborted) return 0; - if (root->fs_info->creating_free_space_tree) + if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &root->fs_info->flags)) return 0; if (root == root->fs_info->extent_root) @@ -2943,7 +2958,7 @@ again: trans->can_flush_pending_bgs = false; ret = __btrfs_run_delayed_refs(trans, root, count); if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; } @@ -2957,7 +2972,6 @@ again: spin_unlock(&delayed_refs->lock); goto out; } - count = (unsigned long)-1; while (node) { head = rb_entry(node, struct btrfs_delayed_ref_head, @@ -3207,7 +3221,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, u64, u64, u64, u64, u64, u64); - if (btrfs_test_is_dummy_root(root)) + if (btrfs_is_testing(root->fs_info)) return 0; ref_root = btrfs_header_owner(buf); @@ -3402,7 +3416,7 @@ again: * transaction, this only happens in really bad situations * anyway. */ - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_put; } WARN_ON(ret); @@ -3420,7 +3434,7 @@ again: spin_lock(&block_group->lock); if (block_group->cached != BTRFS_CACHE_FINISHED || - !btrfs_test_opt(root, SPACE_CACHE)) { + !btrfs_test_opt(root->fs_info, SPACE_CACHE)) { /* * don't bother trying to write stuff out _if_ * a) we're not cached, @@ -3473,7 +3487,6 @@ again: dcs = BTRFS_DC_SETUP; else if (ret == -ENOSPC) set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags); - btrfs_free_reserved_data_space(inode, 0, num_pages); out_put: iput(inode); @@ -3497,7 +3510,7 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, struct btrfs_path *path; if (list_empty(&cur_trans->dirty_bgs) || - !btrfs_test_opt(root, SPACE_CACHE)) + !btrfs_test_opt(root->fs_info, SPACE_CACHE)) return 0; path = btrfs_alloc_path(); @@ -3642,7 +3655,7 @@ again: } spin_unlock(&cur_trans->dirty_bgs_lock); } else if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); } } @@ -3681,6 +3694,8 @@ again: goto again; } spin_unlock(&cur_trans->dirty_bgs_lock); + } else if (ret < 0) { + btrfs_cleanup_dirty_bgs(cur_trans, root); } btrfs_free_path(path); @@ -3788,7 +3803,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, cache); } if (ret) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); } /* if its not on the io list, we need to put the block group */ @@ -3896,6 +3911,7 @@ static const char *alloc_name(u64 flags) static int update_space_info(struct btrfs_fs_info *info, u64 flags, u64 total_bytes, u64 bytes_used, + u64 bytes_readonly, struct btrfs_space_info **space_info) { struct btrfs_space_info *found; @@ -3916,8 +3932,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->disk_total += total_bytes * factor; found->bytes_used += bytes_used; found->disk_used += bytes_used * factor; + found->bytes_readonly += bytes_readonly; if (total_bytes > 0) found->full = 0; + space_info_add_new_bytes(info, found, total_bytes - + bytes_used - bytes_readonly); spin_unlock(&found->lock); *space_info = found; return 0; @@ -3943,7 +3962,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->disk_used = bytes_used * factor; found->bytes_pinned = 0; found->bytes_reserved = 0; - found->bytes_readonly = 0; + found->bytes_readonly = bytes_readonly; found->bytes_may_use = 0; found->full = 0; found->max_extent_size = 0; @@ -3952,6 +3971,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->flush = 0; init_waitqueue_head(&found->wait); INIT_LIST_HEAD(&found->ro_bgs); + INIT_LIST_HEAD(&found->tickets); + INIT_LIST_HEAD(&found->priority_tickets); ret = kobject_init_and_add(&found->kobj, &space_info_ktype, info->space_info_kobj, "%s", @@ -4252,13 +4273,10 @@ int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len) if (ret < 0) return ret; - /* - * Use new btrfs_qgroup_reserve_data to reserve precious data space - * - * TODO: Find a good method to avoid reserve data space for NOCOW - * range, but don't impact performance on quota disable case. - */ + /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ ret = btrfs_qgroup_reserve_data(inode, start, len); + if (ret) + btrfs_free_reserved_data_space_noquota(inode, start, len); return ret; } @@ -4410,10 +4428,10 @@ void check_system_chunk(struct btrfs_trans_handle *trans, thresh = btrfs_calc_trunc_metadata_size(root, num_devs) + btrfs_calc_trans_metadata_size(root, 1); - if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) { + if (left < thresh && btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) { btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu", left, thresh, type); - dump_space_info(info, 0, 0); + dump_space_info(root->fs_info, info, 0, 0); } if (left < thresh) { @@ -4438,6 +4456,15 @@ void check_system_chunk(struct btrfs_trans_handle *trans, } } +/* + * If force is CHUNK_ALLOC_FORCE: + * - return 1 if it successfully allocates a chunk, + * - return errors including -ENOSPC otherwise. + * If force is NOT CHUNK_ALLOC_FORCE: + * - return 0 if it doesn't need to allocate a new chunk, + * - return 1 if it successfully allocates a chunk, + * - return errors including -ENOSPC otherwise. + */ static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 flags, int force) { @@ -4453,7 +4480,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, space_info = __find_space_info(extent_root->fs_info, flags); if (!space_info) { ret = update_space_info(extent_root->fs_info, flags, - 0, 0, &space_info); + 0, 0, 0, &space_info); BUG_ON(ret); /* -ENOMEM */ } BUG_ON(!space_info); /* Logic error */ @@ -4555,7 +4582,7 @@ out: */ if (trans->can_flush_pending_bgs && trans->chunk_bytes_reserved >= (u64)SZ_2M) { - btrfs_create_pending_block_groups(trans, trans->root); + btrfs_create_pending_block_groups(trans, extent_root); btrfs_trans_release_chunk_metadata(trans); } return ret; @@ -4565,12 +4592,19 @@ static int can_overcommit(struct btrfs_root *root, struct btrfs_space_info *space_info, u64 bytes, enum btrfs_reserve_flush_enum flush) { - struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; - u64 profile = btrfs_get_alloc_profile(root, 0); + struct btrfs_block_rsv *global_rsv; + u64 profile; u64 space_size; u64 avail; u64 used; + /* Don't overcommit when in mixed mode. */ + if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) + return 0; + + BUG_ON(root->fs_info == NULL); + global_rsv = &root->fs_info->global_block_rsv; + profile = btrfs_get_alloc_profile(root, 0); used = space_info->bytes_used + space_info->bytes_reserved + space_info->bytes_pinned + space_info->bytes_readonly; @@ -4722,6 +4756,11 @@ skip_async: spin_unlock(&space_info->lock); break; } + if (list_empty(&space_info->tickets) && + list_empty(&space_info->priority_tickets)) { + spin_unlock(&space_info->lock); + break; + } spin_unlock(&space_info->lock); loops++; @@ -4790,13 +4829,11 @@ commit: return btrfs_commit_transaction(trans, root); } -enum flush_state { - FLUSH_DELAYED_ITEMS_NR = 1, - FLUSH_DELAYED_ITEMS = 2, - FLUSH_DELALLOC = 3, - FLUSH_DELALLOC_WAIT = 4, - ALLOC_CHUNK = 5, - COMMIT_TRANS = 6, +struct reserve_ticket { + u64 bytes; + int error; + struct list_head list; + wait_queue_head_t wait; }; static int flush_space(struct btrfs_root *root, @@ -4838,7 +4875,7 @@ static int flush_space(struct btrfs_root *root, btrfs_get_alloc_profile(root, 0), CHUNK_ALLOC_NO_FORCE); btrfs_end_transaction(trans, root); - if (ret == -ENOSPC) + if (ret > 0 || ret == -ENOSPC) ret = 0; break; case COMMIT_TRANS: @@ -4849,6 +4886,8 @@ static int flush_space(struct btrfs_root *root, break; } + trace_btrfs_flush_space(root->fs_info, space_info->flags, num_bytes, + orig_bytes, state, ret); return ret; } @@ -4856,17 +4895,22 @@ static inline u64 btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, struct btrfs_space_info *space_info) { + struct reserve_ticket *ticket; u64 used; u64 expected; - u64 to_reclaim; + u64 to_reclaim = 0; + + list_for_each_entry(ticket, &space_info->tickets, list) + to_reclaim += ticket->bytes; + list_for_each_entry(ticket, &space_info->priority_tickets, list) + to_reclaim += ticket->bytes; + if (to_reclaim) + return to_reclaim; to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); - spin_lock(&space_info->lock); if (can_overcommit(root, space_info, to_reclaim, - BTRFS_RESERVE_FLUSH_ALL)) { - to_reclaim = 0; - goto out; - } + BTRFS_RESERVE_FLUSH_ALL)) + return 0; used = space_info->bytes_used + space_info->bytes_reserved + space_info->bytes_pinned + space_info->bytes_readonly + @@ -4882,14 +4926,11 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, to_reclaim = 0; to_reclaim = min(to_reclaim, space_info->bytes_may_use + space_info->bytes_reserved); -out: - spin_unlock(&space_info->lock); - return to_reclaim; } static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, - struct btrfs_fs_info *fs_info, u64 used) + struct btrfs_root *root, u64 used) { u64 thresh = div_factor_fine(space_info->total_bytes, 98); @@ -4897,73 +4938,176 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) return 0; - return (used >= thresh && !btrfs_fs_closing(fs_info) && - !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); + if (!btrfs_calc_reclaim_metadata_size(root, space_info)) + return 0; + + return (used >= thresh && !btrfs_fs_closing(root->fs_info) && + !test_bit(BTRFS_FS_STATE_REMOUNTING, + &root->fs_info->fs_state)); } -static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info, - struct btrfs_fs_info *fs_info, - int flush_state) +static void wake_all_tickets(struct list_head *head) { - u64 used; + struct reserve_ticket *ticket; - spin_lock(&space_info->lock); - /* - * We run out of space and have not got any free space via flush_space, - * so don't bother doing async reclaim. - */ - if (flush_state > COMMIT_TRANS && space_info->full) { - spin_unlock(&space_info->lock); - return 0; - } - - used = space_info->bytes_used + space_info->bytes_reserved + - space_info->bytes_pinned + space_info->bytes_readonly + - space_info->bytes_may_use; - if (need_do_async_reclaim(space_info, fs_info, used)) { - spin_unlock(&space_info->lock); - return 1; + while (!list_empty(head)) { + ticket = list_first_entry(head, struct reserve_ticket, list); + list_del_init(&ticket->list); + ticket->error = -ENOSPC; + wake_up(&ticket->wait); } - spin_unlock(&space_info->lock); - - return 0; } +/* + * This is for normal flushers, we can wait all goddamned day if we want to. We + * will loop and continuously try to flush as long as we are making progress. + * We count progress as clearing off tickets each time we have to loop. + */ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) { struct btrfs_fs_info *fs_info; struct btrfs_space_info *space_info; u64 to_reclaim; int flush_state; + int commit_cycles = 0; + u64 last_tickets_id; fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); + spin_lock(&space_info->lock); to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, space_info); - if (!to_reclaim) + if (!to_reclaim) { + space_info->flush = 0; + spin_unlock(&space_info->lock); return; + } + last_tickets_id = space_info->tickets_id; + spin_unlock(&space_info->lock); flush_state = FLUSH_DELAYED_ITEMS_NR; do { + struct reserve_ticket *ticket; + int ret; + + ret = flush_space(fs_info->fs_root, space_info, to_reclaim, + to_reclaim, flush_state); + spin_lock(&space_info->lock); + if (list_empty(&space_info->tickets)) { + space_info->flush = 0; + spin_unlock(&space_info->lock); + return; + } + to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, + space_info); + ticket = list_first_entry(&space_info->tickets, + struct reserve_ticket, list); + if (last_tickets_id == space_info->tickets_id) { + flush_state++; + } else { + last_tickets_id = space_info->tickets_id; + flush_state = FLUSH_DELAYED_ITEMS_NR; + if (commit_cycles) + commit_cycles--; + } + + if (flush_state > COMMIT_TRANS) { + commit_cycles++; + if (commit_cycles > 2) { + wake_all_tickets(&space_info->tickets); + space_info->flush = 0; + } else { + flush_state = FLUSH_DELAYED_ITEMS_NR; + } + } + spin_unlock(&space_info->lock); + } while (flush_state <= COMMIT_TRANS); +} + +void btrfs_init_async_reclaim_work(struct work_struct *work) +{ + INIT_WORK(work, btrfs_async_reclaim_metadata_space); +} + +static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + struct reserve_ticket *ticket) +{ + u64 to_reclaim; + int flush_state = FLUSH_DELAYED_ITEMS_NR; + + spin_lock(&space_info->lock); + to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, + space_info); + if (!to_reclaim) { + spin_unlock(&space_info->lock); + return; + } + spin_unlock(&space_info->lock); + + do { flush_space(fs_info->fs_root, space_info, to_reclaim, to_reclaim, flush_state); flush_state++; - if (!btrfs_need_do_async_reclaim(space_info, fs_info, - flush_state)) + spin_lock(&space_info->lock); + if (ticket->bytes == 0) { + spin_unlock(&space_info->lock); return; + } + spin_unlock(&space_info->lock); + + /* + * Priority flushers can't wait on delalloc without + * deadlocking. + */ + if (flush_state == FLUSH_DELALLOC || + flush_state == FLUSH_DELALLOC_WAIT) + flush_state = ALLOC_CHUNK; } while (flush_state < COMMIT_TRANS); } -void btrfs_init_async_reclaim_work(struct work_struct *work) +static int wait_reserve_ticket(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + struct reserve_ticket *ticket, u64 orig_bytes) + { - INIT_WORK(work, btrfs_async_reclaim_metadata_space); + DEFINE_WAIT(wait); + int ret = 0; + + spin_lock(&space_info->lock); + while (ticket->bytes > 0 && ticket->error == 0) { + ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE); + if (ret) { + ret = -EINTR; + break; + } + spin_unlock(&space_info->lock); + + schedule(); + + finish_wait(&ticket->wait, &wait); + spin_lock(&space_info->lock); + } + if (!ret) + ret = ticket->error; + if (!list_empty(&ticket->list)) + list_del_init(&ticket->list); + if (ticket->bytes && ticket->bytes < orig_bytes) { + u64 num_bytes = orig_bytes - ticket->bytes; + space_info->bytes_may_use -= num_bytes; + trace_btrfs_space_reservation(fs_info, "space_info", + space_info->flags, num_bytes, 0); + } + spin_unlock(&space_info->lock); + + return ret; } /** * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space * @root - the root we're allocating for - * @block_rsv - the block_rsv we're allocating for + * @space_info - the space info we want to allocate from * @orig_bytes - the number of bytes we want * @flush - whether or not we can flush to make our reservation * @@ -4974,81 +5118,36 @@ void btrfs_init_async_reclaim_work(struct work_struct *work) * regain reservations will be made and this will fail if there is not enough * space already. */ -static int reserve_metadata_bytes(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, - u64 orig_bytes, - enum btrfs_reserve_flush_enum flush) +static int __reserve_metadata_bytes(struct btrfs_root *root, + struct btrfs_space_info *space_info, + u64 orig_bytes, + enum btrfs_reserve_flush_enum flush) { - struct btrfs_space_info *space_info = block_rsv->space_info; + struct reserve_ticket ticket; u64 used; - u64 num_bytes = orig_bytes; - int flush_state = FLUSH_DELAYED_ITEMS_NR; int ret = 0; - bool flushing = false; -again: - ret = 0; - spin_lock(&space_info->lock); - /* - * We only want to wait if somebody other than us is flushing and we - * are actually allowed to flush all things. - */ - while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing && - space_info->flush) { - spin_unlock(&space_info->lock); - /* - * If we have a trans handle we can't wait because the flusher - * may have to commit the transaction, which would mean we would - * deadlock since we are waiting for the flusher to finish, but - * hold the current transaction open. - */ - if (current->journal_info) - return -EAGAIN; - ret = wait_event_killable(space_info->wait, !space_info->flush); - /* Must have been killed, return */ - if (ret) - return -EINTR; - - spin_lock(&space_info->lock); - } + ASSERT(orig_bytes); + ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL); + spin_lock(&space_info->lock); ret = -ENOSPC; used = space_info->bytes_used + space_info->bytes_reserved + space_info->bytes_pinned + space_info->bytes_readonly + space_info->bytes_may_use; /* - * The idea here is that we've not already over-reserved the block group - * then we can go ahead and save our reservation first and then start - * flushing if we need to. Otherwise if we've already overcommitted - * lets start flushing stuff first and then come back and try to make - * our reservation. + * If we have enough space then hooray, make our reservation and carry + * on. If not see if we can overcommit, and if we can, hooray carry on. + * If not things get more complicated. */ - if (used <= space_info->total_bytes) { - if (used + orig_bytes <= space_info->total_bytes) { - space_info->bytes_may_use += orig_bytes; - trace_btrfs_space_reservation(root->fs_info, - "space_info", space_info->flags, orig_bytes, 1); - ret = 0; - } else { - /* - * Ok set num_bytes to orig_bytes since we aren't - * overocmmitted, this way we only try and reclaim what - * we need. - */ - num_bytes = orig_bytes; - } - } else { - /* - * Ok we're over committed, set num_bytes to the overcommitted - * amount plus the amount of bytes that we need for this - * reservation. - */ - num_bytes = used - space_info->total_bytes + - (orig_bytes * 2); - } - - if (ret && can_overcommit(root, space_info, orig_bytes, flush)) { + if (used + orig_bytes <= space_info->total_bytes) { + space_info->bytes_may_use += orig_bytes; + trace_btrfs_space_reservation(root->fs_info, "space_info", + space_info->flags, orig_bytes, + 1); + ret = 0; + } else if (can_overcommit(root, space_info, orig_bytes, flush)) { space_info->bytes_may_use += orig_bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", space_info->flags, orig_bytes, @@ -5057,16 +5156,31 @@ again: } /* - * Couldn't make our reservation, save our place so while we're trying - * to reclaim space we can actually use it instead of somebody else - * stealing it from us. + * If we couldn't make a reservation then setup our reservation ticket + * and kick the async worker if it's not already running. * - * We make the other tasks wait for the flush only when we can flush - * all things. + * If we are a priority flusher then we just need to add our ticket to + * the list and we will do our own flushing further down. */ if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { - flushing = true; - space_info->flush = 1; + ticket.bytes = orig_bytes; + ticket.error = 0; + init_waitqueue_head(&ticket.wait); + if (flush == BTRFS_RESERVE_FLUSH_ALL) { + list_add_tail(&ticket.list, &space_info->tickets); + if (!space_info->flush) { + space_info->flush = 1; + trace_btrfs_trigger_flush(root->fs_info, + space_info->flags, + orig_bytes, flush, + "enospc"); + queue_work(system_unbound_wq, + &root->fs_info->async_reclaim_work); + } + } else { + list_add_tail(&ticket.list, + &space_info->priority_tickets); + } } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { used += orig_bytes; /* @@ -5074,40 +5188,68 @@ again: * which means we won't have fs_info->fs_root set, so don't do * the async reclaim as we will panic. */ - if (!root->fs_info->log_root_recovering && - need_do_async_reclaim(space_info, root->fs_info, used) && - !work_busy(&root->fs_info->async_reclaim_work)) + if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags) && + need_do_async_reclaim(space_info, root, used) && + !work_busy(&root->fs_info->async_reclaim_work)) { + trace_btrfs_trigger_flush(root->fs_info, + space_info->flags, + orig_bytes, flush, + "preempt"); queue_work(system_unbound_wq, &root->fs_info->async_reclaim_work); + } } spin_unlock(&space_info->lock); - if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) - goto out; + return ret; - ret = flush_space(root, space_info, num_bytes, orig_bytes, - flush_state); - flush_state++; + if (flush == BTRFS_RESERVE_FLUSH_ALL) + return wait_reserve_ticket(root->fs_info, space_info, &ticket, + orig_bytes); - /* - * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock - * would happen. So skip delalloc flush. - */ - if (flush == BTRFS_RESERVE_FLUSH_LIMIT && - (flush_state == FLUSH_DELALLOC || - flush_state == FLUSH_DELALLOC_WAIT)) - flush_state = ALLOC_CHUNK; + ret = 0; + priority_reclaim_metadata_space(root->fs_info, space_info, &ticket); + spin_lock(&space_info->lock); + if (ticket.bytes) { + if (ticket.bytes < orig_bytes) { + u64 num_bytes = orig_bytes - ticket.bytes; + space_info->bytes_may_use -= num_bytes; + trace_btrfs_space_reservation(root->fs_info, + "space_info", space_info->flags, + num_bytes, 0); - if (!ret) - goto again; - else if (flush == BTRFS_RESERVE_FLUSH_LIMIT && - flush_state < COMMIT_TRANS) - goto again; - else if (flush == BTRFS_RESERVE_FLUSH_ALL && - flush_state <= COMMIT_TRANS) - goto again; + } + list_del_init(&ticket.list); + ret = -ENOSPC; + } + spin_unlock(&space_info->lock); + ASSERT(list_empty(&ticket.list)); + return ret; +} -out: +/** + * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space + * @root - the root we're allocating for + * @block_rsv - the block_rsv we're allocating for + * @orig_bytes - the number of bytes we want + * @flush - whether or not we can flush to make our reservation + * + * This will reserve orgi_bytes number of bytes from the space info associated + * with the block_rsv. If there is not enough space it will make an attempt to + * flush out space to make room. It will do this by flushing delalloc if + * possible or committing the transaction. If flush is 0 then no attempts to + * regain reservations will be made and this will fail if there is not enough + * space already. + */ +static int reserve_metadata_bytes(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 orig_bytes, + enum btrfs_reserve_flush_enum flush) +{ + int ret; + + ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes, + flush); if (ret == -ENOSPC && unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { struct btrfs_block_rsv *global_rsv = @@ -5120,13 +5262,8 @@ out: if (ret == -ENOSPC) trace_btrfs_space_reservation(root->fs_info, "space_info:enospc", - space_info->flags, orig_bytes, 1); - if (flushing) { - spin_lock(&space_info->lock); - space_info->flush = 0; - wake_up_all(&space_info->wait); - spin_unlock(&space_info->lock); - } + block_rsv->space_info->flags, + orig_bytes, 1); return ret; } @@ -5202,6 +5339,110 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, return 0; } +/* + * This is for space we already have accounted in space_info->bytes_may_use, so + * basically when we're returning space from block_rsv's. + */ +static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + u64 num_bytes) +{ + struct reserve_ticket *ticket; + struct list_head *head; + u64 used; + enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH; + bool check_overcommit = false; + + spin_lock(&space_info->lock); + head = &space_info->priority_tickets; + + /* + * If we are over our limit then we need to check and see if we can + * overcommit, and if we can't then we just need to free up our space + * and not satisfy any requests. + */ + used = space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + space_info->bytes_may_use; + if (used - num_bytes >= space_info->total_bytes) + check_overcommit = true; +again: + while (!list_empty(head) && num_bytes) { + ticket = list_first_entry(head, struct reserve_ticket, + list); + /* + * We use 0 bytes because this space is already reserved, so + * adding the ticket space would be a double count. + */ + if (check_overcommit && + !can_overcommit(fs_info->extent_root, space_info, 0, + flush)) + break; + if (num_bytes >= ticket->bytes) { + list_del_init(&ticket->list); + num_bytes -= ticket->bytes; + ticket->bytes = 0; + space_info->tickets_id++; + wake_up(&ticket->wait); + } else { + ticket->bytes -= num_bytes; + num_bytes = 0; + } + } + + if (num_bytes && head == &space_info->priority_tickets) { + head = &space_info->tickets; + flush = BTRFS_RESERVE_FLUSH_ALL; + goto again; + } + space_info->bytes_may_use -= num_bytes; + trace_btrfs_space_reservation(fs_info, "space_info", + space_info->flags, num_bytes, 0); + spin_unlock(&space_info->lock); +} + +/* + * This is for newly allocated space that isn't accounted in + * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent + * we use this helper. + */ +static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + u64 num_bytes) +{ + struct reserve_ticket *ticket; + struct list_head *head = &space_info->priority_tickets; + +again: + while (!list_empty(head) && num_bytes) { + ticket = list_first_entry(head, struct reserve_ticket, + list); + if (num_bytes >= ticket->bytes) { + trace_btrfs_space_reservation(fs_info, "space_info", + space_info->flags, + ticket->bytes, 1); + list_del_init(&ticket->list); + num_bytes -= ticket->bytes; + space_info->bytes_may_use += ticket->bytes; + ticket->bytes = 0; + space_info->tickets_id++; + wake_up(&ticket->wait); + } else { + trace_btrfs_space_reservation(fs_info, "space_info", + space_info->flags, + num_bytes, 1); + space_info->bytes_may_use += num_bytes; + ticket->bytes -= num_bytes; + num_bytes = 0; + } + } + + if (num_bytes && head == &space_info->priority_tickets) { + head = &space_info->tickets; + goto again; + } +} + static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, struct btrfs_block_rsv *dest, u64 num_bytes) @@ -5236,18 +5477,15 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, } spin_unlock(&dest->lock); } - if (num_bytes) { - spin_lock(&space_info->lock); - space_info->bytes_may_use -= num_bytes; - trace_btrfs_space_reservation(fs_info, "space_info", - space_info->flags, num_bytes, 0); - spin_unlock(&space_info->lock); - } + if (num_bytes) + space_info_add_old_bytes(fs_info, space_info, + num_bytes); } } -static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, - struct btrfs_block_rsv *dst, u64 num_bytes) +int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src, + struct btrfs_block_rsv *dst, u64 num_bytes, + int update_size) { int ret; @@ -5255,7 +5493,7 @@ static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, if (ret) return ret; - block_rsv_add_bytes(dst, num_bytes, 1); + block_rsv_add_bytes(dst, num_bytes, update_size); return 0; } @@ -5362,13 +5600,6 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, return ret; } -int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, - struct btrfs_block_rsv *dst_rsv, - u64 num_bytes) -{ - return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); -} - void btrfs_block_rsv_release(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 num_bytes) @@ -5381,48 +5612,21 @@ void btrfs_block_rsv_release(struct btrfs_root *root, num_bytes); } -/* - * helper to calculate size of global block reservation. - * the desired value is sum of space used by extent tree, - * checksum tree and root tree - */ -static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) -{ - struct btrfs_space_info *sinfo; - u64 num_bytes; - u64 meta_used; - u64 data_used; - int csum_size = btrfs_super_csum_size(fs_info->super_copy); - - sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); - spin_lock(&sinfo->lock); - data_used = sinfo->bytes_used; - spin_unlock(&sinfo->lock); - - sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); - spin_lock(&sinfo->lock); - if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) - data_used = 0; - meta_used = sinfo->bytes_used; - spin_unlock(&sinfo->lock); - - num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * - csum_size * 2; - num_bytes += div_u64(data_used + meta_used, 50); - - if (num_bytes * 3 > meta_used) - num_bytes = div_u64(meta_used, 3); - - return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10); -} - static void update_global_block_rsv(struct btrfs_fs_info *fs_info) { struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; struct btrfs_space_info *sinfo = block_rsv->space_info; u64 num_bytes; - num_bytes = calc_global_metadata_size(fs_info); + /* + * The global block rsv is based on the size of the extent tree, the + * checksum tree and the root tree. If the fs is empty we want to set + * it to a minimal amount for safety. + */ + num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) + + btrfs_root_used(&fs_info->csum_root->root_item) + + btrfs_root_used(&fs_info->tree_root->root_item); + num_bytes = max_t(u64, num_bytes, SZ_16M); spin_lock(&sinfo->lock); spin_lock(&block_rsv->lock); @@ -5520,7 +5724,7 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, */ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) { - struct btrfs_fs_info *fs_info = trans->root->fs_info; + struct btrfs_fs_info *fs_info = trans->fs_info; if (!trans->chunk_bytes_reserved) return; @@ -5537,7 +5741,13 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); + /* + * We always use trans->block_rsv here as we will have reserved space + * for our orphan when starting the transaction, using get_block_rsv() + * here will sometimes make us choose the wrong block rsv as we could be + * doing a reloc inode for a non refcounted root. + */ + struct btrfs_block_rsv *src_rsv = trans->block_rsv; struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; /* @@ -5548,7 +5758,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); trace_btrfs_space_reservation(root->fs_info, "orphan", btrfs_ino(inode), num_bytes, 1); - return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); + return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); } void btrfs_orphan_release_metadata(struct inode *inode) @@ -5584,7 +5794,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, int ret; struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; - if (root->fs_info->quota_enabled) { + if (test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags)) { /* One for parent inode, two for dir entries */ num_bytes = 3 * root->nodesize; ret = btrfs_qgroup_reserve_meta(root, num_bytes); @@ -5603,7 +5813,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, BTRFS_RESERVE_FLUSH_ALL); if (ret == -ENOSPC && use_global_rsv) - ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes); + ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1); if (ret && *qgroup_reserved) btrfs_qgroup_free_meta(root, *qgroup_reserved); @@ -5713,21 +5923,26 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) u64 to_reserve = 0; u64 csum_bytes; unsigned nr_extents = 0; - int extra_reserve = 0; enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; int ret = 0; bool delalloc_lock = true; u64 to_free = 0; unsigned dropped; + bool release_extra = false; /* If we are a free space inode we need to not flush since we will be in * the middle of a transaction commit. We also don't need the delalloc * mutex since we won't race with anybody. We need this mostly to make * lockdep shut its filthy mouth. + * + * If we have a transaction open (can happen if we call truncate_block + * from truncate), then we need FLUSH_LIMIT so we don't deadlock. */ if (btrfs_is_free_space_inode(inode)) { flush = BTRFS_RESERVE_NO_FLUSH; delalloc_lock = false; + } else if (current->journal_info) { + flush = BTRFS_RESERVE_FLUSH_LIMIT; } if (flush != BTRFS_RESERVE_NO_FLUSH && @@ -5744,46 +5959,37 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); BTRFS_I(inode)->outstanding_extents += nr_extents; - nr_extents = 0; + nr_extents = 0; if (BTRFS_I(inode)->outstanding_extents > BTRFS_I(inode)->reserved_extents) - nr_extents = BTRFS_I(inode)->outstanding_extents - + nr_extents += BTRFS_I(inode)->outstanding_extents - BTRFS_I(inode)->reserved_extents; - /* - * Add an item to reserve for updating the inode when we complete the - * delalloc io. - */ - if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED, - &BTRFS_I(inode)->runtime_flags)) { - nr_extents++; - extra_reserve = 1; - } - - to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); + /* We always want to reserve a slot for updating the inode. */ + to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents + 1); to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); csum_bytes = BTRFS_I(inode)->csum_bytes; spin_unlock(&BTRFS_I(inode)->lock); - if (root->fs_info->quota_enabled) { + if (test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags)) { ret = btrfs_qgroup_reserve_meta(root, nr_extents * root->nodesize); if (ret) goto out_fail; } - ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); + ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush); if (unlikely(ret)) { btrfs_qgroup_free_meta(root, nr_extents * root->nodesize); goto out_fail; } spin_lock(&BTRFS_I(inode)->lock); - if (extra_reserve) { - set_bit(BTRFS_INODE_DELALLOC_META_RESERVED, - &BTRFS_I(inode)->runtime_flags); - nr_extents--; + if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED, + &BTRFS_I(inode)->runtime_flags)) { + to_reserve -= btrfs_calc_trans_metadata_size(root, 1); + release_extra = true; } BTRFS_I(inode)->reserved_extents += nr_extents; spin_unlock(&BTRFS_I(inode)->lock); @@ -5794,8 +6000,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) if (to_reserve) trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), to_reserve, 1); - block_rsv_add_bytes(block_rsv, to_reserve, 1); - + if (release_extra) + btrfs_block_rsv_release(root, block_rsv, + btrfs_calc_trans_metadata_size(root, + 1)); return 0; out_fail: @@ -5887,7 +6095,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) if (dropped > 0) to_free += btrfs_calc_trans_metadata_size(root, dropped); - if (btrfs_test_is_dummy_root(root)) + if (btrfs_is_testing(root->fs_info)) return; trace_btrfs_space_reservation(root->fs_info, "delalloc", @@ -5904,8 +6112,6 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) * @start: start range we are writing to * @len: how long the range we are writing to * - * TODO: This function will finally replace old btrfs_delalloc_reserve_space() - * * This will do the following things * * o reserve space in data space info for num bytes @@ -6002,7 +6208,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, spin_lock(&cache->space_info->lock); spin_lock(&cache->lock); - if (btrfs_test_opt(root, SPACE_CACHE) && + if (btrfs_test_opt(root->fs_info, SPACE_CACHE) && cache->disk_cache_state < BTRFS_DC_CLEAR) cache->disk_cache_state = BTRFS_DC_CLEAR; @@ -6027,6 +6233,9 @@ static int update_block_group(struct btrfs_trans_handle *trans, spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); + trace_btrfs_space_reservation(root->fs_info, "pinned", + cache->space_info->flags, + num_bytes, 1); set_extent_dirty(info->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); @@ -6101,10 +6310,10 @@ static int pin_down_extent(struct btrfs_root *root, spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); + trace_btrfs_space_reservation(root->fs_info, "pinned", + cache->space_info->flags, num_bytes, 1); set_extent_dirty(root->fs_info->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); - if (reserved) - trace_btrfs_reserved_extent_free(root, bytenr, num_bytes); return 0; } @@ -6280,19 +6489,15 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) } /** - * btrfs_update_reserved_bytes - update the block_group and space info counters + * btrfs_add_reserved_bytes - update the block_group and space info counters * @cache: The cache we are manipulating + * @ram_bytes: The number of bytes of file content, and will be same to + * @num_bytes except for the compress path. * @num_bytes: The number of bytes in question - * @reserve: One of the reservation enums * @delalloc: The blocks are allocated for the delalloc write * - * This is called by the allocator when it reserves space, or by somebody who is - * freeing space that was never actually used on disk. For example if you - * reserve some space for a new leaf in transaction A and before transaction A - * commits you free that leaf, you call this with reserve set to 0 in order to - * clear the reservation. - * - * Metadata reservations should be called with RESERVE_ALLOC so we do the proper + * This is called by the allocator when it reserves space. Metadata + * reservations should be called with RESERVE_ALLOC so we do the proper * ENOSPC accounting. For data we handle the reservation through clearing the * delalloc bits in the io_tree. We have to do this since we could end up * allocating less disk space for the amount of data we have reserved in the @@ -6302,44 +6507,63 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) * make the reservation and return -EAGAIN, otherwise this function always * succeeds. */ -static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int delalloc) +static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 ram_bytes, u64 num_bytes, int delalloc) { struct btrfs_space_info *space_info = cache->space_info; int ret = 0; spin_lock(&space_info->lock); spin_lock(&cache->lock); - if (reserve != RESERVE_FREE) { - if (cache->ro) { - ret = -EAGAIN; - } else { - cache->reserved += num_bytes; - space_info->bytes_reserved += num_bytes; - if (reserve == RESERVE_ALLOC) { - trace_btrfs_space_reservation(cache->fs_info, - "space_info", space_info->flags, - num_bytes, 0); - space_info->bytes_may_use -= num_bytes; - } - - if (delalloc) - cache->delalloc_bytes += num_bytes; - } + if (cache->ro) { + ret = -EAGAIN; } else { - if (cache->ro) - space_info->bytes_readonly += num_bytes; - cache->reserved -= num_bytes; - space_info->bytes_reserved -= num_bytes; + cache->reserved += num_bytes; + space_info->bytes_reserved += num_bytes; + trace_btrfs_space_reservation(cache->fs_info, + "space_info", space_info->flags, + ram_bytes, 0); + space_info->bytes_may_use -= ram_bytes; if (delalloc) - cache->delalloc_bytes -= num_bytes; + cache->delalloc_bytes += num_bytes; } spin_unlock(&cache->lock); spin_unlock(&space_info->lock); return ret; } +/** + * btrfs_free_reserved_bytes - update the block_group and space info counters + * @cache: The cache we are manipulating + * @num_bytes: The number of bytes in question + * @delalloc: The blocks are allocated for the delalloc write + * + * This is called by somebody who is freeing space that was never actually used + * on disk. For example if you reserve some space for a new leaf in transaction + * A and before transaction A commits you free that leaf, you call this with + * reserve set to 0 in order to clear the reservation. + */ + +static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int delalloc) +{ + struct btrfs_space_info *space_info = cache->space_info; + int ret = 0; + + spin_lock(&space_info->lock); + spin_lock(&cache->lock); + if (cache->ro) + space_info->bytes_readonly += num_bytes; + cache->reserved -= num_bytes; + space_info->bytes_reserved -= num_bytes; + + if (delalloc) + cache->delalloc_bytes -= num_bytes; + spin_unlock(&cache->lock); + spin_unlock(&space_info->lock); + return ret; +} void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -6381,7 +6605,7 @@ fetch_cluster_info(struct btrfs_root *root, struct btrfs_space_info *space_info, u64 *empty_cluster) { struct btrfs_free_cluster *ret = NULL; - bool ssd = btrfs_test_opt(root, SSD); + bool ssd = btrfs_test_opt(root->fs_info, SSD); *empty_cluster = 0; if (btrfs_mixed_space_info(space_info)) @@ -6459,6 +6683,9 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, spin_lock(&cache->lock); cache->pinned -= len; space_info->bytes_pinned -= len; + + trace_btrfs_space_reservation(fs_info, "pinned", + space_info->flags, len, 0); space_info->max_extent_size = 0; percpu_counter_add(&space_info->total_bytes_pinned, -len); if (cache->ro) { @@ -6466,17 +6693,29 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, readonly = true; } spin_unlock(&cache->lock); - if (!readonly && global_rsv->space_info == space_info) { + if (!readonly && return_free_space && + global_rsv->space_info == space_info) { + u64 to_add = len; + WARN_ON(!return_free_space); spin_lock(&global_rsv->lock); if (!global_rsv->full) { - len = min(len, global_rsv->size - - global_rsv->reserved); - global_rsv->reserved += len; - space_info->bytes_may_use += len; + to_add = min(len, global_rsv->size - + global_rsv->reserved); + global_rsv->reserved += to_add; + space_info->bytes_may_use += to_add; if (global_rsv->reserved >= global_rsv->size) global_rsv->full = 1; + trace_btrfs_space_reservation(fs_info, + "space_info", + space_info->flags, + to_add, 1); + len -= to_add; } spin_unlock(&global_rsv->lock); + /* Add to any tickets we may have */ + if (len) + space_info_add_new_bytes(fs_info, space_info, + len); } spin_unlock(&space_info->lock); } @@ -6511,7 +6750,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, break; } - if (btrfs_test_opt(root, DISCARD)) + if (btrfs_test_opt(root->fs_info, DISCARD)) ret = btrfs_discard_extent(root, start, end + 1 - start, NULL); @@ -6649,7 +6888,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, NULL, refs_to_drop, is_data, &last_ref); if (ret) { - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); goto out; } btrfs_release_path(path); @@ -6691,14 +6930,15 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } if (ret) { - btrfs_err(info, "umm, got %d back from search, was looking for %llu", - ret, bytenr); + btrfs_err(info, + "umm, got %d back from search, was looking for %llu", + ret, bytenr); if (ret > 0) btrfs_print_leaf(extent_root, path->nodes[0]); } if (ret < 0) { - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); goto out; } extent_slot = path->slots[0]; @@ -6709,10 +6949,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", bytenr, parent, root_objectid, owner_objectid, owner_offset); - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); goto out; } else { - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); goto out; } @@ -6724,7 +6964,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ret = convert_extent_item_v0(trans, extent_root, path, owner_objectid, 0); if (ret < 0) { - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); goto out; } @@ -6738,12 +6978,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret) { - btrfs_err(info, "umm, got %d back from search, was looking for %llu", + btrfs_err(info, + "umm, got %d back from search, was looking for %llu", ret, bytenr); btrfs_print_leaf(extent_root, path->nodes[0]); } if (ret < 0) { - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); goto out; } @@ -6765,10 +7006,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, refs = btrfs_extent_refs(leaf, ei); if (refs < refs_to_drop) { - btrfs_err(info, "trying to drop %d refs but we only have %Lu " - "for bytenr %Lu", refs_to_drop, refs, bytenr); + btrfs_err(info, + "trying to drop %d refs but we only have %Lu for bytenr %Lu", + refs_to_drop, refs, bytenr); ret = -EINVAL; - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); goto out; } refs -= refs_to_drop; @@ -6791,7 +7033,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, iref, refs_to_drop, is_data, &last_ref); if (ret) { - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); goto out; } } @@ -6814,7 +7056,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ret = btrfs_del_items(trans, extent_root, path, path->slots[0], num_to_del); if (ret) { - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); goto out; } btrfs_release_path(path); @@ -6822,7 +7064,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (is_data) { ret = btrfs_del_csums(trans, root, bytenr, num_bytes); if (ret) { - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); goto out; } } @@ -6830,13 +7072,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ret = add_to_free_space_tree(trans, root->fs_info, bytenr, num_bytes); if (ret) { - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); goto out; } ret = update_block_group(trans, root, bytenr, num_bytes, 0); if (ret) { - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); goto out; } } @@ -6959,7 +7201,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); btrfs_add_free_space(cache, buf->start, buf->len); - btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); + btrfs_free_reserved_bytes(cache, buf->len, 0); btrfs_put_block_group(cache); trace_btrfs_reserved_extent_free(root, buf->start, buf->len); pin = 0; @@ -6985,7 +7227,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, int ret; struct btrfs_fs_info *fs_info = root->fs_info; - if (btrfs_test_is_dummy_root(root)) + if (btrfs_is_testing(fs_info)) return 0; add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); @@ -7184,9 +7426,9 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache, * the free space extent currently. */ static noinline int find_free_extent(struct btrfs_root *orig_root, - u64 num_bytes, u64 empty_size, - u64 hint_byte, struct btrfs_key *ins, - u64 flags, int delalloc) + u64 ram_bytes, u64 num_bytes, u64 empty_size, + u64 hint_byte, struct btrfs_key *ins, + u64 flags, int delalloc) { int ret = 0; struct btrfs_root *root = orig_root->fs_info->extent_root; @@ -7198,8 +7440,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, struct btrfs_space_info *space_info; int loop = 0; int index = __get_raid_index(flags); - int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ? - RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; bool failed_cluster_refill = false; bool failed_alloc = false; bool use_cluster = true; @@ -7531,8 +7771,8 @@ checks: search_start - offset); BUG_ON(offset > search_start); - ret = btrfs_update_reserved_bytes(block_group, num_bytes, - alloc_type, delalloc); + ret = btrfs_add_reserved_bytes(block_group, ram_bytes, + num_bytes, delalloc); if (ret == -EAGAIN) { btrfs_add_free_space(block_group, offset, num_bytes); goto loop; @@ -7620,8 +7860,7 @@ loop: * can do more things. */ if (ret < 0 && ret != -ENOSPC) - btrfs_abort_transaction(trans, - root, ret); + btrfs_abort_transaction(trans, ret); else ret = 0; if (!exist) @@ -7665,23 +7904,24 @@ out: return ret; } -static void dump_space_info(struct btrfs_space_info *info, u64 bytes, +static void dump_space_info(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *info, u64 bytes, int dump_block_groups) { struct btrfs_block_group_cache *cache; int index = 0; spin_lock(&info->lock); - printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n", - info->flags, - info->total_bytes - info->bytes_used - info->bytes_pinned - - info->bytes_reserved - info->bytes_readonly, - (info->full) ? "" : "not "); - printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, " - "reserved=%llu, may_use=%llu, readonly=%llu\n", - info->total_bytes, info->bytes_used, info->bytes_pinned, - info->bytes_reserved, info->bytes_may_use, - info->bytes_readonly); + btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull", + info->flags, + info->total_bytes - info->bytes_used - info->bytes_pinned - + info->bytes_reserved - info->bytes_readonly - + info->bytes_may_use, (info->full) ? "" : "not "); + btrfs_info(fs_info, + "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu", + info->total_bytes, info->bytes_used, info->bytes_pinned, + info->bytes_reserved, info->bytes_may_use, + info->bytes_readonly); spin_unlock(&info->lock); if (!dump_block_groups) @@ -7691,12 +7931,11 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, again: list_for_each_entry(cache, &info->block_groups[index], list) { spin_lock(&cache->lock); - printk(KERN_INFO "BTRFS: " - "block group %llu has %llu bytes, " - "%llu used %llu pinned %llu reserved %s\n", - cache->key.objectid, cache->key.offset, - btrfs_block_group_used(&cache->item), cache->pinned, - cache->reserved, cache->ro ? "[readonly]" : ""); + btrfs_info(fs_info, + "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s", + cache->key.objectid, cache->key.offset, + btrfs_block_group_used(&cache->item), cache->pinned, + cache->reserved, cache->ro ? "[readonly]" : ""); btrfs_dump_free_space(cache, bytes); spin_unlock(&cache->lock); } @@ -7705,11 +7944,12 @@ again: up_read(&info->groups_sem); } -int btrfs_reserve_extent(struct btrfs_root *root, +int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, int is_data, int delalloc) { + struct btrfs_fs_info *fs_info = root->fs_info; bool final_tried = num_bytes == min_alloc_size; u64 flags; int ret; @@ -7717,27 +7957,28 @@ int btrfs_reserve_extent(struct btrfs_root *root, flags = btrfs_get_alloc_profile(root, is_data); again: WARN_ON(num_bytes < root->sectorsize); - ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, - flags, delalloc); + ret = find_free_extent(root, ram_bytes, num_bytes, empty_size, + hint_byte, ins, flags, delalloc); if (!ret && !is_data) { - btrfs_dec_block_group_reservations(root->fs_info, - ins->objectid); + btrfs_dec_block_group_reservations(fs_info, ins->objectid); } else if (ret == -ENOSPC) { if (!final_tried && ins->offset) { num_bytes = min(num_bytes >> 1, ins->offset); num_bytes = round_down(num_bytes, root->sectorsize); num_bytes = max(num_bytes, min_alloc_size); + ram_bytes = num_bytes; if (num_bytes == min_alloc_size) final_tried = true; goto again; - } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) { + } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { struct btrfs_space_info *sinfo; - sinfo = __find_space_info(root->fs_info, flags); - btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu", - flags, num_bytes); + sinfo = __find_space_info(fs_info, flags); + btrfs_err(root->fs_info, + "allocation failed flags %llu, wanted %llu", + flags, num_bytes); if (sinfo) - dump_space_info(sinfo, num_bytes, 1); + dump_space_info(fs_info, sinfo, num_bytes, 1); } } @@ -7761,16 +8002,14 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, if (pin) pin_down_extent(root, cache, start, len, 1); else { - if (btrfs_test_opt(root, DISCARD)) + if (btrfs_test_opt(root->fs_info, DISCARD)) ret = btrfs_discard_extent(root, start, len, NULL); btrfs_add_free_space(cache, start, len); - btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); + btrfs_free_reserved_bytes(cache, len, delalloc); + trace_btrfs_reserved_extent_free(root, start, len); } btrfs_put_block_group(cache); - - trace_btrfs_reserved_extent_free(root, start, len); - return ret; } @@ -7979,6 +8218,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, { int ret; struct btrfs_block_group_cache *block_group; + struct btrfs_space_info *space_info; /* * Mixed block groups will exclude before processing the log so we only @@ -7994,9 +8234,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, if (!block_group) return -EINVAL; - ret = btrfs_update_reserved_bytes(block_group, ins->offset, - RESERVE_ALLOC_NO_ACCOUNT, 0); - BUG_ON(ret); /* logic error */ + space_info = block_group->space_info; + spin_lock(&space_info->lock); + spin_lock(&block_group->lock); + space_info->bytes_reserved += ins->offset; + block_group->reserved += ins->offset; + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); + ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 0, owner, offset, ins, 1); btrfs_put_block_group(block_group); @@ -8010,8 +8255,9 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) - return ERR_PTR(-ENOMEM); + if (IS_ERR(buf)) + return buf; + btrfs_set_header_generation(buf, trans->transid); btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); @@ -8038,7 +8284,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); } - trans->blocks_used++; + trans->dirty = true; /* this returns a buffer locked for blocking */ return buf; } @@ -8070,7 +8316,7 @@ again: goto again; } - if (btrfs_test_opt(root, ENOSPC_DEBUG)) { + if (btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) { static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL * 10, /*DEFAULT_RATELIMIT_BURST*/ 1); @@ -8124,19 +8370,21 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, bool skinny_metadata = btrfs_fs_incompat(root->fs_info, SKINNY_METADATA); - if (btrfs_test_is_dummy_root(root)) { +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + if (btrfs_is_testing(root->fs_info)) { buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, level); if (!IS_ERR(buf)) root->alloc_bytenr += blocksize; return buf; } +#endif block_rsv = use_block_rsv(trans, root, blocksize); if (IS_ERR(block_rsv)) return ERR_CAST(block_rsv); - ret = btrfs_reserve_extent(root, blocksize, blocksize, + ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize, empty_size, hint, &ins, 0, 0); if (ret) goto out_unuse; @@ -8218,7 +8466,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, u64 refs; u64 flags; u32 nritems; - u32 blocksize; struct btrfs_key key; struct extent_buffer *eb; int ret; @@ -8236,7 +8483,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, eb = path->nodes[wc->level]; nritems = btrfs_header_nritems(eb); - blocksize = root->nodesize; for (slot = path->slots[wc->level]; slot < nritems; slot++) { if (nread >= wc->reada_count) @@ -8289,34 +8535,6 @@ reada: wc->reada_slot = slot; } -/* - * These may not be seen by the usual inc/dec ref code so we have to - * add them here. - */ -static int record_one_subtree_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes) -{ - struct btrfs_qgroup_extent_record *qrecord; - struct btrfs_delayed_ref_root *delayed_refs; - - qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS); - if (!qrecord) - return -ENOMEM; - - qrecord->bytenr = bytenr; - qrecord->num_bytes = num_bytes; - qrecord->old_roots = NULL; - - delayed_refs = &trans->transaction->delayed_refs; - spin_lock(&delayed_refs->lock); - if (btrfs_qgroup_insert_dirty_extent(delayed_refs, qrecord)) - kfree(qrecord); - spin_unlock(&delayed_refs->lock); - - return 0; -} - static int account_leaf_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb) @@ -8328,7 +8546,7 @@ static int account_leaf_items(struct btrfs_trans_handle *trans, u64 bytenr, num_bytes; /* We can be called directly from walk_up_proc() */ - if (!root->fs_info->quota_enabled) + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags)) return 0; for (i = 0; i < nr; i++) { @@ -8350,7 +8568,8 @@ static int account_leaf_items(struct btrfs_trans_handle *trans, num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); - ret = record_one_subtree_extent(trans, root, bytenr, num_bytes); + ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info, + bytenr, num_bytes, GFP_NOFS); if (ret) return ret; } @@ -8436,7 +8655,7 @@ static int account_shared_subtree(struct btrfs_trans_handle *trans, BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL); BUG_ON(root_eb == NULL); - if (!root->fs_info->quota_enabled) + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags)) return 0; if (!extent_buffer_uptodate(root_eb)) { @@ -8499,8 +8718,9 @@ walk_down: btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); path->locks[level] = BTRFS_READ_LOCK_BLOCKING; - ret = record_one_subtree_extent(trans, root, child_bytenr, - root->nodesize); + ret = btrfs_qgroup_insert_dirty_extent(trans, + root->fs_info, child_bytenr, + root->nodesize, GFP_NOFS); if (ret) goto out; } @@ -8653,8 +8873,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root->fs_info, bytenr); if (!next) { next = btrfs_find_create_tree_block(root, bytenr); - if (!next) - return -ENOMEM; + if (IS_ERR(next)) + return PTR_ERR(next); + btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, level - 1); reada = 1; @@ -8665,14 +8886,13 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1, &wc->refs[level - 1], &wc->flags[level - 1]); - if (ret < 0) { - btrfs_tree_unlock(next); - return ret; - } + if (ret < 0) + goto out_unlock; if (unlikely(wc->refs[level - 1] == 0)) { btrfs_err(root->fs_info, "Missing references."); - BUG(); + ret = -EIO; + goto out_unlock; } *lookup_info = 0; @@ -8724,7 +8944,12 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, } level--; - BUG_ON(level != btrfs_header_level(next)); + ASSERT(level == btrfs_header_level(next)); + if (level != btrfs_header_level(next)) { + btrfs_err(root->fs_info, "mismatched level"); + ret = -EIO; + goto out_unlock; + } path->nodes[level] = next; path->slots[level] = 0; path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; @@ -8739,8 +8964,15 @@ skip: if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { parent = path->nodes[level]->start; } else { - BUG_ON(root->root_key.objectid != + ASSERT(root->root_key.objectid == btrfs_header_owner(path->nodes[level])); + if (root->root_key.objectid != + btrfs_header_owner(path->nodes[level])) { + btrfs_err(root->fs_info, + "mismatched block owner"); + ret = -EIO; + goto out_unlock; + } parent = 0; } @@ -8749,20 +8981,24 @@ skip: generation, level - 1); if (ret) { btrfs_err_rl(root->fs_info, - "Error " - "%d accounting shared subtree. Quota " - "is out of sync, rescan required.", - ret); + "Error %d accounting shared subtree. Quota is out of sync, rescan required.", + ret); } } ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, root->root_key.objectid, level - 1, 0); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto out_unlock; } + + *lookup_info = 1; + ret = 1; + +out_unlock: btrfs_tree_unlock(next); free_extent_buffer(next); - *lookup_info = 1; - return 1; + + return ret; } /* @@ -8842,10 +9078,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, ret = account_leaf_items(trans, root, eb); if (ret) { btrfs_err_rl(root->fs_info, - "error " - "%d accounting leaf items. Quota " - "is out of sync, rescan required.", - ret); + "error %d accounting leaf items. Quota is out of sync, rescan required.", + ret); } } /* make block locked assertion in clean_tree_block happy */ @@ -8961,9 +9195,10 @@ int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, int update_ref, int for_reloc) { + struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_path *path; struct btrfs_trans_handle *trans; - struct btrfs_root *tree_root = root->fs_info->tree_root; + struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root_item *root_item = &root->root_item; struct walk_control *wc; struct btrfs_key key; @@ -8972,7 +9207,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int level; bool root_dropped = false; - btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid); + btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid); path = btrfs_alloc_path(); if (!path) { @@ -9094,14 +9329,15 @@ int btrfs_drop_snapshot(struct btrfs_root *root, &root->root_key, root_item); if (ret) { - btrfs_abort_transaction(trans, tree_root, ret); + btrfs_abort_transaction(trans, ret); err = ret; goto out_end_trans; } btrfs_end_transaction_throttle(trans, tree_root); if (!for_reloc && btrfs_need_cleaner_sleep(root)) { - pr_debug("BTRFS: drop snapshot early exit\n"); + btrfs_debug(fs_info, + "drop snapshot early exit"); err = -EAGAIN; goto out_free; } @@ -9121,7 +9357,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, ret = btrfs_del_root(trans, tree_root, &root->root_key); if (ret) { - btrfs_abort_transaction(trans, tree_root, ret); + btrfs_abort_transaction(trans, ret); goto out_end_trans; } @@ -9129,7 +9365,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, ret = btrfs_find_root(tree_root, &root->root_key, path, NULL, NULL); if (ret < 0) { - btrfs_abort_transaction(trans, tree_root, ret); + btrfs_abort_transaction(trans, ret); err = ret; goto out_end_trans; } else if (ret > 0) { @@ -9167,7 +9403,7 @@ out: if (!for_reloc && root_dropped == false) btrfs_add_dead_root(root); if (err && err != -EAGAIN) - btrfs_handle_fs_error(root->fs_info, err, NULL); + btrfs_handle_fs_error(fs_info, err, NULL); return err; } @@ -9500,7 +9736,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) int full = 0; int ret = 0; - debug = btrfs_test_opt(root, ENOSPC_DEBUG); + debug = btrfs_test_opt(root->fs_info, ENOSPC_DEBUG); block_group = btrfs_lookup_block_group(root->fs_info, bytenr); @@ -9656,7 +9892,23 @@ static int find_first_block_group(struct btrfs_root *root, if (found_key.objectid >= key->objectid && found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { - ret = 0; + struct extent_map_tree *em_tree; + struct extent_map *em; + + em_tree = &root->fs_info->mapping_tree.map_tree; + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, found_key.objectid, + found_key.offset); + read_unlock(&em_tree->lock); + if (!em) { + btrfs_err(root->fs_info, + "logical %llu len %llu found bg but no related chunk", + found_key.objectid, found_key.offset); + ret = -ENOENT; + } else { + ret = 0; + } + free_extent_map(em); goto out; } path->slots[0]++; @@ -9693,6 +9945,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info) block_group->iref = 0; block_group->inode = NULL; spin_unlock(&block_group->lock); + ASSERT(block_group->io_ctl.inode == NULL); iput(inode); last = block_group->key.objectid + block_group->key.offset; btrfs_put_block_group(block_group); @@ -9750,6 +10003,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) free_excluded_extents(info->extent_root, block_group); btrfs_remove_free_space_cache(block_group); + ASSERT(list_empty(&block_group->dirty_list)); + ASSERT(list_empty(&block_group->io_list)); + ASSERT(list_empty(&block_group->bg_list)); + ASSERT(atomic_read(&block_group->count) == 1); btrfs_put_block_group(block_group); spin_lock(&info->block_group_cache_lock); @@ -9772,13 +10029,15 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) space_info = list_entry(info->space_info.next, struct btrfs_space_info, list); - if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) { - if (WARN_ON(space_info->bytes_pinned > 0 || + + /* + * Do not hide this behind enospc_debug, this is actually + * important and indicates a real bug if this happens. + */ + if (WARN_ON(space_info->bytes_pinned > 0 || space_info->bytes_reserved > 0 || - space_info->bytes_may_use > 0)) { - dump_space_info(space_info, 0, 0); - } - } + space_info->bytes_may_use > 0)) + dump_space_info(info, space_info, 0, 0); list_del(&space_info->list); for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { struct kobject *kobj; @@ -9827,7 +10086,8 @@ static void __link_block_group(struct btrfs_space_info *space_info, return; out_err: - pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n"); + btrfs_warn(cache->fs_info, + "failed to add kobject for block cache, ignoring"); } static struct btrfs_block_group_cache * @@ -9885,6 +10145,11 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct extent_buffer *leaf; int need_clear = 0; u64 cache_gen; + u64 feature; + int mixed; + + feature = btrfs_super_incompat_flags(info->super_copy); + mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS); root = info->extent_root; key.objectid = 0; @@ -9896,10 +10161,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) path->reada = READA_FORWARD; cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); - if (btrfs_test_opt(root, SPACE_CACHE) && + if (btrfs_test_opt(root->fs_info, SPACE_CACHE) && btrfs_super_generation(root->fs_info->super_copy) != cache_gen) need_clear = 1; - if (btrfs_test_opt(root, CLEAR_CACHE)) + if (btrfs_test_opt(root->fs_info, CLEAR_CACHE)) need_clear = 1; while (1) { @@ -9930,7 +10195,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) * b) Setting 'dirty flag' makes sure that we flush * the new space cache info onto disk. */ - if (btrfs_test_opt(root, SPACE_CACHE)) + if (btrfs_test_opt(root->fs_info, SPACE_CACHE)) cache->disk_cache_state = BTRFS_DC_CLEAR; } @@ -9938,6 +10203,15 @@ int btrfs_read_block_groups(struct btrfs_root *root) btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); cache->flags = btrfs_block_group_flags(&cache->item); + if (!mixed && + ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) && + (cache->flags & BTRFS_BLOCK_GROUP_DATA))) { + btrfs_err(info, +"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups", + cache->key.objectid); + ret = -EINVAL; + goto error; + } key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(path); @@ -9986,9 +10260,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) goto error; } + trace_btrfs_add_block_group(root->fs_info, cache, 0); ret = update_space_info(info, cache->flags, found_key.offset, btrfs_block_group_used(&cache->item), - &space_info); + cache->bytes_super, &space_info); if (ret) { btrfs_remove_free_space_cache(cache); spin_lock(&info->block_group_cache_lock); @@ -10001,9 +10276,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) } cache->space_info = space_info; - spin_lock(&cache->space_info->lock); - cache->space_info->bytes_readonly += cache->bytes_super; - spin_unlock(&cache->space_info->lock); __link_block_group(space_info, cache); @@ -10074,11 +10346,11 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, ret = btrfs_insert_item(trans, extent_root, &key, &item, sizeof(item)); if (ret) - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); ret = btrfs_finish_chunk_alloc(trans, extent_root, key.objectid, key.offset); if (ret) - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); add_block_group_free_space(trans, root->fs_info, block_group); /* already aborted the transaction if it failed. */ next: @@ -10095,7 +10367,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, int ret; struct btrfs_root *extent_root; struct btrfs_block_group_cache *cache; - extent_root = root->fs_info->extent_root; btrfs_set_log_full_commit(root->fs_info, trans); @@ -10141,7 +10412,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, * assigned to our block group, but don't update its counters just yet. * We want our bg to be added to the rbtree with its ->space_info set. */ - ret = update_space_info(root->fs_info, cache->flags, 0, 0, + ret = update_space_info(root->fs_info, cache->flags, 0, 0, 0, &cache->space_info); if (ret) { btrfs_remove_free_space_cache(cache); @@ -10160,8 +10431,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, * Now that our block group has its ->space_info set and is inserted in * the rbtree, update the space info's counters. */ + trace_btrfs_add_block_group(root->fs_info, cache, 1); ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, - &cache->space_info); + cache->bytes_super, &cache->space_info); if (ret) { btrfs_remove_free_space_cache(cache); spin_lock(&root->fs_info->block_group_cache_lock); @@ -10174,16 +10446,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, } update_global_block_rsv(root->fs_info); - spin_lock(&cache->space_info->lock); - cache->space_info->bytes_readonly += cache->bytes_super; - spin_unlock(&cache->space_info->lock); - __link_block_group(cache->space_info, cache); list_add_tail(&cache->bg_list, &trans->new_bgs); set_avail_alloc_bits(extent_root->fs_info, type); - return 0; } @@ -10396,7 +10663,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_lock(&block_group->space_info->lock); list_del_init(&block_group->ro_list); - if (btrfs_test_opt(root, ENOSPC_DEBUG)) { + if (btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) { WARN_ON(block_group->space_info->total_bytes < block_group->key.offset); WARN_ON(block_group->space_info->bytes_readonly @@ -10554,7 +10821,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) struct btrfs_trans_handle *trans; int ret = 0; - if (!fs_info->open) + if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) return; spin_lock(&fs_info->unused_bgs_lock); @@ -10664,7 +10931,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_unlock(&space_info->lock); /* DISCARD can flip during remount */ - trimming = btrfs_test_opt(root, DISCARD); + trimming = btrfs_test_opt(root->fs_info, DISCARD); /* Implicit trim during transaction commit. */ if (trimming) @@ -10728,21 +10995,21 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info) mixed = 1; flags = BTRFS_BLOCK_GROUP_SYSTEM; - ret = update_space_info(fs_info, flags, 0, 0, &space_info); + ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info); if (ret) goto out; if (mixed) { flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; - ret = update_space_info(fs_info, flags, 0, 0, &space_info); + ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info); } else { flags = BTRFS_BLOCK_GROUP_METADATA; - ret = update_space_info(fs_info, flags, 0, 0, &space_info); + ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info); if (ret) goto out; flags = BTRFS_BLOCK_GROUP_DATA; - ret = update_space_info(fs_info, flags, 0, 0, &space_info); + ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info); } out: return ret; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3cd57825c75f..66a755150056 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -20,6 +20,7 @@ #include "locking.h" #include "rcu-string.h" #include "backref.h" +#include "transaction.h" static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; @@ -74,8 +75,7 @@ void btrfs_leak_debug_check(void) while (!list_empty(&buffers)) { eb = list_entry(buffers.next, struct extent_buffer, leak_list); - printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu " - "refs %d\n", + pr_err("BTRFS: buffer leak start %llu len %lu refs %d\n", eb->start, eb->len, atomic_read(&eb->refs)); list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); @@ -163,13 +163,13 @@ int __init extent_io_init(void) { extent_state_cache = kmem_cache_create("btrfs_extent_state", sizeof(struct extent_state), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + SLAB_MEM_SPREAD, NULL); if (!extent_state_cache) return -ENOMEM; extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer", sizeof(struct extent_buffer), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + SLAB_MEM_SPREAD, NULL); if (!extent_buffer_cache) goto free_state_cache; @@ -460,8 +460,7 @@ static int insert_state(struct extent_io_tree *tree, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); - printk(KERN_ERR "BTRFS: found node %llu %llu on insert of " - "%llu %llu\n", + pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n", found->start, found->end, start, end); return -EEXIST; } @@ -572,9 +571,8 @@ alloc_extent_state_atomic(struct extent_state *prealloc) static void extent_io_tree_panic(struct extent_io_tree *tree, int err) { - btrfs_panic(tree_fs_info(tree), err, "Locking error: " - "Extent tree was modified by another " - "thread while locked."); + btrfs_panic(tree_fs_info(tree), err, + "Locking error: Extent tree was modified by another thread while locked."); } /* @@ -1729,7 +1727,7 @@ out_failed: } void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, - struct page *locked_page, + u64 delalloc_end, struct page *locked_page, unsigned clear_bits, unsigned long page_ops) { @@ -2025,9 +2023,16 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, bio->bi_iter.bi_size = 0; map_length = length; + /* + * Avoid races with device replace and make sure our bbio has devices + * associated to its stripes that don't go away while we are doing the + * read repair operation. + */ + btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_block(fs_info, WRITE, logical, &map_length, &bbio, mirror_num); if (ret) { + btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; } @@ -2037,14 +2042,17 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, dev = bbio->stripes[mirror_num-1].dev; btrfs_put_bbio(bbio); if (!dev || !dev->bdev || !dev->writeable) { + btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; } bio->bi_bdev = dev->bdev; + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); bio_add_page(bio, page, length, pg_offset); - if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { + if (btrfsic_submit_bio_wait(bio)) { /* try to remap that extent elsewhere? */ + btrfs_bio_counter_dec(fs_info); bio_put(bio); btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); return -EIO; @@ -2054,6 +2062,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, "read error corrected: ino %llu off %llu (dev %s sector %llu)", btrfs_ino(inode), start, rcu_str_deref(dev->name), sector); + btrfs_bio_counter_dec(fs_info); bio_put(bio); return 0; } @@ -2111,8 +2120,9 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page, if (failrec->in_validation) { /* there was no real error, just free the record */ - pr_debug("clean_io_failure: freeing dummy error at %llu\n", - failrec->start); + btrfs_debug(fs_info, + "clean_io_failure: freeing dummy error at %llu", + failrec->start); goto out; } if (fs_info->sb->s_flags & MS_RDONLY) @@ -2178,6 +2188,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end) int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, struct io_failure_record **failrec_ret) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct io_failure_record *failrec; struct extent_map *em; struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; @@ -2225,8 +2236,9 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, em->compress_type); } - pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n", - logical, start, failrec->len); + btrfs_debug(fs_info, + "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu", + logical, start, failrec->len); failrec->logical = logical; free_extent_map(em); @@ -2244,9 +2256,10 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, return ret; } } else { - pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n", - failrec->logical, failrec->start, failrec->len, - failrec->in_validation); + btrfs_debug(fs_info, + "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d", + failrec->logical, failrec->start, failrec->len, + failrec->in_validation); /* * when data can be on disk more than twice, add to failrec here * (e.g. with a list for failed_mirror) to make @@ -2262,18 +2275,19 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, struct io_failure_record *failrec, int failed_mirror) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); int num_copies; - num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, - failrec->logical, failrec->len); + num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); if (num_copies == 1) { /* * we only have a single copy of the data, so don't bother with * all the retry and error correction code that follows. no * matter what the error is, it is very likely to persist. */ - pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", - num_copies, failrec->this_mirror, failed_mirror); + btrfs_debug(fs_info, + "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d", + num_copies, failrec->this_mirror, failed_mirror); return 0; } @@ -2312,8 +2326,9 @@ int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, } if (failrec->this_mirror > num_copies) { - pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", - num_copies, failrec->this_mirror, failed_mirror); + btrfs_debug(fs_info, + "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d", + num_copies, failrec->this_mirror, failed_mirror); return 0; } @@ -2376,7 +2391,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, int read_mode; int ret; - BUG_ON(failed_bio->bi_rw & REQ_WRITE); + BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); ret = btrfs_get_io_failure_record(inode, start, end, &failrec); if (ret) @@ -2402,12 +2417,13 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, free_io_failure(inode, failrec); return -EIO; } + bio_set_op_attrs(bio, REQ_OP_READ, read_mode); - pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n", - read_mode, failrec->this_mirror, failrec->in_validation); + btrfs_debug(btrfs_sb(inode->i_sb), + "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d", + read_mode, failrec->this_mirror, failrec->in_validation); - ret = tree->ops->submit_bio_hook(inode, read_mode, bio, - failrec->this_mirror, + ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror, failrec->bio_flags, 0); if (ret) { free_io_failure(inode, failrec); @@ -2473,8 +2489,7 @@ static void end_bio_extent_writepage(struct bio *bio) bvec->bv_offset, bvec->bv_len); else btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info, - "incomplete page write in btrfs with offset %u and " - "length %u", + "incomplete page write in btrfs with offset %u and length %u", bvec->bv_offset, bvec->bv_len); } @@ -2530,10 +2545,12 @@ static void end_bio_extent_readpage(struct bio *bio) bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; struct inode *inode = page->mapping->host; + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " - "mirror=%u\n", (u64)bio->bi_iter.bi_sector, - bio->bi_error, io_bio->mirror_num); + btrfs_debug(fs_info, + "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u", + (u64)bio->bi_iter.bi_sector, bio->bi_error, + io_bio->mirror_num); tree = &BTRFS_I(inode)->io_tree; /* We always issue full-page reads, but if some block @@ -2543,13 +2560,12 @@ static void end_bio_extent_readpage(struct bio *bio) * if they don't add up to a full page. */ if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) { if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE) - btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info, - "partial page read in btrfs with offset %u and length %u", + btrfs_err(fs_info, + "partial page read in btrfs with offset %u and length %u", bvec->bv_offset, bvec->bv_len); else - btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info, - "incomplete page read in btrfs with offset %u and " - "length %u", + btrfs_info(fs_info, + "incomplete page read in btrfs with offset %u and length %u", bvec->bv_offset, bvec->bv_len); } @@ -2686,12 +2702,6 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) btrfs_bio->csum = NULL; btrfs_bio->csum_allocated = NULL; btrfs_bio->end_io = NULL; - -#ifdef CONFIG_BLK_CGROUP - /* FIXME, put this into bio_clone_bioset */ - if (bio->bi_css) - bio_associate_blkcg(new, bio->bi_css); -#endif } return new; } @@ -2713,8 +2723,8 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) } -static int __must_check submit_one_bio(int rw, struct bio *bio, - int mirror_num, unsigned long bio_flags) +static int __must_check submit_one_bio(struct bio *bio, int mirror_num, + unsigned long bio_flags) { int ret = 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -2725,33 +2735,31 @@ static int __must_check submit_one_bio(int rw, struct bio *bio, start = page_offset(page) + bvec->bv_offset; bio->bi_private = NULL; - bio_get(bio); if (tree->ops && tree->ops->submit_bio_hook) - ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio, + ret = tree->ops->submit_bio_hook(page->mapping->host, bio, mirror_num, bio_flags, start); else - btrfsic_submit_bio(rw, bio); + btrfsic_submit_bio(bio); bio_put(bio); return ret; } -static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page, +static int merge_bio(struct extent_io_tree *tree, struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags) { int ret = 0; if (tree->ops && tree->ops->merge_bio_hook) - ret = tree->ops->merge_bio_hook(rw, page, offset, size, bio, + ret = tree->ops->merge_bio_hook(page, offset, size, bio, bio_flags); - BUG_ON(ret < 0); return ret; } -static int submit_extent_page(int rw, struct extent_io_tree *tree, +static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree, struct writeback_control *wbc, struct page *page, sector_t sector, size_t size, unsigned long offset, @@ -2779,10 +2787,9 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, if (prev_bio_flags != bio_flags || !contig || force_bio_submit || - merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || + merge_bio(tree, page, offset, page_size, bio, bio_flags) || bio_add_page(bio, page, page_size, offset) < page_size) { - ret = submit_one_bio(rw, bio, mirror_num, - prev_bio_flags); + ret = submit_one_bio(bio, mirror_num, prev_bio_flags); if (ret < 0) { *bio_ret = NULL; return ret; @@ -2803,6 +2810,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; bio->bi_private = tree; + bio_set_op_attrs(bio, op, op_flags); if (wbc) { wbc_init_bio(wbc, bio); wbc_account_io(wbc, page, page_size); @@ -2811,7 +2819,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, if (bio_ret) *bio_ret = bio; else - ret = submit_one_bio(rw, bio, mirror_num, bio_flags); + ret = submit_one_bio(bio, mirror_num, bio_flags); return ret; } @@ -2869,13 +2877,14 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, * into the tree that are removed when the IO is done (by the end_io * handlers) * XXX JDM: This needs looking at to ensure proper page locking + * return 0 on success, otherwise return error */ static int __do_readpage(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw, + unsigned long *bio_flags, int read_flags, u64 *prev_em_start) { struct inode *inode = page->mapping->host; @@ -2890,7 +2899,7 @@ static int __do_readpage(struct extent_io_tree *tree, sector_t sector; struct extent_map *em; struct block_device *bdev; - int ret; + int ret = 0; int nr = 0; size_t pg_offset = 0; size_t iosize; @@ -3058,8 +3067,8 @@ static int __do_readpage(struct extent_io_tree *tree, } pnr -= page->index; - ret = submit_extent_page(rw, tree, NULL, page, - sector, disk_io_size, pg_offset, + ret = submit_extent_page(REQ_OP_READ, read_flags, tree, NULL, + page, sector, disk_io_size, pg_offset, bdev, bio, pnr, end_bio_extent_readpage, mirror_num, *bio_flags, @@ -3071,6 +3080,7 @@ static int __do_readpage(struct extent_io_tree *tree, } else { SetPageError(page); unlock_extent(tree, cur, cur + iosize - 1); + goto out; } cur = cur + iosize; pg_offset += iosize; @@ -3081,7 +3091,7 @@ out: SetPageUptodate(page); unlock_page(page); } - return 0; + return ret; } static inline void __do_contiguous_readpages(struct extent_io_tree *tree, @@ -3090,7 +3100,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw, + unsigned long *bio_flags, u64 *prev_em_start) { struct inode *inode; @@ -3111,7 +3121,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, for (index = 0; index < nr_pages; index++) { __do_readpage(tree, pages[index], get_extent, em_cached, bio, - mirror_num, bio_flags, rw, prev_em_start); + mirror_num, bio_flags, 0, prev_em_start); put_page(pages[index]); } } @@ -3121,7 +3131,7 @@ static void __extent_readpages(struct extent_io_tree *tree, int nr_pages, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw, + unsigned long *bio_flags, u64 *prev_em_start) { u64 start = 0; @@ -3143,7 +3153,7 @@ static void __extent_readpages(struct extent_io_tree *tree, index - first_index, start, end, get_extent, em_cached, bio, mirror_num, bio_flags, - rw, prev_em_start); + prev_em_start); start = page_start; end = start + PAGE_SIZE - 1; first_index = index; @@ -3154,7 +3164,7 @@ static void __extent_readpages(struct extent_io_tree *tree, __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, end, get_extent, em_cached, bio, - mirror_num, bio_flags, rw, + mirror_num, bio_flags, prev_em_start); } @@ -3162,7 +3172,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) + unsigned long *bio_flags, int read_flags) { struct inode *inode = page->mapping->host; struct btrfs_ordered_extent *ordered; @@ -3182,7 +3192,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, } ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, - bio_flags, rw, NULL); + bio_flags, read_flags, NULL); return ret; } @@ -3194,9 +3204,9 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, int ret; ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num, - &bio_flags, READ); + &bio_flags, 0); if (bio) - ret = submit_one_bio(READ, bio, mirror_num, bio_flags); + ret = submit_one_bio(bio, mirror_num, bio_flags); return ret; } @@ -3430,8 +3440,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, page->index, cur, end); } - ret = submit_extent_page(write_flags, tree, wbc, page, - sector, iosize, pg_offset, + ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc, + page, sector, iosize, pg_offset, bdev, &epd->bio, max_nr, end_bio_extent_writepage, 0, 0, 0, false); @@ -3470,13 +3480,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, size_t pg_offset = 0; loff_t i_size = i_size_read(inode); unsigned long end_index = i_size >> PAGE_SHIFT; - int write_flags; + int write_flags = 0; unsigned long nr_written = 0; if (wbc->sync_mode == WB_SYNC_ALL) write_flags = WRITE_SYNC; - else - write_flags = WRITE; trace___extent_writepage(page, inode, wbc); @@ -3621,7 +3629,6 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb) static void set_btree_ioerr(struct page *page) { struct extent_buffer *eb = (struct extent_buffer *)page->private; - struct btrfs_inode *btree_ino = BTRFS_I(eb->fs_info->btree_inode); SetPageError(page); if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) @@ -3667,13 +3674,13 @@ static void set_btree_ioerr(struct page *page) */ switch (eb->log_index) { case -1: - set_bit(BTRFS_INODE_BTREE_ERR, &btree_ino->runtime_flags); + set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags); break; case 0: - set_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); + set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags); break; case 1: - set_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); + set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags); break; default: BUG(); /* unexpected, logic error */ @@ -3718,9 +3725,11 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, struct block_device *bdev = fs_info->fs_devices->latest_bdev; struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree; u64 offset = eb->start; + u32 nritems; unsigned long i, num_pages; unsigned long bio_flags = 0; - int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META; + unsigned long start, end; + int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META; int ret = 0; clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); @@ -3729,14 +3738,32 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) bio_flags = EXTENT_BIO_TREE_LOG; + /* set btree blocks beyond nritems with 0 to avoid stale content. */ + nritems = btrfs_header_nritems(eb); + if (btrfs_header_level(eb) > 0) { + end = btrfs_node_key_ptr_offset(nritems); + + memset_extent_buffer(eb, 0, end, eb->len - end); + } else { + /* + * leaf: + * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0 + */ + start = btrfs_item_nr_offset(nritems); + end = btrfs_leaf_data(eb) + + leaf_data_end(fs_info->tree_root, eb); + memset_extent_buffer(eb, 0, start, end - start); + } + for (i = 0; i < num_pages; i++) { struct page *p = eb->pages[i]; clear_page_dirty_for_io(p); set_page_writeback(p); - ret = submit_extent_page(rw, tree, wbc, p, offset >> 9, - PAGE_SIZE, 0, bdev, &epd->bio, - -1, end_bio_extent_buffer_writepage, + ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc, + p, offset >> 9, PAGE_SIZE, 0, bdev, + &epd->bio, -1, + end_bio_extent_buffer_writepage, 0, epd->bio_flags, bio_flags, false); epd->bio_flags = bio_flags; if (ret) { @@ -4046,13 +4073,12 @@ retry: static void flush_epd_write_bio(struct extent_page_data *epd) { if (epd->bio) { - int rw = WRITE; int ret; - if (epd->sync_io) - rw = WRITE_SYNC; + bio_set_op_attrs(epd->bio, REQ_OP_WRITE, + epd->sync_io ? WRITE_SYNC : 0); - ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags); + ret = submit_one_bio(epd->bio, 0, epd->bio_flags); BUG_ON(ret < 0); /* -ENOMEM */ epd->bio = NULL; } @@ -4170,7 +4196,8 @@ int extent_readpages(struct extent_io_tree *tree, prefetchw(&page->flags); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, - page->index, GFP_NOFS)) { + page->index, + readahead_gfp_mask(mapping))) { put_page(page); continue; } @@ -4179,19 +4206,19 @@ int extent_readpages(struct extent_io_tree *tree, if (nr < ARRAY_SIZE(pagepool)) continue; __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, 0, &bio_flags, READ, &prev_em_start); + &bio, 0, &bio_flags, &prev_em_start); nr = 0; } if (nr) __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, 0, &bio_flags, READ, &prev_em_start); + &bio, 0, &bio_flags, &prev_em_start); if (em_cached) free_extent_map(em_cached); BUG_ON(!list_empty(pages)); if (bio) - return submit_one_bio(READ, bio, 0, bio_flags); + return submit_one_bio(bio, 0, bio_flags); return 0; } @@ -4483,21 +4510,36 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags |= (FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN); } else if (fieinfo->fi_extents_max) { + struct btrfs_trans_handle *trans; + u64 bytenr = em->block_start - (em->start - em->orig_start); disko = em->block_start + offset_in_extent; /* + * We need a trans handle to get delayed refs + */ + trans = btrfs_join_transaction(root); + /* + * It's OK if we can't start a trans we can still check + * from commit_root + */ + if (IS_ERR(trans)) + trans = NULL; + + /* * As btrfs supports shared space, this information * can be exported to userspace tools via * flag FIEMAP_EXTENT_SHARED. If fi_extents_max == 0 * then we're just getting a count and we can skip the * lookup stuff. */ - ret = btrfs_check_shared(NULL, root->fs_info, + ret = btrfs_check_shared(trans, root->fs_info, root->objectid, btrfs_ino(inode), bytenr); + if (trans) + btrfs_end_transaction(trans, root); if (ret < 0) goto out_free; if (ret) @@ -4718,16 +4760,16 @@ err: } struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start) + u64 start, u32 nodesize) { unsigned long len; if (!fs_info) { /* * Called only from tests that don't always have a fs_info - * available, but we know that nodesize is 4096 + * available */ - len = 4096; + len = nodesize; } else { len = fs_info->tree_root->nodesize; } @@ -4823,7 +4865,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start) + u64 start, u32 nodesize) { struct extent_buffer *eb, *exists = NULL; int ret; @@ -4831,7 +4873,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, eb = find_extent_buffer(fs_info, start); if (eb) return eb; - eb = alloc_dummy_extent_buffer(fs_info, start); + eb = alloc_dummy_extent_buffer(fs_info, start, nodesize); if (!eb) return NULL; eb->fs_info = fs_info; @@ -4882,18 +4924,25 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, int uptodate = 1; int ret; + if (!IS_ALIGNED(start, fs_info->tree_root->sectorsize)) { + btrfs_err(fs_info, "bad tree block start %llu", start); + return ERR_PTR(-EINVAL); + } + eb = find_extent_buffer(fs_info, start); if (eb) return eb; eb = __alloc_extent_buffer(fs_info, start, len); if (!eb) - return NULL; + return ERR_PTR(-ENOMEM); for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL); - if (!p) + if (!p) { + exists = ERR_PTR(-ENOMEM); goto free_eb; + } spin_lock(&mapping->private_lock); if (PagePrivate(p)) { @@ -4938,8 +4987,10 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); again: ret = radix_tree_preload(GFP_NOFS); - if (ret) + if (ret) { + exists = ERR_PTR(ret); goto free_eb; + } spin_lock(&fs_info->buffer_lock); ret = radix_tree_insert(&fs_info->buffer_radix, @@ -5160,11 +5211,10 @@ int extent_buffer_uptodate(struct extent_buffer *eb) } int read_extent_buffer_pages(struct extent_io_tree *tree, - struct extent_buffer *eb, u64 start, int wait, + struct extent_buffer *eb, int wait, get_extent_t *get_extent, int mirror_num) { unsigned long i; - unsigned long start_i; struct page *page; int err; int ret = 0; @@ -5178,16 +5228,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 0; - if (start) { - WARN_ON(start < eb->start); - start_i = (start >> PAGE_SHIFT) - - (eb->start >> PAGE_SHIFT); - } else { - start_i = 0; - } - num_pages = num_extent_pages(eb->start, eb->len); - for (i = start_i; i < num_pages; i++) { + for (i = 0; i < num_pages; i++) { page = eb->pages[i]; if (wait == WAIT_NONE) { if (!trylock_page(page)) @@ -5196,38 +5238,62 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, lock_page(page); } locked_pages++; + } + /* + * We need to firstly lock all pages to make sure that + * the uptodate bit of our pages won't be affected by + * clear_extent_buffer_uptodate(). + */ + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; if (!PageUptodate(page)) { num_reads++; all_uptodate = 0; } } + if (all_uptodate) { - if (start_i == 0) - set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); goto unlock_exit; } clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = 0; atomic_set(&eb->io_pages, num_reads); - for (i = start_i; i < num_pages; i++) { + for (i = 0; i < num_pages; i++) { page = eb->pages[i]; + if (!PageUptodate(page)) { + if (ret) { + atomic_dec(&eb->io_pages); + unlock_page(page); + continue; + } + ClearPageError(page); err = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num, &bio_flags, - READ | REQ_META); - if (err) + REQ_META); + if (err) { ret = err; + /* + * We use &bio in above __extent_read_full_page, + * so we ensure that if it returns error, the + * current page fails to add itself to bio and + * it's been unlocked. + * + * We must dec io_pages by ourselves. + */ + atomic_dec(&eb->io_pages); + } } else { unlock_page(page); } } if (bio) { - err = submit_one_bio(READ | REQ_META, bio, mirror_num, - bio_flags); + err = submit_one_bio(bio, mirror_num, bio_flags); if (err) return err; } @@ -5235,7 +5301,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (ret || wait != WAIT_COMPLETE) return ret; - for (i = start_i; i < num_pages; i++) { + for (i = 0; i < num_pages; i++) { page = eb->pages[i]; wait_on_page_locked(page); if (!PageUptodate(page)) @@ -5245,12 +5311,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, return ret; unlock_exit: - i = start_i; while (locked_pages > 0) { - page = eb->pages[i]; - i++; - unlock_page(page); locked_pages--; + page = eb->pages[locked_pages]; + unlock_page(page); } return ret; } @@ -5323,6 +5387,11 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, return ret; } +/* + * return 0 if the item is found within a page. + * return 1 if the item spans two pages. + * return -EINVAL otherwise. + */ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, char **map, unsigned long *map_start, @@ -5337,7 +5406,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, PAGE_SHIFT; if (i != end_i) - return -EINVAL; + return 1; if (i == 0) { offset = start_offset; @@ -5348,8 +5417,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, } if (start + min_len > eb->len) { - WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, " - "wanted %lu %lu\n", + WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); return -EINVAL; } @@ -5490,17 +5558,45 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, } } -/* - * The extent buffer bitmap operations are done with byte granularity because - * bitmap items are not guaranteed to be aligned to a word and therefore a - * single word in a bitmap may straddle two pages in the extent buffer. - */ -#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE) -#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1) -#define BITMAP_FIRST_BYTE_MASK(start) \ - ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK) -#define BITMAP_LAST_BYTE_MASK(nbits) \ - (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1))) +void le_bitmap_set(u8 *map, unsigned int start, int len) +{ + u8 *p = map + BIT_BYTE(start); + const unsigned int size = start + len; + int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE); + u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start); + + while (len - bits_to_set >= 0) { + *p |= mask_to_set; + len -= bits_to_set; + bits_to_set = BITS_PER_BYTE; + mask_to_set = ~(u8)0; + p++; + } + if (len) { + mask_to_set &= BITMAP_LAST_BYTE_MASK(size); + *p |= mask_to_set; + } +} + +void le_bitmap_clear(u8 *map, unsigned int start, int len) +{ + u8 *p = map + BIT_BYTE(start); + const unsigned int size = start + len; + int bits_to_clear = BITS_PER_BYTE - (start % BITS_PER_BYTE); + u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(start); + + while (len - bits_to_clear >= 0) { + *p &= ~mask_to_clear; + len -= bits_to_clear; + bits_to_clear = BITS_PER_BYTE; + mask_to_clear = ~(u8)0; + p++; + } + if (len) { + mask_to_clear &= BITMAP_LAST_BYTE_MASK(size); + *p &= ~mask_to_clear; + } +} /* * eb_bitmap_offset() - calculate the page and offset of the byte containing the @@ -5544,7 +5640,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb, int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start, unsigned long nr) { - char *kaddr; + u8 *kaddr; struct page *page; unsigned long i; size_t offset; @@ -5566,13 +5662,13 @@ int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start, void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, unsigned long pos, unsigned long len) { - char *kaddr; + u8 *kaddr; struct page *page; unsigned long i; size_t offset; const unsigned int size = pos + len; int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE); - unsigned int mask_to_set = BITMAP_FIRST_BYTE_MASK(pos); + u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos); eb_bitmap_offset(eb, start, pos, &i, &offset); page = eb->pages[i]; @@ -5583,7 +5679,7 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, kaddr[offset] |= mask_to_set; len -= bits_to_set; bits_to_set = BITS_PER_BYTE; - mask_to_set = ~0U; + mask_to_set = ~(u8)0; if (++offset >= PAGE_SIZE && len > 0) { offset = 0; page = eb->pages[++i]; @@ -5608,13 +5704,13 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start, unsigned long pos, unsigned long len) { - char *kaddr; + u8 *kaddr; struct page *page; unsigned long i; size_t offset; const unsigned int size = pos + len; int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE); - unsigned int mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos); + u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos); eb_bitmap_offset(eb, start, pos, &i, &offset); page = eb->pages[i]; @@ -5625,7 +5721,7 @@ void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start, kaddr[offset] &= ~mask_to_clear; len -= bits_to_clear; bits_to_clear = BITS_PER_BYTE; - mask_to_clear = ~0U; + mask_to_clear = ~(u8)0; if (++offset >= PAGE_SIZE && len > 0) { offset = 0; page = eb->pages[++i]; @@ -5679,14 +5775,14 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, if (src_offset + len > dst->len) { btrfs_err(dst->fs_info, - "memmove bogus src_offset %lu move " - "len %lu dst len %lu", src_offset, len, dst->len); + "memmove bogus src_offset %lu move len %lu dst len %lu", + src_offset, len, dst->len); BUG_ON(1); } if (dst_offset + len > dst->len) { btrfs_err(dst->fs_info, - "memmove bogus dst_offset %lu move " - "len %lu dst len %lu", dst_offset, len, dst->len); + "memmove bogus dst_offset %lu move len %lu dst len %lu", + dst_offset, len, dst->len); BUG_ON(1); } @@ -5726,13 +5822,15 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_i; if (src_offset + len > dst->len) { - btrfs_err(dst->fs_info, "memmove bogus src_offset %lu move " - "len %lu len %lu", src_offset, len, dst->len); + btrfs_err(dst->fs_info, + "memmove bogus src_offset %lu move len %lu len %lu", + src_offset, len, dst->len); BUG_ON(1); } if (dst_offset + len > dst->len) { - btrfs_err(dst->fs_info, "memmove bogus dst_offset %lu move " - "len %lu len %lu", dst_offset, len, dst->len); + btrfs_err(dst->fs_info, + "memmove bogus dst_offset %lu move len %lu len %lu", + dst_offset, len, dst->len); BUG_ON(1); } if (dst_offset < src_offset) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 1baf19c9b79d..ab31d145227e 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -20,6 +20,7 @@ #define EXTENT_DAMAGED (1U << 14) #define EXTENT_NORESERVE (1U << 15) #define EXTENT_QGROUP_RESERVED (1U << 16) +#define EXTENT_CLEAR_DATA_RESV (1U << 17) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) @@ -58,21 +59,43 @@ */ #define EXTENT_PAGE_PRIVATE 1 +/* + * The extent buffer bitmap operations are done with byte granularity instead of + * word granularity for two reasons: + * 1. The bitmaps must be little-endian on disk. + * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a + * single word in a bitmap may straddle two pages in the extent buffer. + */ +#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE) +#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1) +#define BITMAP_FIRST_BYTE_MASK(start) \ + ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK) +#define BITMAP_LAST_BYTE_MASK(nbits) \ + (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1))) + +static inline int le_test_bit(int nr, const u8 *addr) +{ + return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1))); +} + +extern void le_bitmap_set(u8 *map, unsigned int start, int len); +extern void le_bitmap_clear(u8 *map, unsigned int start, int len); + struct extent_state; struct btrfs_root; struct btrfs_io_bio; struct io_failure_record; -typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, - struct bio *bio, int mirror_num, - unsigned long bio_flags, u64 bio_offset); +typedef int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio, + int mirror_num, unsigned long bio_flags, + u64 bio_offset); struct extent_io_ops { int (*fill_delalloc)(struct inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, unsigned long *nr_written); int (*writepage_start_hook)(struct page *page, u64 start, u64 end); extent_submit_bio_hook_t *submit_bio_hook; - int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset, + int (*merge_bio_hook)(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); @@ -348,7 +371,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, unsigned long len); struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start); + u64 start, u32 nodesize); struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, u64 start); @@ -358,7 +381,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb); #define WAIT_COMPLETE 1 #define WAIT_PAGE_LOCK 2 int read_extent_buffer_pages(struct extent_io_tree *tree, - struct extent_buffer *eb, u64 start, int wait, + struct extent_buffer *eb, int wait, get_extent_t *get_extent, int mirror_num); void wait_on_extent_buffer_writeback(struct extent_buffer *eb); @@ -412,7 +435,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, - struct page *locked_page, + u64 delalloc_end, struct page *locked_page, unsigned bits_to_clear, unsigned long page_ops); struct bio * @@ -468,5 +491,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode, u64 *end, u64 max_bytes); #endif struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start); + u64 start, u32 nodesize); #endif diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index e0715fcfb11e..26f9ac719d20 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -13,7 +13,7 @@ int __init extent_map_init(void) { extent_map_cache = kmem_cache_create("btrfs_extent_map", sizeof(struct extent_map), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + SLAB_MEM_SPREAD, NULL); if (!extent_map_cache) return -ENOMEM; return 0; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 62a81ee13a5f..d0d571c47d33 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -250,7 +250,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, offset + root->sectorsize - 1, EXTENT_NODATASUM); } else { - btrfs_info(BTRFS_I(inode)->root->fs_info, + btrfs_info_rl(BTRFS_I(inode)->root->fs_info, "no csum found for inode %llu start %llu", btrfs_ino(inode), offset); } @@ -699,7 +699,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, */ ret = btrfs_split_item(trans, root, path, &key, offset); if (ret && ret != -EAGAIN) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e0c9bd3fb02d..3a14c87d9c92 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -132,7 +132,7 @@ static int __btrfs_add_inode_defrag(struct inode *inode, static inline int __need_auto_defrag(struct btrfs_root *root) { - if (!btrfs_test_opt(root, AUTO_DEFRAG)) + if (!btrfs_test_opt(root->fs_info, AUTO_DEFRAG)) return 0; if (btrfs_fs_closing(root->fs_info)) @@ -503,7 +503,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, end_of_last_block = start_pos + num_bytes - 1; err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, - cached); + cached, 0); if (err) return err; @@ -950,7 +950,7 @@ delete_extent_item: ret = btrfs_del_items(trans, root, path, del_slot, del_nr); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); break; } @@ -974,7 +974,7 @@ delete_extent_item: path->slots[0] = del_slot; ret = btrfs_del_items(trans, root, path, del_slot, del_nr); if (ret) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); } leaf = path->nodes[0]; @@ -1110,13 +1110,25 @@ again: leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY); + if (key.objectid != ino || + key.type != BTRFS_EXTENT_DATA_KEY) { + ret = -EINVAL; + btrfs_abort_transaction(trans, ret); + goto out; + } fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - BUG_ON(btrfs_file_extent_type(leaf, fi) != - BTRFS_FILE_EXTENT_PREALLOC); + if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_PREALLOC) { + ret = -EINVAL; + btrfs_abort_transaction(trans, ret); + goto out; + } extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); - BUG_ON(key.offset > start || extent_end < end); + if (key.offset > start || extent_end < end) { + ret = -EINVAL; + btrfs_abort_transaction(trans, ret); + goto out; + } bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); @@ -1190,7 +1202,7 @@ again: goto again; } if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } @@ -1213,12 +1225,19 @@ again: ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, ino, orig_offset); - BUG_ON(ret); /* -ENOMEM */ + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out; + } if (split == start) { key.offset = start; } else { - BUG_ON(start != key.offset); + if (start != key.offset) { + ret = -EINVAL; + btrfs_abort_transaction(trans, ret); + goto out; + } path->slots[0]--; extent_end = end; } @@ -1240,7 +1259,10 @@ again: ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, ino, orig_offset); - BUG_ON(ret); /* -ENOMEM */ + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out; + } } other_start = 0; other_end = start; @@ -1257,7 +1279,10 @@ again: ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, ino, orig_offset); - BUG_ON(ret); /* -ENOMEM */ + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out; + } } if (del_nr == 0) { fi = btrfs_item_ptr(leaf, path->slots[0], @@ -1278,7 +1303,7 @@ again: ret = btrfs_del_items(trans, root, path, del_slot, del_nr); if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } } @@ -1534,30 +1559,30 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, reserve_bytes = round_up(write_bytes + sector_offset, root->sectorsize); - if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | - BTRFS_INODE_PREALLOC)) && - check_can_nocow(inode, pos, &write_bytes) > 0) { - /* - * For nodata cow case, no need to reserve - * data space. - */ - only_release_metadata = true; - /* - * our prealloc extent may be smaller than - * write_bytes, so scale down. - */ - num_pages = DIV_ROUND_UP(write_bytes + offset, - PAGE_SIZE); - reserve_bytes = round_up(write_bytes + sector_offset, - root->sectorsize); - goto reserve_metadata; - } - ret = btrfs_check_data_free_space(inode, pos, write_bytes); - if (ret < 0) - break; + if (ret < 0) { + if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | + BTRFS_INODE_PREALLOC)) && + check_can_nocow(inode, pos, &write_bytes) > 0) { + /* + * For nodata cow case, no need to reserve + * data space. + */ + only_release_metadata = true; + /* + * our prealloc extent may be smaller than + * write_bytes, so scale down. + */ + num_pages = DIV_ROUND_UP(write_bytes + offset, + PAGE_SIZE); + reserve_bytes = round_up(write_bytes + + sector_offset, + root->sectorsize); + } else { + break; + } + } -reserve_metadata: ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes); if (ret) { if (!only_release_metadata) @@ -1629,13 +1654,11 @@ again: * managed to copy. */ if (num_sectors > dirty_sectors) { - /* - * we round down because we don't want to count - * any partial blocks actually sent through the - * IO machines - */ - release_bytes = round_down(release_bytes - copied, - root->sectorsize); + + /* release everything except the sectors we dirtied */ + release_bytes -= dirty_sectors << + root->fs_info->sb->s_blocksize_bits; + if (copied > 0) { spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; @@ -1759,7 +1782,7 @@ static void update_time_for_write(struct inode *inode) if (IS_NOCMTIME(inode)) return; - now = current_fs_time(inode->i_sb); + now = current_time(inode); if (!timespec_equal(&inode->i_mtime, &now)) inode->i_mtime = now; @@ -2035,6 +2058,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); + /* + * An ordered extent might have started before and completed + * already with io errors, in which case the inode was not + * updated and we end up here. So check the inode's mapping + * flags for any errors that might have happened while doing + * writeback of file data. + */ + ret = filemap_check_errors(inode->i_mapping); inode_unlock(inode); goto out; } @@ -2064,7 +2095,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } trans->sync = true; - btrfs_init_log_ctx(&ctx); + btrfs_init_log_ctx(&ctx, inode); ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); if (ret < 0) { @@ -2479,7 +2510,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) } ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, - min_size); + min_size, 0); BUG_ON(ret); trans->block_rsv = rsv; @@ -2522,7 +2553,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) } ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, - rsv, min_size); + rsv, min_size, 0); BUG_ON(ret); /* shouldn't happen */ trans->block_rsv = rsv; @@ -2572,7 +2603,7 @@ out_trans: goto out_free; inode_inc_iversion(inode); - inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_mtime = inode->i_ctime = current_time(inode); trans->block_rsv = &root->fs_info->trans_block_rsv; ret = btrfs_update_inode(trans, root, inode); @@ -2669,6 +2700,7 @@ static long btrfs_fallocate(struct file *file, int mode, alloc_start = round_down(offset, blocksize); alloc_end = round_up(offset + len, blocksize); + cur_offset = alloc_start; /* Make sure we aren't being give some crap mode */ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) @@ -2761,7 +2793,6 @@ static long btrfs_fallocate(struct file *file, int mode, /* First, check if we exceed the qgroup limit */ INIT_LIST_HEAD(&reserve_list); - cur_offset = alloc_start; while (1) { em = btrfs_get_extent(inode, NULL, 0, cur_offset, alloc_end - cur_offset, 0); @@ -2788,6 +2819,14 @@ static long btrfs_fallocate(struct file *file, int mode, last_byte - cur_offset); if (ret < 0) break; + } else { + /* + * Do not need to reserve unwritten extent for this + * range, free reserved data space first, otherwise + * it'll result in false ENOSPC error. + */ + btrfs_free_reserved_data_space(inode, cur_offset, + last_byte - cur_offset); } free_extent_map(em); cur_offset = last_byte; @@ -2805,6 +2844,9 @@ static long btrfs_fallocate(struct file *file, int mode, range->start, range->len, 1 << inode->i_blkbits, offset + len, &alloc_hint); + else + btrfs_free_reserved_data_space(inode, range->start, + range->len); list_del(&range->list); kfree(range); } @@ -2825,7 +2867,7 @@ static long btrfs_fallocate(struct file *file, int mode, if (IS_ERR(trans)) { ret = PTR_ERR(trans); } else { - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); i_size_write(inode, actual_end); btrfs_ordered_update_i_size(inode, actual_end, NULL); ret = btrfs_update_inode(trans, root, inode); @@ -2839,18 +2881,11 @@ out_unlock: unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, &cached_state, GFP_KERNEL); out: - /* - * As we waited the extent range, the data_rsv_map must be empty - * in the range, as written data range will be released from it. - * And for prealloacted extent, it will also be released when - * its metadata is written. - * So this is completely used as cleanup. - */ - btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start); inode_unlock(inode); /* Let go of our reservation. */ - btrfs_free_reserved_data_space(inode, alloc_start, - alloc_end - alloc_start); + if (ret != 0) + btrfs_free_reserved_data_space(inode, alloc_start, + alloc_end - cur_offset); return ret; } @@ -2977,7 +3012,7 @@ int btrfs_auto_defrag_init(void) { btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag", sizeof(struct inode_defrag), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + SLAB_MEM_SPREAD, NULL); if (!btrfs_inode_defrag_cachep) return -ENOMEM; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index c6dc1183f542..e4b48f377d3a 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -29,7 +29,7 @@ #include "inode-map.h" #include "volumes.h" -#define BITS_PER_BITMAP (PAGE_SIZE * 8) +#define BITS_PER_BITMAP (PAGE_SIZE * 8UL) #define MAX_CACHE_BYTES_PER_GIG SZ_32K struct btrfs_trim_range { @@ -280,7 +280,7 @@ fail: if (locked) mutex_unlock(&trans->transaction->cache_write_mutex); if (ret) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; } @@ -716,8 +716,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, if (BTRFS_I(inode)->generation != generation) { btrfs_err(root->fs_info, - "free space inode generation (%llu) " - "did not match free space cache generation (%llu)", + "free space inode generation (%llu) did not match free space cache generation (%llu)", BTRFS_I(inode)->generation, generation); return 0; } @@ -879,8 +878,9 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, if (!matched) { __btrfs_remove_free_space_cache(ctl); - btrfs_warn(fs_info, "block group %llu has wrong amount of free space", - block_group->key.objectid); + btrfs_warn(fs_info, + "block group %llu has wrong amount of free space", + block_group->key.objectid); ret = -1; } out: @@ -891,8 +891,9 @@ out: spin_unlock(&block_group->lock); ret = 0; - btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuilding it now", - block_group->key.objectid); + btrfs_warn(fs_info, + "failed to load free space cache for block group %llu, rebuilding it now", + block_group->key.objectid); } iput(inode); @@ -1415,11 +1416,11 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) { u64 bitmap_start; - u32 bytes_per_bitmap; + u64 bytes_per_bitmap; bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; bitmap_start = offset - ctl->start; - bitmap_start = div_u64(bitmap_start, bytes_per_bitmap); + bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); bitmap_start *= bytes_per_bitmap; bitmap_start += ctl->start; @@ -1638,10 +1639,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) u64 bitmap_bytes; u64 extent_bytes; u64 size = block_group->key.offset; - u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; - u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg); + u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; + u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); - max_bitmaps = max_t(u32, max_bitmaps, 1); + max_bitmaps = max_t(u64, max_bitmaps, 1); ASSERT(ctl->total_bitmaps <= max_bitmaps); @@ -1660,7 +1661,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as * we add more bitmaps. */ - bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_SIZE; + bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; if (bitmap_bytes >= max_bytes) { ctl->extents_thresh = 0; @@ -2298,7 +2299,8 @@ static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, } } -int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, +int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, + struct btrfs_free_space_ctl *ctl, u64 offset, u64 bytes) { struct btrfs_free_space *info; @@ -2345,7 +2347,7 @@ out: spin_unlock(&ctl->tree_lock); if (ret) { - printk(KERN_CRIT "BTRFS: unable to add free space :%d\n", ret); + btrfs_crit(fs_info, "unable to add free space :%d", ret); ASSERT(ret != -EEXIST); } @@ -2621,7 +2623,8 @@ out: spin_unlock(&ctl->tree_lock); if (align_gap_len) - __btrfs_add_free_space(ctl, align_gap, align_gap_len); + __btrfs_add_free_space(block_group->fs_info, ctl, + align_gap, align_gap_len); return ret; } @@ -3026,7 +3029,7 @@ int btrfs_find_space_cluster(struct btrfs_root *root, * For metadata, allow allocates with smaller extents. For * data, keep it dense. */ - if (btrfs_test_opt(root, SSD_SPREAD)) { + if (btrfs_test_opt(root->fs_info, SSD_SPREAD)) { cont1_bytes = min_bytes = bytes + empty_size; } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { cont1_bytes = bytes; @@ -3470,7 +3473,7 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) int ret = 0; u64 root_gen = btrfs_root_generation(&root->root_item); - if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE)) return 0; /* @@ -3514,7 +3517,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, struct btrfs_io_ctl io_ctl; bool release_metadata = true; - if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE)) return 0; memset(&io_ctl, 0, sizeof(io_ctl)); @@ -3662,7 +3665,7 @@ have_info: if (tmp->offset + tmp->bytes < offset) break; if (offset + bytes < tmp->offset) { - n = rb_prev(&info->offset_index); + n = rb_prev(&tmp->offset_index); continue; } info = tmp; @@ -3676,7 +3679,7 @@ have_info: if (offset + bytes < tmp->offset) break; if (tmp->offset + tmp->bytes < offset) { - n = rb_next(&info->offset_index); + n = rb_next(&tmp->offset_index); continue; } info = tmp; diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 3af651c2bbc7..363fdd955e5d 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -89,13 +89,15 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, struct inode *inode); void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group); -int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, +int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, + struct btrfs_free_space_ctl *ctl, u64 bytenr, u64 size); static inline int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, u64 bytenr, u64 size) { - return __btrfs_add_free_space(block_group->free_space_ctl, + return __btrfs_add_free_space(block_group->fs_info, + block_group->free_space_ctl, bytenr, size); } int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 53dbeaf6ce94..57401b474ec6 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -107,7 +107,7 @@ search_free_space_info(struct btrfs_trans_handle *trans, if (ret < 0) return ERR_PTR(ret); if (ret != 0) { - btrfs_warn(fs_info, "missing free space info for %llu\n", + btrfs_warn(fs_info, "missing free space info for %llu", block_group->key.objectid); ASSERT(0); return ERR_PTR(-ENOENT); @@ -151,7 +151,7 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize) return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE); } -static unsigned long *alloc_bitmap(u32 bitmap_size) +static u8 *alloc_bitmap(u32 bitmap_size) { void *mem; @@ -180,8 +180,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, struct btrfs_free_space_info *info; struct btrfs_key key, found_key; struct extent_buffer *leaf; - unsigned long *bitmap; - char *bitmap_cursor; + u8 *bitmap, *bitmap_cursor; u64 start, end; u64 bitmap_range, i; u32 bitmap_size, flags, expected_extent_count; @@ -231,7 +230,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, block_group->sectorsize); last = div_u64(found_key.objectid + found_key.offset - start, block_group->sectorsize); - bitmap_set(bitmap, first, last - first); + le_bitmap_set(bitmap, first, last - first); extent_count++; nr++; @@ -261,7 +260,8 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, btrfs_release_path(path); if (extent_count != expected_extent_count) { - btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u", + btrfs_err(fs_info, + "incorrect extent count for %llu; counted %u, expected %u", block_group->key.objectid, extent_count, expected_extent_count); ASSERT(0); @@ -269,7 +269,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, goto out; } - bitmap_cursor = (char *)bitmap; + bitmap_cursor = bitmap; bitmap_range = block_group->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; i = start; while (i < end) { @@ -305,7 +305,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, out: kvfree(bitmap); if (ret) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; } @@ -318,7 +318,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans, struct btrfs_free_space_info *info; struct btrfs_key key, found_key; struct extent_buffer *leaf; - unsigned long *bitmap; + u8 *bitmap; u64 start, end; /* Initialize to silence GCC. */ u64 extent_start = 0; @@ -362,7 +362,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans, break; } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { unsigned long ptr; - char *bitmap_cursor; + u8 *bitmap_cursor; u32 bitmap_pos, data_size; ASSERT(found_key.objectid >= start); @@ -372,7 +372,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans, bitmap_pos = div_u64(found_key.objectid - start, block_group->sectorsize * BITS_PER_BYTE); - bitmap_cursor = ((char *)bitmap) + bitmap_pos; + bitmap_cursor = bitmap + bitmap_pos; data_size = free_space_bitmap_size(found_key.offset, block_group->sectorsize); @@ -409,7 +409,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans, offset = start; bitnr = 0; while (offset < end) { - bit = !!test_bit(bitnr, bitmap); + bit = !!le_test_bit(bitnr, bitmap); if (prev_bit == 0 && bit == 1) { extent_start = offset; } else if (prev_bit == 1 && bit == 0) { @@ -442,7 +442,8 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans, } if (extent_count != expected_extent_count) { - btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u", + btrfs_err(fs_info, + "incorrect extent count for %llu; counted %u, expected %u", block_group->key.objectid, extent_count, expected_extent_count); ASSERT(0); @@ -454,7 +455,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans, out: kvfree(bitmap); if (ret) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; } @@ -851,7 +852,7 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans, out: btrfs_free_path(path); if (ret) - btrfs_abort_transaction(trans, fs_info->free_space_root, ret); + btrfs_abort_transaction(trans, ret); return ret; } @@ -1047,7 +1048,7 @@ int add_to_free_space_tree(struct btrfs_trans_handle *trans, out: btrfs_free_path(path); if (ret) - btrfs_abort_transaction(trans, fs_info->free_space_root, ret); + btrfs_abort_transaction(trans, ret); return ret; } @@ -1163,7 +1164,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) if (IS_ERR(trans)) return PTR_ERR(trans); - fs_info->creating_free_space_tree = 1; + set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); free_space_root = btrfs_create_tree(trans, fs_info, BTRFS_FREE_SPACE_TREE_OBJECTID); if (IS_ERR(free_space_root)) { @@ -1183,7 +1184,8 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) } btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); - fs_info->creating_free_space_tree = 0; + btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); + clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); ret = btrfs_commit_transaction(trans, tree_root); if (ret) @@ -1192,8 +1194,8 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) return 0; abort: - fs_info->creating_free_space_tree = 0; - btrfs_abort_transaction(trans, tree_root, ret); + clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); + btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans, tree_root); return ret; } @@ -1251,6 +1253,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) return PTR_ERR(trans); btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE); + btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); fs_info->free_space_root = NULL; ret = clear_free_space_tree(trans, free_space_root); @@ -1280,7 +1283,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) return 0; abort: - btrfs_abort_transaction(trans, tree_root, ret); + btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans, tree_root); return ret; } @@ -1333,7 +1336,7 @@ out: btrfs_free_path(path); mutex_unlock(&block_group->free_space_lock); if (ret) - btrfs_abort_transaction(trans, fs_info->free_space_root, ret); + btrfs_abort_transaction(trans, ret); return ret; } @@ -1410,7 +1413,7 @@ int remove_block_group_free_space(struct btrfs_trans_handle *trans, out: btrfs_free_path(path); if (ret) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; } @@ -1480,7 +1483,8 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, } if (extent_count != expected_extent_count) { - btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u", + btrfs_err(fs_info, + "incorrect extent count for %llu; counted %u, expected %u", block_group->key.objectid, extent_count, expected_extent_count); ASSERT(0); @@ -1542,7 +1546,8 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, } if (extent_count != expected_extent_count) { - btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u", + btrfs_err(fs_info, + "incorrect extent count for %llu; counted %u, expected %u", block_group->key.objectid, extent_count, expected_extent_count); ASSERT(0); diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index aae520b2aee5..a97fdc156a03 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -24,6 +24,11 @@ int __init btrfs_hash_init(void) return PTR_ERR_OR_ZERO(tfm); } +const char* btrfs_crc32c_impl(void) +{ + return crypto_tfm_alg_driver_name(crypto_shash_tfm(tfm)); +} + void btrfs_hash_exit(void) { crypto_free_shash(tfm); diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index 118a2316e5d3..c3a2ec554361 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h @@ -22,6 +22,7 @@ int __init btrfs_hash_init(void); void btrfs_hash_exit(void); +const char* btrfs_crc32c_impl(void); u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 70107f7c9307..d27014b8bf72 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -38,7 +38,7 @@ static int caching_kthread(void *data) int slot; int ret; - if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE)) return 0; path = btrfs_alloc_path(); @@ -104,7 +104,7 @@ again: break; if (last != (u64)-1 && last + 1 != key.objectid) { - __btrfs_add_free_space(ctl, last + 1, + __btrfs_add_free_space(fs_info, ctl, last + 1, key.objectid - last - 1); wake_up(&root->ino_cache_wait); } @@ -115,7 +115,7 @@ next: } if (last < root->highest_objectid - 1) { - __btrfs_add_free_space(ctl, last + 1, + __btrfs_add_free_space(fs_info, ctl, last + 1, root->highest_objectid - last - 1); } @@ -136,12 +136,13 @@ out: static void start_caching(struct btrfs_root *root) { + struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct task_struct *tsk; int ret; u64 objectid; - if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) return; spin_lock(&root->ino_cache_lock); @@ -153,7 +154,7 @@ static void start_caching(struct btrfs_root *root) root->ino_cache_state = BTRFS_CACHE_STARTED; spin_unlock(&root->ino_cache_lock); - ret = load_free_ino_cache(root->fs_info, root); + ret = load_free_ino_cache(fs_info, root); if (ret == 1) { spin_lock(&root->ino_cache_lock); root->ino_cache_state = BTRFS_CACHE_FINISHED; @@ -170,22 +171,22 @@ static void start_caching(struct btrfs_root *root) */ ret = btrfs_find_free_objectid(root, &objectid); if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) { - __btrfs_add_free_space(ctl, objectid, + __btrfs_add_free_space(fs_info, ctl, objectid, BTRFS_LAST_FREE_OBJECTID - objectid + 1); } tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu", root->root_key.objectid); if (IS_ERR(tsk)) { - btrfs_warn(root->fs_info, "failed to start inode caching task"); - btrfs_clear_pending_and_info(root->fs_info, INODE_MAP_CACHE, + btrfs_warn(fs_info, "failed to start inode caching task"); + btrfs_clear_pending_and_info(fs_info, INODE_MAP_CACHE, "disabling inode map caching"); } } int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) { - if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE)) return btrfs_find_free_objectid(root, objectid); again: @@ -209,28 +210,29 @@ again: void btrfs_return_ino(struct btrfs_root *root, u64 objectid) { + struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; - if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) return; again: if (root->ino_cache_state == BTRFS_CACHE_FINISHED) { - __btrfs_add_free_space(pinned, objectid, 1); + __btrfs_add_free_space(fs_info, pinned, objectid, 1); } else { - down_write(&root->fs_info->commit_root_sem); + down_write(&fs_info->commit_root_sem); spin_lock(&root->ino_cache_lock); if (root->ino_cache_state == BTRFS_CACHE_FINISHED) { spin_unlock(&root->ino_cache_lock); - up_write(&root->fs_info->commit_root_sem); + up_write(&fs_info->commit_root_sem); goto again; } spin_unlock(&root->ino_cache_lock); start_caching(root); - __btrfs_add_free_space(pinned, objectid, 1); + __btrfs_add_free_space(fs_info, pinned, objectid, 1); - up_write(&root->fs_info->commit_root_sem); + up_write(&fs_info->commit_root_sem); } } @@ -251,7 +253,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) struct rb_node *n; u64 count; - if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE)) return; while (1) { @@ -277,7 +279,8 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) rb_erase(&info->offset_index, rbroot); spin_unlock(rbroot_lock); if (add_to_ctl) - __btrfs_add_free_space(ctl, info->offset, count); + __btrfs_add_free_space(root->fs_info, ctl, + info->offset, count); kmem_cache_free(btrfs_free_space_cachep, info); } } @@ -412,7 +415,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root, if (btrfs_root_refs(&root->root_item) == 0) return 0; - if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE)) return 0; path = btrfs_alloc_path(); @@ -458,7 +461,7 @@ again: BTRFS_I(inode)->generation = 0; ret = btrfs_update_inode(trans, root, inode); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_put; } @@ -466,7 +469,7 @@ again: ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode); if (ret) { if (ret != -ENOSPC) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_put; } } @@ -495,10 +498,9 @@ again: ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, prealloc, prealloc, &alloc_hint); if (ret) { - btrfs_delalloc_release_space(inode, 0, prealloc); + btrfs_delalloc_release_metadata(inode, prealloc); goto out_put; } - btrfs_free_reserved_data_space(inode, 0, prealloc); ret = btrfs_write_out_ino_cache(root, trans, path, inode); out_put: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 270499598ed4..2b790bda7998 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -60,6 +60,7 @@ #include "hash.h" #include "props.h" #include "qgroup.h" +#include "dedupe.h" struct btrfs_iget_args { struct btrfs_key *location; @@ -105,8 +106,9 @@ static int btrfs_truncate(struct inode *inode); static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); static noinline int cow_file_range(struct inode *inode, struct page *locked_page, - u64 start, u64 end, int *page_started, - unsigned long *nr_written, int unlock); + u64 start, u64 end, u64 delalloc_end, + int *page_started, unsigned long *nr_written, + int unlock, struct btrfs_dedupe_hash *hash); static struct extent_map *create_pinned_em(struct inode *inode, u64 start, u64 len, u64 orig_start, u64 block_start, u64 block_len, @@ -294,7 +296,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root, start, aligned_end, NULL, 1, 1, extent_item_size, &extent_inserted); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } @@ -305,7 +307,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root, inline_len, compressed_size, compress_type, compressed_pages); if (ret && ret != -ENOSPC) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } else if (ret == -ENOSPC) { ret = 1; @@ -374,12 +376,12 @@ static inline int inode_need_compress(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; /* force compress */ - if (btrfs_test_opt(root, FORCE_COMPRESS)) + if (btrfs_test_opt(root->fs_info, FORCE_COMPRESS)) return 1; /* bad compression ratios */ if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) return 0; - if (btrfs_test_opt(root, COMPRESS) || + if (btrfs_test_opt(root->fs_info, COMPRESS) || BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS || BTRFS_I(inode)->force_compress) return 1; @@ -558,12 +560,15 @@ cont: * we don't need to create any more async work items. * Unlock and free up our temp pages. */ - extent_clear_unlock_delalloc(inode, start, end, NULL, - clear_flags, PAGE_UNLOCK | + extent_clear_unlock_delalloc(inode, start, end, end, + NULL, clear_flags, + PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | page_error_op | PAGE_END_WRITEBACK); + btrfs_free_reserved_data_space_noquota(inode, start, + end - start + 1); goto free_pages_out; } } @@ -585,9 +590,27 @@ cont: will_compress = 0; } else { num_bytes = total_in; + *num_added += 1; + + /* + * The async work queues will take care of doing actual + * allocation on disk for these compressed pages, and + * will submit them to the elevator. + */ + add_async_extent(async_cow, start, num_bytes, + total_compressed, pages, nr_pages_ret, + compress_type); + + if (start + num_bytes < end) { + start += num_bytes; + pages = NULL; + cond_resched(); + goto again; + } + return; } } - if (!will_compress && pages) { + if (pages) { /* * the compression code ran but failed to make things smaller, * free any pages it allocated and our page pointer array @@ -602,48 +625,28 @@ cont: nr_pages_ret = 0; /* flag the file so we don't compress in the future */ - if (!btrfs_test_opt(root, FORCE_COMPRESS) && + if (!btrfs_test_opt(root->fs_info, FORCE_COMPRESS) && !(BTRFS_I(inode)->force_compress)) { BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; } } - if (will_compress) { - *num_added += 1; - - /* the async work queues will take care of doing actual - * allocation on disk for these compressed pages, - * and will submit them to the elevator. - */ - add_async_extent(async_cow, start, num_bytes, - total_compressed, pages, nr_pages_ret, - compress_type); - - if (start + num_bytes < end) { - start += num_bytes; - pages = NULL; - cond_resched(); - goto again; - } - } else { cleanup_and_bail_uncompressed: - /* - * No compression, but we still need to write the pages in - * the file we've been given so far. redirty the locked - * page if it corresponds to our extent and set things up - * for the async work queue to run cow_file_range to do - * the normal delalloc dance - */ - if (page_offset(locked_page) >= start && - page_offset(locked_page) <= end) { - __set_page_dirty_nobuffers(locked_page); - /* unlocked later on in the async handlers */ - } - if (redirty) - extent_range_redirty_for_io(inode, start, end); - add_async_extent(async_cow, start, end - start + 1, - 0, NULL, 0, BTRFS_COMPRESS_NONE); - *num_added += 1; - } + /* + * No compression, but we still need to write the pages in the file + * we've been given so far. redirty the locked page if it corresponds + * to our extent and set things up for the async work queue to run + * cow_file_range to do the normal delalloc dance. + */ + if (page_offset(locked_page) >= start && + page_offset(locked_page) <= end) + __set_page_dirty_nobuffers(locked_page); + /* unlocked later on in the async handlers */ + + if (redirty) + extent_range_redirty_for_io(inode, start, end); + add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0, + BTRFS_COMPRESS_NONE); + *num_added += 1; return; @@ -712,7 +715,10 @@ retry: async_extent->start, async_extent->start + async_extent->ram_size - 1, - &page_started, &nr_written, 0); + async_extent->start + + async_extent->ram_size - 1, + &page_started, &nr_written, 0, + NULL); /* JDM XXX */ @@ -739,7 +745,7 @@ retry: lock_extent(io_tree, async_extent->start, async_extent->start + async_extent->ram_size - 1); - ret = btrfs_reserve_extent(root, + ret = btrfs_reserve_extent(root, async_extent->ram_size, async_extent->compressed_size, async_extent->compressed_size, 0, alloc_hint, &ins, 1, 1); @@ -832,6 +838,8 @@ retry: extent_clear_unlock_delalloc(inode, async_extent->start, async_extent->start + async_extent->ram_size - 1, + async_extent->start + + async_extent->ram_size - 1, NULL, EXTENT_LOCKED | EXTENT_DELALLOC, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK); @@ -851,7 +859,8 @@ retry: tree->ops->writepage_end_io_hook(p, start, end, NULL, 0); p->mapping = NULL; - extent_clear_unlock_delalloc(inode, start, end, NULL, 0, + extent_clear_unlock_delalloc(inode, start, end, end, + NULL, 0, PAGE_END_WRITEBACK | PAGE_SET_ERROR); free_async_extent_pages(async_extent); @@ -868,6 +877,8 @@ out_free: extent_clear_unlock_delalloc(inode, async_extent->start, async_extent->start + async_extent->ram_size - 1, + async_extent->start + + async_extent->ram_size - 1, NULL, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | @@ -925,9 +936,9 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, */ static noinline int cow_file_range(struct inode *inode, struct page *locked_page, - u64 start, u64 end, int *page_started, - unsigned long *nr_written, - int unlock) + u64 start, u64 end, u64 delalloc_end, + int *page_started, unsigned long *nr_written, + int unlock, struct btrfs_dedupe_hash *hash) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 alloc_hint = 0; @@ -961,12 +972,14 @@ static noinline int cow_file_range(struct inode *inode, ret = cow_file_range_inline(root, inode, start, end, 0, 0, NULL); if (ret == 0) { - extent_clear_unlock_delalloc(inode, start, end, NULL, + extent_clear_unlock_delalloc(inode, start, end, + delalloc_end, NULL, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DEFRAG, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); - + btrfs_free_reserved_data_space_noquota(inode, start, + end - start + 1); *nr_written = *nr_written + (end - start + PAGE_SIZE) / PAGE_SIZE; *page_started = 1; @@ -986,7 +999,7 @@ static noinline int cow_file_range(struct inode *inode, unsigned long op; cur_alloc_size = disk_num_bytes; - ret = btrfs_reserve_extent(root, cur_alloc_size, + ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, root->sectorsize, 0, alloc_hint, &ins, 1, 1); if (ret < 0) @@ -1056,7 +1069,8 @@ static noinline int cow_file_range(struct inode *inode, op |= PAGE_SET_PRIVATE2; extent_clear_unlock_delalloc(inode, start, - start + ram_size - 1, locked_page, + start + ram_size - 1, + delalloc_end, locked_page, EXTENT_LOCKED | EXTENT_DELALLOC, op); disk_num_bytes -= cur_alloc_size; @@ -1073,7 +1087,8 @@ out_reserve: btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); out_unlock: - extent_clear_unlock_delalloc(inode, start, end, locked_page, + extent_clear_unlock_delalloc(inode, start, end, delalloc_end, + locked_page, EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | EXTENT_DELALLOC | EXTENT_DEFRAG, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | @@ -1156,7 +1171,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_cow->start = start; if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && - !btrfs_test_opt(root, FORCE_COMPRESS)) + !btrfs_test_opt(root->fs_info, FORCE_COMPRESS)) cur_end = end; else cur_end = min(end, start + SZ_512K - 1); @@ -1252,7 +1267,8 @@ static noinline int run_delalloc_nocow(struct inode *inode, path = btrfs_alloc_path(); if (!path) { - extent_clear_unlock_delalloc(inode, start, end, locked_page, + extent_clear_unlock_delalloc(inode, start, end, end, + locked_page, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, PAGE_UNLOCK | @@ -1270,7 +1286,8 @@ static noinline int run_delalloc_nocow(struct inode *inode, trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { - extent_clear_unlock_delalloc(inode, start, end, locked_page, + extent_clear_unlock_delalloc(inode, start, end, end, + locked_page, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, PAGE_UNLOCK | @@ -1418,7 +1435,8 @@ out_check: if (cow_start != (u64)-1) { ret = cow_file_range(inode, locked_page, cow_start, found_key.offset - 1, - page_started, nr_written, 1); + end, page_started, nr_written, 1, + NULL); if (ret) { if (!nolock && nocow) btrfs_end_write_no_snapshoting(root); @@ -1483,10 +1501,12 @@ out_check: } extent_clear_unlock_delalloc(inode, cur_offset, - cur_offset + num_bytes - 1, + cur_offset + num_bytes - 1, end, locked_page, EXTENT_LOCKED | - EXTENT_DELALLOC, PAGE_UNLOCK | - PAGE_SET_PRIVATE2); + EXTENT_DELALLOC | + EXTENT_CLEAR_DATA_RESV, + PAGE_UNLOCK | PAGE_SET_PRIVATE2); + if (!nolock && nocow) btrfs_end_write_no_snapshoting(root); cur_offset = extent_end; @@ -1501,8 +1521,8 @@ out_check: } if (cow_start != (u64)-1) { - ret = cow_file_range(inode, locked_page, cow_start, end, - page_started, nr_written, 1); + ret = cow_file_range(inode, locked_page, cow_start, end, end, + page_started, nr_written, 1, NULL); if (ret) goto error; } @@ -1513,7 +1533,7 @@ error: ret = err; if (ret && cur_offset < end) - extent_clear_unlock_delalloc(inode, cur_offset, end, + extent_clear_unlock_delalloc(inode, cur_offset, end, end, locked_page, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | @@ -1561,8 +1581,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); } else if (!inode_need_compress(inode)) { - ret = cow_file_range(inode, locked_page, start, end, - page_started, nr_written, 1); + ret = cow_file_range(inode, locked_page, start, end, end, + page_started, nr_written, 1, NULL); } else { set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &BTRFS_I(inode)->runtime_flags); @@ -1740,7 +1760,7 @@ static void btrfs_set_bit_hook(struct inode *inode, } /* For sanity tests */ - if (btrfs_test_is_dummy_root(root)) + if (btrfs_is_testing(root->fs_info)) return; __percpu_counter_add(&root->fs_info->delalloc_bytes, len, @@ -1799,11 +1819,13 @@ static void btrfs_clear_bit_hook(struct inode *inode, btrfs_delalloc_release_metadata(inode, len); /* For sanity tests. */ - if (btrfs_test_is_dummy_root(root)) + if (btrfs_is_testing(root->fs_info)) return; if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID - && do_list && !(state->state & EXTENT_NORESERVE)) + && do_list && !(state->state & EXTENT_NORESERVE) + && (*bits & (EXTENT_DO_ACCOUNTING | + EXTENT_CLEAR_DATA_RESV))) btrfs_free_reserved_data_space_noquota(inode, state->start, len); @@ -1822,8 +1844,12 @@ static void btrfs_clear_bit_hook(struct inode *inode, /* * extent_io.c merge_bio_hook, this must check the chunk tree to make sure * we don't create bios that span stripes or chunks + * + * return 1 if page cannot be merged to bio + * return 0 if page can be merged to bio + * return error otherwise */ -int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset, +int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags) { @@ -1838,10 +1864,10 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset, length = bio->bi_iter.bi_size; map_length = length; - ret = btrfs_map_block(root->fs_info, rw, logical, + ret = btrfs_map_block(root->fs_info, bio_op(bio), logical, &map_length, NULL, 0); - /* Will always return 0 with map_multi == NULL */ - BUG_ON(ret < 0); + if (ret < 0) + return ret; if (map_length < length + size) return 1; return 0; @@ -1855,9 +1881,8 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset, * At IO completion time the cums attached on the ordered extent record * are inserted into the btree */ -static int __btrfs_submit_bio_start(struct inode *inode, int rw, - struct bio *bio, int mirror_num, - unsigned long bio_flags, +static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio, + int mirror_num, unsigned long bio_flags, u64 bio_offset) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1876,14 +1901,14 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw, * At IO completion time the cums attached on the ordered extent record * are inserted into the btree */ -static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, +static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; - ret = btrfs_map_bio(root, rw, bio, mirror_num, 1); + ret = btrfs_map_bio(root, bio, mirror_num, 1); if (ret) { bio->bi_error = ret; bio_endio(bio); @@ -1895,7 +1920,7 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, * extent_io.c submission hook. This does the right thing for csum calculation * on write, or reading the csums from the tree before a read */ -static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, +static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset) { @@ -1910,7 +1935,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, if (btrfs_is_free_space_inode(inode)) metadata = BTRFS_WQ_ENDIO_FREE_SPACE; - if (!(rw & REQ_WRITE)) { + if (bio_op(bio) != REQ_OP_WRITE) { ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); if (ret) goto out; @@ -1932,7 +1957,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, goto mapit; /* we're doing a write, do the async checksumming */ ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, - inode, rw, bio, mirror_num, + inode, bio, mirror_num, bio_flags, bio_offset, __btrfs_submit_bio_start, __btrfs_submit_bio_done); @@ -1944,7 +1969,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, } mapit: - ret = btrfs_map_bio(root, rw, bio, mirror_num, 0); + ret = btrfs_map_bio(root, bio, mirror_num, 0); out: if (ret < 0) { @@ -1974,7 +1999,7 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, } int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, - struct extent_state **cached_state) + struct extent_state **cached_state, int dedupe) { WARN_ON((end & (PAGE_SIZE - 1)) == 0); return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, @@ -2038,7 +2063,8 @@ again: goto out; } - btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); + btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state, + 0); ClearPageChecked(page); set_page_dirty(page); out: @@ -2295,7 +2321,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, if (PTR_ERR(root) == -ENOENT) return 0; WARN_ON(1); - pr_debug("inum=%llu, offset=%llu, root_id=%llu\n", + btrfs_debug(fs_info, "inum=%llu, offset=%llu, root_id=%llu", inum, offset, root_id); return PTR_ERR(root); } @@ -2595,7 +2621,7 @@ again: ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent)); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_free_path; } @@ -2622,7 +2648,7 @@ again: backref->root_id, backref->inum, new->file_pos); /* start - extent_offset */ if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_free_path; } @@ -2891,7 +2917,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_update_inode_fallback(trans, root, inode); if (ret) /* -ENOMEM or corruption */ - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } @@ -2951,7 +2977,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) ordered_extent->file_offset, ordered_extent->len, trans->transid); if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_unlock; } @@ -2961,7 +2987,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) btrfs_ordered_update_i_size(inode, 0, ordered_extent); ret = btrfs_update_inode_fallback(trans, root, inode); if (ret) { /* -ENOMEM or corruption */ - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_unlock; } ret = 0; @@ -3205,7 +3231,7 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, root->root_key.objectid); if (ret) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); else clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); @@ -3271,7 +3297,16 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) /* grab metadata reservation from transaction handle */ if (reserve) { ret = btrfs_orphan_reserve_metadata(trans, inode); - BUG_ON(ret); /* -ENOSPC in reservation; Logic error? JDM */ + ASSERT(!ret); + if (ret) { + atomic_dec(&root->orphan_inodes); + clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, + &BTRFS_I(inode)->runtime_flags); + if (insert) + clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags); + return ret; + } } /* insert an orphan item to track this unlinked/truncated file */ @@ -3287,7 +3322,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) if (ret != -EEXIST) { clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &BTRFS_I(inode)->runtime_flags); - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; } } @@ -3299,7 +3334,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, root->root_key.objectid); if (ret && ret != -EEXIST) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; } } @@ -3419,10 +3454,10 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) found_key.offset = 0; inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); ret = PTR_ERR_OR_ZERO(inode); - if (ret && ret != -ESTALE) + if (ret && ret != -ENOENT) goto out; - if (ret == -ESTALE && root == root->fs_info->tree_root) { + if (ret == -ENOENT && root == root->fs_info->tree_root) { struct btrfs_root *dead_root; struct btrfs_fs_info *fs_info = root->fs_info; int is_dead_root = 0; @@ -3458,7 +3493,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) * Inode is already gone but the orphan item is still there, * kill the orphan item. */ - if (ret == -ESTALE) { + if (ret == -ENOENT) { trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); @@ -3617,7 +3652,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf, /* * read an inode from the btree into the in-memory inode */ -static void btrfs_read_locked_inode(struct inode *inode) +static int btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; struct extent_buffer *leaf; @@ -3636,14 +3671,19 @@ static void btrfs_read_locked_inode(struct inode *inode) filled = true; path = btrfs_alloc_path(); - if (!path) + if (!path) { + ret = -ENOMEM; goto make_bad; + } memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); - if (ret) + if (ret) { + if (ret > 0) + ret = -ENOENT; goto make_bad; + } leaf = path->nodes[0]; @@ -3796,11 +3836,12 @@ cache_acl: } btrfs_update_iflags(inode); - return; + return 0; make_bad: btrfs_free_path(path); make_bad_inode(inode); + return ret; } /* @@ -3907,7 +3948,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, */ if (!btrfs_is_free_space_inode(inode) && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID - && !root->fs_info->log_root_recovering) { + && !test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) { btrfs_update_root_times(trans, root); ret = btrfs_delayed_update_inode(trans, root, inode); @@ -3998,20 +4039,20 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, btrfs_info(root->fs_info, "failed to delete reference to %.*s, inode %llu parent %llu", name_len, name, ino, dir_ino); - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto err; } skip_backref: ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto err; } ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode, dir_ino); if (ret != 0 && ret != -ENOENT) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto err; } @@ -4020,7 +4061,7 @@ skip_backref: if (ret == -ENOENT) ret = 0; else if (ret) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); err: btrfs_free_path(path); if (ret) @@ -4030,7 +4071,7 @@ err: inode_inc_iversion(inode); inode_inc_iversion(dir); inode->i_ctime = dir->i_mtime = - dir->i_ctime = current_fs_time(inode->i_sb); + dir->i_ctime = current_time(inode); ret = btrfs_update_inode(trans, root, dir); out: return ret; @@ -4134,7 +4175,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); ret = btrfs_delete_one_dir_name(trans, root, path, di); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } btrfs_release_path(path); @@ -4144,7 +4185,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, dir_ino, &index, name, name_len); if (ret < 0) { if (ret != -ENOENT) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } di = btrfs_search_dir_index_item(root, path, dir_ino, @@ -4154,7 +4195,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, ret = -ENOENT; else ret = PTR_ERR(di); - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } @@ -4167,16 +4208,16 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } btrfs_i_size_write(dir, dir->i_size - name_len * 2); inode_inc_iversion(dir); - dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); + dir->i_mtime = dir->i_ctime = current_time(dir); ret = btrfs_update_inode_fallback(trans, root, dir); if (ret) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); out: btrfs_free_path(path); return ret; @@ -4188,6 +4229,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) int err = 0; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_trans_handle *trans; + u64 last_unlink_trans; if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; @@ -4210,11 +4252,27 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) if (err) goto out; + last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; + /* now the directory is empty */ err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry), dentry->d_name.name, dentry->d_name.len); - if (!err) + if (!err) { btrfs_i_size_write(inode, 0); + /* + * Propagate the last_unlink_trans value of the deleted dir to + * its parent directory. This is to prevent an unrecoverable + * log tree in the case we do something like this: + * 1) create dir foo + * 2) create snapshot under dir foo + * 3) delete the snapshot + * 4) rmdir foo + * 5) mkdir foo + * 6) fsync foo or some file inside foo + */ + if (last_unlink_trans >= trans->transid) + BTRFS_I(dir)->last_unlink_trans = last_unlink_trans; + } out: btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root); @@ -4497,7 +4555,6 @@ search_again: pending_del_nr); if (err) { btrfs_abort_transaction(trans, - root, err); goto error; } @@ -4509,8 +4566,7 @@ search_again: item_end, new_size); if (err) { - btrfs_abort_transaction(trans, - root, err); + btrfs_abort_transaction(trans, err); goto error; } } else if (test_bit(BTRFS_ROOT_REF_COWS, @@ -4549,6 +4605,7 @@ delete: BUG_ON(ret); if (btrfs_should_throttle_delayed_refs(trans, root)) btrfs_async_run_delayed_refs(root, + trans->transid, trans->delayed_ref_updates * 2, 0); if (be_nice) { if (truncate_space_check(trans, root, @@ -4573,8 +4630,7 @@ delete: pending_del_slot, pending_del_nr); if (ret) { - btrfs_abort_transaction(trans, - root, ret); + btrfs_abort_transaction(trans, ret); goto error; } pending_del_nr = 0; @@ -4607,7 +4663,7 @@ out: ret = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); if (ret) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); } error: if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) @@ -4713,7 +4769,7 @@ again: 0, 0, &cached_state, GFP_NOFS); ret = btrfs_set_extent_delalloc(inode, block_start, block_end, - &cached_state); + &cached_state, 0); if (ret) { unlock_extent_cached(io_tree, block_start, block_end, &cached_state, GFP_NOFS); @@ -4776,7 +4832,7 @@ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode, ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans, root); return ret; } @@ -4784,7 +4840,7 @@ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode, ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset, 0, 0, len, 0, len, 0, 0, 0); if (ret) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); else btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); @@ -4921,7 +4977,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) inode_inc_iversion(inode); if (!(mask & (ATTR_CTIME | ATTR_MTIME))) inode->i_ctime = inode->i_mtime = - current_fs_time(inode->i_sb); + current_time(inode); } if (newsize > oldsize) { @@ -5011,7 +5067,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) i_size_write(inode, BTRFS_I(inode)->disk_i_size); err = btrfs_orphan_del(trans, inode); if (err) - btrfs_abort_transaction(trans, root, err); + btrfs_abort_transaction(trans, err); btrfs_end_transaction(trans, root); } } @@ -5028,7 +5084,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) if (btrfs_root_readonly(root)) return -EROFS; - err = inode_change_ok(inode, attr); + err = setattr_prepare(dentry, attr); if (err) return err; @@ -5149,11 +5205,18 @@ void btrfs_evict_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_block_rsv *rsv, *global_rsv; int steal_from_global = 0; - u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); + u64 min_size; int ret; trace_btrfs_inode_evict(inode); + if (!root) { + kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); + return; + } + + min_size = btrfs_calc_trunc_metadata_size(root, 1); + evict_inode_truncate_pages(inode); if (inode->i_nlink && @@ -5172,7 +5235,7 @@ void btrfs_evict_inode(struct inode *inode) btrfs_free_io_failure_record(inode, 0, (u64)-1); - if (root->fs_info->log_root_recovering) { + if (test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) { BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &BTRFS_I(inode)->runtime_flags)); goto no_delete; @@ -5254,7 +5317,7 @@ void btrfs_evict_inode(struct inode *inode) if (steal_from_global) { if (!btrfs_check_space_for_delayed_refs(trans, root)) ret = btrfs_block_rsv_migrate(global_rsv, rsv, - min_size); + min_size, 0); else ret = -ENOSPC; } @@ -5585,7 +5648,9 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, return ERR_PTR(-ENOMEM); if (inode->i_state & I_NEW) { - btrfs_read_locked_inode(inode); + int ret; + + ret = btrfs_read_locked_inode(inode); if (!is_bad_inode(inode)) { inode_tree_add(inode); unlock_new_inode(inode); @@ -5594,7 +5659,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, } else { unlock_new_inode(inode); iput(inode); - inode = ERR_PTR(-ESTALE); + ASSERT(ret < 0); + inode = ERR_PTR(ret < 0 ? ret : -ESTALE); } } @@ -5618,7 +5684,7 @@ static struct inode *new_simple_dir(struct super_block *s, inode->i_op = &btrfs_dir_ro_inode_operations; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; - inode->i_mtime = current_fs_time(inode->i_sb); + inode->i_mtime = current_time(inode); inode->i_atime = inode->i_mtime; inode->i_ctime = inode->i_mtime; BTRFS_I(inode)->i_otime = inode->i_mtime; @@ -5748,6 +5814,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) int name_len; int is_curr = 0; /* ctx->pos points to the current index? */ bool emitted; + bool put = false; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) @@ -5765,7 +5832,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (key_type == BTRFS_DIR_INDEX_KEY) { INIT_LIST_HEAD(&ins_list); INIT_LIST_HEAD(&del_list); - btrfs_get_delayed_items(inode, &ins_list, &del_list); + put = btrfs_readdir_get_delayed_items(inode, &ins_list, + &del_list); } key.type = key_type; @@ -5912,8 +5980,8 @@ next: nopos: ret = 0; err: - if (key_type == BTRFS_DIR_INDEX_KEY) - btrfs_put_delayed_items(&ins_list, &del_list); + if (put) + btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list); btrfs_free_path(path); return ret; } @@ -6202,7 +6270,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode_init_owner(inode, dir, mode); inode_set_bytes(inode, 0); - inode->i_mtime = current_fs_time(inode->i_sb); + inode->i_mtime = current_time(inode); inode->i_atime = inode->i_mtime; inode->i_ctime = inode->i_mtime; BTRFS_I(inode)->i_otime = inode->i_mtime; @@ -6228,9 +6296,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_inherit_iflags(inode, dir); if (S_ISREG(mode)) { - if (btrfs_test_opt(root, NODATASUM)) + if (btrfs_test_opt(root->fs_info, NODATASUM)) BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; - if (btrfs_test_opt(root, NODATACOW)) + if (btrfs_test_opt(root->fs_info, NODATACOW)) BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW | BTRFS_INODE_NODATASUM; } @@ -6308,7 +6376,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, if (ret == -EEXIST || ret == -EOVERFLOW) goto fail_dir_item; else if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; } @@ -6316,10 +6384,10 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, name_len * 2); inode_inc_iversion(parent_inode); parent_inode->i_mtime = parent_inode->i_ctime = - current_fs_time(parent_inode->i_sb); + current_time(parent_inode); ret = btrfs_update_inode(trans, root, parent_inode); if (ret) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); return ret; fail_dir_item: @@ -6534,7 +6602,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, BTRFS_I(inode)->dir_index = 0ULL; inc_nlink(inode); inode_inc_iversion(inode); - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); ihold(inode); set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); @@ -6956,8 +7024,9 @@ not_found_em: insert: btrfs_release_path(path); if (em->start > start || extent_map_end(em) <= start) { - btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]", - em->start, em->len, start, len); + btrfs_err(root->fs_info, + "bad extent! em: [%llu %llu] passed [%llu %llu]", + em->start, em->len, start, len); err = -EIO; goto out; } @@ -6979,7 +7048,18 @@ insert: * existing will always be non-NULL, since there must be * extent causing the -EEXIST. */ - if (start >= extent_map_end(existing) || + if (existing->start == em->start && + extent_map_end(existing) == extent_map_end(em) && + em->block_start == existing->block_start) { + /* + * these two extents are the same, it happens + * with inlines especially + */ + free_extent_map(em); + em = existing; + err = 0; + + } else if (start >= extent_map_end(existing) || start <= existing->start) { /* * The existing extent map is the one nearest to @@ -7191,7 +7271,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, int ret; alloc_hint = get_extent_allocation_hint(inode, start, len); - ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, + ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0, alloc_hint, &ins, 1, 1); if (ret) return ERR_PTR(ret); @@ -7691,6 +7771,13 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, ret = PTR_ERR(em2); goto unlock_err; } + /* + * For inode marked NODATACOW or extent marked PREALLOC, + * use the existing or preallocated extent, so does not + * need to adjust btrfs_space_info's bytes_may_use. + */ + btrfs_free_reserved_data_space_noquota(inode, + start, len); goto unlock; } } @@ -7725,7 +7812,6 @@ unlock: i_size_write(inode, start + len); adjust_dio_outstanding_extents(inode, dio_data, len); - btrfs_free_reserved_data_space(inode, start, len); WARN_ON(dio_data->reserve < len); dio_data->reserve -= len; dio_data->unsubmitted_oe_range_end = start + len; @@ -7767,12 +7853,12 @@ err: } static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio, - int rw, int mirror_num) + int mirror_num) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; - BUG_ON(rw & REQ_WRITE); + BUG_ON(bio_op(bio) == REQ_OP_WRITE); bio_get(bio); @@ -7781,7 +7867,7 @@ static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio, if (ret) goto err; - ret = btrfs_map_bio(root, rw, bio, mirror_num, 0); + ret = btrfs_map_bio(root, bio, mirror_num, 0); err: bio_put(bio); return ret; @@ -7792,18 +7878,19 @@ static int btrfs_check_dio_repairable(struct inode *inode, struct io_failure_record *failrec, int failed_mirror) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); int num_copies; - num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, - failrec->logical, failrec->len); + num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); if (num_copies == 1) { /* * we only have a single copy of the data, so don't bother with * all the retry and error correction code that follows. no * matter what the error is, it is very likely to persist. */ - pr_debug("Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", - num_copies, failrec->this_mirror, failed_mirror); + btrfs_debug(fs_info, + "Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d", + num_copies, failrec->this_mirror, failed_mirror); return 0; } @@ -7813,8 +7900,9 @@ static int btrfs_check_dio_repairable(struct inode *inode, failrec->this_mirror++; if (failrec->this_mirror > num_copies) { - pr_debug("Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", - num_copies, failrec->this_mirror, failed_mirror); + btrfs_debug(fs_info, + "Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d", + num_copies, failrec->this_mirror, failed_mirror); return 0; } @@ -7832,7 +7920,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, int read_mode; int ret; - BUG_ON(failed_bio->bi_rw & REQ_WRITE); + BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); ret = btrfs_get_io_failure_record(inode, start, end, &failrec); if (ret) @@ -7860,13 +7948,13 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, free_io_failure(inode, failrec); return -EIO; } + bio_set_op_attrs(bio, REQ_OP_READ, read_mode); btrfs_debug(BTRFS_I(inode)->root->fs_info, "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n", read_mode, failrec->this_mirror, failrec->in_validation); - ret = submit_dio_repair_bio(inode, bio, read_mode, - failrec->this_mirror); + ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror); if (ret) { free_io_failure(inode, failrec); bio_put(bio); @@ -8156,7 +8244,7 @@ static void btrfs_endio_direct_write(struct bio *bio) bio_put(bio); } -static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, +static int __btrfs_submit_bio_start_direct_io(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 offset) { @@ -8174,8 +8262,8 @@ static void btrfs_end_dio_bio(struct bio *bio) if (err) btrfs_warn(BTRFS_I(dip->inode)->root->fs_info, - "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d", - btrfs_ino(dip->inode), bio->bi_rw, + "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d", + btrfs_ino(dip->inode), bio_op(bio), bio->bi_opf, (unsigned long long)bio->bi_iter.bi_sector, bio->bi_iter.bi_size, err); @@ -8249,11 +8337,11 @@ static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root, } static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, - int rw, u64 file_offset, int skip_sum, + u64 file_offset, int skip_sum, int async_submit) { struct btrfs_dio_private *dip = bio->bi_private; - int write = rw & REQ_WRITE; + bool write = bio_op(bio) == REQ_OP_WRITE; struct btrfs_root *root = BTRFS_I(inode)->root; int ret; @@ -8274,8 +8362,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, if (write && async_submit) { ret = btrfs_wq_submit_bio(root->fs_info, - inode, rw, bio, 0, 0, - file_offset, + inode, bio, 0, 0, file_offset, __btrfs_submit_bio_start_direct_io, __btrfs_submit_bio_done); goto err; @@ -8294,13 +8381,13 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, goto err; } map: - ret = btrfs_map_bio(root, rw, bio, 0, async_submit); + ret = btrfs_map_bio(root, bio, 0, async_submit); err: bio_put(bio); return ret; } -static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, +static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, int skip_sum) { struct inode *inode = dip->inode; @@ -8319,8 +8406,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, int i; map_length = orig_bio->bi_iter.bi_size; - ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, - &map_length, NULL, 0); + ret = btrfs_map_block(root->fs_info, bio_op(orig_bio), + start_sector << 9, &map_length, NULL, 0); if (ret) return -EIO; @@ -8340,6 +8427,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, if (!bio) return -ENOMEM; + bio_set_op_attrs(bio, bio_op(orig_bio), bio_flags(orig_bio)); bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; btrfs_io_bio(bio)->logical = file_offset; @@ -8359,7 +8447,7 @@ next_block: * before we're done setting it up */ atomic_inc(&dip->pending_bios); - ret = __btrfs_submit_dio_bio(bio, inode, rw, + ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, async_submit); if (ret) { @@ -8377,12 +8465,14 @@ next_block: start_sector, GFP_NOFS); if (!bio) goto out_err; + bio_set_op_attrs(bio, bio_op(orig_bio), + bio_flags(orig_bio)); bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; btrfs_io_bio(bio)->logical = file_offset; map_length = orig_bio->bi_iter.bi_size; - ret = btrfs_map_block(root->fs_info, rw, + ret = btrfs_map_block(root->fs_info, bio_op(orig_bio), start_sector << 9, &map_length, NULL, 0); if (ret) { @@ -8402,7 +8492,7 @@ next_block: } submit: - ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, + ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, async_submit); if (!ret) return 0; @@ -8422,14 +8512,14 @@ out_err: return 0; } -static void btrfs_submit_direct(int rw, struct bio *dio_bio, - struct inode *inode, loff_t file_offset) +static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode, + loff_t file_offset) { struct btrfs_dio_private *dip = NULL; struct bio *io_bio = NULL; struct btrfs_io_bio *btrfs_bio; int skip_sum; - int write = rw & REQ_WRITE; + bool write = (bio_op(dio_bio) == REQ_OP_WRITE); int ret = 0; skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; @@ -8480,7 +8570,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, dio_data->unsubmitted_oe_range_end; } - ret = btrfs_submit_direct_hook(rw, dip, skip_sum); + ret = btrfs_submit_direct_hook(dip, skip_sum); if (!ret) return; @@ -8544,7 +8634,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb, goto out; /* If this is a write we don't need to check anymore */ - if (iov_iter_rw(iter) == WRITE) + if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter)) return 0; /* * Check to make sure we don't have duplicate iov_base's in this @@ -8980,7 +9070,7 @@ again: 0, 0, &cached_state, GFP_NOFS); ret = btrfs_set_extent_delalloc(inode, page_start, end, - &cached_state); + &cached_state, 0); if (ret) { unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); @@ -9093,7 +9183,7 @@ static int btrfs_truncate(struct inode *inode) /* Migrate the slack space for the truncate to our reserve */ ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, - min_size); + min_size, 0); BUG_ON(ret); /* @@ -9133,7 +9223,7 @@ static int btrfs_truncate(struct inode *inode) } ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, - rsv, min_size); + rsv, min_size, 0); BUG_ON(ret); /* shouldn't happen */ trans->block_rsv = rsv; } @@ -9154,7 +9244,6 @@ static int btrfs_truncate(struct inode *inode) ret = btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root); } - out: btrfs_free_block_rsv(root, rsv); @@ -9303,8 +9392,9 @@ void btrfs_destroy_inode(struct inode *inode) if (!ordered) break; else { - btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup", - ordered->file_offset, ordered->len); + btrfs_err(root->fs_info, + "found ordered extent %llu %llu on inode cleanup", + ordered->file_offset, ordered->len); btrfs_remove_ordered_extent(inode, ordered); btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); @@ -9363,25 +9453,25 @@ int btrfs_init_cachep(void) btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle", sizeof(struct btrfs_trans_handle), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL); if (!btrfs_trans_handle_cachep) goto fail; btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction", sizeof(struct btrfs_transaction), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL); if (!btrfs_transaction_cachep) goto fail; btrfs_path_cachep = kmem_cache_create("btrfs_path", sizeof(struct btrfs_path), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + SLAB_MEM_SPREAD, NULL); if (!btrfs_path_cachep) goto fail; btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space", sizeof(struct btrfs_free_space), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + SLAB_MEM_SPREAD, NULL); if (!btrfs_free_space_cachep) goto fail; @@ -9419,7 +9509,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, struct btrfs_root *dest = BTRFS_I(new_dir)->root; struct inode *new_inode = new_dentry->d_inode; struct inode *old_inode = old_dentry->d_inode; - struct timespec ctime = CURRENT_TIME; + struct timespec ctime = current_time(old_inode); struct dentry *parent; u64 old_ino = btrfs_ino(old_inode); u64 new_ino = btrfs_ino(new_inode); @@ -9531,7 +9621,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, ret = btrfs_update_inode(trans, root, old_inode); } if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_fail; } @@ -9551,7 +9641,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, ret = btrfs_update_inode(trans, dest, new_inode); } if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_fail; } @@ -9559,7 +9649,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, new_dentry->d_name.name, new_dentry->d_name.len, 0, old_idx); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_fail; } @@ -9567,7 +9657,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, old_dentry->d_name.name, old_dentry->d_name.len, 0, new_idx); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_fail; } @@ -9787,7 +9877,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, inode_inc_iversion(old_inode); old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = new_dir->i_mtime = - old_inode->i_ctime = current_fs_time(old_dir->i_sb); + old_inode->i_ctime = current_time(old_dir); if (old_dentry->d_parent != new_dentry->d_parent) btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); @@ -9806,13 +9896,13 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, ret = btrfs_update_inode(trans, root, old_inode); } if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_fail; } if (new_inode) { inode_inc_iversion(new_inode); - new_inode->i_ctime = current_fs_time(new_inode->i_sb); + new_inode->i_ctime = current_time(new_inode); if (unlikely(btrfs_ino(new_inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { root_objectid = BTRFS_I(new_inode)->location.objectid; @@ -9830,7 +9920,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!ret && new_inode->i_nlink == 0) ret = btrfs_orphan_add(trans, d_inode(new_dentry)); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_fail; } } @@ -9839,7 +9929,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dentry->d_name.name, new_dentry->d_name.len, 0, index); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_fail; } @@ -9859,7 +9949,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dentry); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_fail; } } @@ -10246,6 +10336,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, u64 last_alloc = (u64)-1; int ret = 0; bool own_trans = true; + u64 end = start + num_bytes - 1; if (trans) own_trans = false; @@ -10267,8 +10358,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, * sized chunks. */ cur_bytes = min(cur_bytes, last_alloc); - ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, - *alloc_hint, &ins, 1, 0); + ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes, + min_size, 0, *alloc_hint, &ins, 1, 0); if (ret) { if (own_trans) btrfs_end_transaction(trans, root); @@ -10285,7 +10376,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, if (ret) { btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0); - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); if (own_trans) btrfs_end_transaction(trans, root); break; @@ -10329,7 +10420,7 @@ next: *alloc_hint = ins.objectid + ins.offset; inode_inc_iversion(inode); - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && (actual_len > inode->i_size) && @@ -10345,7 +10436,7 @@ next: ret = btrfs_update_inode(trans, root, inode); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); if (own_trans) btrfs_end_transaction(trans, root); break; @@ -10354,6 +10445,9 @@ next: if (own_trans) btrfs_end_transaction(trans, root); } + if (cur_offset < end) + btrfs_free_reserved_data_space(inode, cur_offset, + end - cur_offset + 1); return ret; } @@ -10466,21 +10560,6 @@ out_inode: } -/* Inspired by filemap_check_errors() */ -int btrfs_inode_check_errors(struct inode *inode) -{ - int ret = 0; - - if (test_bit(AS_ENOSPC, &inode->i_mapping->flags) && - test_and_clear_bit(AS_ENOSPC, &inode->i_mapping->flags)) - ret = -ENOSPC; - if (test_bit(AS_EIO, &inode->i_mapping->flags) && - test_and_clear_bit(AS_EIO, &inode->i_mapping->flags)) - ret = -EIO; - - return ret; -} - static const struct inode_operations btrfs_dir_inode_operations = { .getattr = btrfs_getattr, .lookup = btrfs_lookup, @@ -10489,14 +10568,11 @@ static const struct inode_operations btrfs_dir_inode_operations = { .link = btrfs_link, .mkdir = btrfs_mkdir, .rmdir = btrfs_rmdir, - .rename2 = btrfs_rename2, + .rename = btrfs_rename2, .symlink = btrfs_symlink, .setattr = btrfs_setattr, .mknod = btrfs_mknod, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, .listxattr = btrfs_listxattr, - .removexattr = generic_removexattr, .permission = btrfs_permission, .get_acl = btrfs_get_acl, .set_acl = btrfs_set_acl, @@ -10514,7 +10590,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { static const struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = btrfs_real_readdir, + .iterate_shared = btrfs_real_readdir, .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_compat_ioctl, @@ -10570,10 +10646,7 @@ static const struct address_space_operations btrfs_symlink_aops = { static const struct inode_operations btrfs_file_inode_operations = { .getattr = btrfs_getattr, .setattr = btrfs_setattr, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, .listxattr = btrfs_listxattr, - .removexattr = generic_removexattr, .permission = btrfs_permission, .fiemap = btrfs_fiemap, .get_acl = btrfs_get_acl, @@ -10584,10 +10657,7 @@ static const struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, .setattr = btrfs_setattr, .permission = btrfs_permission, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, .listxattr = btrfs_listxattr, - .removexattr = generic_removexattr, .get_acl = btrfs_get_acl, .set_acl = btrfs_set_acl, .update_time = btrfs_update_time, @@ -10598,10 +10668,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { .getattr = btrfs_getattr, .setattr = btrfs_setattr, .permission = btrfs_permission, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, .listxattr = btrfs_listxattr, - .removexattr = generic_removexattr, .update_time = btrfs_update_time, }; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 05173563e4a6..18e1aa0f85f5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -349,7 +349,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) btrfs_update_iflags(inode); inode_inc_iversion(inode); - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); ret = btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); @@ -445,7 +445,7 @@ static noinline int create_subvol(struct inode *dir, struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *new_root; struct btrfs_block_rsv block_rsv; - struct timespec cur_time = current_fs_time(dir->i_sb); + struct timespec cur_time = current_time(dir); struct inode *inode; int ret; int err; @@ -561,7 +561,7 @@ static noinline int create_subvol(struct inode *dir, new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); if (IS_ERR(new_root)) { ret = PTR_ERR(new_root); - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } @@ -570,7 +570,7 @@ static noinline int create_subvol(struct inode *dir, ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid); if (ret) { /* We potentially lose an unused inode item here */ - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } @@ -583,7 +583,7 @@ static noinline int create_subvol(struct inode *dir, */ ret = btrfs_set_inode_index(dir, &index); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } @@ -591,7 +591,7 @@ static noinline int create_subvol(struct inode *dir, name, namelen, dir, &key, BTRFS_FT_DIR, index); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } @@ -608,7 +608,7 @@ static noinline int create_subvol(struct inode *dir, root_item->uuid, BTRFS_UUID_KEY_SUBVOL, objectid); if (ret) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); fail: kfree(root_item); @@ -1634,6 +1634,9 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, int namelen; int ret = 0; + if (!S_ISDIR(file_inode(file)->i_mode)) + return -ENOTDIR; + ret = mnt_want_write_file(file); if (ret) goto out; @@ -1691,6 +1694,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, struct btrfs_ioctl_vol_args *vol_args; int ret; + if (!S_ISDIR(file_inode(file)->i_mode)) + return -ENOTDIR; + vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); @@ -1714,6 +1720,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, bool readonly = false; struct btrfs_qgroup_inherit *inherit = NULL; + if (!S_ISDIR(file_inode(file)->i_mode)) + return -ENOTDIR; + vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); @@ -1894,8 +1903,9 @@ static noinline int may_destroy_subvol(struct btrfs_root *root) btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); if (key.objectid == root->root_key.objectid) { ret = -EPERM; - btrfs_err(root->fs_info, "deleting default subvolume " - "%llu is not allowed", key.objectid); + btrfs_err(root->fs_info, + "deleting default subvolume %llu is not allowed", + key.objectid); goto out; } btrfs_release_path(path); @@ -1948,8 +1958,7 @@ static noinline int key_in_sk(struct btrfs_key *key, return 1; } -static noinline int copy_to_sk(struct btrfs_root *root, - struct btrfs_path *path, +static noinline int copy_to_sk(struct btrfs_path *path, struct btrfs_key *key, struct btrfs_ioctl_search_key *sk, size_t *buf_size, @@ -2120,7 +2129,7 @@ static noinline int search_ioctl(struct inode *inode, ret = 0; goto err; } - ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf, + ret = copy_to_sk(path, &key, sk, buf_size, ubuf, &sk_offset, &num_found); btrfs_release_path(path); if (ret) @@ -2358,6 +2367,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, int ret; int err = 0; + if (!S_ISDIR(dir->i_mode)) + return -ENOTDIR; + vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); @@ -2406,7 +2418,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, * rmdir(2). */ err = -EPERM; - if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) + if (!btrfs_test_opt(root->fs_info, USER_SUBVOL_RM_ALLOWED)) goto out_dput; /* @@ -2489,7 +2501,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, dentry->d_name.len); if (ret) { err = ret; - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_end_trans; } @@ -2505,7 +2517,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, root->fs_info->tree_root, dest->root_key.objectid); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); err = ret; goto out_end_trans; } @@ -2515,7 +2527,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL, dest->root_key.objectid); if (ret && ret != -ENOENT) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); err = ret; goto out_end_trans; } @@ -2525,7 +2537,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, BTRFS_UUID_KEY_RECEIVED_SUBVOL, dest->root_key.objectid); if (ret && ret != -ENOENT) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); err = ret; goto out_end_trans; } @@ -3280,7 +3292,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans, inode_inc_iversion(inode); if (!no_time_update) - inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_mtime = inode->i_ctime = current_time(inode); /* * We round up to the block size at eof when determining which * extents to clone above, but shouldn't round up the file size. @@ -3292,7 +3304,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, root, inode); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans, root); goto out; } @@ -3694,7 +3706,7 @@ process_slot: if (ret) { if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, - root, ret); + ret); btrfs_end_transaction(trans, root); goto out; } @@ -3702,8 +3714,7 @@ process_slot: ret = btrfs_insert_empty_item(trans, root, path, &new_key, size); if (ret) { - btrfs_abort_transaction(trans, root, - ret); + btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans, root); goto out; } @@ -3735,7 +3746,6 @@ process_slot: new_key.offset - datao); if (ret) { btrfs_abort_transaction(trans, - root, ret); btrfs_end_transaction(trans, root); @@ -3772,7 +3782,6 @@ process_slot: if (ret) { if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, - root, ret); btrfs_end_transaction(trans, root); goto out; @@ -3828,7 +3837,7 @@ process_slot: last_dest_end, destoff + len, 1); if (ret) { if (ret != -EOPNOTSUPP) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans, root); goto out; } @@ -4089,8 +4098,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) if (IS_ERR_OR_NULL(di)) { btrfs_free_path(path); btrfs_end_transaction(trans, root); - btrfs_err(new_root->fs_info, "Umm, you don't have the default dir" - "item, this isn't going to work"); + btrfs_err(new_root->fs_info, + "Umm, you don't have the default diritem, this isn't going to work"); ret = -ENOENT; goto out; } @@ -5088,7 +5097,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - return btrfs_qgroup_wait_for_completion(root->fs_info); + return btrfs_qgroup_wait_for_completion(root->fs_info, true); } static long _btrfs_ioctl_set_received_subvol(struct file *file, @@ -5098,7 +5107,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root_item *root_item = &root->root_item; struct btrfs_trans_handle *trans; - struct timespec ct = current_fs_time(inode->i_sb); + struct timespec ct = current_time(inode); int ret = 0; int received_uuid_changed; @@ -5164,13 +5173,13 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, BTRFS_UUID_KEY_RECEIVED_SUBVOL, root->root_key.objectid); if (ret < 0 && ret != -EEXIST) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } } ret = btrfs_commit_transaction(trans, root); if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } @@ -5299,8 +5308,9 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) return -EFAULT; if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { - btrfs_err(root->fs_info, "unable to set label with more than %d bytes", - BTRFS_LABEL_SIZE - 1); + btrfs_err(root->fs_info, + "unable to set label with more than %d bytes", + BTRFS_LABEL_SIZE - 1); return -EINVAL; } diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 1adfbe7be6b8..48655da0f4ca 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -141,7 +141,7 @@ static int lzo_compress_pages(struct list_head *ws, ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, &out_len, workspace->mem); if (ret != LZO_E_OK) { - printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n", + pr_debug("BTRFS: deflate in loop returned %d\n", ret); ret = -EIO; goto out; @@ -356,7 +356,7 @@ cont: if (need_unmap) kunmap(pages_in[page_in_index - 1]); if (ret != LZO_E_OK) { - printk(KERN_WARNING "BTRFS: decompress failed\n"); + pr_warn("BTRFS: decompress failed\n"); ret = -EIO; break; } @@ -402,7 +402,7 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in, out_len = PAGE_SIZE; ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); if (ret != LZO_E_OK) { - printk(KERN_WARNING "BTRFS: decompress failed!\n"); + pr_warn("BTRFS: decompress failed!\n"); ret = -EIO; goto out; } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 559170464d7c..b2d1e95de7be 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -67,8 +67,8 @@ static void ordered_data_tree_panic(struct inode *inode, int errno, u64 offset) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset " - "%llu", offset); + btrfs_panic(fs_info, errno, + "Inconsistency in ordered tree at offset %llu", offset); } /* @@ -718,12 +718,13 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, return count; } -void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, +int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, const u64 range_start, const u64 range_len) { struct btrfs_root *root; struct list_head splice; int done; + int total_done = 0; INIT_LIST_HEAD(&splice); @@ -742,6 +743,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, done = btrfs_wait_ordered_extents(root, nr, range_start, range_len); btrfs_put_fs_root(root); + total_done += done; spin_lock(&fs_info->ordered_root_lock); if (nr != -1) { @@ -752,6 +754,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, list_splice_tail(&splice, &fs_info->ordered_roots); spin_unlock(&fs_info->ordered_root_lock); mutex_unlock(&fs_info->ordered_operations_mutex); + + return total_done; } /* @@ -964,6 +968,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct rb_node *prev = NULL; struct btrfs_ordered_extent *test; int ret = 1; + u64 orig_offset = offset; spin_lock_irq(&tree->lock); if (ordered) { @@ -979,7 +984,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, /* truncate file */ if (disk_i_size > i_size) { - BTRFS_I(inode)->disk_i_size = i_size; + BTRFS_I(inode)->disk_i_size = orig_offset; ret = 0; goto out; } @@ -1117,7 +1122,7 @@ int __init ordered_data_init(void) { btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent", sizeof(struct btrfs_ordered_extent), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + SLAB_MEM_SPREAD, NULL); if (!btrfs_ordered_extent_cache) return -ENOMEM; diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 2049c9be85ee..451507776ff5 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -199,7 +199,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum, int len); int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, const u64 range_start, const u64 range_len); -void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, +int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, const u64 range_start, const u64 range_len); void btrfs_get_logged_extents(struct inode *inode, struct list_head *logged_list, diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 147dc6ca5de1..438575ea8d25 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -24,12 +24,11 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) { int num_stripes = btrfs_chunk_num_stripes(eb, chunk); int i; - printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu " - "num_stripes %d\n", + pr_info("\t\tchunk length %llu owner %llu type %llu num_stripes %d\n", btrfs_chunk_length(eb, chunk), btrfs_chunk_owner(eb, chunk), btrfs_chunk_type(eb, chunk), num_stripes); for (i = 0 ; i < num_stripes ; i++) { - printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i, + pr_info("\t\t\tstripe %d devid %llu offset %llu\n", i, btrfs_stripe_devid_nr(eb, chunk, i), btrfs_stripe_offset_nr(eb, chunk, i)); } @@ -37,8 +36,7 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) static void print_dev_item(struct extent_buffer *eb, struct btrfs_dev_item *dev_item) { - printk(KERN_INFO "\t\tdev item devid %llu " - "total_bytes %llu bytes used %llu\n", + pr_info("\t\tdev item devid %llu total_bytes %llu bytes used %llu\n", btrfs_device_id(eb, dev_item), btrfs_device_total_bytes(eb, dev_item), btrfs_device_bytes_used(eb, dev_item)); @@ -46,8 +44,7 @@ static void print_dev_item(struct extent_buffer *eb, static void print_extent_data_ref(struct extent_buffer *eb, struct btrfs_extent_data_ref *ref) { - printk(KERN_INFO "\t\textent data backref root %llu " - "objectid %llu offset %llu count %u\n", + pr_info("\t\textent data backref root %llu objectid %llu offset %llu count %u\n", btrfs_extent_data_ref_root(eb, ref), btrfs_extent_data_ref_objectid(eb, ref), btrfs_extent_data_ref_offset(eb, ref), @@ -72,7 +69,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type) struct btrfs_extent_item_v0 *ei0; BUG_ON(item_size != sizeof(*ei0)); ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0); - printk(KERN_INFO "\t\textent refs %u\n", + pr_info("\t\textent refs %u\n", btrfs_extent_refs_v0(eb, ei0)); return; #else @@ -83,7 +80,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type) ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); flags = btrfs_extent_flags(eb, ei); - printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n", + pr_info("\t\textent refs %llu gen %llu flags %llu\n", btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei), flags); @@ -92,8 +89,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type) struct btrfs_tree_block_info *info; info = (struct btrfs_tree_block_info *)(ei + 1); btrfs_tree_block_key(eb, info, &key); - printk(KERN_INFO "\t\ttree block key (%llu %u %llu) " - "level %d\n", + pr_info("\t\ttree block key (%llu %u %llu) level %d\n", btrfs_disk_key_objectid(&key), key.type, btrfs_disk_key_offset(&key), btrfs_tree_block_level(eb, info)); @@ -110,12 +106,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type) offset = btrfs_extent_inline_ref_offset(eb, iref); switch (type) { case BTRFS_TREE_BLOCK_REF_KEY: - printk(KERN_INFO "\t\ttree block backref " - "root %llu\n", offset); + pr_info("\t\ttree block backref root %llu\n", offset); break; case BTRFS_SHARED_BLOCK_REF_KEY: - printk(KERN_INFO "\t\tshared block backref " - "parent %llu\n", offset); + pr_info("\t\tshared block backref parent %llu\n", offset); break; case BTRFS_EXTENT_DATA_REF_KEY: dref = (struct btrfs_extent_data_ref *)(&iref->offset); @@ -123,8 +117,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type) break; case BTRFS_SHARED_DATA_REF_KEY: sref = (struct btrfs_shared_data_ref *)(iref + 1); - printk(KERN_INFO "\t\tshared data backref " - "parent %llu count %u\n", + pr_info("\t\tshared data backref parent %llu count %u\n", offset, btrfs_shared_data_ref_count(eb, sref)); break; default: @@ -141,8 +134,7 @@ static void print_extent_ref_v0(struct extent_buffer *eb, int slot) struct btrfs_extent_ref_v0 *ref0; ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0); - printk("\t\textent back ref root %llu gen %llu " - "owner %llu num_refs %lu\n", + printk("\t\textent back ref root %llu gen %llu owner %llu num_refs %lu\n", btrfs_ref_root_v0(eb, ref0), btrfs_ref_generation_v0(eb, ref0), btrfs_ref_objectid_v0(eb, ref0), @@ -162,7 +154,7 @@ static void print_uuid_item(struct extent_buffer *l, unsigned long offset, __le64 subvol_id; read_extent_buffer(l, &subvol_id, offset, sizeof(subvol_id)); - printk(KERN_INFO "\t\tsubvol_id %llu\n", + pr_info("\t\tsubvol_id %llu\n", (unsigned long long)le64_to_cpu(subvol_id)); item_size -= sizeof(u64); offset += sizeof(u64); @@ -196,15 +188,13 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) item = btrfs_item_nr(i); btrfs_item_key_to_cpu(l, &key, i); type = key.type; - printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d " - "itemsize %d\n", + pr_info("\titem %d key (%llu %u %llu) itemoff %d itemsize %d\n", i, key.objectid, type, key.offset, btrfs_item_offset(l, item), btrfs_item_size(l, item)); switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); - printk(KERN_INFO "\t\tinode generation %llu size %llu " - "mode %o\n", + pr_info("\t\tinode generation %llu size %llu mode %o\n", btrfs_inode_generation(l, ii), btrfs_inode_size(l, ii), btrfs_inode_mode(l, ii)); @@ -212,13 +202,13 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(l, di, &found_key); - printk(KERN_INFO "\t\tdir oid %llu type %u\n", + pr_info("\t\tdir oid %llu type %u\n", found_key.objectid, btrfs_dir_type(l, di)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); - printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n", + pr_info("\t\troot data bytenr %llu refs %u\n", btrfs_disk_root_bytenr(l, ri), btrfs_disk_root_refs(l, ri)); break; @@ -227,10 +217,10 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) print_extent_item(l, i, type); break; case BTRFS_TREE_BLOCK_REF_KEY: - printk(KERN_INFO "\t\ttree block backref\n"); + pr_info("\t\ttree block backref\n"); break; case BTRFS_SHARED_BLOCK_REF_KEY: - printk(KERN_INFO "\t\tshared block backref\n"); + pr_info("\t\tshared block backref\n"); break; case BTRFS_EXTENT_DATA_REF_KEY: dref = btrfs_item_ptr(l, i, @@ -240,7 +230,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_SHARED_DATA_REF_KEY: sref = btrfs_item_ptr(l, i, struct btrfs_shared_data_ref); - printk(KERN_INFO "\t\tshared data backref count %u\n", + pr_info("\t\tshared data backref count %u\n", btrfs_shared_data_ref_count(l, sref)); break; case BTRFS_EXTENT_DATA_KEY: @@ -248,17 +238,14 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) struct btrfs_file_extent_item); if (btrfs_file_extent_type(l, fi) == BTRFS_FILE_EXTENT_INLINE) { - printk(KERN_INFO "\t\tinline extent data " - "size %u\n", + pr_info("\t\tinline extent data size %u\n", btrfs_file_extent_inline_len(l, i, fi)); break; } - printk(KERN_INFO "\t\textent data disk bytenr %llu " - "nr %llu\n", + pr_info("\t\textent data disk bytenr %llu nr %llu\n", btrfs_file_extent_disk_bytenr(l, fi), btrfs_file_extent_disk_num_bytes(l, fi)); - printk(KERN_INFO "\t\textent data offset %llu " - "nr %llu ram %llu\n", + pr_info("\t\textent data offset %llu nr %llu ram %llu\n", btrfs_file_extent_offset(l, fi), btrfs_file_extent_num_bytes(l, fi), btrfs_file_extent_ram_bytes(l, fi)); @@ -273,7 +260,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); - printk(KERN_INFO "\t\tblock group used %llu\n", + pr_info("\t\tblock group used %llu\n", btrfs_disk_block_group_used(l, bi)); break; case BTRFS_CHUNK_ITEM_KEY: @@ -287,38 +274,36 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_DEV_EXTENT_KEY: dev_extent = btrfs_item_ptr(l, i, struct btrfs_dev_extent); - printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n" - "\t\tchunk objectid %llu chunk offset %llu " - "length %llu\n", + pr_info("\t\tdev extent chunk_tree %llu\n\t\tchunk objectid %llu chunk offset %llu length %llu\n", btrfs_dev_extent_chunk_tree(l, dev_extent), btrfs_dev_extent_chunk_objectid(l, dev_extent), btrfs_dev_extent_chunk_offset(l, dev_extent), btrfs_dev_extent_length(l, dev_extent)); break; case BTRFS_PERSISTENT_ITEM_KEY: - printk(KERN_INFO "\t\tpersistent item objectid %llu offset %llu\n", + pr_info("\t\tpersistent item objectid %llu offset %llu\n", key.objectid, key.offset); switch (key.objectid) { case BTRFS_DEV_STATS_OBJECTID: - printk(KERN_INFO "\t\tdevice stats\n"); + pr_info("\t\tdevice stats\n"); break; default: - printk(KERN_INFO "\t\tunknown persistent item\n"); + pr_info("\t\tunknown persistent item\n"); } break; case BTRFS_TEMPORARY_ITEM_KEY: - printk(KERN_INFO "\t\ttemporary item objectid %llu offset %llu\n", + pr_info("\t\ttemporary item objectid %llu offset %llu\n", key.objectid, key.offset); switch (key.objectid) { case BTRFS_BALANCE_OBJECTID: - printk(KERN_INFO "\t\tbalance status\n"); + pr_info("\t\tbalance status\n"); break; default: - printk(KERN_INFO "\t\tunknown temporary item\n"); + pr_info("\t\tunknown temporary item\n"); } break; case BTRFS_DEV_REPLACE_KEY: - printk(KERN_INFO "\t\tdev replace\n"); + pr_info("\t\tdev replace\n"); break; case BTRFS_UUID_KEY_SUBVOL: case BTRFS_UUID_KEY_RECEIVED_SUBVOL: @@ -343,12 +328,13 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) btrfs_print_leaf(root, c); return; } - btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u", - btrfs_header_bytenr(c), level, nr, - (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); + btrfs_info(root->fs_info, + "node %llu level %d total ptrs %d free spc %u", + btrfs_header_bytenr(c), level, nr, + (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { btrfs_node_key_to_cpu(c, &key, i); - printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n", + pr_info("\tkey %d (%llu %u %llu) block %llu\n", i, key.objectid, key.type, key.offset, btrfs_node_blockptr(c, i)); } @@ -356,6 +342,13 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) struct extent_buffer *next = read_tree_block(root, btrfs_node_blockptr(c, i), btrfs_node_ptr_generation(c, i)); + if (IS_ERR(next)) { + continue; + } else if (!extent_buffer_uptodate(next)) { + free_extent_buffer(next); + continue; + } + if (btrfs_is_leaf(next) && level != 1) BUG(); diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 36992128c746..cf0b444ac4f3 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -350,6 +350,7 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_root *parent_root) { + struct super_block *sb = root->fs_info->sb; struct btrfs_key key; struct inode *parent_inode, *child_inode; int ret; @@ -358,12 +359,11 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans, key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - parent_inode = btrfs_iget(parent_root->fs_info->sb, &key, - parent_root, NULL); + parent_inode = btrfs_iget(sb, &key, parent_root, NULL); if (IS_ERR(parent_inode)) return PTR_ERR(parent_inode); - child_inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); + child_inode = btrfs_iget(sb, &key, root, NULL); if (IS_ERR(child_inode)) { iput(parent_inode); return PTR_ERR(child_inode); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 9d4c05b14f6e..11f4fffe503e 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -309,7 +309,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) u64 flags = 0; u64 rescan_progress = 0; - if (!fs_info->quota_enabled) + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) return 0; fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); @@ -360,8 +360,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) fs_info->generation) { flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; btrfs_err(fs_info, - "qgroup generation mismatch, " - "marked as inconsistent"); + "qgroup generation mismatch, marked as inconsistent"); } fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, ptr); @@ -463,13 +462,11 @@ next2: } out: fs_info->qgroup_flags |= flags; - if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { - fs_info->quota_enabled = 0; - fs_info->pending_quota_state = 0; - } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && - ret >= 0) { + if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) + clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); + else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && + ret >= 0) ret = qgroup_rescan_init(fs_info, rescan_progress, 0); - } btrfs_free_path(path); if (ret < 0) { @@ -571,7 +568,7 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_key key; - if (btrfs_test_is_dummy_root(quota_root)) + if (btrfs_is_testing(quota_root->fs_info)) return 0; path = btrfs_alloc_path(); @@ -728,7 +725,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans, int ret; int slot; - if (btrfs_test_is_dummy_root(root)) + if (btrfs_is_testing(root->fs_info)) return 0; key.objectid = 0; @@ -847,7 +844,7 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, } ret = 0; out: - root->fs_info->pending_quota_state = 0; + set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags); btrfs_free_path(path); return ret; } @@ -868,7 +865,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, mutex_lock(&fs_info->qgroup_ioctl_lock); if (fs_info->quota_root) { - fs_info->pending_quota_state = 1; + set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); goto out; } @@ -964,7 +961,7 @@ out_add_root: } spin_lock(&fs_info->qgroup_lock); fs_info->quota_root = quota_root; - fs_info->pending_quota_state = 1; + set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); spin_unlock(&fs_info->qgroup_lock); out_free_path: btrfs_free_path(path); @@ -993,9 +990,9 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) goto out; - fs_info->quota_enabled = 0; - fs_info->pending_quota_state = 0; - btrfs_qgroup_wait_for_completion(fs_info); + clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); + set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags); + btrfs_qgroup_wait_for_completion(fs_info, false); spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; fs_info->quota_root = NULL; @@ -1453,9 +1450,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, return ret; } -struct btrfs_qgroup_extent_record -*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs, - struct btrfs_qgroup_extent_record *record) +int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_qgroup_extent_record *record) { struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; struct rb_node *parent_node = NULL; @@ -1463,7 +1460,7 @@ struct btrfs_qgroup_extent_record u64 bytenr = record->bytenr; assert_spin_locked(&delayed_refs->lock); - trace_btrfs_qgroup_insert_dirty_extent(record); + trace_btrfs_qgroup_insert_dirty_extent(fs_info, record); while (*p) { parent_node = *p; @@ -1474,12 +1471,43 @@ struct btrfs_qgroup_extent_record else if (bytenr > entry->bytenr) p = &(*p)->rb_right; else - return entry; + return 1; } rb_link_node(&record->node, parent_node, p); rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); - return NULL; + return 0; +} + +int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, + gfp_t gfp_flag) +{ + struct btrfs_qgroup_extent_record *record; + struct btrfs_delayed_ref_root *delayed_refs; + int ret; + + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) + || bytenr == 0 || num_bytes == 0) + return 0; + if (WARN_ON(trans == NULL)) + return -EINVAL; + record = kmalloc(sizeof(*record), gfp_flag); + if (!record) + return -ENOMEM; + + delayed_refs = &trans->transaction->delayed_refs; + record->bytenr = bytenr; + record->num_bytes = num_bytes; + record->old_roots = NULL; + + spin_lock(&delayed_refs->lock); + ret = btrfs_qgroup_insert_dirty_extent_nolock(fs_info, delayed_refs, + record); + spin_unlock(&delayed_refs->lock); + if (ret > 0) + kfree(record); + return 0; } #define UPDATE_NEW 0 @@ -1595,8 +1623,8 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info, cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); - trace_qgroup_update_counters(qg->qgroupid, cur_old_count, - cur_new_count); + trace_qgroup_update_counters(fs_info, qg->qgroupid, + cur_old_count, cur_new_count); /* Rfer update part */ if (cur_old_count == 0 && cur_new_count > 0) { @@ -1683,12 +1711,12 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, if (old_roots) nr_old_roots = old_roots->nnodes; - if (!fs_info->quota_enabled) + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) goto out_free; BUG_ON(!fs_info->quota_root); - trace_btrfs_qgroup_account_extent(bytenr, num_bytes, nr_old_roots, - nr_new_roots); + trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes, + nr_old_roots, nr_new_roots); qgroups = ulist_alloc(GFP_NOFS); if (!qgroups) { @@ -1759,7 +1787,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, record = rb_entry(node, struct btrfs_qgroup_extent_record, node); - trace_btrfs_qgroup_account_extents(record); + trace_btrfs_qgroup_account_extents(fs_info, record); if (!ret) { /* @@ -1803,10 +1831,14 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, if (!quota_root) goto out; - if (!fs_info->quota_enabled && fs_info->pending_quota_state) + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && + test_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) start_rescan_worker = 1; - fs_info->quota_enabled = fs_info->pending_quota_state; + if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); + if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags)) + clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); spin_lock(&fs_info->qgroup_lock); while (!list_empty(&fs_info->dirty_qgroups)) { @@ -1825,7 +1857,7 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; spin_lock(&fs_info->qgroup_lock); } - if (fs_info->quota_enabled) + if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; else fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; @@ -1870,7 +1902,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 nums; mutex_lock(&fs_info->qgroup_ioctl_lock); - if (!fs_info->quota_enabled) + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) goto out; if (!quota_root) { @@ -1961,8 +1993,9 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, ret = update_qgroup_limit_item(trans, quota_root, dstgroup); if (ret) { fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - btrfs_info(fs_info, "unable to update quota limit for %llu", - dstgroup->qgroupid); + btrfs_info(fs_info, + "unable to update quota limit for %llu", + dstgroup->qgroupid); goto unlock; } } @@ -2195,9 +2228,8 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) { if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) return; - btrfs_err(trans->root->fs_info, - "qgroups not uptodate in trans handle %p: list is%s empty, " - "seq is %#x.%x", + btrfs_err(trans->fs_info, + "qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x", trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", (u32)(trans->delayed_ref_elem.seq >> 32), (u32)trans->delayed_ref_elem.seq); @@ -2225,10 +2257,11 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, &fs_info->qgroup_rescan_progress, path, 1, 0); - pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n", - fs_info->qgroup_rescan_progress.objectid, - fs_info->qgroup_rescan_progress.type, - fs_info->qgroup_rescan_progress.offset, ret); + btrfs_debug(fs_info, + "current progress key (%llu %u %llu), search_slot ret %d", + fs_info->qgroup_rescan_progress.objectid, + fs_info->qgroup_rescan_progress.type, + fs_info->qgroup_rescan_progress.offset, ret); if (ret) { /* @@ -2302,6 +2335,10 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) int err = -ENOMEM; int ret = 0; + mutex_lock(&fs_info->qgroup_rescan_lock); + fs_info->qgroup_rescan_running = true; + mutex_unlock(&fs_info->qgroup_rescan_lock); + path = btrfs_alloc_path(); if (!path) goto out; @@ -2313,7 +2350,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) err = PTR_ERR(trans); break; } - if (!fs_info->quota_enabled) { + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { err = -EINTR; } else { err = qgroup_rescan_leaf(fs_info, path, trans); @@ -2354,7 +2391,7 @@ out: ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); if (ret < 0) { err = ret; - btrfs_err(fs_info, "fail to update qgroup status: %d\n", err); + btrfs_err(fs_info, "fail to update qgroup status: %d", err); } btrfs_end_transaction(trans, fs_info->quota_root); @@ -2368,6 +2405,9 @@ out: } done: + mutex_lock(&fs_info->qgroup_rescan_lock); + fs_info->qgroup_rescan_running = false; + mutex_unlock(&fs_info->qgroup_rescan_lock); complete_all(&fs_info->qgroup_rescan_completion); } @@ -2486,20 +2526,26 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) return 0; } -int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) +int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, + bool interruptible) { int running; int ret = 0; mutex_lock(&fs_info->qgroup_rescan_lock); spin_lock(&fs_info->qgroup_lock); - running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; + running = fs_info->qgroup_rescan_running; spin_unlock(&fs_info->qgroup_lock); mutex_unlock(&fs_info->qgroup_rescan_lock); - if (running) + if (!running) + return 0; + + if (interruptible) ret = wait_for_completion_interruptible( &fs_info->qgroup_rescan_completion); + else + wait_for_completion(&fs_info->qgroup_rescan_completion); return ret; } @@ -2535,8 +2581,8 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len) struct ulist_iterator uiter; int ret; - if (!root->fs_info->quota_enabled || !is_fstree(root->objectid) || - len == 0) + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || + !is_fstree(root->objectid) || len == 0) return 0; changeset.bytes_changed = 0; @@ -2633,8 +2679,8 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes) { int ret; - if (!root->fs_info->quota_enabled || !is_fstree(root->objectid) || - num_bytes == 0) + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || + !is_fstree(root->objectid) || num_bytes == 0) return 0; BUG_ON(num_bytes != round_down(num_bytes, root->nodesize)); @@ -2649,7 +2695,8 @@ void btrfs_qgroup_free_meta_all(struct btrfs_root *root) { int reserved; - if (!root->fs_info->quota_enabled || !is_fstree(root->objectid)) + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || + !is_fstree(root->objectid)) return; reserved = atomic_xchg(&root->qgroup_meta_rsv, 0); @@ -2660,7 +2707,8 @@ void btrfs_qgroup_free_meta_all(struct btrfs_root *root) void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) { - if (!root->fs_info->quota_enabled || !is_fstree(root->objectid)) + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || + !is_fstree(root->objectid)) return; BUG_ON(num_bytes != round_down(num_bytes, root->nodesize)); diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index ecb2c143ef75..1bc64c864b62 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -46,7 +46,8 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); -int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); +int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, + bool interruptible); int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 src, u64 dst); int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, @@ -63,9 +64,35 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); struct btrfs_delayed_extent_op; int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); -struct btrfs_qgroup_extent_record -*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs, - struct btrfs_qgroup_extent_record *record); +/* + * Insert one dirty extent record into @delayed_refs, informing qgroup to + * account that extent at commit trans time. + * + * No lock version, caller must acquire delayed ref lock and allocate memory. + * + * Return 0 for success insert + * Return >0 for existing record, caller can free @record safely. + * Error is not possible + */ +int btrfs_qgroup_insert_dirty_extent_nolock( + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_qgroup_extent_record *record); + +/* + * Insert one dirty extent record into @delayed_refs, informing qgroup to + * account that extent at commit trans time. + * + * Better encapsulated version. + * + * Return 0 if the operation is done. + * Return <0 for error, like memory allocation failure or invalid parameter + * (NULL trans) + */ +int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, + gfp_t gfp_flag); + int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, @@ -88,7 +115,7 @@ static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info, u64 ref_root, u64 num_bytes) { btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes); - trace_btrfs_qgroup_free_delayed_ref(ref_root, num_bytes); + trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes); } void assert_qgroups_uptodate(struct btrfs_trans_handle *trans); diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index f8b6d411a034..d016d4a79864 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1320,7 +1320,9 @@ write_data: bio->bi_private = rbio; bio->bi_end_io = raid_write_end_io; - submit_bio(WRITE, bio); + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + + submit_bio(bio); } return; @@ -1573,11 +1575,12 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) bio->bi_private = rbio; bio->bi_end_io = raid_rmw_end_io; + bio_set_op_attrs(bio, REQ_OP_READ, 0); btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); - submit_bio(READ, bio); + submit_bio(bio); } /* the actual write will happen once the reads are done */ return 0; @@ -2097,11 +2100,12 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) bio->bi_private = rbio; bio->bi_end_io = raid_recover_end_io; + bio_set_op_attrs(bio, REQ_OP_READ, 0); btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); - submit_bio(READ, bio); + submit_bio(bio); } out: return 0; @@ -2139,7 +2143,10 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, rbio->faila = find_logical_bio_stripe(rbio, bio); if (rbio->faila == -1) { - BUG(); + btrfs_warn(root->fs_info, + "%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)", + __func__, (u64)bio->bi_iter.bi_sector << 9, + (u64)bio->bi_iter.bi_size, bbio->map_type); if (generic_io) btrfs_put_bbio(bbio); kfree(rbio); @@ -2433,7 +2440,9 @@ submit_write: bio->bi_private = rbio; bio->bi_end_io = raid_write_end_io; - submit_bio(WRITE, bio); + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + + submit_bio(bio); } return; @@ -2610,11 +2619,12 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) bio->bi_private = rbio; bio->bi_end_io = raid56_parity_scrub_end_io; + bio_set_op_attrs(bio, REQ_OP_READ, 0); btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); - submit_bio(READ, bio); + submit_bio(bio); } /* the actual write will happen once the reads are done */ return; diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 298631eaee78..75bab76739be 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -761,12 +761,14 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) do { enqueued = 0; + mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry(device, &fs_devices->devices, dev_list) { if (atomic_read(&device->reada_in_flight) < MAX_IN_FLIGHT) enqueued += reada_start_machine_dev(fs_info, device); } + mutex_unlock(&fs_devices->device_list_mutex); total += enqueued; } while (enqueued && total < 10000); @@ -818,7 +820,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) spin_lock(&fs_info->reada_lock); list_for_each_entry(device, &fs_devices->devices, dev_list) { - printk(KERN_DEBUG "dev %lld has %d in flight\n", device->devid, + btrfs_debug(fs_info, "dev %lld has %d in flight", device->devid, atomic_read(&device->reada_in_flight)); index = 0; while (1) { @@ -827,17 +829,17 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) (void **)&zone, index, 1); if (ret == 0) break; - printk(KERN_DEBUG " zone %llu-%llu elems %llu locked " - "%d devs", zone->start, zone->end, zone->elems, - zone->locked); + pr_debug(" zone %llu-%llu elems %llu locked %d devs", + zone->start, zone->end, zone->elems, + zone->locked); for (j = 0; j < zone->ndevs; ++j) { - printk(KERN_CONT " %lld", + pr_cont(" %lld", zone->devs[j]->devid); } if (device->reada_curr_zone == zone) - printk(KERN_CONT " curr off %llu", + pr_cont(" curr off %llu", device->reada_next - zone->start); - printk(KERN_CONT "\n"); + pr_cont("\n"); index = (zone->end >> PAGE_SHIFT) + 1; } cnt = 0; @@ -849,21 +851,20 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) (void **)&re, index, 1); if (ret == 0) break; - printk(KERN_DEBUG - " re: logical %llu size %u empty %d scheduled %d", + pr_debug(" re: logical %llu size %u empty %d scheduled %d", re->logical, fs_info->tree_root->nodesize, list_empty(&re->extctl), re->scheduled); for (i = 0; i < re->nzones; ++i) { - printk(KERN_CONT " zone %llu-%llu devs", + pr_cont(" zone %llu-%llu devs", re->zones[i]->start, re->zones[i]->end); for (j = 0; j < re->zones[i]->ndevs; ++j) { - printk(KERN_CONT " %lld", + pr_cont(" %lld", re->zones[i]->devs[j]->devid); } } - printk(KERN_CONT "\n"); + pr_cont("\n"); index = (re->logical >> PAGE_SHIFT) + 1; if (++cnt > 15) break; @@ -883,20 +884,19 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) index = (re->logical >> PAGE_SHIFT) + 1; continue; } - printk(KERN_DEBUG - "re: logical %llu size %u list empty %d scheduled %d", + pr_debug("re: logical %llu size %u list empty %d scheduled %d", re->logical, fs_info->tree_root->nodesize, list_empty(&re->extctl), re->scheduled); for (i = 0; i < re->nzones; ++i) { - printk(KERN_CONT " zone %llu-%llu devs", + pr_cont(" zone %llu-%llu devs", re->zones[i]->start, re->zones[i]->end); for (j = 0; j < re->zones[i]->ndevs; ++j) { - printk(KERN_CONT " %lld", + pr_cont(" %lld", re->zones[i]->devs[j]->devid); } } - printk(KERN_CONT "\n"); + pr_cont("\n"); index = (re->logical >> PAGE_SHIFT) + 1; } spin_unlock(&fs_info->reada_lock); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 0477dca154ed..0ec8ffa37ab0 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -31,6 +31,7 @@ #include "async-thread.h" #include "free-space-cache.h" #include "inode-map.h" +#include "qgroup.h" /* * backref_node, mapping_node and tree_block start with this @@ -235,12 +236,12 @@ static void backref_cache_cleanup(struct backref_cache *cache) cache->last_trans = 0; for (i = 0; i < BTRFS_MAX_LEVEL; i++) - BUG_ON(!list_empty(&cache->pending[i])); - BUG_ON(!list_empty(&cache->changed)); - BUG_ON(!list_empty(&cache->detached)); - BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); - BUG_ON(cache->nr_nodes); - BUG_ON(cache->nr_edges); + ASSERT(list_empty(&cache->pending[i])); + ASSERT(list_empty(&cache->changed)); + ASSERT(list_empty(&cache->detached)); + ASSERT(RB_EMPTY_ROOT(&cache->rb_root)); + ASSERT(!cache->nr_nodes); + ASSERT(!cache->nr_edges); } static struct backref_node *alloc_backref_node(struct backref_cache *cache) @@ -336,8 +337,9 @@ static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr) rb_node); if (bnode->root) fs_info = bnode->root->fs_info; - btrfs_panic(fs_info, errno, "Inconsistency in backref cache " - "found at offset %llu", bytenr); + btrfs_panic(fs_info, errno, + "Inconsistency in backref cache found at offset %llu", + bytenr); } /* @@ -922,9 +924,16 @@ again: path2->slots[level]--; eb = path2->nodes[level]; - WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != - cur->bytenr); - + if (btrfs_node_blockptr(eb, path2->slots[level]) != + cur->bytenr) { + btrfs_err(root->fs_info, + "couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)", + cur->bytenr, level - 1, root->objectid, + node_key->objectid, node_key->type, + node_key->offset); + err = -ENOENT; + goto out; + } lower = cur; need_check = true; for (; level < BTRFS_MAX_LEVEL; level++) { @@ -1171,8 +1180,12 @@ out: lower = list_entry(useless.next, struct backref_node, list); list_del_init(&lower->list); + if (lower == node) + node = NULL; free_backref_node(cache, lower); } + + free_backref_node(cache, node); return ERR_PTR(err); } ASSERT(!node || !node->detached); @@ -1291,9 +1304,9 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) node->bytenr, &node->rb_node); spin_unlock(&rc->reloc_root_tree.lock); if (rb_node) { - btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " - "for start=%llu while inserting into relocation " - "tree", node->bytenr); + btrfs_panic(root->fs_info, -EEXIST, + "Duplicate root found for start=%llu while inserting into relocation tree", + node->bytenr); kfree(node); return -EEXIST; } @@ -1719,7 +1732,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans, btrfs_header_owner(leaf), key.objectid, key.offset); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); break; } @@ -1727,7 +1740,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans, parent, btrfs_header_owner(leaf), key.objectid, key.offset); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); break; } } @@ -2345,6 +2358,10 @@ void free_reloc_roots(struct list_head *list) while (!list_empty(list)) { reloc_root = list_entry(list->next, struct btrfs_root, root_list); + free_extent_buffer(reloc_root->node); + free_extent_buffer(reloc_root->commit_root); + reloc_root->node = NULL; + reloc_root->commit_root = NULL; __del_reloc_root(reloc_root); } } @@ -2604,25 +2621,28 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans, trans->block_rsv = rc->block_rsv; rc->reserved_bytes += num_bytes; + + /* + * We are under a transaction here so we can only do limited flushing. + * If we get an enospc just kick back -EAGAIN so we know to drop the + * transaction and try to refill when we can flush all the things. + */ ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes, - BTRFS_RESERVE_FLUSH_ALL); + BTRFS_RESERVE_FLUSH_LIMIT); if (ret) { - if (ret == -EAGAIN) { - tmp = rc->extent_root->nodesize * - RELOCATION_RESERVED_NODES; - while (tmp <= rc->reserved_bytes) - tmp <<= 1; - /* - * only one thread can access block_rsv at this point, - * so we don't need hold lock to protect block_rsv. - * we expand more reservation size here to allow enough - * space for relocation and we will return earlier in - * enospc case. - */ - rc->block_rsv->size = tmp + rc->extent_root->nodesize * - RELOCATION_RESERVED_NODES; - } - return ret; + tmp = rc->extent_root->nodesize * RELOCATION_RESERVED_NODES; + while (tmp <= rc->reserved_bytes) + tmp <<= 1; + /* + * only one thread can access block_rsv at this point, + * so we don't need hold lock to protect block_rsv. + * we expand more reservation size here to allow enough + * space for relocation and we will return eailer in + * enospc case. + */ + rc->block_rsv->size = tmp + rc->extent_root->nodesize * + RELOCATION_RESERVED_NODES; + return -EAGAIN; } return 0; @@ -2678,11 +2698,15 @@ static int do_relocation(struct btrfs_trans_handle *trans, if (!upper->eb) { ret = btrfs_search_slot(trans, root, key, path, 0, 1); - if (ret < 0) { - err = ret; + if (ret) { + if (ret < 0) + err = ret; + else + err = -ENOENT; + + btrfs_release_path(path); break; } - BUG_ON(ret > 0); if (!upper->eb) { upper->eb = path->nodes[upper->level]; @@ -3030,15 +3054,19 @@ int prealloc_file_extent_cluster(struct inode *inode, u64 num_bytes; int nr = 0; int ret = 0; + u64 prealloc_start = cluster->start - offset; + u64 prealloc_end = cluster->end - offset; + u64 cur_offset; BUG_ON(cluster->start != cluster->boundary[0]); inode_lock(inode); - ret = btrfs_check_data_free_space(inode, cluster->start, - cluster->end + 1 - cluster->start); + ret = btrfs_check_data_free_space(inode, prealloc_start, + prealloc_end + 1 - prealloc_start); if (ret) goto out; + cur_offset = prealloc_start; while (nr < cluster->nr) { start = cluster->boundary[nr] - offset; if (nr + 1 < cluster->nr) @@ -3048,16 +3076,21 @@ int prealloc_file_extent_cluster(struct inode *inode, lock_extent(&BTRFS_I(inode)->io_tree, start, end); num_bytes = end + 1 - start; + if (cur_offset < start) + btrfs_free_reserved_data_space(inode, cur_offset, + start - cur_offset); ret = btrfs_prealloc_file_range(inode, 0, start, num_bytes, num_bytes, end + 1, &alloc_hint); + cur_offset = end + 1; unlock_extent(&BTRFS_I(inode)->io_tree, start, end); if (ret) break; nr++; } - btrfs_free_reserved_data_space(inode, cluster->start, - cluster->end + 1 - cluster->start); + if (cur_offset < prealloc_end) + btrfs_free_reserved_data_space(inode, cur_offset, + prealloc_end + 1 - cur_offset); out: inode_unlock(inode); return ret; @@ -3186,7 +3219,7 @@ static int relocate_file_extent_cluster(struct inode *inode, nr++; } - btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); + btrfs_set_extent_delalloc(inode, page_start, page_end, NULL, 0); set_page_dirty(page); unlock_extent(&BTRFS_I(inode)->io_tree, @@ -3871,6 +3904,7 @@ static noinline_for_stack int prepare_to_relocate(struct reloc_control *rc) { struct btrfs_trans_handle *trans; + int ret; rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, BTRFS_BLOCK_RSV_TEMP); @@ -3885,6 +3919,11 @@ int prepare_to_relocate(struct reloc_control *rc) rc->reserved_bytes = 0; rc->block_rsv->size = rc->extent_root->nodesize * RELOCATION_RESERVED_NODES; + ret = btrfs_block_rsv_refill(rc->extent_root, + rc->block_rsv, rc->block_rsv->size, + BTRFS_RESERVE_FLUSH_ALL); + if (ret) + return ret; rc->create_reloc_tree = 1; set_reloc_control(rc); @@ -3903,6 +3942,90 @@ int prepare_to_relocate(struct reloc_control *rc) return 0; } +/* + * Qgroup fixer for data chunk relocation. + * The data relocation is done in the following steps + * 1) Copy data extents into data reloc tree + * 2) Create tree reloc tree(special snapshot) for related subvolumes + * 3) Modify file extents in tree reloc tree + * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks + * + * The problem is, data and tree reloc tree are not accounted to qgroup, + * and 4) will only info qgroup to track tree blocks change, not file extents + * in the tree blocks. + * + * The good news is, related data extents are all in data reloc tree, so we + * only need to info qgroup to track all file extents in data reloc tree + * before commit trans. + */ +static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans, + struct reloc_control *rc) +{ + struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; + struct inode *inode = rc->data_inode; + struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root; + struct btrfs_path *path; + struct btrfs_key key; + int ret = 0; + + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) + return 0; + + /* + * Only for stage where we update data pointers the qgroup fix is + * valid. + * For MOVING_DATA stage, we will miss the timing of swapping tree + * blocks, and won't fix it. + */ + if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found)) + return 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + key.objectid = btrfs_ino(inode); + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0); + if (ret < 0) + goto out; + + lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1); + while (1) { + struct btrfs_file_extent_item *fi; + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid > btrfs_ino(inode)) + break; + if (key.type != BTRFS_EXTENT_DATA_KEY) + goto next; + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(path->nodes[0], fi) != + BTRFS_FILE_EXTENT_REG) + goto next; + ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info, + btrfs_file_extent_disk_bytenr(path->nodes[0], fi), + btrfs_file_extent_disk_num_bytes(path->nodes[0], fi), + GFP_NOFS); + if (ret < 0) + break; +next: + ret = btrfs_next_item(data_reloc_root, path); + if (ret < 0) + break; + if (ret > 0) { + ret = 0; + break; + } + } + unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1); +out: + btrfs_free_path(path); + return ret; +} + static noinline_for_stack int relocate_block_group(struct reloc_control *rc) { struct rb_root blocks = RB_ROOT; @@ -4089,10 +4212,18 @@ restart: /* get rid of pinned extents */ trans = btrfs_join_transaction(rc->extent_root); - if (IS_ERR(trans)) + if (IS_ERR(trans)) { err = PTR_ERR(trans); - else - btrfs_commit_transaction(trans, rc->extent_root); + goto out_free; + } + ret = qgroup_fix_relocated_data_extents(trans, rc); + if (ret < 0) { + btrfs_abort_transaction(trans, ret); + if (!err) + err = ret; + goto out_free; + } + btrfs_commit_transaction(trans, rc->extent_root); out_free: btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); btrfs_free_path(path); @@ -4250,8 +4381,9 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) goto out; } - btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", - rc->block_group->key.objectid, rc->block_group->flags); + btrfs_info(extent_root->fs_info, + "relocating block group %llu flags %llu", + rc->block_group->key.objectid, rc->block_group->flags); btrfs_wait_block_group_reservations(rc->block_group); btrfs_wait_nocow_writers(rc->block_group); @@ -4455,10 +4587,16 @@ int btrfs_recover_relocation(struct btrfs_root *root) unset_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root); - if (IS_ERR(trans)) + if (IS_ERR(trans)) { err = PTR_ERR(trans); - else - err = btrfs_commit_transaction(trans, rc->extent_root); + goto out_free; + } + err = qgroup_fix_relocated_data_extents(trans, rc); + if (err < 0) { + btrfs_abort_transaction(trans, err); + goto out_free; + } + err = btrfs_commit_transaction(trans, rc->extent_root); out_free: kfree(rc); out: @@ -4643,7 +4781,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, if (rc->merge_reloc_tree) { ret = btrfs_block_rsv_migrate(&pending->block_rsv, rc->block_rsv, - rc->nodes_relocated); + rc->nodes_relocated, 1); if (ret) return ret; } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index f1c30861d062..edae751e870c 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -46,12 +46,7 @@ static void btrfs_read_root_item(struct extent_buffer *eb, int slot, != btrfs_root_generation_v2(item)) { if (btrfs_root_generation_v2(item) != 0) { btrfs_warn(eb->fs_info, - "mismatching " - "generation and generation_v2 " - "found in root item. This root " - "was probably mounted with an " - "older kernel. Resetting all " - "new fields."); + "mismatching generation and generation_v2 found in root item. This root was probably mounted with an older kernel. Resetting all new fields."); } need_reset = 1; } @@ -150,14 +145,15 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_search_slot(trans, root, key, path, 0, 1); if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); - btrfs_crit(root->fs_info, "unable to update root key %llu %u %llu", - key->objectid, key->type, key->offset); + btrfs_crit(root->fs_info, + "unable to update root key %llu %u %llu", + key->objectid, key->type, key->offset); BUG_ON(1); } @@ -176,20 +172,20 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_search_slot(trans, root, key, path, -1, 1); if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } ret = btrfs_del_item(trans, root, path); if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } btrfs_release_path(path); ret = btrfs_insert_empty_item(trans, root, path, key, sizeof(*item)); if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } l = path->nodes[0]; @@ -272,6 +268,23 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) root_key.objectid = key.offset; key.offset++; + /* + * The root might have been inserted already, as before we look + * for orphan roots, log replay might have happened, which + * triggers a transaction commit and qgroup accounting, which + * in turn reads and inserts fs roots while doing backref + * walking. + */ + root = btrfs_lookup_fs_root(tree_root->fs_info, + root_key.objectid); + if (root) { + WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, + &root->state)); + if (btrfs_root_refs(&root->root_item) == 0) + btrfs_add_dead_root(root); + continue; + } + root = btrfs_read_fs_root(tree_root, &root_key); err = PTR_ERR_OR_ZERO(root); if (err && err != -ENOENT) { @@ -285,8 +298,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) if (IS_ERR(trans)) { err = PTR_ERR(trans); btrfs_handle_fs_error(tree_root->fs_info, err, - "Failed to start trans to delete " - "orphan item"); + "Failed to start trans to delete orphan item"); break; } err = btrfs_del_orphan_item(trans, tree_root, @@ -294,8 +306,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) btrfs_end_transaction(trans, tree_root); if (err) { btrfs_handle_fs_error(tree_root->fs_info, err, - "Failed to delete root orphan " - "item"); + "Failed to delete root orphan item"); break; } continue; @@ -310,16 +321,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); err = btrfs_insert_fs_root(root->fs_info, root); - /* - * The root might have been inserted already, as before we look - * for orphan roots, log replay might have happened, which - * triggers a transaction commit and qgroup accounting, which - * in turn reads and inserts fs roots while doing backref - * walking. - */ - if (err == -EEXIST) - err = 0; if (err) { + BUG_ON(err == -EEXIST); btrfs_free_fs_root(root); break; } @@ -448,7 +451,7 @@ again: ret = btrfs_insert_empty_item(trans, tree_root, path, &key, sizeof(*ref) + name_len); if (ret) { - btrfs_abort_transaction(trans, tree_root, ret); + btrfs_abort_transaction(trans, ret); btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 46d847f66e4b..fffb9ab8526e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -575,23 +575,25 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, * hold all of the paths here */ for (i = 0; i < ipath->fspath->elem_cnt; ++i) - btrfs_warn_in_rcu(fs_info, "%s at logical %llu on dev " - "%s, sector %llu, root %llu, inode %llu, offset %llu, " - "length %llu, links %u (path: %s)", swarn->errstr, - swarn->logical, rcu_str_deref(swarn->dev->name), - (unsigned long long)swarn->sector, root, inum, offset, - min(isize - offset, (u64)PAGE_SIZE), nlink, - (char *)(unsigned long)ipath->fspath->val[i]); + btrfs_warn_in_rcu(fs_info, + "%s at logical %llu on dev %s, sector %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)", + swarn->errstr, swarn->logical, + rcu_str_deref(swarn->dev->name), + (unsigned long long)swarn->sector, + root, inum, offset, + min(isize - offset, (u64)PAGE_SIZE), nlink, + (char *)(unsigned long)ipath->fspath->val[i]); free_ipath(ipath); return 0; err: - btrfs_warn_in_rcu(fs_info, "%s at logical %llu on dev " - "%s, sector %llu, root %llu, inode %llu, offset %llu: path " - "resolving failed with ret=%d", swarn->errstr, - swarn->logical, rcu_str_deref(swarn->dev->name), - (unsigned long long)swarn->sector, root, inum, offset, ret); + btrfs_warn_in_rcu(fs_info, + "%s at logical %llu on dev %s, sector %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d", + swarn->errstr, swarn->logical, + rcu_str_deref(swarn->dev->name), + (unsigned long long)swarn->sector, + root, inum, offset, ret); free_ipath(ipath); return 0; @@ -645,9 +647,8 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) item_size, &ref_root, &ref_level); btrfs_warn_in_rcu(fs_info, - "%s at logical %llu on dev %s, " - "sector %llu: metadata %s (level %d) in tree " - "%llu", errstr, swarn.logical, + "%s at logical %llu on dev %s, sector %llu: metadata %s (level %d) in tree %llu", + errstr, swarn.logical, rcu_str_deref(dev->name), (unsigned long long)swarn.sector, ref_level ? "node" : "leaf", @@ -1504,8 +1505,9 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, sblock->no_io_error_seen = 0; } else { bio->bi_iter.bi_sector = page->physical >> 9; + bio_set_op_attrs(bio, REQ_OP_READ, 0); - if (btrfsic_submit_bio_wait(READ, bio)) + if (btrfsic_submit_bio_wait(bio)) sblock->no_io_error_seen = 0; } @@ -1573,8 +1575,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, if (!page_bad->dev->bdev) { btrfs_warn_rl(sblock_bad->sctx->dev_root->fs_info, - "scrub_repair_page_from_good_copy(bdev == NULL) " - "is unexpected"); + "scrub_repair_page_from_good_copy(bdev == NULL) is unexpected"); return -EIO; } @@ -1583,6 +1584,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, return -EIO; bio->bi_bdev = page_bad->dev->bdev; bio->bi_iter.bi_sector = page_bad->physical >> 9; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0); if (PAGE_SIZE != ret) { @@ -1590,7 +1592,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, return -EIO; } - if (btrfsic_submit_bio_wait(WRITE, bio)) { + if (btrfsic_submit_bio_wait(bio)) { btrfs_dev_stat_inc_and_print(page_bad->dev, BTRFS_DEV_STAT_WRITE_ERRS); btrfs_dev_replace_stats_inc( @@ -1684,6 +1686,7 @@ again: bio->bi_end_io = scrub_wr_bio_end_io; bio->bi_bdev = sbio->dev->bdev; bio->bi_iter.bi_sector = sbio->physical >> 9; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); sbio->err = 0; } else if (sbio->physical + sbio->page_count * PAGE_SIZE != spage->physical_for_dev_replace || @@ -1731,7 +1734,7 @@ static void scrub_wr_submit(struct scrub_ctx *sctx) * orders the requests before sending them to the driver which * doubled the write performance on spinning disks when measured * with Linux 3.5 */ - btrfsic_submit_bio(WRITE, sbio->bio); + btrfsic_submit_bio(sbio->bio); } static void scrub_wr_bio_end_io(struct bio *bio) @@ -2041,7 +2044,7 @@ static void scrub_submit(struct scrub_ctx *sctx) sbio = sctx->bios[sctx->curr]; sctx->curr = -1; scrub_pending_bio_inc(sctx); - btrfsic_submit_bio(READ, sbio->bio); + btrfsic_submit_bio(sbio->bio); } static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, @@ -2088,6 +2091,7 @@ again: bio->bi_end_io = scrub_bio_end_io; bio->bi_bdev = sbio->dev->bdev; bio->bi_iter.bi_sector = sbio->physical >> 9; + bio_set_op_attrs(bio, REQ_OP_READ, 0); sbio->err = 0; } else if (sbio->physical + sbio->page_count * PAGE_SIZE != spage->physical || @@ -2957,7 +2961,8 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, (key.objectid < logic_start || key.objectid + bytes > logic_start + map->stripe_len)) { - btrfs_err(fs_info, "scrub: tree block %llu spanning stripes, ignored. logical=%llu", + btrfs_err(fs_info, + "scrub: tree block %llu spanning stripes, ignored. logical=%llu", key.objectid, logic_start); spin_lock(&sctx->stat_lock); sctx->stat.uncorrectable_errors++; @@ -3308,8 +3313,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, key.objectid + bytes > logical + map->stripe_len)) { btrfs_err(fs_info, - "scrub: tree block %llu spanning " - "stripes, ignored. logical=%llu", + "scrub: tree block %llu spanning stripes, ignored. logical=%llu", key.objectid, logical); spin_lock(&sctx->stat_lock); sctx->stat.uncorrectable_errors++; @@ -3582,6 +3586,46 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, */ scrub_pause_on(fs_info); ret = btrfs_inc_block_group_ro(root, cache); + if (!ret && is_dev_replace) { + /* + * If we are doing a device replace wait for any tasks + * that started dellaloc right before we set the block + * group to RO mode, as they might have just allocated + * an extent from it or decided they could do a nocow + * write. And if any such tasks did that, wait for their + * ordered extents to complete and then commit the + * current transaction, so that we can later see the new + * extent items in the extent tree - the ordered extents + * create delayed data references (for cow writes) when + * they complete, which will be run and insert the + * corresponding extent items into the extent tree when + * we commit the transaction they used when running + * inode.c:btrfs_finish_ordered_io(). We later use + * the commit root of the extent tree to find extents + * to copy from the srcdev into the tgtdev, and we don't + * want to miss any new extents. + */ + btrfs_wait_block_group_reservations(cache); + btrfs_wait_nocow_writers(cache); + ret = btrfs_wait_ordered_roots(fs_info, -1, + cache->key.objectid, + cache->key.offset); + if (ret > 0) { + struct btrfs_trans_handle *trans; + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) + ret = PTR_ERR(trans); + else + ret = btrfs_commit_transaction(trans, + root); + if (ret) { + scrub_pause_off(fs_info); + btrfs_put_block_group(cache); + break; + } + } + } scrub_pause_off(fs_info); if (ret == 0) { @@ -3596,15 +3640,18 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, */ ro_set = 0; } else { - btrfs_warn(fs_info, "failed setting block group ro, ret=%d\n", + btrfs_warn(fs_info, + "failed setting block group ro, ret=%d\n", ret); btrfs_put_block_group(cache); break; } + btrfs_dev_replace_lock(&fs_info->dev_replace, 1); dev_replace->cursor_right = found_key.offset + length; dev_replace->cursor_left = found_key.offset; dev_replace->item_needs_writeback = 1; + btrfs_dev_replace_unlock(&fs_info->dev_replace, 1); ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, found_key.offset, cache, is_dev_replace); @@ -3640,6 +3687,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, scrub_pause_off(fs_info); + btrfs_dev_replace_lock(&fs_info->dev_replace, 1); + dev_replace->cursor_left = dev_replace->cursor_right; + dev_replace->item_needs_writeback = 1; + btrfs_dev_replace_unlock(&fs_info->dev_replace, 1); + if (ro_set) btrfs_dec_block_group_ro(root, cache); @@ -3677,9 +3729,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, ret = -ENOMEM; break; } - - dev_replace->cursor_left = dev_replace->cursor_right; - dev_replace->item_needs_writeback = 1; skip: key.offset = found_key.offset + length; btrfs_release_path(path); @@ -3737,27 +3786,27 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, if (fs_info->scrub_workers_refcnt == 0) { if (is_dev_replace) fs_info->scrub_workers = - btrfs_alloc_workqueue("scrub", flags, + btrfs_alloc_workqueue(fs_info, "scrub", flags, 1, 4); else fs_info->scrub_workers = - btrfs_alloc_workqueue("scrub", flags, + btrfs_alloc_workqueue(fs_info, "scrub", flags, max_active, 4); if (!fs_info->scrub_workers) goto fail_scrub_workers; fs_info->scrub_wr_completion_workers = - btrfs_alloc_workqueue("scrubwrc", flags, + btrfs_alloc_workqueue(fs_info, "scrubwrc", flags, max_active, 2); if (!fs_info->scrub_wr_completion_workers) goto fail_scrub_wr_completion_workers; fs_info->scrub_nocow_workers = - btrfs_alloc_workqueue("scrubnc", flags, 1, 0); + btrfs_alloc_workqueue(fs_info, "scrubnc", flags, 1, 0); if (!fs_info->scrub_nocow_workers) goto fail_scrub_nocow_workers; fs_info->scrub_parity_workers = - btrfs_alloc_workqueue("scrubparity", flags, + btrfs_alloc_workqueue(fs_info, "scrubparity", flags, max_active, 2); if (!fs_info->scrub_parity_workers) goto fail_scrub_parity_workers; @@ -3812,9 +3861,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, if (fs_info->chunk_root->sectorsize != PAGE_SIZE) { /* not supported for data w/o checksums */ - btrfs_err(fs_info, - "scrub: size assumption sectorsize != PAGE_SIZE " - "(%d != %lu) fails", + btrfs_err_rl(fs_info, + "scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails", fs_info->chunk_root->sectorsize, PAGE_SIZE); return -EINVAL; } @@ -3827,8 +3875,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, * would exhaust the array bounds of pagev member in * struct scrub_block */ - btrfs_err(fs_info, "scrub: size assumption nodesize and sectorsize " - "<= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails", + btrfs_err(fs_info, + "scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails", fs_info->chunk_root->nodesize, SCRUB_MAX_PAGES_PER_BLOCK, fs_info->chunk_root->sectorsize, @@ -4154,10 +4202,10 @@ static void copy_nocow_pages_worker(struct btrfs_work *work) ret = iterate_inodes_from_logical(logical, fs_info, path, record_inode_for_nocow, nocow_ctx); if (ret != 0 && ret != -ENOENT) { - btrfs_warn(fs_info, "iterate_inodes_from_logical() failed: log %llu, " - "phys %llu, len %llu, mir %u, ret %d", - logical, physical_for_dev_replace, len, mirror_num, - ret); + btrfs_warn(fs_info, + "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d", + logical, physical_for_dev_replace, len, mirror_num, + ret); not_written = 1; goto out; } @@ -4392,6 +4440,7 @@ static int write_page_nocow(struct scrub_ctx *sctx, bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = physical_for_dev_replace >> 9; bio->bi_bdev = dev->bdev; + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); ret = bio_add_page(bio, page, PAGE_SIZE, 0); if (ret != PAGE_SIZE) { leave_with_eio: @@ -4400,7 +4449,7 @@ leave_with_eio: return -EIO; } - if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) + if (btrfsic_submit_bio_wait(bio)) goto leave_with_eio; bio_put(bio); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index b71dd298385c..01bc36cec26e 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -36,10 +36,6 @@ #include "transaction.h" #include "compression.h" -static int g_verbose = 0; - -#define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__) - /* * A fs_path is a helper to dynamically build path names with unknown size. * It reallocates the internal buffer on demand. @@ -231,7 +227,6 @@ struct pending_dir_move { u64 parent_ino; u64 ino; u64 gen; - bool is_orphan; struct list_head update_refs; }; @@ -274,6 +269,39 @@ struct name_cache_entry { char name[]; }; +static void inconsistent_snapshot_error(struct send_ctx *sctx, + enum btrfs_compare_tree_result result, + const char *what) +{ + const char *result_string; + + switch (result) { + case BTRFS_COMPARE_TREE_NEW: + result_string = "new"; + break; + case BTRFS_COMPARE_TREE_DELETED: + result_string = "deleted"; + break; + case BTRFS_COMPARE_TREE_CHANGED: + result_string = "updated"; + break; + case BTRFS_COMPARE_TREE_SAME: + ASSERT(0); + result_string = "unchanged"; + break; + default: + ASSERT(0); + result_string = "unexpected"; + } + + btrfs_err(sctx->send_root->fs_info, + "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu", + result_string, what, sctx->cmp_key->objectid, + sctx->send_root->root_key.objectid, + (sctx->parent_root ? + sctx->parent_root->root_key.objectid : 0)); +} + static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); static struct waiting_dir_move * @@ -695,9 +723,10 @@ static int send_cmd(struct send_ctx *sctx) static int send_rename(struct send_ctx *sctx, struct fs_path *from, struct fs_path *to) { + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; int ret; -verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start); + btrfs_debug(fs_info, "send_rename %s -> %s", from->start, to->start); ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME); if (ret < 0) @@ -719,9 +748,10 @@ out: static int send_link(struct send_ctx *sctx, struct fs_path *path, struct fs_path *lnk) { + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; int ret; -verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start); + btrfs_debug(fs_info, "send_link %s -> %s", path->start, lnk->start); ret = begin_cmd(sctx, BTRFS_SEND_C_LINK); if (ret < 0) @@ -742,9 +772,10 @@ out: */ static int send_unlink(struct send_ctx *sctx, struct fs_path *path) { + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; int ret; -verbose_printk("btrfs: send_unlink %s\n", path->start); + btrfs_debug(fs_info, "send_unlink %s", path->start); ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK); if (ret < 0) @@ -764,9 +795,10 @@ out: */ static int send_rmdir(struct send_ctx *sctx, struct fs_path *path) { + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; int ret; -verbose_printk("btrfs: send_rmdir %s\n", path->start); + btrfs_debug(fs_info, "send_rmdir %s", path->start); ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR); if (ret < 0) @@ -1281,6 +1313,7 @@ static int find_extent_clone(struct send_ctx *sctx, u64 ino_size, struct clone_root **found) { + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; int ret; int extent_type; u64 logical; @@ -1339,10 +1372,10 @@ static int find_extent_clone(struct send_ctx *sctx, } logical = disk_byte + btrfs_file_extent_offset(eb, fi); - down_read(&sctx->send_root->fs_info->commit_root_sem); - ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path, + down_read(&fs_info->commit_root_sem); + ret = extent_from_logical(fs_info, disk_byte, tmp_path, &found_key, &flags); - up_read(&sctx->send_root->fs_info->commit_root_sem); + up_read(&fs_info->commit_root_sem); btrfs_release_path(tmp_path); if (ret < 0) @@ -1397,7 +1430,7 @@ static int find_extent_clone(struct send_ctx *sctx, extent_item_pos = logical - found_key.objectid; else extent_item_pos = 0; - ret = iterate_extent_inodes(sctx->send_root->fs_info, + ret = iterate_extent_inodes(fs_info, found_key.objectid, extent_item_pos, 1, __iterate_backrefs, backref_ctx); @@ -1407,20 +1440,18 @@ static int find_extent_clone(struct send_ctx *sctx, if (!backref_ctx->found_itself) { /* found a bug in backref code? */ ret = -EIO; - btrfs_err(sctx->send_root->fs_info, "did not find backref in " - "send_root. inode=%llu, offset=%llu, " - "disk_byte=%llu found extent=%llu", - ino, data_offset, disk_byte, found_key.objectid); + btrfs_err(fs_info, + "did not find backref in send_root. inode=%llu, offset=%llu, disk_byte=%llu found extent=%llu", + ino, data_offset, disk_byte, found_key.objectid); goto out; } -verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " - "ino=%llu, " - "num_bytes=%llu, logical=%llu\n", - data_offset, ino, num_bytes, logical); + btrfs_debug(fs_info, + "find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu", + data_offset, ino, num_bytes, logical); if (!backref_ctx->found) - verbose_printk("btrfs: no clones found\n"); + btrfs_debug(fs_info, "no clones found"); cur_clone_root = NULL; for (i = 0; i < sctx->clone_roots_cnt; i++) { @@ -1861,7 +1892,8 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, * was already unlinked/moved, so we can safely assume that we will not * overwrite anything at this point in time. */ - if (other_inode > sctx->send_progress) { + if (other_inode > sctx->send_progress || + is_waiting_for_move(sctx, other_inode)) { ret = get_inode_info(sctx->parent_root, other_inode, NULL, who_gen, NULL, NULL, NULL, NULL); if (ret < 0) @@ -2390,10 +2422,11 @@ out: static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) { + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; int ret = 0; struct fs_path *p; -verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); + btrfs_debug(fs_info, "send_truncate %llu size=%llu", ino, size); p = fs_path_alloc(); if (!p) @@ -2419,10 +2452,11 @@ out: static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) { + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; int ret = 0; struct fs_path *p; -verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); + btrfs_debug(fs_info, "send_chmod %llu mode=%llu", ino, mode); p = fs_path_alloc(); if (!p) @@ -2448,10 +2482,12 @@ out: static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) { + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; int ret = 0; struct fs_path *p; -verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); + btrfs_debug(fs_info, "send_chown %llu uid=%llu, gid=%llu", + ino, uid, gid); p = fs_path_alloc(); if (!p) @@ -2478,6 +2514,7 @@ out: static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) { + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; int ret = 0; struct fs_path *p = NULL; struct btrfs_inode_item *ii; @@ -2486,7 +2523,7 @@ static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) struct btrfs_key key; int slot; -verbose_printk("btrfs: send_utimes %llu\n", ino); + btrfs_debug(fs_info, "send_utimes %llu", ino); p = fs_path_alloc(); if (!p) @@ -2502,6 +2539,8 @@ verbose_printk("btrfs: send_utimes %llu\n", ino); key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); + if (ret > 0) + ret = -ENOENT; if (ret < 0) goto out; @@ -2538,6 +2577,7 @@ out: */ static int send_create_inode(struct send_ctx *sctx, u64 ino) { + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; int ret = 0; struct fs_path *p; int cmd; @@ -2545,7 +2585,7 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino) u64 mode; u64 rdev; -verbose_printk("btrfs: send_create_inode %llu\n", ino); + btrfs_debug(fs_info, "send_create_inode %llu", ino); p = fs_path_alloc(); if (!p) @@ -2947,6 +2987,10 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, } if (loc.objectid > send_progress) { + struct orphan_dir_info *odi; + + odi = get_orphan_dir_info(sctx, dir); + free_orphan_dir_info(sctx, odi); ret = 0; goto out; } @@ -3047,7 +3091,6 @@ static int add_pending_dir_move(struct send_ctx *sctx, pm->parent_ino = parent_ino; pm->ino = ino; pm->gen = ino_gen; - pm->is_orphan = is_orphan; INIT_LIST_HEAD(&pm->list); INIT_LIST_HEAD(&pm->update_refs); RB_CLEAR_NODE(&pm->node); @@ -3113,6 +3156,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, return NULL; } +static int path_loop(struct send_ctx *sctx, struct fs_path *name, + u64 ino, u64 gen, u64 *ancestor_ino) +{ + int ret = 0; + u64 parent_inode = 0; + u64 parent_gen = 0; + u64 start_ino = ino; + + *ancestor_ino = 0; + while (ino != BTRFS_FIRST_FREE_OBJECTID) { + fs_path_reset(name); + + if (is_waiting_for_rm(sctx, ino)) + break; + if (is_waiting_for_move(sctx, ino)) { + if (*ancestor_ino == 0) + *ancestor_ino = ino; + ret = get_first_ref(sctx->parent_root, ino, + &parent_inode, &parent_gen, name); + } else { + ret = __get_cur_name_and_parent(sctx, ino, gen, + &parent_inode, + &parent_gen, name); + if (ret > 0) { + ret = 0; + break; + } + } + if (ret < 0) + break; + if (parent_inode == start_ino) { + ret = 1; + if (*ancestor_ino == 0) + *ancestor_ino = ino; + break; + } + ino = parent_inode; + gen = parent_gen; + } + return ret; +} + static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) { struct fs_path *from_path = NULL; @@ -3123,6 +3208,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) u64 parent_ino, parent_gen; struct waiting_dir_move *dm = NULL; u64 rmdir_ino = 0; + u64 ancestor; + bool is_orphan; int ret; name = fs_path_alloc(); @@ -3135,9 +3222,10 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) dm = get_waiting_dir_move(sctx, pm->ino); ASSERT(dm); rmdir_ino = dm->rmdir_ino; + is_orphan = dm->orphanized; free_waiting_dir_move(sctx, dm); - if (pm->is_orphan) { + if (is_orphan) { ret = gen_unique_name(sctx, pm->ino, pm->gen, from_path); } else { @@ -3155,6 +3243,24 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) goto out; sctx->send_progress = sctx->cur_ino + 1; + ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor); + if (ret < 0) + goto out; + if (ret) { + LIST_HEAD(deleted_refs); + ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); + ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, + &pm->update_refs, &deleted_refs, + is_orphan); + if (ret < 0) + goto out; + if (rmdir_ino) { + dm = get_waiting_dir_move(sctx, pm->ino); + ASSERT(dm); + dm->rmdir_ino = rmdir_ino; + } + goto out; + } fs_path_reset(name); to_path = name; name = NULL; @@ -3174,7 +3280,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) /* already deleted */ goto finish; } - ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); + ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino); if (ret < 0) goto out; if (!ret) @@ -3204,8 +3310,18 @@ finish: * and old parent(s). */ list_for_each_entry(cur, &pm->update_refs, list) { - if (cur->dir == rmdir_ino) + /* + * The parent inode might have been deleted in the send snapshot + */ + ret = get_inode_info(sctx->send_root, cur->dir, NULL, + NULL, NULL, NULL, NULL, NULL); + if (ret == -ENOENT) { + ret = 0; continue; + } + if (ret < 0) + goto out; + ret = send_utimes(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; @@ -3325,6 +3441,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx, u64 left_gen; u64 right_gen; int ret = 0; + struct waiting_dir_move *wdm; if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) return 0; @@ -3383,7 +3500,8 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx, goto out; } - if (is_waiting_for_move(sctx, di_key.objectid)) { + wdm = get_waiting_dir_move(sctx, di_key.objectid); + if (wdm && !wdm->orphanized) { ret = add_pending_dir_move(sctx, sctx->cur_ino, sctx->cur_inode_gen, @@ -3470,7 +3588,8 @@ static int wait_for_parent_move(struct send_ctx *sctx, ret = is_ancestor(sctx->parent_root, sctx->cur_ino, sctx->cur_inode_gen, ino, path_before); - break; + if (ret) + break; } fs_path_reset(path_before); @@ -3524,6 +3643,7 @@ out: */ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) { + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; int ret = 0; struct recorded_ref *cur; struct recorded_ref *cur2; @@ -3536,7 +3656,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) u64 last_dir_ino_rm = 0; bool can_rename = true; -verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); + btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino); /* * This should never happen as the root dir always has the same ref @@ -3643,11 +3763,26 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); goto out; if (ret) { struct name_cache_entry *nce; + struct waiting_dir_move *wdm; ret = orphanize_inode(sctx, ow_inode, ow_gen, cur->full_path); if (ret < 0) goto out; + + /* + * If ow_inode has its rename operation delayed + * make sure that its orphanized name is used in + * the source path when performing its rename + * operation. + */ + if (is_waiting_for_move(sctx, ow_inode)) { + wdm = get_waiting_dir_move(sctx, + ow_inode); + ASSERT(wdm); + wdm->orphanized = true; + } + /* * Make sure we clear our orphanized inode's * name from the name cache. This is because the @@ -3663,6 +3798,19 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); name_cache_delete(sctx, nce); kfree(nce); } + + /* + * ow_inode might currently be an ancestor of + * cur_ino, therefore compute valid_path (the + * current path of cur_ino) again because it + * might contain the pre-orphanization name of + * ow_inode, which is no longer valid. + */ + fs_path_reset(valid_path); + ret = get_cur_path(sctx, sctx->cur_ino, + sctx->cur_inode_gen, valid_path); + if (ret < 0) + goto out; } else { ret = send_unlink(sctx, cur->full_path); if (ret < 0) @@ -4126,10 +4274,12 @@ static int process_all_refs(struct send_ctx *sctx, } btrfs_release_path(path); + /* + * We don't actually care about pending_move as we are simply + * re-creating this inode and will be rename'ing it into place once we + * rename the parent directory. + */ ret = process_recorded_refs(sctx, &pending_move); - /* Only applicable to an incremental send. */ - ASSERT(pending_move == 0); - out: btrfs_free_path(path); return ret; @@ -4185,7 +4335,7 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key, int ret; struct send_ctx *sctx = ctx; struct fs_path *p; - posix_acl_xattr_header dummy_acl; + struct posix_acl_xattr_header dummy_acl; p = fs_path_alloc(); if (!p) @@ -4254,12 +4404,8 @@ static int process_new_xattr(struct send_ctx *sctx) static int process_deleted_xattr(struct send_ctx *sctx) { - int ret; - - ret = iterate_dir_item(sctx->parent_root, sctx->right_path, - sctx->cmp_key, __process_deleted_xattr, sctx); - - return ret; + return iterate_dir_item(sctx->parent_root, sctx->right_path, + sctx->cmp_key, __process_deleted_xattr, sctx); } struct find_xattr_ctx { @@ -4520,6 +4666,7 @@ out: */ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) { + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; int ret = 0; struct fs_path *p; ssize_t num_read = 0; @@ -4528,7 +4675,7 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) if (!p) return -ENOMEM; -verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); + btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len); num_read = fill_read_buf(sctx, offset, len); if (num_read <= 0) { @@ -4570,10 +4717,10 @@ static int send_clone(struct send_ctx *sctx, struct fs_path *p; u64 gen; -verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " - "clone_inode=%llu, clone_offset=%llu\n", offset, len, - clone_root->root->objectid, clone_root->ino, - clone_root->offset); + btrfs_debug(sctx->send_root->fs_info, + "send_clone offset=%llu, len=%d, clone_root=%llu, clone_inode=%llu, clone_offset=%llu", + offset, len, clone_root->root->objectid, clone_root->ino, + clone_root->offset); p = fs_path_alloc(); if (!p) @@ -5602,7 +5749,10 @@ static int changed_ref(struct send_ctx *sctx, { int ret = 0; - BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); + if (sctx->cur_ino != sctx->cmp_key->objectid) { + inconsistent_snapshot_error(sctx, result, "reference"); + return -EIO; + } if (!sctx->cur_inode_new_gen && sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { @@ -5627,7 +5777,10 @@ static int changed_xattr(struct send_ctx *sctx, { int ret = 0; - BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); + if (sctx->cur_ino != sctx->cmp_key->objectid) { + inconsistent_snapshot_error(sctx, result, "xattr"); + return -EIO; + } if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { if (result == BTRFS_COMPARE_TREE_NEW) @@ -5651,7 +5804,10 @@ static int changed_extent(struct send_ctx *sctx, { int ret = 0; - BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); + if (sctx->cur_ino != sctx->cmp_key->objectid) { + inconsistent_snapshot_error(sctx, result, "extent"); + return -EIO; + } if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { if (result != BTRFS_COMPARE_TREE_DELETED) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4e59a91a11e0..74ed5aae6cea 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -151,12 +151,11 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function vaf.fmt = fmt; vaf.va = &args; - printk(KERN_CRIT - "BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n", + pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n", sb->s_id, function, line, errno, errstr, &vaf); va_end(args); } else { - printk(KERN_CRIT "BTRFS: error (device %s) in %s:%d: errno=%d %s\n", + pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n", sb->s_id, function, line, errno, errstr); } #endif @@ -184,6 +183,22 @@ static const char * const logtypes[] = { "debug", }; + +/* + * Use one ratelimit state per log level so that a flood of less important + * messages doesn't cause more important ones to be dropped. + */ +static struct ratelimit_state printk_limits[] = { + RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100), + RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100), + RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100), + RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100), + RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100), + RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100), + RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100), + RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100), +}; + void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) { struct super_block *sb = fs_info->sb; @@ -192,6 +207,7 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) va_list args; const char *type = logtypes[4]; int kern_level; + struct ratelimit_state *ratelimit; va_start(args, fmt); @@ -202,13 +218,18 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) lvl[size] = '\0'; fmt += size; type = logtypes[kern_level - '0']; - } else + ratelimit = &printk_limits[kern_level - '0']; + } else { *lvl = '\0'; + /* Default to debug output */ + ratelimit = &printk_limits[7]; + } vaf.fmt = fmt; vaf.va = &args; - printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf); + if (__ratelimit(ratelimit)) + printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf); va_end(args); } @@ -229,26 +250,28 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) */ __cold void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root, const char *function, + const char *function, unsigned int line, int errno) { + struct btrfs_fs_info *fs_info = trans->fs_info; + trans->aborted = errno; /* Nothing used. The other threads that have joined this * transaction may be able to continue. */ - if (!trans->blocks_used && list_empty(&trans->new_bgs)) { + if (!trans->dirty && list_empty(&trans->new_bgs)) { const char *errstr; errstr = btrfs_decode_error(errno); - btrfs_warn(root->fs_info, + btrfs_warn(fs_info, "%s:%d: Aborting unused transaction(%s).", function, line, errstr); return; } ACCESS_ONCE(trans->transaction->aborted) = errno; /* Wake up anybody who may be waiting on this transaction */ - wake_up(&root->fs_info->transaction_wait); - wake_up(&root->fs_info->transaction_blocked_wait); - __btrfs_handle_fs_error(root->fs_info, function, line, errno, NULL); + wake_up(&fs_info->transaction_wait); + wake_up(&fs_info->transaction_blocked_wait); + __btrfs_handle_fs_error(fs_info, function, line, errno, NULL); } /* * __btrfs_panic decodes unexpected, fatal errors from the caller, @@ -432,27 +455,30 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, */ break; case Opt_nodatasum: - btrfs_set_and_info(root, NODATASUM, + btrfs_set_and_info(info, NODATASUM, "setting nodatasum"); break; case Opt_datasum: - if (btrfs_test_opt(root, NODATASUM)) { - if (btrfs_test_opt(root, NODATACOW)) - btrfs_info(root->fs_info, "setting datasum, datacow enabled"); + if (btrfs_test_opt(info, NODATASUM)) { + if (btrfs_test_opt(info, NODATACOW)) + btrfs_info(root->fs_info, + "setting datasum, datacow enabled"); else - btrfs_info(root->fs_info, "setting datasum"); + btrfs_info(root->fs_info, + "setting datasum"); } btrfs_clear_opt(info->mount_opt, NODATACOW); btrfs_clear_opt(info->mount_opt, NODATASUM); break; case Opt_nodatacow: - if (!btrfs_test_opt(root, NODATACOW)) { - if (!btrfs_test_opt(root, COMPRESS) || - !btrfs_test_opt(root, FORCE_COMPRESS)) { + if (!btrfs_test_opt(info, NODATACOW)) { + if (!btrfs_test_opt(info, COMPRESS) || + !btrfs_test_opt(info, FORCE_COMPRESS)) { btrfs_info(root->fs_info, "setting nodatacow, compression disabled"); } else { - btrfs_info(root->fs_info, "setting nodatacow"); + btrfs_info(root->fs_info, + "setting nodatacow"); } } btrfs_clear_opt(info->mount_opt, COMPRESS); @@ -461,7 +487,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, btrfs_set_opt(info->mount_opt, NODATASUM); break; case Opt_datacow: - btrfs_clear_and_info(root, NODATACOW, + btrfs_clear_and_info(info, NODATACOW, "setting datacow"); break; case Opt_compress_force: @@ -470,10 +496,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, /* Fallthrough */ case Opt_compress: case Opt_compress_type: - saved_compress_type = btrfs_test_opt(root, COMPRESS) ? + saved_compress_type = btrfs_test_opt(info, + COMPRESS) ? info->compress_type : BTRFS_COMPRESS_NONE; saved_compress_force = - btrfs_test_opt(root, FORCE_COMPRESS); + btrfs_test_opt(info, FORCE_COMPRESS); if (token == Opt_compress || token == Opt_compress_force || strcmp(args[0].from, "zlib") == 0) { @@ -513,10 +540,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, */ btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); } - if ((btrfs_test_opt(root, COMPRESS) && + if ((btrfs_test_opt(info, COMPRESS) && (info->compress_type != saved_compress_type || compress_force != saved_compress_force)) || - (!btrfs_test_opt(root, COMPRESS) && + (!btrfs_test_opt(info, COMPRESS) && no_compress == 1)) { btrfs_info(root->fs_info, "%s %s compression", @@ -526,25 +553,25 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, compress_force = false; break; case Opt_ssd: - btrfs_set_and_info(root, SSD, + btrfs_set_and_info(info, SSD, "use ssd allocation scheme"); break; case Opt_ssd_spread: - btrfs_set_and_info(root, SSD_SPREAD, + btrfs_set_and_info(info, SSD_SPREAD, "use spread ssd allocation scheme"); btrfs_set_opt(info->mount_opt, SSD); break; case Opt_nossd: - btrfs_set_and_info(root, NOSSD, + btrfs_set_and_info(info, NOSSD, "not using ssd allocation scheme"); btrfs_clear_opt(info->mount_opt, SSD); break; case Opt_barrier: - btrfs_clear_and_info(root, NOBARRIER, + btrfs_clear_and_info(info, NOBARRIER, "turning on barriers"); break; case Opt_nobarrier: - btrfs_set_and_info(root, NOBARRIER, + btrfs_set_and_info(info, NOBARRIER, "turning off barriers"); break; case Opt_thread_pool: @@ -583,8 +610,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, info->alloc_start = memparse(num, NULL); mutex_unlock(&info->chunk_mutex); kfree(num); - btrfs_info(root->fs_info, "allocations start at %llu", - info->alloc_start); + btrfs_info(root->fs_info, + "allocations start at %llu", + info->alloc_start); } else { ret = -ENOMEM; goto out; @@ -604,24 +632,24 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, root->fs_info->sb->s_flags &= ~MS_POSIXACL; break; case Opt_notreelog: - btrfs_set_and_info(root, NOTREELOG, + btrfs_set_and_info(info, NOTREELOG, "disabling tree log"); break; case Opt_treelog: - btrfs_clear_and_info(root, NOTREELOG, + btrfs_clear_and_info(info, NOTREELOG, "enabling tree log"); break; case Opt_norecovery: case Opt_nologreplay: - btrfs_set_and_info(root, NOLOGREPLAY, + btrfs_set_and_info(info, NOLOGREPLAY, "disabling log replay at mount time"); break; case Opt_flushoncommit: - btrfs_set_and_info(root, FLUSHONCOMMIT, + btrfs_set_and_info(info, FLUSHONCOMMIT, "turning on flush-on-commit"); break; case Opt_noflushoncommit: - btrfs_clear_and_info(root, FLUSHONCOMMIT, + btrfs_clear_and_info(info, FLUSHONCOMMIT, "turning off flush-on-commit"); break; case Opt_ratio: @@ -638,11 +666,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, } break; case Opt_discard: - btrfs_set_and_info(root, DISCARD, + btrfs_set_and_info(info, DISCARD, "turning on discard"); break; case Opt_nodiscard: - btrfs_clear_and_info(root, DISCARD, + btrfs_clear_and_info(info, DISCARD, "turning off discard"); break; case Opt_space_cache: @@ -651,12 +679,13 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, strcmp(args[0].from, "v1") == 0) { btrfs_clear_opt(root->fs_info->mount_opt, FREE_SPACE_TREE); - btrfs_set_and_info(root, SPACE_CACHE, + btrfs_set_and_info(info, SPACE_CACHE, "enabling disk space caching"); } else if (strcmp(args[0].from, "v2") == 0) { btrfs_clear_opt(root->fs_info->mount_opt, SPACE_CACHE); - btrfs_set_and_info(root, FREE_SPACE_TREE, + btrfs_set_and_info(info, + FREE_SPACE_TREE, "enabling free space tree"); } else { ret = -EINVAL; @@ -667,12 +696,14 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE); break; case Opt_no_space_cache: - if (btrfs_test_opt(root, SPACE_CACHE)) { - btrfs_clear_and_info(root, SPACE_CACHE, + if (btrfs_test_opt(info, SPACE_CACHE)) { + btrfs_clear_and_info(info, + SPACE_CACHE, "disabling disk space caching"); } - if (btrfs_test_opt(root, FREE_SPACE_TREE)) { - btrfs_clear_and_info(root, FREE_SPACE_TREE, + if (btrfs_test_opt(info, FREE_SPACE_TREE)) { + btrfs_clear_and_info(info, + FREE_SPACE_TREE, "disabling free space tree"); } break; @@ -685,7 +716,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, "disabling inode map caching"); break; case Opt_clear_cache: - btrfs_set_and_info(root, CLEAR_CACHE, + btrfs_set_and_info(info, CLEAR_CACHE, "force clearing of disk cache"); break; case Opt_user_subvol_rm_allowed: @@ -698,11 +729,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG); break; case Opt_defrag: - btrfs_set_and_info(root, AUTO_DEFRAG, + btrfs_set_and_info(info, AUTO_DEFRAG, "enabling auto defrag"); break; case Opt_nodefrag: - btrfs_clear_and_info(root, AUTO_DEFRAG, + btrfs_clear_and_info(info, AUTO_DEFRAG, "disabling auto defrag"); break; case Opt_recovery: @@ -734,8 +765,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, goto out; } else if (intarg >= 0) { info->check_integrity_print_mask = intarg; - btrfs_info(root->fs_info, "check_integrity_print_mask 0x%x", - info->check_integrity_print_mask); + btrfs_info(root->fs_info, + "check_integrity_print_mask 0x%x", + info->check_integrity_print_mask); } else { ret = -EINVAL; goto out; @@ -766,19 +798,22 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, intarg = 0; ret = match_int(&args[0], &intarg); if (ret < 0) { - btrfs_err(root->fs_info, "invalid commit interval"); + btrfs_err(root->fs_info, + "invalid commit interval"); ret = -EINVAL; goto out; } if (intarg > 0) { if (intarg > 300) { - btrfs_warn(root->fs_info, "excessive commit interval %d", - intarg); + btrfs_warn(root->fs_info, + "excessive commit interval %d", + intarg); } info->commit_interval = intarg; } else { - btrfs_info(root->fs_info, "using default commit interval %ds", - BTRFS_DEFAULT_COMMIT_INTERVAL); + btrfs_info(root->fs_info, + "using default commit interval %ds", + BTRFS_DEFAULT_COMMIT_INTERVAL); info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; } break; @@ -799,7 +834,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options, break; #endif case Opt_err: - btrfs_info(root->fs_info, "unrecognized mount option '%s'", p); + btrfs_info(root->fs_info, + "unrecognized mount option '%s'", p); ret = -EINVAL; goto out; default: @@ -810,22 +846,22 @@ check: /* * Extra check for current option against current flag */ - if (btrfs_test_opt(root, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) { + if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) { btrfs_err(root->fs_info, "nologreplay must be used with ro mount option"); ret = -EINVAL; } out: if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) && - !btrfs_test_opt(root, FREE_SPACE_TREE) && - !btrfs_test_opt(root, CLEAR_CACHE)) { + !btrfs_test_opt(info, FREE_SPACE_TREE) && + !btrfs_test_opt(info, CLEAR_CACHE)) { btrfs_err(root->fs_info, "cannot disable free space tree"); ret = -EINVAL; } - if (!ret && btrfs_test_opt(root, SPACE_CACHE)) + if (!ret && btrfs_test_opt(info, SPACE_CACHE)) btrfs_info(root->fs_info, "disk space caching is enabled"); - if (!ret && btrfs_test_opt(root, FREE_SPACE_TREE)) + if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE)) btrfs_info(root->fs_info, "using free space tree"); kfree(orig); return ret; @@ -888,9 +924,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, } break; case Opt_subvolrootid: - printk(KERN_WARNING - "BTRFS: 'subvolrootid' mount option is deprecated and has " - "no effect\n"); + pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n"); break; case Opt_device: device_name = match_strdup(&args[0]); @@ -1114,7 +1148,7 @@ static int btrfs_fill_super(struct super_block *sb, sb->s_iflags |= SB_I_CGROUPWB; err = open_ctree(sb, fs_devices, (char *)data); if (err) { - printk(KERN_ERR "BTRFS: open_ctree failed\n"); + btrfs_err(fs_info, "open_ctree failed"); return err; } @@ -1149,7 +1183,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *root = fs_info->tree_root; - trace_btrfs_sync_fs(wait); + trace_btrfs_sync_fs(fs_info, wait); if (!wait) { filemap_flush(fs_info->btree_inode->i_mapping); @@ -1192,13 +1226,13 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) struct btrfs_root *root = info->tree_root; char *compress_type; - if (btrfs_test_opt(root, DEGRADED)) + if (btrfs_test_opt(info, DEGRADED)) seq_puts(seq, ",degraded"); - if (btrfs_test_opt(root, NODATASUM)) + if (btrfs_test_opt(info, NODATASUM)) seq_puts(seq, ",nodatasum"); - if (btrfs_test_opt(root, NODATACOW)) + if (btrfs_test_opt(info, NODATACOW)) seq_puts(seq, ",nodatacow"); - if (btrfs_test_opt(root, NOBARRIER)) + if (btrfs_test_opt(info, NOBARRIER)) seq_puts(seq, ",nobarrier"); if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE) seq_printf(seq, ",max_inline=%llu", info->max_inline); @@ -1207,56 +1241,56 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) if (info->thread_pool_size != min_t(unsigned long, num_online_cpus() + 2, 8)) seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); - if (btrfs_test_opt(root, COMPRESS)) { + if (btrfs_test_opt(info, COMPRESS)) { if (info->compress_type == BTRFS_COMPRESS_ZLIB) compress_type = "zlib"; else compress_type = "lzo"; - if (btrfs_test_opt(root, FORCE_COMPRESS)) + if (btrfs_test_opt(info, FORCE_COMPRESS)) seq_printf(seq, ",compress-force=%s", compress_type); else seq_printf(seq, ",compress=%s", compress_type); } - if (btrfs_test_opt(root, NOSSD)) + if (btrfs_test_opt(info, NOSSD)) seq_puts(seq, ",nossd"); - if (btrfs_test_opt(root, SSD_SPREAD)) + if (btrfs_test_opt(info, SSD_SPREAD)) seq_puts(seq, ",ssd_spread"); - else if (btrfs_test_opt(root, SSD)) + else if (btrfs_test_opt(info, SSD)) seq_puts(seq, ",ssd"); - if (btrfs_test_opt(root, NOTREELOG)) + if (btrfs_test_opt(info, NOTREELOG)) seq_puts(seq, ",notreelog"); - if (btrfs_test_opt(root, NOLOGREPLAY)) + if (btrfs_test_opt(info, NOLOGREPLAY)) seq_puts(seq, ",nologreplay"); - if (btrfs_test_opt(root, FLUSHONCOMMIT)) + if (btrfs_test_opt(info, FLUSHONCOMMIT)) seq_puts(seq, ",flushoncommit"); - if (btrfs_test_opt(root, DISCARD)) + if (btrfs_test_opt(info, DISCARD)) seq_puts(seq, ",discard"); if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) seq_puts(seq, ",noacl"); - if (btrfs_test_opt(root, SPACE_CACHE)) + if (btrfs_test_opt(info, SPACE_CACHE)) seq_puts(seq, ",space_cache"); - else if (btrfs_test_opt(root, FREE_SPACE_TREE)) + else if (btrfs_test_opt(info, FREE_SPACE_TREE)) seq_puts(seq, ",space_cache=v2"); else seq_puts(seq, ",nospace_cache"); - if (btrfs_test_opt(root, RESCAN_UUID_TREE)) + if (btrfs_test_opt(info, RESCAN_UUID_TREE)) seq_puts(seq, ",rescan_uuid_tree"); - if (btrfs_test_opt(root, CLEAR_CACHE)) + if (btrfs_test_opt(info, CLEAR_CACHE)) seq_puts(seq, ",clear_cache"); - if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) + if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED)) seq_puts(seq, ",user_subvol_rm_allowed"); - if (btrfs_test_opt(root, ENOSPC_DEBUG)) + if (btrfs_test_opt(info, ENOSPC_DEBUG)) seq_puts(seq, ",enospc_debug"); - if (btrfs_test_opt(root, AUTO_DEFRAG)) + if (btrfs_test_opt(info, AUTO_DEFRAG)) seq_puts(seq, ",autodefrag"); - if (btrfs_test_opt(root, INODE_MAP_CACHE)) + if (btrfs_test_opt(info, INODE_MAP_CACHE)) seq_puts(seq, ",inode_cache"); - if (btrfs_test_opt(root, SKIP_BALANCE)) + if (btrfs_test_opt(info, SKIP_BALANCE)) seq_puts(seq, ",skip_balance"); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY - if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA)) + if (btrfs_test_opt(info, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA)) seq_puts(seq, ",check_int_data"); - else if (btrfs_test_opt(root, CHECK_INTEGRITY)) + else if (btrfs_test_opt(info, CHECK_INTEGRITY)) seq_puts(seq, ",check_int"); if (info->check_integrity_print_mask) seq_printf(seq, ",check_int_print_mask=%d", @@ -1265,14 +1299,14 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) if (info->metadata_ratio) seq_printf(seq, ",metadata_ratio=%d", info->metadata_ratio); - if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR)) + if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR)) seq_puts(seq, ",fatal_errors=panic"); if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) seq_printf(seq, ",commit=%d", info->commit_interval); #ifdef CONFIG_BTRFS_DEBUG - if (btrfs_test_opt(root, FRAGMENT_DATA)) + if (btrfs_test_opt(info, FRAGMENT_DATA)) seq_puts(seq, ",fragment=data"); - if (btrfs_test_opt(root, FRAGMENT_METADATA)) + if (btrfs_test_opt(info, FRAGMENT_METADATA)) seq_puts(seq, ",fragment=metadata"); #endif seq_printf(seq, ",subvolid=%llu", @@ -1412,12 +1446,13 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, if (!IS_ERR(root)) { struct super_block *s = root->d_sb; + struct btrfs_fs_info *fs_info = btrfs_sb(s); struct inode *root_inode = d_inode(root); u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid; ret = 0; if (!is_subvolume_inode(root_inode)) { - pr_err("BTRFS: '%s' is not a valid subvolume\n", + btrfs_err(fs_info, "'%s' is not a valid subvolume", subvol_name); ret = -EINVAL; } @@ -1427,8 +1462,9 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, * subvolume which was passed by ID is renamed and * another subvolume is renamed over the old location. */ - pr_err("BTRFS: subvol '%s' does not match subvolid %llu\n", - subvol_name, subvol_objectid); + btrfs_err(fs_info, + "subvol '%s' does not match subvolid %llu", + subvol_name, subvol_objectid); ret = -EINVAL; } if (ret) { @@ -1802,11 +1838,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_info(fs_info, "creating UUID tree"); ret = btrfs_create_uuid_tree(fs_info); if (ret) { - btrfs_warn(fs_info, "failed to create the UUID tree %d", ret); + btrfs_warn(fs_info, + "failed to create the UUID tree %d", + ret); goto restore; } } sb->s_flags &= ~MS_RDONLY; + + set_bit(BTRFS_FS_OPEN, &fs_info->flags); } out: wake_up_process(fs_info->transaction_kthread); @@ -2028,9 +2068,6 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) * chunk). * * If metadata is exhausted, f_bavail will be 0. - * - * FIXME: not accurate for mixed block groups, total and free/used are ok, - * available appears slightly larger. */ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { @@ -2214,6 +2251,13 @@ static int btrfs_freeze(struct super_block *sb) struct btrfs_trans_handle *trans; struct btrfs_root *root = btrfs_sb(sb)->tree_root; + root->fs_info->fs_frozen = 1; + /* + * We don't need a barrier here, we'll wait for any transaction that + * could be in progress on other threads (and do delayed iputs that + * we want to avoid on a frozen filesystem), or do the commit + * ourselves. + */ trans = btrfs_attach_transaction_barrier(root); if (IS_ERR(trans)) { /* no transaction, don't bother */ @@ -2224,6 +2268,14 @@ static int btrfs_freeze(struct super_block *sb) return btrfs_commit_transaction(trans, root); } +static int btrfs_unfreeze(struct super_block *sb) +{ + struct btrfs_root *root = btrfs_sb(sb)->tree_root; + + root->fs_info->fs_frozen = 0; + return 0; +} + static int btrfs_show_devname(struct seq_file *m, struct dentry *root) { struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); @@ -2272,6 +2324,7 @@ static const struct super_operations btrfs_super_ops = { .statfs = btrfs_statfs, .remount_fs = btrfs_remount, .freeze_fs = btrfs_freeze, + .unfreeze_fs = btrfs_unfreeze, }; static const struct file_operations btrfs_ctl_fops = { @@ -2303,7 +2356,7 @@ static void btrfs_interface_exit(void) static void btrfs_print_mod_info(void) { - printk(KERN_INFO "Btrfs loaded" + pr_info("Btrfs loaded, crc32c=%s" #ifdef CONFIG_BTRFS_DEBUG ", debug=on" #endif @@ -2313,36 +2366,8 @@ static void btrfs_print_mod_info(void) #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY ", integrity-checker=on" #endif - "\n"); -} - -static int btrfs_run_sanity_tests(void) -{ - int ret; - - ret = btrfs_init_test_fs(); - if (ret) - return ret; - - ret = btrfs_test_free_space_cache(); - if (ret) - goto out; - ret = btrfs_test_extent_buffer_operations(); - if (ret) - goto out; - ret = btrfs_test_extent_io(); - if (ret) - goto out; - ret = btrfs_test_inodes(); - if (ret) - goto out; - ret = btrfs_test_qgroups(); - if (ret) - goto out; - ret = btrfs_test_free_space_tree(); -out: - btrfs_destroy_test_fs(); - return ret; + "\n", + btrfs_crc32c_impl()); } static int __init init_btrfs_fs(void) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 4879656bda3c..1f157fba8940 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -77,7 +77,7 @@ static int can_modify_feature(struct btrfs_feature_attr *fa) clear = BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR; break; default: - printk(KERN_WARNING "btrfs: sysfs: unknown feature set %d\n", + pr_warn("btrfs: sysfs: unknown feature set %d\n", fa->feature_set); return 0; } @@ -326,6 +326,7 @@ SPACE_INFO_ATTR(bytes_used); SPACE_INFO_ATTR(bytes_pinned); SPACE_INFO_ATTR(bytes_reserved); SPACE_INFO_ATTR(bytes_may_use); +SPACE_INFO_ATTR(bytes_readonly); SPACE_INFO_ATTR(disk_used); SPACE_INFO_ATTR(disk_total); BTRFS_ATTR(total_bytes_pinned, btrfs_space_info_show_total_bytes_pinned); @@ -337,6 +338,7 @@ static struct attribute *space_info_attrs[] = { BTRFS_ATTR_PTR(bytes_pinned), BTRFS_ATTR_PTR(bytes_reserved), BTRFS_ATTR_PTR(bytes_may_use), + BTRFS_ATTR_PTR(bytes_readonly), BTRFS_ATTR_PTR(disk_used), BTRFS_ATTR_PTR(disk_total), BTRFS_ATTR_PTR(total_bytes_pinned), @@ -428,7 +430,8 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); + return snprintf(buf, PAGE_SIZE, "%u\n", + fs_info->super_copy->sectorsize); } BTRFS_ATTR(sectorsize, btrfs_sectorsize_show); @@ -438,7 +441,8 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); + return snprintf(buf, PAGE_SIZE, "%u\n", + fs_info->super_copy->sectorsize); } BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show); @@ -834,9 +838,18 @@ static int btrfs_init_debugfs(void) if (!btrfs_debugfs_root_dentry) return -ENOMEM; - debugfs_create_u64("test", S_IRUGO | S_IWUGO, btrfs_debugfs_root_dentry, + /* + * Example code, how to export data through debugfs. + * + * file: /sys/kernel/debug/btrfs/test + * contents of: btrfs_debugfs_test + */ +#ifdef CONFIG_BTRFS_DEBUG + debugfs_create_u64("test", S_IRUGO | S_IWUSR, btrfs_debugfs_root_dentry, &btrfs_debugfs_test); #endif + +#endif return 0; } diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index f54bf450bad3..bf62ad919a95 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -54,7 +54,7 @@ struct inode *btrfs_new_test_inode(void) return new_inode(test_mnt->mnt_sb); } -int btrfs_init_test_fs(void) +static int btrfs_init_test_fs(void) { int ret; @@ -68,12 +68,12 @@ int btrfs_init_test_fs(void) if (IS_ERR(test_mnt)) { printk(KERN_ERR "btrfs: cannot mount test file system\n"); unregister_filesystem(&test_type); - return ret; + return PTR_ERR(test_mnt); } return 0; } -void btrfs_destroy_test_fs(void) +static void btrfs_destroy_test_fs(void) { kern_unmount(test_mnt); unregister_filesystem(&test_type); @@ -128,14 +128,27 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void) extent_io_tree_init(&fs_info->freed_extents[0], NULL); extent_io_tree_init(&fs_info->freed_extents[1], NULL); fs_info->pinned_extents = &fs_info->freed_extents[0]; + set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state); + + test_mnt->mnt_sb->s_fs_info = fs_info; + return fs_info; } -static void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) +void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) { struct radix_tree_iter iter; void **slot; + if (!fs_info) + return; + + if (WARN_ON(!test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, + &fs_info->fs_state))) + return; + + test_mnt->mnt_sb->s_fs_info = NULL; + spin_lock(&fs_info->buffer_lock); radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { struct extent_buffer *eb; @@ -167,15 +180,16 @@ void btrfs_free_dummy_root(struct btrfs_root *root) { if (!root) return; + /* Will be freed by btrfs_free_fs_roots */ + if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state))) + return; if (root->node) free_extent_buffer(root->node); - if (root->fs_info) - btrfs_free_dummy_fs_info(root->fs_info); kfree(root); } struct btrfs_block_group_cache * -btrfs_alloc_dummy_block_group(unsigned long length) +btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize) { struct btrfs_block_group_cache *cache; @@ -192,8 +206,8 @@ btrfs_alloc_dummy_block_group(unsigned long length) cache->key.objectid = 0; cache->key.offset = length; cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; - cache->sectorsize = 4096; - cache->full_stripe_len = 4096; + cache->sectorsize = sectorsize; + cache->full_stripe_len = sectorsize; INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); @@ -220,3 +234,46 @@ void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans) INIT_LIST_HEAD(&trans->qgroup_ref_list); trans->type = __TRANS_DUMMY; } + +int btrfs_run_sanity_tests(void) +{ + int ret, i; + u32 sectorsize, nodesize; + u32 test_sectorsize[] = { + PAGE_SIZE, + }; + ret = btrfs_init_test_fs(); + if (ret) + return ret; + for (i = 0; i < ARRAY_SIZE(test_sectorsize); i++) { + sectorsize = test_sectorsize[i]; + for (nodesize = sectorsize; + nodesize <= BTRFS_MAX_METADATA_BLOCKSIZE; + nodesize <<= 1) { + pr_info("BTRFS: selftest: sectorsize: %u nodesize: %u\n", + sectorsize, nodesize); + ret = btrfs_test_free_space_cache(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_extent_buffer_operations(sectorsize, + nodesize); + if (ret) + goto out; + ret = btrfs_test_extent_io(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_inodes(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_qgroups(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_free_space_tree(sectorsize, nodesize); + if (ret) + goto out; + } + } +out: + btrfs_destroy_test_fs(); + return ret; +} diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h index 054b8c73c951..b17ffbe8f9f3 100644 --- a/fs/btrfs/tests/btrfs-tests.h +++ b/fs/btrfs/tests/btrfs-tests.h @@ -20,56 +20,29 @@ #define __BTRFS_TESTS #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +int btrfs_run_sanity_tests(void); #define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__) struct btrfs_root; struct btrfs_trans_handle; -int btrfs_test_free_space_cache(void); -int btrfs_test_extent_buffer_operations(void); -int btrfs_test_extent_io(void); -int btrfs_test_inodes(void); -int btrfs_test_qgroups(void); -int btrfs_test_free_space_tree(void); -int btrfs_init_test_fs(void); -void btrfs_destroy_test_fs(void); +int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize); +int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize); +int btrfs_test_extent_io(u32 sectorsize, u32 nodesize); +int btrfs_test_inodes(u32 sectorsize, u32 nodesize); +int btrfs_test_qgroups(u32 sectorsize, u32 nodesize); +int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize); struct inode *btrfs_new_test_inode(void); struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void); +void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info); void btrfs_free_dummy_root(struct btrfs_root *root); struct btrfs_block_group_cache * -btrfs_alloc_dummy_block_group(unsigned long length); +btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize); void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache); void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans); #else -static inline int btrfs_test_free_space_cache(void) -{ - return 0; -} -static inline int btrfs_test_extent_buffer_operations(void) -{ - return 0; -} -static inline int btrfs_init_test_fs(void) -{ - return 0; -} -static inline void btrfs_destroy_test_fs(void) -{ -} -static inline int btrfs_test_extent_io(void) -{ - return 0; -} -static inline int btrfs_test_inodes(void) -{ - return 0; -} -static inline int btrfs_test_qgroups(void) -{ - return 0; -} -static inline int btrfs_test_free_space_tree(void) +static inline int btrfs_run_sanity_tests(void) { return 0; } diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index f51963a8f929..199569174637 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -22,10 +22,11 @@ #include "../extent_io.h" #include "../disk-io.h" -static int test_btrfs_split_item(void) +static int test_btrfs_split_item(u32 sectorsize, u32 nodesize) { - struct btrfs_path *path; - struct btrfs_root *root; + struct btrfs_fs_info *fs_info; + struct btrfs_path *path = NULL; + struct btrfs_root *root = NULL; struct extent_buffer *eb; struct btrfs_item *item; char *value = "mary had a little lamb"; @@ -40,20 +41,28 @@ static int test_btrfs_split_item(void) test_msg("Running btrfs_split_item tests\n"); - root = btrfs_alloc_dummy_root(); + fs_info = btrfs_alloc_dummy_fs_info(); + if (!fs_info) { + test_msg("Could not allocate fs_info\n"); + return -ENOMEM; + } + + root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Could not allocate root\n"); - return PTR_ERR(root); + ret = PTR_ERR(root); + goto out; } path = btrfs_alloc_path(); if (!path) { test_msg("Could not allocate path\n"); - kfree(root); - return -ENOMEM; + ret = -ENOMEM; + goto out; } - path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, 4096); + path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, nodesize, + nodesize); if (!eb) { test_msg("Could not allocate dummy buffer\n"); ret = -ENOMEM; @@ -218,12 +227,13 @@ static int test_btrfs_split_item(void) } out: btrfs_free_path(path); - kfree(root); + btrfs_free_dummy_root(root); + btrfs_free_dummy_fs_info(fs_info); return ret; } -int btrfs_test_extent_buffer_operations(void) +int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize) { - test_msg("Running extent buffer operation tests"); - return test_btrfs_split_item(); + test_msg("Running extent buffer operation tests\n"); + return test_btrfs_split_item(sectorsize, nodesize); } diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 55724607f79b..caad80bb9bd0 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -21,6 +21,7 @@ #include <linux/slab.h> #include <linux/sizes.h> #include "btrfs-tests.h" +#include "../ctree.h" #include "../extent_io.h" #define PROCESS_UNLOCK (1 << 0) @@ -65,7 +66,7 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end, return count; } -static int test_find_delalloc(void) +static int test_find_delalloc(u32 sectorsize) { struct inode *inode; struct extent_io_tree tmp; @@ -113,7 +114,7 @@ static int test_find_delalloc(void) * |--- delalloc ---| * |--- search ---| */ - set_extent_delalloc(&tmp, 0, 4095, NULL); + set_extent_delalloc(&tmp, 0, sectorsize - 1, NULL); start = 0; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -122,9 +123,9 @@ static int test_find_delalloc(void) test_msg("Should have found at least one delalloc\n"); goto out_bits; } - if (start != 0 || end != 4095) { - test_msg("Expected start 0 end 4095, got start %Lu end %Lu\n", - start, end); + if (start != 0 || end != (sectorsize - 1)) { + test_msg("Expected start 0 end %u, got start %llu end %llu\n", + sectorsize - 1, start, end); goto out_bits; } unlock_extent(&tmp, start, end); @@ -144,7 +145,7 @@ static int test_find_delalloc(void) test_msg("Couldn't find the locked page\n"); goto out_bits; } - set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL); + set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, NULL); start = test_start; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -172,7 +173,7 @@ static int test_find_delalloc(void) * |--- delalloc ---| * |--- search ---| */ - test_start = max_bytes + 4096; + test_start = max_bytes + sectorsize; locked_page = find_lock_page(inode->i_mapping, test_start >> PAGE_SHIFT); if (!locked_page) { @@ -272,10 +273,37 @@ out: return ret; } +static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb, + unsigned long len) +{ + unsigned long i; + + for (i = 0; i < len * BITS_PER_BYTE; i++) { + int bit, bit1; + + bit = !!test_bit(i, bitmap); + bit1 = !!extent_buffer_test_bit(eb, 0, i); + if (bit1 != bit) { + test_msg("Bits do not match\n"); + return -EINVAL; + } + + bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE, + i % BITS_PER_BYTE); + if (bit1 != bit) { + test_msg("Offset bits do not match\n"); + return -EINVAL; + } + } + return 0; +} + static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, unsigned long len) { - unsigned long i, x; + unsigned long i, j; + u32 x; + int ret; memset(bitmap, 0, len); memset_extent_buffer(eb, 0, 0, len); @@ -286,37 +314,45 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, bitmap_set(bitmap, 0, len * BITS_PER_BYTE); extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE); - if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { + ret = check_eb_bitmap(bitmap, eb, len); + if (ret) { test_msg("Setting all bits failed\n"); - return -EINVAL; + return ret; } bitmap_clear(bitmap, 0, len * BITS_PER_BYTE); extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE); - if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { + ret = check_eb_bitmap(bitmap, eb, len); + if (ret) { test_msg("Clearing all bits failed\n"); - return -EINVAL; + return ret; } - bitmap_set(bitmap, (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, - sizeof(long) * BITS_PER_BYTE); - extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0, - sizeof(long) * BITS_PER_BYTE); - if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { - test_msg("Setting straddling pages failed\n"); - return -EINVAL; - } + /* Straddling pages test */ + if (len > PAGE_SIZE) { + bitmap_set(bitmap, + (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, + sizeof(long) * BITS_PER_BYTE); + extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0, + sizeof(long) * BITS_PER_BYTE); + ret = check_eb_bitmap(bitmap, eb, len); + if (ret) { + test_msg("Setting straddling pages failed\n"); + return ret; + } - bitmap_set(bitmap, 0, len * BITS_PER_BYTE); - bitmap_clear(bitmap, - (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, - sizeof(long) * BITS_PER_BYTE); - extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE); - extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0, - sizeof(long) * BITS_PER_BYTE); - if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { - test_msg("Clearing straddling pages failed\n"); - return -EINVAL; + bitmap_set(bitmap, 0, len * BITS_PER_BYTE); + bitmap_clear(bitmap, + (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, + sizeof(long) * BITS_PER_BYTE); + extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE); + extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0, + sizeof(long) * BITS_PER_BYTE); + ret = check_eb_bitmap(bitmap, eb, len); + if (ret) { + test_msg("Clearing straddling pages failed\n"); + return ret; + } } /* @@ -324,42 +360,43 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, * something repetitive that could miss some hypothetical off-by-n bug. */ x = 0; - for (i = 0; i < len / sizeof(long); i++) { - x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffUL; - bitmap[i] = x; - } - write_extent_buffer(eb, bitmap, 0, len); - - for (i = 0; i < len * BITS_PER_BYTE; i++) { - int bit, bit1; - - bit = !!test_bit(i, bitmap); - bit1 = !!extent_buffer_test_bit(eb, 0, i); - if (bit1 != bit) { - test_msg("Testing bit pattern failed\n"); - return -EINVAL; + bitmap_clear(bitmap, 0, len * BITS_PER_BYTE); + extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE); + for (i = 0; i < len * BITS_PER_BYTE / 32; i++) { + x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffU; + for (j = 0; j < 32; j++) { + if (x & (1U << j)) { + bitmap_set(bitmap, i * 32 + j, 1); + extent_buffer_bitmap_set(eb, 0, i * 32 + j, 1); + } } + } - bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE, - i % BITS_PER_BYTE); - if (bit1 != bit) { - test_msg("Testing bit pattern with offset failed\n"); - return -EINVAL; - } + ret = check_eb_bitmap(bitmap, eb, len); + if (ret) { + test_msg("Random bit pattern failed\n"); + return ret; } return 0; } -static int test_eb_bitmaps(void) +static int test_eb_bitmaps(u32 sectorsize, u32 nodesize) { - unsigned long len = PAGE_SIZE * 4; + unsigned long len; unsigned long *bitmap; struct extent_buffer *eb; int ret; test_msg("Running extent buffer bitmap tests\n"); + /* + * In ppc64, sectorsize can be 64K, thus 4 * 64K will be larger than + * BTRFS_MAX_METADATA_BLOCKSIZE. + */ + len = (sectorsize < BTRFS_MAX_METADATA_BLOCKSIZE) + ? sectorsize * 4 : sectorsize; + bitmap = kmalloc(len, GFP_KERNEL); if (!bitmap) { test_msg("Couldn't allocate test bitmap\n"); @@ -379,7 +416,7 @@ static int test_eb_bitmaps(void) /* Do it over again with an extent buffer which isn't page-aligned. */ free_extent_buffer(eb); - eb = __alloc_dummy_extent_buffer(NULL, PAGE_SIZE / 2, len); + eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len); if (!eb) { test_msg("Couldn't allocate test extent buffer\n"); kfree(bitmap); @@ -393,17 +430,17 @@ out: return ret; } -int btrfs_test_extent_io(void) +int btrfs_test_extent_io(u32 sectorsize, u32 nodesize) { int ret; test_msg("Running extent I/O tests\n"); - ret = test_find_delalloc(); + ret = test_find_delalloc(sectorsize); if (ret) goto out; - ret = test_eb_bitmaps(); + ret = test_eb_bitmaps(sectorsize, nodesize); out: test_msg("Extent I/O tests finished\n"); return ret; diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index 0eeb8f3d6b67..3221c8dee272 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -22,7 +22,7 @@ #include "../disk-io.h" #include "../free-space-cache.h" -#define BITS_PER_BITMAP (PAGE_SIZE * 8) +#define BITS_PER_BITMAP (PAGE_SIZE * 8UL) /* * This test just does basic sanity checking, making sure we can add an extent @@ -99,7 +99,8 @@ static int test_extents(struct btrfs_block_group_cache *cache) return 0; } -static int test_bitmaps(struct btrfs_block_group_cache *cache) +static int test_bitmaps(struct btrfs_block_group_cache *cache, + u32 sectorsize) { u64 next_bitmap_offset; int ret; @@ -139,7 +140,7 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache) * The first bitmap we have starts at offset 0 so the next one is just * at the end of the first bitmap. */ - next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); + next_bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize); /* Test a bit straddling two bitmaps */ ret = test_add_free_space_entry(cache, next_bitmap_offset - SZ_2M, @@ -167,9 +168,10 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache) } /* This is the high grade jackassery */ -static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) +static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache, + u32 sectorsize) { - u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); + u64 bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize); int ret; test_msg("Running bitmap and extent tests\n"); @@ -401,7 +403,8 @@ static int check_cache_empty(struct btrfs_block_group_cache *cache) * requests. */ static int -test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) +test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache, + u32 sectorsize) { int ret; u64 offset; @@ -539,7 +542,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * The goal is to test that the bitmap entry space stealing doesn't * steal this space region. */ - ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, 4096); + ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, sectorsize); if (ret) { test_msg("Error adding free space: %d\n", ret); return ret; @@ -597,8 +600,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -ENOENT; } - if (cache->free_space_ctl->free_space != (SZ_1M + 4096)) { - test_msg("Cache free space is not 1Mb + 4Kb\n"); + if (cache->free_space_ctl->free_space != (SZ_1M + sectorsize)) { + test_msg("Cache free space is not 1Mb + %u\n", sectorsize); return -EINVAL; } @@ -611,22 +614,25 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -EINVAL; } - /* All that remains is a 4Kb free space region in a bitmap. Confirm. */ + /* + * All that remains is a sectorsize free space region in a bitmap. + * Confirm. + */ ret = check_num_extents_and_bitmaps(cache, 1, 1); if (ret) return ret; - if (cache->free_space_ctl->free_space != 4096) { - test_msg("Cache free space is not 4Kb\n"); + if (cache->free_space_ctl->free_space != sectorsize) { + test_msg("Cache free space is not %u\n", sectorsize); return -EINVAL; } offset = btrfs_find_space_for_alloc(cache, - 0, 4096, 0, + 0, sectorsize, 0, &max_extent_size); if (offset != (SZ_128M + SZ_16M)) { - test_msg("Failed to allocate 4Kb from space cache, returned offset is: %llu\n", - offset); + test_msg("Failed to allocate %u, returned offset : %llu\n", + sectorsize, offset); return -EINVAL; } @@ -733,7 +739,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * The goal is to test that the bitmap entry space stealing doesn't * steal this space region. */ - ret = btrfs_add_free_space(cache, SZ_32M, 8192); + ret = btrfs_add_free_space(cache, SZ_32M, 2 * sectorsize); if (ret) { test_msg("Error adding free space: %d\n", ret); return ret; @@ -757,7 +763,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) /* * Confirm that our extent entry didn't stole all free space from the - * bitmap, because of the small 8Kb free space region. + * bitmap, because of the small 2 * sectorsize free space region. */ ret = check_num_extents_and_bitmaps(cache, 2, 1); if (ret) @@ -783,8 +789,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -ENOENT; } - if (cache->free_space_ctl->free_space != (SZ_1M + 8192)) { - test_msg("Cache free space is not 1Mb + 8Kb\n"); + if (cache->free_space_ctl->free_space != (SZ_1M + 2 * sectorsize)) { + test_msg("Cache free space is not 1Mb + %u\n", 2 * sectorsize); return -EINVAL; } @@ -796,21 +802,25 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -EINVAL; } - /* All that remains is a 8Kb free space region in a bitmap. Confirm. */ + /* + * All that remains is 2 * sectorsize free space region + * in a bitmap. Confirm. + */ ret = check_num_extents_and_bitmaps(cache, 1, 1); if (ret) return ret; - if (cache->free_space_ctl->free_space != 8192) { - test_msg("Cache free space is not 8Kb\n"); + if (cache->free_space_ctl->free_space != 2 * sectorsize) { + test_msg("Cache free space is not %u\n", 2 * sectorsize); return -EINVAL; } offset = btrfs_find_space_for_alloc(cache, - 0, 8192, 0, + 0, 2 * sectorsize, 0, &max_extent_size); if (offset != SZ_32M) { - test_msg("Failed to allocate 8Kb from space cache, returned offset is: %llu\n", + test_msg("Failed to allocate %u, offset: %llu\n", + 2 * sectorsize, offset); return -EINVAL; } @@ -825,29 +835,38 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return 0; } -int btrfs_test_free_space_cache(void) +int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize) { + struct btrfs_fs_info *fs_info; struct btrfs_block_group_cache *cache; struct btrfs_root *root = NULL; int ret = -ENOMEM; test_msg("Running btrfs free space cache tests\n"); - cache = btrfs_alloc_dummy_block_group(1024 * 1024 * 1024); + /* + * For ppc64 (with 64k page size), bytes per bitmap might be + * larger than 1G. To make bitmap test available in ppc64, + * alloc dummy block group whose size cross bitmaps. + */ + cache = btrfs_alloc_dummy_block_group(BITS_PER_BITMAP * sectorsize + + PAGE_SIZE, sectorsize); if (!cache) { test_msg("Couldn't run the tests\n"); return 0; } - root = btrfs_alloc_dummy_root(); - if (IS_ERR(root)) { - ret = PTR_ERR(root); + fs_info = btrfs_alloc_dummy_fs_info(); + if (!fs_info) { + ret = -ENOMEM; goto out; } - root->fs_info = btrfs_alloc_dummy_fs_info(); - if (!root->fs_info) + root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize); + if (IS_ERR(root)) { + ret = PTR_ERR(root); goto out; + } root->fs_info->extent_root = root; cache->fs_info = root->fs_info; @@ -855,17 +874,18 @@ int btrfs_test_free_space_cache(void) ret = test_extents(cache); if (ret) goto out; - ret = test_bitmaps(cache); + ret = test_bitmaps(cache, sectorsize); if (ret) goto out; - ret = test_bitmaps_and_extents(cache); + ret = test_bitmaps_and_extents(cache, sectorsize); if (ret) goto out; - ret = test_steal_space_from_bitmap_to_extent(cache); + ret = test_steal_space_from_bitmap_to_extent(cache, sectorsize); out: btrfs_free_dummy_block_group(cache); btrfs_free_dummy_root(root); + btrfs_free_dummy_fs_info(fs_info); test_msg("Free space cache tests finished\n"); return ret; } diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 7cea4462acd5..6e144048a72e 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include <linux/types.h> #include "btrfs-tests.h" #include "../ctree.h" #include "../disk-io.h" @@ -23,20 +24,15 @@ #include "../transaction.h" struct free_space_extent { - u64 start, length; + u64 start; + u64 length; }; -/* - * The test cases align their operations to this in order to hit some of the - * edge cases in the bitmap code. - */ -#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * 4096) - static int __check_free_space_extents(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *cache, struct btrfs_path *path, - struct free_space_extent *extents, + const struct free_space_extent * const extents, unsigned int num_extents) { struct btrfs_free_space_info *info; @@ -125,7 +121,7 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *cache, struct btrfs_path *path, - struct free_space_extent *extents, + const struct free_space_extent * const extents, unsigned int num_extents) { struct btrfs_free_space_info *info; @@ -167,9 +163,10 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans, static int test_empty_block_group(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *cache, - struct btrfs_path *path) + struct btrfs_path *path, + u32 alignment) { - struct free_space_extent extents[] = { + const struct free_space_extent extents[] = { {cache->key.objectid, cache->key.offset}, }; @@ -180,9 +177,10 @@ static int test_empty_block_group(struct btrfs_trans_handle *trans, static int test_remove_all(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *cache, - struct btrfs_path *path) + struct btrfs_path *path, + u32 alignment) { - struct free_space_extent extents[] = {}; + const struct free_space_extent extents[] = {}; int ret; ret = __remove_from_free_space_tree(trans, fs_info, cache, path, @@ -200,16 +198,17 @@ static int test_remove_all(struct btrfs_trans_handle *trans, static int test_remove_beginning(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *cache, - struct btrfs_path *path) + struct btrfs_path *path, + u32 alignment) { - struct free_space_extent extents[] = { - {cache->key.objectid + BITMAP_RANGE, - cache->key.offset - BITMAP_RANGE}, + const struct free_space_extent extents[] = { + {cache->key.objectid + alignment, + cache->key.offset - alignment}, }; int ret; ret = __remove_from_free_space_tree(trans, fs_info, cache, path, - cache->key.objectid, BITMAP_RANGE); + cache->key.objectid, alignment); if (ret) { test_msg("Could not remove free space\n"); return ret; @@ -223,17 +222,18 @@ static int test_remove_beginning(struct btrfs_trans_handle *trans, static int test_remove_end(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *cache, - struct btrfs_path *path) + struct btrfs_path *path, + u32 alignment) { - struct free_space_extent extents[] = { - {cache->key.objectid, cache->key.offset - BITMAP_RANGE}, + const struct free_space_extent extents[] = { + {cache->key.objectid, cache->key.offset - alignment}, }; int ret; ret = __remove_from_free_space_tree(trans, fs_info, cache, path, cache->key.objectid + - cache->key.offset - BITMAP_RANGE, - BITMAP_RANGE); + cache->key.offset - alignment, + alignment); if (ret) { test_msg("Could not remove free space\n"); return ret; @@ -246,18 +246,19 @@ static int test_remove_end(struct btrfs_trans_handle *trans, static int test_remove_middle(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *cache, - struct btrfs_path *path) + struct btrfs_path *path, + u32 alignment) { - struct free_space_extent extents[] = { - {cache->key.objectid, BITMAP_RANGE}, - {cache->key.objectid + 2 * BITMAP_RANGE, - cache->key.offset - 2 * BITMAP_RANGE}, + const struct free_space_extent extents[] = { + {cache->key.objectid, alignment}, + {cache->key.objectid + 2 * alignment, + cache->key.offset - 2 * alignment}, }; int ret; ret = __remove_from_free_space_tree(trans, fs_info, cache, path, - cache->key.objectid + BITMAP_RANGE, - BITMAP_RANGE); + cache->key.objectid + alignment, + alignment); if (ret) { test_msg("Could not remove free space\n"); return ret; @@ -270,10 +271,11 @@ static int test_remove_middle(struct btrfs_trans_handle *trans, static int test_merge_left(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *cache, - struct btrfs_path *path) + struct btrfs_path *path, + u32 alignment) { - struct free_space_extent extents[] = { - {cache->key.objectid, 2 * BITMAP_RANGE}, + const struct free_space_extent extents[] = { + {cache->key.objectid, 2 * alignment}, }; int ret; @@ -286,15 +288,15 @@ static int test_merge_left(struct btrfs_trans_handle *trans, } ret = __add_to_free_space_tree(trans, fs_info, cache, path, - cache->key.objectid, BITMAP_RANGE); + cache->key.objectid, alignment); if (ret) { test_msg("Could not add free space\n"); return ret; } ret = __add_to_free_space_tree(trans, fs_info, cache, path, - cache->key.objectid + BITMAP_RANGE, - BITMAP_RANGE); + cache->key.objectid + alignment, + alignment); if (ret) { test_msg("Could not add free space\n"); return ret; @@ -307,10 +309,11 @@ static int test_merge_left(struct btrfs_trans_handle *trans, static int test_merge_right(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *cache, - struct btrfs_path *path) + struct btrfs_path *path, + u32 alignment) { - struct free_space_extent extents[] = { - {cache->key.objectid + BITMAP_RANGE, 2 * BITMAP_RANGE}, + const struct free_space_extent extents[] = { + {cache->key.objectid + alignment, 2 * alignment}, }; int ret; @@ -323,16 +326,16 @@ static int test_merge_right(struct btrfs_trans_handle *trans, } ret = __add_to_free_space_tree(trans, fs_info, cache, path, - cache->key.objectid + 2 * BITMAP_RANGE, - BITMAP_RANGE); + cache->key.objectid + 2 * alignment, + alignment); if (ret) { test_msg("Could not add free space\n"); return ret; } ret = __add_to_free_space_tree(trans, fs_info, cache, path, - cache->key.objectid + BITMAP_RANGE, - BITMAP_RANGE); + cache->key.objectid + alignment, + alignment); if (ret) { test_msg("Could not add free space\n"); return ret; @@ -345,10 +348,11 @@ static int test_merge_right(struct btrfs_trans_handle *trans, static int test_merge_both(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *cache, - struct btrfs_path *path) + struct btrfs_path *path, + u32 alignment) { - struct free_space_extent extents[] = { - {cache->key.objectid, 3 * BITMAP_RANGE}, + const struct free_space_extent extents[] = { + {cache->key.objectid, 3 * alignment}, }; int ret; @@ -361,23 +365,23 @@ static int test_merge_both(struct btrfs_trans_handle *trans, } ret = __add_to_free_space_tree(trans, fs_info, cache, path, - cache->key.objectid, BITMAP_RANGE); + cache->key.objectid, alignment); if (ret) { test_msg("Could not add free space\n"); return ret; } ret = __add_to_free_space_tree(trans, fs_info, cache, path, - cache->key.objectid + 2 * BITMAP_RANGE, - BITMAP_RANGE); + cache->key.objectid + 2 * alignment, + alignment); if (ret) { test_msg("Could not add free space\n"); return ret; } ret = __add_to_free_space_tree(trans, fs_info, cache, path, - cache->key.objectid + BITMAP_RANGE, - BITMAP_RANGE); + cache->key.objectid + alignment, + alignment); if (ret) { test_msg("Could not add free space\n"); return ret; @@ -390,12 +394,13 @@ static int test_merge_both(struct btrfs_trans_handle *trans, static int test_merge_none(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *cache, - struct btrfs_path *path) + struct btrfs_path *path, + u32 alignment) { - struct free_space_extent extents[] = { - {cache->key.objectid, BITMAP_RANGE}, - {cache->key.objectid + 2 * BITMAP_RANGE, BITMAP_RANGE}, - {cache->key.objectid + 4 * BITMAP_RANGE, BITMAP_RANGE}, + const struct free_space_extent extents[] = { + {cache->key.objectid, alignment}, + {cache->key.objectid + 2 * alignment, alignment}, + {cache->key.objectid + 4 * alignment, alignment}, }; int ret; @@ -408,23 +413,23 @@ static int test_merge_none(struct btrfs_trans_handle *trans, } ret = __add_to_free_space_tree(trans, fs_info, cache, path, - cache->key.objectid, BITMAP_RANGE); + cache->key.objectid, alignment); if (ret) { test_msg("Could not add free space\n"); return ret; } ret = __add_to_free_space_tree(trans, fs_info, cache, path, - cache->key.objectid + 4 * BITMAP_RANGE, - BITMAP_RANGE); + cache->key.objectid + 4 * alignment, + alignment); if (ret) { test_msg("Could not add free space\n"); return ret; } ret = __add_to_free_space_tree(trans, fs_info, cache, path, - cache->key.objectid + 2 * BITMAP_RANGE, - BITMAP_RANGE); + cache->key.objectid + 2 * alignment, + alignment); if (ret) { test_msg("Could not add free space\n"); return ret; @@ -437,27 +442,30 @@ static int test_merge_none(struct btrfs_trans_handle *trans, typedef int (*test_func_t)(struct btrfs_trans_handle *, struct btrfs_fs_info *, struct btrfs_block_group_cache *, - struct btrfs_path *); + struct btrfs_path *, + u32 alignment); -static int run_test(test_func_t test_func, int bitmaps) +static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize, + u32 nodesize, u32 alignment) { + struct btrfs_fs_info *fs_info; struct btrfs_root *root = NULL; struct btrfs_block_group_cache *cache = NULL; struct btrfs_trans_handle trans; struct btrfs_path *path = NULL; int ret; - root = btrfs_alloc_dummy_root(); - if (IS_ERR(root)) { - test_msg("Couldn't allocate dummy root\n"); - ret = PTR_ERR(root); + fs_info = btrfs_alloc_dummy_fs_info(); + if (!fs_info) { + test_msg("Couldn't allocate dummy fs info\n"); + ret = -ENOMEM; goto out; } - root->fs_info = btrfs_alloc_dummy_fs_info(); - if (!root->fs_info) { - test_msg("Couldn't allocate dummy fs info\n"); - ret = -ENOMEM; + root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize); + if (IS_ERR(root)) { + test_msg("Couldn't allocate dummy root\n"); + ret = PTR_ERR(root); goto out; } @@ -466,7 +474,8 @@ static int run_test(test_func_t test_func, int bitmaps) root->fs_info->free_space_root = root; root->fs_info->tree_root = root; - root->node = alloc_test_extent_buffer(root->fs_info, 4096); + root->node = alloc_test_extent_buffer(root->fs_info, + nodesize, nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); ret = -ENOMEM; @@ -474,9 +483,9 @@ static int run_test(test_func_t test_func, int bitmaps) } btrfs_set_header_level(root->node, 0); btrfs_set_header_nritems(root->node, 0); - root->alloc_bytenr += 8192; + root->alloc_bytenr += 2 * nodesize; - cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE); + cache = btrfs_alloc_dummy_block_group(8 * alignment, sectorsize); if (!cache) { test_msg("Couldn't allocate dummy block group cache\n"); ret = -ENOMEM; @@ -510,7 +519,7 @@ static int run_test(test_func_t test_func, int bitmaps) } } - ret = test_func(&trans, root->fs_info, cache, path); + ret = test_func(&trans, root->fs_info, cache, path, alignment); if (ret) goto out; @@ -531,20 +540,34 @@ out: btrfs_free_path(path); btrfs_free_dummy_block_group(cache); btrfs_free_dummy_root(root); + btrfs_free_dummy_fs_info(fs_info); return ret; } -static int run_test_both_formats(test_func_t test_func) +static int run_test_both_formats(test_func_t test_func, u32 sectorsize, + u32 nodesize, u32 alignment) { + int test_ret = 0; int ret; - ret = run_test(test_func, 0); - if (ret) - return ret; - return run_test(test_func, 1); + ret = run_test(test_func, 0, sectorsize, nodesize, alignment); + if (ret) { + test_msg("%pf failed with extents, sectorsize=%u, nodesize=%u, alignment=%u\n", + test_func, sectorsize, nodesize, alignment); + test_ret = ret; + } + + ret = run_test(test_func, 1, sectorsize, nodesize, alignment); + if (ret) { + test_msg("%pf failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u\n", + test_func, sectorsize, nodesize, alignment); + test_ret = ret; + } + + return test_ret; } -int btrfs_test_free_space_tree(void) +int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize) { test_func_t tests[] = { test_empty_block_group, @@ -557,16 +580,30 @@ int btrfs_test_free_space_tree(void) test_merge_both, test_merge_none, }; + u32 bitmap_alignment; + int test_ret = 0; int i; + /* + * Align some operations to a page to flush out bugs in the extent + * buffer bitmap handling of highmem. + */ + bitmap_alignment = BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE; + test_msg("Running free space tree tests\n"); for (i = 0; i < ARRAY_SIZE(tests); i++) { - int ret = run_test_both_formats(tests[i]); - if (ret) { - test_msg("%pf failed\n", tests[i]); - return ret; - } + int ret; + + ret = run_test_both_formats(tests[i], sectorsize, nodesize, + sectorsize); + if (ret) + test_ret = ret; + + ret = run_test_both_formats(tests[i], sectorsize, nodesize, + bitmap_alignment); + if (ret) + test_ret = ret; } - return 0; + return test_ret; } diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 8a25fe8b7c45..0bf46808ce8f 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include <linux/types.h> #include "btrfs-tests.h" #include "../ctree.h" #include "../btrfs_inode.h" @@ -86,19 +87,19 @@ static void insert_inode_item_key(struct btrfs_root *root) * diagram of how the extents will look though this may not be possible we still * want to make sure everything acts normally (the last number is not inclusive) * - * [0 - 5][5 - 6][6 - 10][10 - 4096][ 4096 - 8192 ][8192 - 12288] - * [hole ][inline][ hole ][ regular ][regular1 split][ hole ] + * [0 - 5][5 - 6][ 6 - 4096 ][ 4096 - 4100][4100 - 8195][8195 - 12291] + * [hole ][inline][hole but no extent][ hole ][ regular ][regular1 split] * - * [ 12288 - 20480][20480 - 24576][ 24576 - 28672 ][28672 - 36864][36864 - 45056] - * [regular1 split][ prealloc1 ][prealloc1 written][ prealloc1 ][ compressed ] + * [12291 - 16387][16387 - 24579][24579 - 28675][ 28675 - 32771][32771 - 36867 ] + * [ hole ][regular1 split][ prealloc ][ prealloc1 ][prealloc1 written] * - * [45056 - 49152][49152-53248][53248-61440][61440-65536][ 65536+81920 ] - * [ compressed1 ][ regular ][compressed1][ regular ][ hole but no extent] + * [36867 - 45059][45059 - 53251][53251 - 57347][57347 - 61443][61443- 69635] + * [ prealloc1 ][ compressed ][ compressed1 ][ regular ][ compressed1] * - * [81920-86016] - * [ regular ] + * [69635-73731][ 73731 - 86019 ][86019-90115] + * [ regular ][ hole but no extent][ regular ] */ -static void setup_file_extents(struct btrfs_root *root) +static void setup_file_extents(struct btrfs_root *root, u32 sectorsize) { int slot = 0; u64 disk_bytenr = SZ_1M; @@ -119,7 +120,7 @@ static void setup_file_extents(struct btrfs_root *root) insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0, slot); slot++; - offset = 4096; + offset = sectorsize; /* Now another hole */ insert_extent(root, offset, 4, 4, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0, @@ -128,100 +129,108 @@ static void setup_file_extents(struct btrfs_root *root) offset += 4; /* Now for a regular extent */ - insert_extent(root, offset, 4095, 4095, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); + insert_extent(root, offset, sectorsize - 1, sectorsize - 1, 0, + disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - disk_bytenr += 4096; - offset += 4095; + disk_bytenr += sectorsize; + offset += sectorsize - 1; /* * Now for 3 extents that were split from a hole punch so we test * offsets properly. */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_REG, 0, slot); + insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr, + 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 4096, 4096, 0, 0, 0, BTRFS_FILE_EXTENT_REG, - 0, slot); + offset += sectorsize; + insert_extent(root, offset, sectorsize, sectorsize, 0, 0, 0, + BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384, + offset += sectorsize; + insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize, + 2 * sectorsize, disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 8192; - disk_bytenr += 16384; + offset += 2 * sectorsize; + disk_bytenr += 4 * sectorsize; /* Now for a unwritten prealloc extent */ - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_PREALLOC, 0, slot); + insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot); slot++; - offset += 4096; + offset += sectorsize; /* * We want to jack up disk_bytenr a little more so the em stuff doesn't * merge our records. */ - disk_bytenr += 8192; + disk_bytenr += 2 * sectorsize; /* * Now for a partially written prealloc extent, basically the same as * the hole punch example above. Ram_bytes never changes when you mark * extents written btw. */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_PREALLOC, 0, slot); + insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr, + 4 * sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 4096, 16384, 4096, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_REG, 0, slot); + offset += sectorsize; + insert_extent(root, offset, sectorsize, 4 * sectorsize, sectorsize, + disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, + slot); slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384, + offset += sectorsize; + insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize, + 2 * sectorsize, disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot); slot++; - offset += 8192; - disk_bytenr += 16384; + offset += 2 * sectorsize; + disk_bytenr += 4 * sectorsize; /* Now a normal compressed extent */ - insert_extent(root, offset, 8192, 8192, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); + insert_extent(root, offset, 2 * sectorsize, 2 * sectorsize, 0, + disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, + BTRFS_COMPRESS_ZLIB, slot); slot++; - offset += 8192; + offset += 2 * sectorsize; /* No merges */ - disk_bytenr += 8192; + disk_bytenr += 2 * sectorsize; /* Now a split compressed extent */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); + insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_REG, + BTRFS_COMPRESS_ZLIB, slot); slot++; - offset += 4096; - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr + 4096, 4096, + offset += sectorsize; + insert_extent(root, offset, sectorsize, sectorsize, 0, + disk_bytenr + sectorsize, sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 4096, + offset += sectorsize; + insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize, + 2 * sectorsize, disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); slot++; - offset += 8192; - disk_bytenr += 8192; + offset += 2 * sectorsize; + disk_bytenr += 2 * sectorsize; /* Now extents that have a hole but no hole extent */ - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); + insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 16384; - disk_bytenr += 4096; - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); + offset += 4 * sectorsize; + disk_bytenr += sectorsize; + insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); } static unsigned long prealloc_only = 0; static unsigned long compressed_only = 0; static unsigned long vacancy_only = 0; -static noinline int test_btrfs_get_extent(void) +static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) { + struct btrfs_fs_info *fs_info = NULL; struct inode *inode = NULL; struct btrfs_root *root = NULL; struct extent_map *em = NULL; @@ -240,23 +249,19 @@ static noinline int test_btrfs_get_extent(void) BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; BTRFS_I(inode)->location.offset = 0; - root = btrfs_alloc_dummy_root(); - if (IS_ERR(root)) { - test_msg("Couldn't allocate root\n"); + fs_info = btrfs_alloc_dummy_fs_info(); + if (!fs_info) { + test_msg("Couldn't allocate dummy fs info\n"); goto out; } - /* - * We do this since btrfs_get_extent wants to assign em->bdev to - * root->fs_info->fs_devices->latest_bdev. - */ - root->fs_info = btrfs_alloc_dummy_fs_info(); - if (!root->fs_info) { - test_msg("Couldn't allocate dummy fs info\n"); + root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize); + if (IS_ERR(root)) { + test_msg("Couldn't allocate root\n"); goto out; } - root->node = alloc_dummy_extent_buffer(NULL, 4096); + root->node = alloc_dummy_extent_buffer(NULL, nodesize, nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); goto out; @@ -273,7 +278,7 @@ static noinline int test_btrfs_get_extent(void) /* First with no extents */ BTRFS_I(inode)->root = root; - em = btrfs_get_extent(inode, NULL, 0, 0, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, 0, sectorsize, 0); if (IS_ERR(em)) { em = NULL; test_msg("Got an error when we shouldn't have\n"); @@ -295,7 +300,7 @@ static noinline int test_btrfs_get_extent(void) * setup_file_extents, so if you change anything there you need to * update the comment and update the expected values below. */ - setup_file_extents(root); + setup_file_extents(root, sectorsize); em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0); if (IS_ERR(em)) { @@ -318,7 +323,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -327,7 +332,8 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected an inline, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4091) { + + if (em->start != offset || em->len != (sectorsize - 5)) { test_msg("Unexpected extent wanted start %llu len 1, got start " "%llu len %llu\n", offset, em->start, em->len); goto out; @@ -344,7 +350,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -366,7 +372,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Regular extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -375,7 +381,7 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4095) { + if (em->start != offset || em->len != sectorsize - 1) { test_msg("Unexpected extent wanted start %llu len 4095, got " "start %llu len %llu\n", offset, em->start, em->len); goto out; @@ -393,7 +399,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* The next 3 are split extents */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -402,9 +408,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -421,7 +428,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -430,9 +437,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a hole, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -442,7 +450,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -451,9 +459,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -475,7 +484,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Prealloc extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -484,9 +493,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != prealloc_only) { @@ -503,7 +513,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* The next 3 are a half written prealloc extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -512,9 +522,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != prealloc_only) { @@ -532,7 +543,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -541,9 +552,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -564,7 +576,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -573,9 +585,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != prealloc_only) { @@ -598,7 +611,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Now for the compressed extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -607,9 +620,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u," + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != compressed_only) { @@ -631,7 +645,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Split compressed extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -640,9 +654,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u," + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != compressed_only) { @@ -665,7 +680,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -674,9 +689,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -691,7 +707,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -701,9 +717,10 @@ static noinline int test_btrfs_get_extent(void) disk_bytenr, em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != compressed_only) { @@ -725,7 +742,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* A hole between regular extents but no hole extent */ - em = btrfs_get_extent(inode, NULL, 0, offset + 6, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset + 6, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -734,9 +751,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -765,9 +783,10 @@ static noinline int test_btrfs_get_extent(void) * length of the actual hole, if this changes we'll have to change this * test. */ - if (em->start != offset || em->len != 12288) { - test_msg("Unexpected extent wanted start %llu len 12288, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 3 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 3 * sectorsize, em->start, em->len); goto out; } if (em->flags != vacancy_only) { @@ -783,7 +802,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -792,9 +811,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u," + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -812,11 +832,13 @@ out: free_extent_map(em); iput(inode); btrfs_free_dummy_root(root); + btrfs_free_dummy_fs_info(fs_info); return ret; } -static int test_hole_first(void) +static int test_hole_first(u32 sectorsize, u32 nodesize) { + struct btrfs_fs_info *fs_info = NULL; struct inode *inode = NULL; struct btrfs_root *root = NULL; struct extent_map *em = NULL; @@ -832,19 +854,19 @@ static int test_hole_first(void) BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; BTRFS_I(inode)->location.offset = 0; - root = btrfs_alloc_dummy_root(); - if (IS_ERR(root)) { - test_msg("Couldn't allocate root\n"); + fs_info = btrfs_alloc_dummy_fs_info(); + if (!fs_info) { + test_msg("Couldn't allocate dummy fs info\n"); goto out; } - root->fs_info = btrfs_alloc_dummy_fs_info(); - if (!root->fs_info) { - test_msg("Couldn't allocate dummy fs info\n"); + root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize); + if (IS_ERR(root)) { + test_msg("Couldn't allocate root\n"); goto out; } - root->node = alloc_dummy_extent_buffer(NULL, 4096); + root->node = alloc_dummy_extent_buffer(NULL, nodesize, nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); goto out; @@ -861,9 +883,9 @@ static int test_hole_first(void) * btrfs_get_extent. */ insert_inode_item_key(root); - insert_extent(root, 4096, 4096, 4096, 0, 4096, 4096, - BTRFS_FILE_EXTENT_REG, 0, 1); - em = btrfs_get_extent(inode, NULL, 0, 0, 8192, 0); + insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize, + sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1); + em = btrfs_get_extent(inode, NULL, 0, 0, 2 * sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -872,9 +894,10 @@ static int test_hole_first(void) test_msg("Expected a hole, got %llu\n", em->block_start); goto out; } - if (em->start != 0 || em->len != 4096) { - test_msg("Unexpected extent wanted start 0 len 4096, got start " - "%llu len %llu\n", em->start, em->len); + if (em->start != 0 || em->len != sectorsize) { + test_msg("Unexpected extent wanted start 0 len %u, " + "got start %llu len %llu\n", + sectorsize, em->start, em->len); goto out; } if (em->flags != vacancy_only) { @@ -884,18 +907,19 @@ static int test_hole_first(void) } free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, 4096, 8192, 0); + em = btrfs_get_extent(inode, NULL, 0, sectorsize, 2 * sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; } - if (em->block_start != 4096) { + if (em->block_start != sectorsize) { test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != 4096 || em->len != 4096) { - test_msg("Unexpected extent wanted start 4096 len 4096, got " - "start %llu len %llu\n", em->start, em->len); + if (em->start != sectorsize || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %u len %u, " + "got start %llu len %llu\n", + sectorsize, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -909,11 +933,13 @@ out: free_extent_map(em); iput(inode); btrfs_free_dummy_root(root); + btrfs_free_dummy_fs_info(fs_info); return ret; } -static int test_extent_accounting(void) +static int test_extent_accounting(u32 sectorsize, u32 nodesize) { + struct btrfs_fs_info *fs_info = NULL; struct inode *inode = NULL; struct btrfs_root *root = NULL; int ret = -ENOMEM; @@ -924,15 +950,15 @@ static int test_extent_accounting(void) return ret; } - root = btrfs_alloc_dummy_root(); - if (IS_ERR(root)) { - test_msg("Couldn't allocate root\n"); + fs_info = btrfs_alloc_dummy_fs_info(); + if (!fs_info) { + test_msg("Couldn't allocate dummy fs info\n"); goto out; } - root->fs_info = btrfs_alloc_dummy_fs_info(); - if (!root->fs_info) { - test_msg("Couldn't allocate dummy fs info\n"); + root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize); + if (IS_ERR(root)) { + test_msg("Couldn't allocate root\n"); goto out; } @@ -942,7 +968,7 @@ static int test_extent_accounting(void) /* [BTRFS_MAX_EXTENT_SIZE] */ BTRFS_I(inode)->outstanding_extents++; ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, - NULL); + NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -954,10 +980,11 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE][4k] */ + /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ BTRFS_I(inode)->outstanding_extents++; ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE, - BTRFS_MAX_EXTENT_SIZE + 4095, NULL); + BTRFS_MAX_EXTENT_SIZE + sectorsize - 1, + NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -969,10 +996,10 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */ + /* [BTRFS_MAX_EXTENT_SIZE/2][sectorsize HOLE][the rest] */ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, BTRFS_MAX_EXTENT_SIZE >> 1, - (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, + (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_KERNEL); @@ -987,11 +1014,12 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE][4K] */ + /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ BTRFS_I(inode)->outstanding_extents++; ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1, - (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, - NULL); + (BTRFS_MAX_EXTENT_SIZE >> 1) + + sectorsize - 1, + NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1004,16 +1032,17 @@ static int test_extent_accounting(void) } /* - * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K] + * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize] * * I'm artificially adding 2 to outstanding_extents because in the * buffered IO case we'd add things up as we go, but I don't feel like * doing that here, this isn't the interesting case we want to test. */ BTRFS_I(inode)->outstanding_extents += 2; - ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192, - (BTRFS_MAX_EXTENT_SIZE << 1) + 12287, - NULL); + ret = btrfs_set_extent_delalloc(inode, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize, + (BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1, + NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1025,10 +1054,13 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */ + /* + * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize] + */ BTRFS_I(inode)->outstanding_extents++; - ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, - BTRFS_MAX_EXTENT_SIZE+8191, NULL); + ret = btrfs_set_extent_delalloc(inode, + BTRFS_MAX_EXTENT_SIZE + sectorsize, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1042,8 +1074,8 @@ static int test_extent_accounting(void) /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, - BTRFS_MAX_EXTENT_SIZE+4096, - BTRFS_MAX_EXTENT_SIZE+8191, + BTRFS_MAX_EXTENT_SIZE + sectorsize, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, NULL, GFP_KERNEL); @@ -1063,8 +1095,9 @@ static int test_extent_accounting(void) * might fail and I'd rather satisfy my paranoia at this point. */ BTRFS_I(inode)->outstanding_extents++; - ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, - BTRFS_MAX_EXTENT_SIZE+8191, NULL); + ret = btrfs_set_extent_delalloc(inode, + BTRFS_MAX_EXTENT_SIZE + sectorsize, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1100,10 +1133,11 @@ out: NULL, GFP_KERNEL); iput(inode); btrfs_free_dummy_root(root); + btrfs_free_dummy_fs_info(fs_info); return ret; } -int btrfs_test_inodes(void) +int btrfs_test_inodes(u32 sectorsize, u32 nodesize) { int ret; @@ -1112,13 +1146,13 @@ int btrfs_test_inodes(void) set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only); test_msg("Running btrfs_get_extent tests\n"); - ret = test_btrfs_get_extent(); + ret = test_btrfs_get_extent(sectorsize, nodesize); if (ret) return ret; test_msg("Running hole first btrfs_get_extent test\n"); - ret = test_hole_first(); + ret = test_hole_first(sectorsize, nodesize); if (ret) return ret; test_msg("Running outstanding_extents tests\n"); - return test_extent_accounting(); + return test_extent_accounting(sectorsize, nodesize); } diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 8aa4ded31326..ca7cb5e6d385 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include <linux/types.h> #include "btrfs-tests.h" #include "../ctree.h" #include "../transaction.h" @@ -216,7 +217,8 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr, return ret; } -static int test_no_shared_qgroup(struct btrfs_root *root) +static int test_no_shared_qgroup(struct btrfs_root *root, + u32 sectorsize, u32 nodesize) { struct btrfs_trans_handle trans; struct btrfs_fs_info *fs_info = root->fs_info; @@ -227,7 +229,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root) btrfs_init_dummy_trans(&trans); test_msg("Qgroup basic add\n"); - ret = btrfs_create_qgroup(NULL, fs_info, 5); + ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID); if (ret) { test_msg("Couldn't create a qgroup %d\n", ret); return ret; @@ -238,18 +240,19 @@ static int test_no_shared_qgroup(struct btrfs_root *root) * we can only call btrfs_qgroup_account_extent() directly to test * quota. */ - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5); + ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FS_TREE_OBJECTID); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -257,32 +260,33 @@ static int test_no_shared_qgroup(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } old_roots = NULL; new_roots = NULL; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = remove_extent_item(root, 4096, 4096); + ret = remove_extent_item(root, nodesize, nodesize); if (ret) return -EINVAL; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -290,14 +294,14 @@ static int test_no_shared_qgroup(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return -EINVAL; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 0, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, 0, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } @@ -310,7 +314,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root) * right, also remove one of the roots and make sure the exclusive count is * adjusted properly. */ -static int test_multiple_refs(struct btrfs_root *root) +static int test_multiple_refs(struct btrfs_root *root, + u32 sectorsize, u32 nodesize) { struct btrfs_trans_handle trans; struct btrfs_fs_info *fs_info = root->fs_info; @@ -322,25 +327,29 @@ static int test_multiple_refs(struct btrfs_root *root) test_msg("Qgroup multiple refs test\n"); - /* We have 5 created already from the previous test */ - ret = btrfs_create_qgroup(NULL, fs_info, 256); + /* + * We have BTRFS_FS_TREE_OBJECTID created already from the + * previous test. + */ + ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID); if (ret) { test_msg("Couldn't create a qgroup %d\n", ret); return ret; } - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5); + ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FS_TREE_OBJECTID); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -348,30 +357,32 @@ static int test_multiple_refs(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = add_tree_ref(root, 4096, 4096, 0, 256); + ret = add_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FIRST_FREE_OBJECTID); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -379,35 +390,38 @@ static int test_multiple_refs(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - if (btrfs_verify_qgroup_counts(fs_info, 256, 4096, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID, + nodesize, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = remove_extent_ref(root, 4096, 4096, 0, 256); + ret = remove_extent_ref(root, nodesize, nodesize, 0, + BTRFS_FIRST_FREE_OBJECTID); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -415,19 +429,21 @@ static int test_multiple_refs(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 256, 0, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID, + 0, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } @@ -435,24 +451,26 @@ static int test_multiple_refs(struct btrfs_root *root) return 0; } -int btrfs_test_qgroups(void) +int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) { + struct btrfs_fs_info *fs_info = NULL; struct btrfs_root *root; struct btrfs_root *tmp_root; int ret = 0; - root = btrfs_alloc_dummy_root(); - if (IS_ERR(root)) { - test_msg("Couldn't allocate root\n"); - return PTR_ERR(root); + fs_info = btrfs_alloc_dummy_fs_info(); + if (!fs_info) { + test_msg("Couldn't allocate dummy fs info\n"); + return -ENOMEM; } - root->fs_info = btrfs_alloc_dummy_fs_info(); - if (!root->fs_info) { - test_msg("Couldn't allocate dummy fs info\n"); - ret = -ENOMEM; + root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize); + if (IS_ERR(root)) { + test_msg("Couldn't allocate root\n"); + ret = PTR_ERR(root); goto out; } + /* We are using this root as our extent root */ root->fs_info->extent_root = root; @@ -462,13 +480,14 @@ int btrfs_test_qgroups(void) */ root->fs_info->tree_root = root; root->fs_info->quota_root = root; - root->fs_info->quota_enabled = 1; + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); /* * Can't use bytenr 0, some things freak out * *cough*backref walking code*cough* */ - root->node = alloc_test_extent_buffer(root->fs_info, 4096); + root->node = alloc_test_extent_buffer(root->fs_info, nodesize, + nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); ret = -ENOMEM; @@ -476,16 +495,16 @@ int btrfs_test_qgroups(void) } btrfs_set_header_level(root->node, 0); btrfs_set_header_nritems(root->node, 0); - root->alloc_bytenr += 8192; + root->alloc_bytenr += 2 * nodesize; - tmp_root = btrfs_alloc_dummy_root(); + tmp_root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize); if (IS_ERR(tmp_root)) { test_msg("Couldn't allocate a fs root\n"); ret = PTR_ERR(tmp_root); goto out; } - tmp_root->root_key.objectid = 5; + tmp_root->root_key.objectid = BTRFS_FS_TREE_OBJECTID; root->fs_info->fs_root = tmp_root; ret = btrfs_insert_fs_root(root->fs_info, tmp_root); if (ret) { @@ -493,14 +512,14 @@ int btrfs_test_qgroups(void) goto out; } - tmp_root = btrfs_alloc_dummy_root(); + tmp_root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize); if (IS_ERR(tmp_root)) { test_msg("Couldn't allocate a fs root\n"); ret = PTR_ERR(tmp_root); goto out; } - tmp_root->root_key.objectid = 256; + tmp_root->root_key.objectid = BTRFS_FIRST_FREE_OBJECTID; ret = btrfs_insert_fs_root(root->fs_info, tmp_root); if (ret) { test_msg("Couldn't insert fs root %d\n", ret); @@ -508,11 +527,12 @@ int btrfs_test_qgroups(void) } test_msg("Running qgroup tests\n"); - ret = test_no_shared_qgroup(root); + ret = test_no_shared_qgroup(root, sectorsize, nodesize); if (ret) goto out; - ret = test_multiple_refs(root); + ret = test_multiple_refs(root, sectorsize, nodesize); out: btrfs_free_dummy_root(root); + btrfs_free_dummy_fs_info(fs_info); return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f6e24cb423ae..9517de0e668c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -65,8 +65,9 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) BUG_ON(!list_empty(&transaction->list)); WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root)); if (transaction->delayed_refs.pending_csums) - printk(KERN_ERR "pending csums is %llu\n", - transaction->delayed_refs.pending_csums); + btrfs_err(transaction->fs_info, + "pending csums is %llu", + transaction->delayed_refs.pending_csums); while (!list_empty(&transaction->pending_chunks)) { struct extent_map *em; @@ -245,6 +246,7 @@ loop: return -EROFS; } + cur_trans->fs_info = fs_info; atomic_set(&cur_trans->num_writers, 1); extwriter_counter_init(cur_trans, type); init_waitqueue_head(&cur_trans->writer_wait); @@ -272,11 +274,9 @@ loop: */ smp_mb(); if (!list_empty(&fs_info->tree_mod_seq_list)) - WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when " - "creating a fresh transaction\n"); + WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when creating a fresh transaction\n"); if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) - WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when " - "creating a fresh transaction\n"); + WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when creating a fresh transaction\n"); atomic64_set(&fs_info->tree_mod_seq, 0); spin_lock_init(&cur_trans->delayed_refs.lock); @@ -441,7 +441,7 @@ static void wait_current_trans(struct btrfs_root *root) static int may_wait_transaction(struct btrfs_root *root, int type) { - if (root->fs_info->log_root_recovering) + if (test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) return 0; if (type == TRANS_USERSPACE) @@ -549,11 +549,8 @@ again: } } while (ret == -EBUSY); - if (ret < 0) { - /* We must get the transaction if we are JOIN_NOLOCK. */ - BUG_ON(type == TRANS_JOIN_NOLOCK); + if (ret < 0) goto join_fail; - } cur_trans = root->fs_info->running_transaction; @@ -561,6 +558,7 @@ again: h->transaction = cur_trans; h->root = root; h->use_count = 1; + h->fs_info = root->fs_info; h->type = type; h->can_flush_pending_bgs = true; @@ -818,6 +816,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, { struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_fs_info *info = root->fs_info; + u64 transid = trans->transid; unsigned long cur = trans->delayed_ref_updates; int lock = (trans->type != TRANS_JOIN_NOLOCK); int err = 0; @@ -905,7 +904,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); if (must_run_delayed_refs) { - btrfs_async_run_delayed_refs(root, cur, + btrfs_async_run_delayed_refs(root, cur, transid, must_run_delayed_refs == 1); } return err; @@ -991,7 +990,6 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, struct extent_state *cached_state = NULL; u64 start = 0; u64 end; - struct btrfs_inode *btree_ino = BTRFS_I(root->fs_info->btree_inode); bool errors = false; while (!find_first_extent_bit(dirty_pages, start, &start, &end, @@ -1023,17 +1021,17 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { if ((mark & EXTENT_DIRTY) && - test_and_clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, - &btree_ino->runtime_flags)) + test_and_clear_bit(BTRFS_FS_LOG1_ERR, + &root->fs_info->flags)) errors = true; if ((mark & EXTENT_NEW) && - test_and_clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, - &btree_ino->runtime_flags)) + test_and_clear_bit(BTRFS_FS_LOG2_ERR, + &root->fs_info->flags)) errors = true; } else { - if (test_and_clear_bit(BTRFS_INODE_BTREE_ERR, - &btree_ino->runtime_flags)) + if (test_and_clear_bit(BTRFS_FS_BTREE_ERR, + &root->fs_info->flags)) errors = true; } @@ -1298,11 +1296,11 @@ int btrfs_defrag_root(struct btrfs_root *root) btrfs_btree_balance_dirty(info->tree_root); cond_resched(); - if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN) + if (btrfs_fs_closing(info) || ret != -EAGAIN) break; - if (btrfs_defrag_cancelled(root->fs_info)) { - pr_debug("BTRFS: defrag_root cancelled\n"); + if (btrfs_defrag_cancelled(info)) { + btrfs_debug(info, "defrag_root cancelled"); ret = -EAGAIN; break; } @@ -1311,11 +1309,6 @@ int btrfs_defrag_root(struct btrfs_root *root) return ret; } -/* Bisesctability fixup, remove in 4.8 */ -#ifndef btrfs_std_error -#define btrfs_std_error btrfs_handle_fs_error -#endif - /* * Do all special snapshot related qgroup dirty hack. * @@ -1338,7 +1331,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, * kick in anyway. */ mutex_lock(&fs_info->qgroup_ioctl_lock); - if (!fs_info->quota_enabled) { + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { mutex_unlock(&fs_info->qgroup_ioctl_lock); return 0; } @@ -1385,7 +1378,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, switch_commit_roots(trans->transaction, fs_info); ret = btrfs_write_and_wait_transaction(trans, src); if (ret) - btrfs_std_error(fs_info, ret, + btrfs_handle_fs_error(fs_info, ret, "Error while writing out transaction for qgroup"); out: @@ -1477,7 +1470,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, parent_root = BTRFS_I(parent_inode)->root; record_root_in_trans(trans, parent_root, 0); - cur_time = current_fs_time(parent_inode->i_sb); + cur_time = current_time(parent_inode); /* * insert the directory item @@ -1495,7 +1488,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, goto dir_item_existed; } else if (IS_ERR(dir_item)) { ret = PTR_ERR(dir_item); - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } btrfs_release_path(path); @@ -1508,7 +1501,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, */ ret = btrfs_run_delayed_items(trans, root); if (ret) { /* Transaction aborted */ - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } @@ -1547,7 +1540,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, if (ret) { btrfs_tree_unlock(old); free_extent_buffer(old); - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } @@ -1558,7 +1551,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_tree_unlock(old); free_extent_buffer(old); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } /* see comments in should_cow_block() */ @@ -1572,7 +1565,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_tree_unlock(tmp); free_extent_buffer(tmp); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } @@ -1584,7 +1577,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_ino(parent_inode), index, dentry->d_name.name, dentry->d_name.len); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } @@ -1592,19 +1585,19 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); if (IS_ERR(pending->snap)) { ret = PTR_ERR(pending->snap); - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } ret = btrfs_reloc_post_snapshot(trans, pending); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } @@ -1626,23 +1619,23 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, /* We have check then name at the beginning, so it is impossible. */ BUG_ON(ret == -EEXIST || ret == -EOVERFLOW); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } btrfs_i_size_write(parent_inode, parent_inode->i_size + dentry->d_name.len * 2); parent_inode->i_mtime = parent_inode->i_ctime = - current_fs_time(parent_inode->i_sb); + current_time(parent_inode); ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, new_uuid.b, BTRFS_UUID_KEY_SUBVOL, objectid); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) { @@ -1651,14 +1644,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, BTRFS_UUID_KEY_RECEIVED_SUBVOL, objectid); if (ret && ret != -EEXIST) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } } ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto fail; } @@ -1713,9 +1706,9 @@ static void update_super_roots(struct btrfs_root *root) super->root = root_item->bytenr; super->generation = root_item->generation; super->root_level = root_item->level; - if (btrfs_test_opt(root, SPACE_CACHE)) + if (btrfs_test_opt(root->fs_info, SPACE_CACHE)) super->cache_generation = root_item->generation; - if (root->fs_info->update_uuid_tree_gen) + if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &root->fs_info->flags)) super->uuid_tree_generation = root_item->generation; } @@ -1854,7 +1847,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, WARN_ON(trans->use_count > 1); - btrfs_abort_transaction(trans, root, err); + btrfs_abort_transaction(trans, err); spin_lock(&root->fs_info->trans_lock); @@ -1899,14 +1892,14 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) { - if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) + if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) return btrfs_start_delalloc_roots(fs_info, 1, -1); return 0; } static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) { - if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) + if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1); } @@ -1922,7 +1915,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, { struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_transaction *prev_trans = NULL; - struct btrfs_inode *btree_ino = BTRFS_I(root->fs_info->btree_inode); int ret; /* Stop the commit early if ->aborted is set */ @@ -2216,8 +2208,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_update_commit_device_size(root->fs_info); btrfs_update_commit_device_bytes_used(root, cur_trans); - clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); - clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); + clear_bit(BTRFS_FS_LOG1_ERR, &root->fs_info->flags); + clear_bit(BTRFS_FS_LOG2_ERR, &root->fs_info->flags); btrfs_trans_release_chunk_metadata(trans); @@ -2281,8 +2273,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); + /* + * If fs has been frozen, we can not handle delayed iputs, otherwise + * it'll result in deadlock about SB_FREEZE_FS. + */ if (current != root->fs_info->transaction_kthread && - current != root->fs_info->cleaner_kthread) + current != root->fs_info->cleaner_kthread && + !root->fs_info->fs_frozen) btrfs_run_delayed_iputs(root); return ret; @@ -2326,7 +2323,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) list_del_init(&root->root_list); spin_unlock(&fs_info->trans_lock); - pr_debug("BTRFS: cleaner removing %llu\n", root->objectid); + btrfs_debug(fs_info, "cleaner removing %llu", root->objectid); btrfs_kill_all_delayed_nodes(root); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 9fe0ec2bf0fe..6cf0d37d4f76 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -82,6 +82,7 @@ struct btrfs_transaction { spinlock_t dropped_roots_lock; struct btrfs_delayed_ref_root delayed_refs; int aborted; + struct btrfs_fs_info *fs_info; }; #define __TRANS_FREEZABLE (1U << 0) @@ -110,7 +111,6 @@ struct btrfs_trans_handle { u64 chunk_bytes_reserved; unsigned long use_count; unsigned long blocks_reserved; - unsigned long blocks_used; unsigned long delayed_ref_updates; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; @@ -121,6 +121,7 @@ struct btrfs_trans_handle { bool can_flush_pending_bgs; bool reloc_reserved; bool sync; + bool dirty; unsigned int type; /* * this root is only needed to validate that the root passed to @@ -128,6 +129,7 @@ struct btrfs_trans_handle { * Subvolume quota depends on this */ struct btrfs_root *root; + struct btrfs_fs_info *fs_info; struct seq_list delayed_ref_elem; struct list_head qgroup_ref_list; struct list_head new_bgs; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index b7665af471d8..528cae123dc9 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -27,6 +27,7 @@ #include "backref.h" #include "hash.h" #include "compression.h" +#include "qgroup.h" /* magic values for the inode_only field in btrfs_log_inode: * @@ -680,6 +681,21 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; offset = key->offset - btrfs_file_extent_offset(eb, item); + /* + * Manually record dirty extent, as here we did a shallow + * file extent item copy and skip normal backref update, + * but modifying extent tree all by ourselves. + * So need to manually record dirty extent for qgroup, + * as the owner of the file extent changed from log tree + * (doesn't affect qgroup) to fs/file tree(affects qgroup) + */ + ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info, + btrfs_file_extent_disk_bytenr(eb, item), + btrfs_file_extent_disk_num_bytes(eb, item), + GFP_NOFS); + if (ret < 0) + goto out; + if (ins.objectid > 0) { u64 csum_start; u64 csum_end; @@ -2422,8 +2438,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); next = btrfs_find_create_tree_block(root, bytenr); - if (!next) - return -ENOMEM; + if (IS_ERR(next)) + return PTR_ERR(next); if (*level == 1) { ret = wc->process_func(root, next, wc, ptr_gen); @@ -2757,7 +2773,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, while (1) { int batch = atomic_read(&root->log_batch); /* when we're on an ssd, just kick the log commit out */ - if (!btrfs_test_opt(root, SSD) && + if (!btrfs_test_opt(root->fs_info, SSD) && test_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state)) { mutex_unlock(&root->log_mutex); schedule_timeout_uninterruptible(1); @@ -2788,7 +2804,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); if (ret) { blk_finish_plug(&plug); - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); btrfs_free_logged_extents(log, log_transid); btrfs_set_log_full_commit(root->fs_info, trans); mutex_unlock(&root->log_mutex); @@ -2807,7 +2823,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, */ mutex_unlock(&root->log_mutex); - btrfs_init_log_ctx(&root_log_ctx); + btrfs_init_log_ctx(&root_log_ctx, NULL); mutex_lock(&log_root_tree->log_mutex); atomic_inc(&log_root_tree->log_batch); @@ -2838,7 +2854,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, btrfs_set_log_full_commit(root->fs_info, trans); if (ret != -ENOSPC) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); mutex_unlock(&log_root_tree->log_mutex); goto out; } @@ -2851,6 +2867,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { blk_finish_plug(&plug); + list_del_init(&root_log_ctx.list); mutex_unlock(&log_root_tree->log_mutex); ret = root_log_ctx.log_ret; goto out; @@ -2898,7 +2915,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, blk_finish_plug(&plug); if (ret) { btrfs_set_log_full_commit(root->fs_info, trans); - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; @@ -2934,7 +2951,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = write_ctree_super(trans, root->fs_info->tree_root, 1); if (ret) { btrfs_set_log_full_commit(root->fs_info, trans); - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out_wake_log_root; } @@ -2991,7 +3008,7 @@ static void free_log_tree(struct btrfs_trans_handle *trans, ret = walk_log_tree(trans, log, &wc); /* I don't think this can happen but just in case */ if (ret) - btrfs_abort_transaction(trans, log, ret); + btrfs_abort_transaction(trans, ret); while (1) { ret = find_first_extent_bit(&log->dirty_log_pages, @@ -3160,7 +3177,7 @@ out_unlock: btrfs_set_log_full_commit(root->fs_info, trans); ret = 0; } else if (ret < 0) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); btrfs_end_log_trans(root); @@ -3193,7 +3210,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, btrfs_set_log_full_commit(root->fs_info, trans); ret = 0; } else if (ret < 0 && ret != -ENOENT) - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); btrfs_end_log_trans(root); return ret; @@ -3944,7 +3961,7 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans, * i_mapping flags, so that the next fsync won't get * an outdated io error too. */ - btrfs_inode_check_errors(inode); + filemap_check_errors(inode->i_mapping); *ordered_io_error = true; break; } @@ -4181,7 +4198,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, * without writing to the log tree and the fsync must report the * file data write error and not commit the current transaction. */ - ret = btrfs_inode_check_errors(inode); + ret = filemap_check_errors(inode->i_mapping); if (ret) ctx->io_err = ret; process: @@ -4469,7 +4486,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, static int btrfs_check_ref_name_override(struct extent_buffer *eb, const int slot, const struct btrfs_key *key, - struct inode *inode) + struct inode *inode, + u64 *other_ino) { int ret; struct btrfs_path *search_path; @@ -4528,7 +4546,16 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, search_path, parent, name, this_name_len, 0); if (di && !IS_ERR(di)) { - ret = 1; + struct btrfs_key di_key; + + btrfs_dir_item_key_to_cpu(search_path->nodes[0], + di, &di_key); + if (di_key.type == BTRFS_INODE_ITEM_KEY) { + ret = 1; + *other_ino = di_key.objectid; + } else { + ret = -EAGAIN; + } goto out; } else if (IS_ERR(di)) { ret = PTR_ERR(di); @@ -4703,6 +4730,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ins_nr = 0; ret = btrfs_search_forward(root, &min_key, path, trans->transid); + if (ret < 0) { + err = ret; + goto out_unlock; + } if (ret != 0) break; again: @@ -4718,16 +4749,72 @@ again: if ((min_key.type == BTRFS_INODE_REF_KEY || min_key.type == BTRFS_INODE_EXTREF_KEY) && BTRFS_I(inode)->generation == trans->transid) { + u64 other_ino = 0; + ret = btrfs_check_ref_name_override(path->nodes[0], path->slots[0], - &min_key, inode); + &min_key, inode, + &other_ino); if (ret < 0) { err = ret; goto out_unlock; - } else if (ret > 0) { - err = 1; - btrfs_set_log_full_commit(root->fs_info, trans); - goto out_unlock; + } else if (ret > 0 && ctx && + other_ino != btrfs_ino(ctx->inode)) { + struct btrfs_key inode_key; + struct inode *other_inode; + + if (ins_nr > 0) { + ins_nr++; + } else { + ins_nr = 1; + ins_start_slot = path->slots[0]; + } + ret = copy_items(trans, inode, dst_path, path, + &last_extent, ins_start_slot, + ins_nr, inode_only, + logged_isize); + if (ret < 0) { + err = ret; + goto out_unlock; + } + ins_nr = 0; + btrfs_release_path(path); + inode_key.objectid = other_ino; + inode_key.type = BTRFS_INODE_ITEM_KEY; + inode_key.offset = 0; + other_inode = btrfs_iget(root->fs_info->sb, + &inode_key, root, + NULL); + /* + * If the other inode that had a conflicting dir + * entry was deleted in the current transaction, + * we don't need to do more work nor fallback to + * a transaction commit. + */ + if (IS_ERR(other_inode) && + PTR_ERR(other_inode) == -ENOENT) { + goto next_key; + } else if (IS_ERR(other_inode)) { + err = PTR_ERR(other_inode); + goto out_unlock; + } + /* + * We are safe logging the other inode without + * acquiring its i_mutex as long as we log with + * the LOG_INODE_EXISTS mode. We're safe against + * concurrent renames of the other inode as well + * because during a rename we pin the log and + * update the log with the new name before we + * unpin it. + */ + err = btrfs_log_inode(trans, root, other_inode, + LOG_INODE_EXISTS, + 0, LLONG_MAX, ctx); + iput(other_inode); + if (err) + goto out_unlock; + else + goto next_key; } } @@ -4795,7 +4882,7 @@ next_slot: ins_nr = 0; } btrfs_release_path(path); - +next_key: if (min_key.offset < (u64)-1) { min_key.offset++; } else if (min_key.type < max_key.type) { @@ -4989,8 +5076,12 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) break; - if (IS_ROOT(parent)) + if (IS_ROOT(parent)) { + inode = d_inode(parent); + if (btrfs_must_commit_transaction(trans, inode)) + ret = 1; break; + } parent = dget_parent(parent); dput(old_parent); @@ -5301,7 +5392,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, sb = inode->i_sb; - if (btrfs_test_opt(root, NOTREELOG)) { + if (btrfs_test_opt(root->fs_info, NOTREELOG)) { ret = 1; goto end_no_trans; } @@ -5488,7 +5579,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) if (!path) return -ENOMEM; - fs_info->log_root_recovering = 1; + set_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags); trans = btrfs_start_transaction(fs_info->tree_root, 0); if (IS_ERR(trans)) { @@ -5501,8 +5592,8 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) ret = walk_log_tree(trans, log_root_tree, &wc); if (ret) { - btrfs_handle_fs_error(fs_info, ret, "Failed to pin buffers while " - "recovering log root tree."); + btrfs_handle_fs_error(fs_info, ret, + "Failed to pin buffers while recovering log root tree."); goto error; } @@ -5548,8 +5639,8 @@ again: free_extent_buffer(log->node); free_extent_buffer(log->commit_root); kfree(log); - btrfs_handle_fs_error(fs_info, ret, "Couldn't read target root " - "for tree log recovery."); + btrfs_handle_fs_error(fs_info, ret, + "Couldn't read target root for tree log recovery."); goto error; } @@ -5598,7 +5689,7 @@ again: free_extent_buffer(log_root_tree->node); log_root_tree->log_root = NULL; - fs_info->log_root_recovering = 0; + clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags); kfree(log_root_tree); return 0; diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index a9f1b75d080d..ab858e31ccbc 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -30,15 +30,18 @@ struct btrfs_log_ctx { int log_transid; int io_err; bool log_new_dentries; + struct inode *inode; struct list_head list; }; -static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) +static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, + struct inode *inode) { ctx->log_ret = 0; ctx->log_transid = 0; ctx->io_err = 0; ctx->log_new_dentries = false; + ctx->inode = inode; INIT_LIST_HEAD(&ctx->list); } diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index 778282944530..7fc89e4adb41 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -69,8 +69,9 @@ static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid, ret = -ENOENT; if (!IS_ALIGNED(item_size, sizeof(u64))) { - btrfs_warn(uuid_root->fs_info, "uuid item with illegal size %lu!", - (unsigned long)item_size); + btrfs_warn(uuid_root->fs_info, + "uuid item with illegal size %lu!", + (unsigned long)item_size); goto out; } while (item_size) { @@ -137,10 +138,10 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, offset = btrfs_item_ptr_offset(eb, slot); offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le); } else if (ret < 0) { - btrfs_warn(uuid_root->fs_info, "insert uuid item failed %d " - "(0x%016llx, 0x%016llx) type %u!", - ret, (unsigned long long)key.objectid, - (unsigned long long)key.offset, type); + btrfs_warn(uuid_root->fs_info, + "insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!", + ret, (unsigned long long)key.objectid, + (unsigned long long)key.offset, type); goto out; } @@ -184,8 +185,8 @@ int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1); if (ret < 0) { - btrfs_warn(uuid_root->fs_info, "error %d while searching for uuid item!", - ret); + btrfs_warn(uuid_root->fs_info, + "error %d while searching for uuid item!", ret); goto out; } if (ret > 0) { @@ -198,8 +199,9 @@ int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans, offset = btrfs_item_ptr_offset(eb, slot); item_size = btrfs_item_size_nr(eb, slot); if (!IS_ALIGNED(item_size, sizeof(u64))) { - btrfs_warn(uuid_root->fs_info, "uuid item with illegal size %lu!", - (unsigned long)item_size); + btrfs_warn(uuid_root->fs_info, + "uuid item with illegal size %lu!", + (unsigned long)item_size); ret = -ENOENT; goto out; } @@ -299,8 +301,9 @@ again_search_slot: offset = btrfs_item_ptr_offset(leaf, slot); item_size = btrfs_item_size_nr(leaf, slot); if (!IS_ALIGNED(item_size, sizeof(u64))) { - btrfs_warn(fs_info, "uuid item with illegal size %lu!", - (unsigned long)item_size); + btrfs_warn(fs_info, + "uuid item with illegal size %lu!", + (unsigned long)item_size); goto skip; } while (item_size) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bdc62561ede8..71a60cc01451 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -140,7 +140,6 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root); static void __btrfs_reset_dev_stats(struct btrfs_device *dev); static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev); static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); -static void btrfs_close_one_device(struct btrfs_device *device); DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); @@ -462,7 +461,7 @@ loop_lock: sync_pending = 0; } - btrfsic_submit_bio(cur->bi_rw, cur); + btrfsic_submit_bio(cur); num_run++; batch_run++; @@ -835,10 +834,6 @@ static void __free_device(struct work_struct *work) struct btrfs_device *device; device = container_of(work, struct btrfs_device, rcu_work); - - if (device->bdev) - blkdev_put(device->bdev, device->mode); - rcu_string_free(device->name); kfree(device); } @@ -853,19 +848,81 @@ static void free_device(struct rcu_head *head) schedule_work(&device->rcu_work); } +static void btrfs_close_bdev(struct btrfs_device *device) +{ + if (device->bdev && device->writeable) { + sync_blockdev(device->bdev); + invalidate_bdev(device->bdev); + } + + if (device->bdev) + blkdev_put(device->bdev, device->mode); +} + +static void btrfs_prepare_close_one_device(struct btrfs_device *device) +{ + struct btrfs_fs_devices *fs_devices = device->fs_devices; + struct btrfs_device *new_device; + struct rcu_string *name; + + if (device->bdev) + fs_devices->open_devices--; + + if (device->writeable && + device->devid != BTRFS_DEV_REPLACE_DEVID) { + list_del_init(&device->dev_alloc_list); + fs_devices->rw_devices--; + } + + if (device->missing) + fs_devices->missing_devices--; + + new_device = btrfs_alloc_device(NULL, &device->devid, + device->uuid); + BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ + + /* Safe because we are under uuid_mutex */ + if (device->name) { + name = rcu_string_strdup(device->name->str, GFP_NOFS); + BUG_ON(!name); /* -ENOMEM */ + rcu_assign_pointer(new_device->name, name); + } + + list_replace_rcu(&device->dev_list, &new_device->dev_list); + new_device->fs_devices = device->fs_devices; +} + static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device, *tmp; + struct list_head pending_put; + + INIT_LIST_HEAD(&pending_put); if (--fs_devices->opened > 0) return 0; mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) { - btrfs_close_one_device(device); + btrfs_prepare_close_one_device(device); + list_add(&device->dev_list, &pending_put); } mutex_unlock(&fs_devices->device_list_mutex); + /* + * btrfs_show_devname() is using the device_list_mutex, + * sometimes call to blkdev_put() leads vfs calling + * into this func. So do put outside of device_list_mutex, + * as of now. + */ + while (!list_empty(&pending_put)) { + device = list_first_entry(&pending_put, + struct btrfs_device, dev_list); + list_del(&device->dev_list); + btrfs_close_bdev(device); + call_rcu(&device->rcu, free_device); + } + WARN_ON(fs_devices->open_devices); WARN_ON(fs_devices->rw_devices); fs_devices->opened = 0; @@ -1097,12 +1154,12 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, ret = device_list_add(path, disk_super, devid, fs_devices_ret); if (ret > 0) { if (disk_super->label[0]) { - printk(KERN_INFO "BTRFS: device label %s ", disk_super->label); + pr_info("BTRFS: device label %s ", disk_super->label); } else { - printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid); + pr_info("BTRFS: device fsid %pU ", disk_super->fsid); } - printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); + pr_cont("devid %llu transid %llu %s\n", devid, transid, path); ret = 0; } if (!ret && fs_devices_ret) @@ -1803,7 +1860,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid) u64 num_devices; int ret = 0; bool clear_super = false; - char *dev_name = NULL; mutex_lock(&uuid_mutex); @@ -1839,11 +1895,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid) list_del_init(&device->dev_alloc_list); device->fs_devices->rw_devices--; unlock_chunks(root); - dev_name = kstrdup(device->name->str, GFP_KERNEL); - if (!dev_name) { - ret = -ENOMEM; - goto error_undo; - } clear_super = true; } @@ -1893,12 +1944,21 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid) btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device); } - call_rcu(&device->rcu, free_device); - num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + /* + * at this point, the device is zero sized and detached from + * the devices list. All that's left is to zero out the old + * supers and free the device. + */ + if (device->writeable) + btrfs_scratch_superblocks(device->bdev, device->name->str); + + btrfs_close_bdev(device); + call_rcu(&device->rcu, free_device); + if (cur_devices->open_devices == 0) { struct btrfs_fs_devices *fs_devices; fs_devices = root->fs_info->fs_devices; @@ -1917,24 +1977,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid) root->fs_info->num_tolerated_disk_barrier_failures = btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); - /* - * at this point, the device is zero sized. We want to - * remove it from the devices list and zero out the old super - */ - if (clear_super) { - struct block_device *bdev; - - bdev = blkdev_get_by_path(dev_name, FMODE_READ | FMODE_EXCL, - root->fs_info->bdev_holder); - if (!IS_ERR(bdev)) { - btrfs_scratch_superblocks(bdev, dev_name); - blkdev_put(bdev, FMODE_READ | FMODE_EXCL); - } - } - out: - kfree(dev_name); - mutex_unlock(&uuid_mutex); return ret; @@ -1986,6 +2029,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, /* zero out the old super if it is writable */ btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); } + + btrfs_close_bdev(srcdev); + call_rcu(&srcdev->rcu, free_device); /* @@ -2041,6 +2087,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, * the device_list_mutex lock. */ btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); + + btrfs_close_bdev(tgtdev); call_rcu(&tgtdev->rcu, free_device); } @@ -2399,14 +2447,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) ret = init_first_rw_device(trans, root, device); unlock_chunks(root); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto error_trans; } } ret = btrfs_add_device(trans, root, device); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto error_trans; } @@ -2415,7 +2463,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) ret = btrfs_finish_sprout(trans, root); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto error_trans; } @@ -2444,9 +2492,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) ret = btrfs_relocate_sys_chunks(root); if (ret < 0) btrfs_handle_fs_error(root->fs_info, ret, - "Failed to relocate sys chunks after " - "device initialization. This can be fixed " - "using the \"btrfs balance\" command."); + "Failed to relocate sys chunks after device initialization. This can be fixed using the \"btrfs balance\" command."); trans = btrfs_attach_transaction(root); if (IS_ERR(trans)) { if (PTR_ERR(trans) == -ENOENT) @@ -2505,7 +2551,8 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { if (device->bdev == bdev) { - btrfs_err(fs_info, "target device is in the filesystem!"); + btrfs_err(fs_info, + "target device is in the filesystem!"); ret = -EEXIST; goto error; } @@ -2514,7 +2561,8 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, if (i_size_read(bdev->bd_inode) < btrfs_device_get_total_bytes(srcdev)) { - btrfs_err(fs_info, "target device is smaller than source device!"); + btrfs_err(fs_info, + "target device is smaller than source device!"); ret = -EINVAL; goto error; } @@ -2761,6 +2809,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 dev_extent_len = 0; u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; int i, ret = 0; + struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; /* Just in case */ root = root->fs_info->chunk_root; @@ -2787,13 +2836,20 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, check_system_chunk(trans, extent_root, map->type); unlock_chunks(root->fs_info->chunk_root); + /* + * Take the device list mutex to prevent races with the final phase of + * a device replace operation that replaces the device object associated + * with map stripes (dev-replace.c:btrfs_dev_replace_finishing()). + */ + mutex_lock(&fs_devices->device_list_mutex); for (i = 0; i < map->num_stripes; i++) { struct btrfs_device *device = map->stripes[i].dev; ret = btrfs_free_dev_extent(trans, device, map->stripes[i].physical, &dev_extent_len); if (ret) { - btrfs_abort_transaction(trans, root, ret); + mutex_unlock(&fs_devices->device_list_mutex); + btrfs_abort_transaction(trans, ret); goto out; } @@ -2811,14 +2867,17 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, if (map->stripes[i].dev) { ret = btrfs_update_device(trans, map->stripes[i].dev); if (ret) { - btrfs_abort_transaction(trans, root, ret); + mutex_unlock(&fs_devices->device_list_mutex); + btrfs_abort_transaction(trans, ret); goto out; } } } + mutex_unlock(&fs_devices->device_list_mutex); + ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } @@ -2827,14 +2886,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); if (ret) { - btrfs_abort_transaction(trans, root, ret); + btrfs_abort_transaction(trans, ret); goto out; } } ret = btrfs_remove_block_group(trans, extent_root, chunk_offset, em); if (ret) { - btrfs_abort_transaction(trans, extent_root, ret); + btrfs_abort_transaction(trans, ret); goto out; } @@ -2891,7 +2950,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset) * chunk tree entries */ ret = btrfs_remove_chunk(trans, root, chunk_offset); - btrfs_end_transaction(trans, root); + btrfs_end_transaction(trans, extent_root); return ret; } @@ -3410,7 +3469,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) u64 size_to_free; u64 chunk_type; struct btrfs_chunk *chunk; - struct btrfs_path *path; + struct btrfs_path *path = NULL; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_trans_handle *trans; @@ -3444,13 +3503,33 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) ret = btrfs_shrink_device(device, old_size - size_to_free); if (ret == -ENOSPC) break; - BUG_ON(ret); + if (ret) { + /* btrfs_shrink_device never returns ret > 0 */ + WARN_ON(ret > 0); + goto error; + } trans = btrfs_start_transaction(dev_root, 0); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + btrfs_info_in_rcu(fs_info, + "resize: unable to start transaction after shrinking device %s (error %d), old size %llu, new size %llu", + rcu_str_deref(device->name), ret, + old_size, old_size - size_to_free); + goto error; + } ret = btrfs_grow_device(trans, device, old_size); - BUG_ON(ret); + if (ret) { + btrfs_end_transaction(trans, dev_root); + /* btrfs_grow_device never returns ret > 0 */ + WARN_ON(ret > 0); + btrfs_info_in_rcu(fs_info, + "resize: unable to grow device after shrinking device %s (error %d), old size %llu, new size %llu", + rcu_str_deref(device->name), ret, + old_size, old_size - size_to_free); + goto error; + } btrfs_end_transaction(trans, dev_root); } @@ -3617,7 +3696,7 @@ error: btrfs_free_path(path); if (enospc_errors) { btrfs_info(fs_info, "%d enospc errors during balance", - enospc_errors); + enospc_errors); if (!ret) ret = -ENOSPC; } @@ -3711,8 +3790,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl, if (!(bctl->flags & BTRFS_BALANCE_DATA) || !(bctl->flags & BTRFS_BALANCE_METADATA) || memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) { - btrfs_err(fs_info, "with mixed groups data and " - "metadata balance options must be the same"); + btrfs_err(fs_info, + "with mixed groups data and metadata balance options must be the same"); ret = -EINVAL; goto out; } @@ -3734,23 +3813,23 @@ int btrfs_balance(struct btrfs_balance_control *bctl, allowed |= (BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_RAID6); if (validate_convert_profile(&bctl->data, allowed)) { - btrfs_err(fs_info, "unable to start balance with target " - "data profile %llu", - bctl->data.target); + btrfs_err(fs_info, + "unable to start balance with target data profile %llu", + bctl->data.target); ret = -EINVAL; goto out; } if (validate_convert_profile(&bctl->meta, allowed)) { btrfs_err(fs_info, - "unable to start balance with target metadata profile %llu", - bctl->meta.target); + "unable to start balance with target metadata profile %llu", + bctl->meta.target); ret = -EINVAL; goto out; } if (validate_convert_profile(&bctl->sys, allowed)) { btrfs_err(fs_info, - "unable to start balance with target system profile %llu", - bctl->sys.target); + "unable to start balance with target system profile %llu", + bctl->sys.target); ret = -EINVAL; goto out; } @@ -3770,10 +3849,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (fs_info->avail_metadata_alloc_bits & allowed) && !(bctl->meta.target & allowed))) { if (bctl->flags & BTRFS_BALANCE_FORCE) { - btrfs_info(fs_info, "force reducing metadata integrity"); + btrfs_info(fs_info, + "force reducing metadata integrity"); } else { - btrfs_err(fs_info, "balance will reduce metadata " - "integrity, use force if you want this"); + btrfs_err(fs_info, + "balance will reduce metadata integrity, use force if you want this"); ret = -EINVAL; goto out; } @@ -3783,8 +3863,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl, if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) < btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) { btrfs_warn(fs_info, - "metadata profile 0x%llx has lower redundancy than data profile 0x%llx", - bctl->meta.target, bctl->data.target); + "metadata profile 0x%llx has lower redundancy than data profile 0x%llx", + bctl->meta.target, bctl->data.target); } if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { @@ -3874,7 +3954,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) } spin_unlock(&fs_info->balance_lock); - if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { + if (btrfs_test_opt(fs_info, SKIP_BALANCE)) { btrfs_info(fs_info, "force skipping balance"); return 0; } @@ -4140,7 +4220,7 @@ out: if (ret) btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret); else - fs_info->update_uuid_tree_gen = 1; + set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags); up(&fs_info->uuid_tree_rescan_sem); return 0; } @@ -4229,7 +4309,8 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) BTRFS_UUID_TREE_OBJECTID); if (IS_ERR(uuid_root)) { ret = PTR_ERR(uuid_root); - btrfs_abort_transaction(trans, tree_root, ret); + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans, tree_root); return ret; } @@ -4502,8 +4583,7 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) btrfs_set_fs_incompat(info, RAID56); } -#define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r) \ - - sizeof(struct btrfs_item) \ +#define BTRFS_MAX_DEVS(r) ((BTRFS_MAX_ITEM_SIZE(r) \ - sizeof(struct btrfs_chunk)) \ / sizeof(struct btrfs_stripe) + 1) @@ -4682,12 +4762,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & BTRFS_BLOCK_GROUP_RAID5) { raid_stripe_len = find_raid56_stripe_len(ndevs - 1, - btrfs_super_stripesize(info->super_copy)); + extent_root->stripesize); data_stripes = num_stripes - 1; } if (type & BTRFS_BLOCK_GROUP_RAID6) { raid_stripe_len = find_raid56_stripe_len(ndevs - 2, - btrfs_super_stripesize(info->super_copy)); + extent_root->stripesize); data_stripes = num_stripes - 2; } @@ -4832,15 +4912,16 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, read_unlock(&em_tree->lock); if (!em) { - btrfs_crit(extent_root->fs_info, "unable to find logical " - "%Lu len %Lu", chunk_offset, chunk_size); + btrfs_crit(extent_root->fs_info, + "unable to find logical %Lu len %Lu", + chunk_offset, chunk_size); return -EINVAL; } if (em->start != chunk_offset || em->len != chunk_size) { - btrfs_crit(extent_root->fs_info, "found a bad mapping, wanted" - " %Lu-%Lu, found %Lu-%Lu", chunk_offset, - chunk_size, em->start, em->len); + btrfs_crit(extent_root->fs_info, + "found a bad mapping, wanted %Lu-%Lu, found %Lu-%Lu", + chunk_offset, chunk_size, em->start, em->len); free_extent_map(em); return -EINVAL; } @@ -5073,9 +5154,9 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) } if (em->start > logical || em->start + em->len < logical) { - btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got " - "%Lu-%Lu", logical, logical+len, em->start, - em->start + em->len); + btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got %Lu-%Lu", + logical, logical+len, em->start, + em->start + em->len); free_extent_map(em); return 1; } @@ -5248,7 +5329,7 @@ void btrfs_put_bbio(struct btrfs_bio *bbio) kfree(bbio); } -static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, +static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num, int need_raid_map) @@ -5289,9 +5370,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } if (em->start > logical || em->start + em->len < logical) { - btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, " - "found %Lu-%Lu", logical, em->start, - em->start + em->len); + btrfs_crit(fs_info, + "found a bad mapping, wanted %Lu, found %Lu-%Lu", + logical, em->start, em->start + em->len); free_extent_map(em); return -EINVAL; } @@ -5309,9 +5390,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, stripe_offset = stripe_nr * stripe_len; if (offset < stripe_offset) { - btrfs_crit(fs_info, "stripe math has gone wrong, " - "stripe_offset=%llu, offset=%llu, start=%llu, " - "logical=%llu, stripe_len=%llu", + btrfs_crit(fs_info, + "stripe math has gone wrong, stripe_offset=%llu, offset=%llu, start=%llu, logical=%llu, stripe_len=%llu", stripe_offset, offset, em->start, logical, stripe_len); free_extent_map(em); @@ -5334,7 +5414,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, raid56_full_stripe_start *= full_stripe_len; } - if (rw & REQ_DISCARD) { + if (op == REQ_OP_DISCARD) { /* we don't discard raid56 yet */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { ret = -EOPNOTSUPP; @@ -5347,7 +5427,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, For other RAID types and for RAID[56] reads, just allow a single stripe (on a single disk). */ if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && - (rw & REQ_WRITE)) { + (op == REQ_OP_WRITE)) { max_len = stripe_len * nr_data_stripes(map) - (offset - raid56_full_stripe_start); } else { @@ -5372,8 +5452,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, btrfs_dev_replace_set_lock_blocking(dev_replace); if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && - !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) && - dev_replace->tgtdev != NULL) { + op != REQ_OP_WRITE && op != REQ_OP_DISCARD && + op != REQ_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) { /* * in dev-replace case, for repair case (that's the only * case where the mirror is selected explicitly when @@ -5460,15 +5540,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, (offset + *length); if (map->type & BTRFS_BLOCK_GROUP_RAID0) { - if (rw & REQ_DISCARD) + if (op == REQ_OP_DISCARD) num_stripes = min_t(u64, map->num_stripes, stripe_nr_end - stripe_nr_orig); stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); - if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))) + if (op != REQ_OP_WRITE && op != REQ_OP_DISCARD && + op != REQ_GET_READ_MIRRORS) mirror_num = 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { - if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) + if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD || + op == REQ_GET_READ_MIRRORS) num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; @@ -5481,7 +5563,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { - if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) { + if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD || + op == REQ_GET_READ_MIRRORS) { num_stripes = map->num_stripes; } else if (mirror_num) { stripe_index = mirror_num - 1; @@ -5495,9 +5578,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); stripe_index *= map->sub_stripes; - if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) + if (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS) num_stripes = map->sub_stripes; - else if (rw & REQ_DISCARD) + else if (op == REQ_OP_DISCARD) num_stripes = min_t(u64, map->sub_stripes * (stripe_nr_end - stripe_nr_orig), map->num_stripes); @@ -5515,7 +5598,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { if (need_raid_map && - ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || + (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS || mirror_num > 1)) { /* push stripe_nr back to the start of the full stripe */ stripe_nr = div_u64(raid56_full_stripe_start, @@ -5543,8 +5626,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, /* We distribute the parity blocks across stripes */ div_u64_rem(stripe_nr + stripe_index, map->num_stripes, &stripe_index); - if (!(rw & (REQ_WRITE | REQ_DISCARD | - REQ_GET_READ_MIRRORS)) && mirror_num <= 1) + if ((op != REQ_OP_WRITE && op != REQ_OP_DISCARD && + op != REQ_GET_READ_MIRRORS) && mirror_num <= 1) mirror_num = 1; } } else { @@ -5558,8 +5641,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, mirror_num = stripe_index + 1; } if (stripe_index >= map->num_stripes) { - btrfs_crit(fs_info, "stripe index math went horribly wrong, " - "got stripe_index=%u, num_stripes=%u", + btrfs_crit(fs_info, + "stripe index math went horribly wrong, got stripe_index=%u, num_stripes=%u", stripe_index, map->num_stripes); ret = -EINVAL; goto out; @@ -5567,9 +5650,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, num_alloc_stripes = num_stripes; if (dev_replace_is_ongoing) { - if (rw & (REQ_WRITE | REQ_DISCARD)) + if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD) num_alloc_stripes <<= 1; - if (rw & REQ_GET_READ_MIRRORS) + if (op == REQ_GET_READ_MIRRORS) num_alloc_stripes++; tgtdev_indexes = num_stripes; } @@ -5584,7 +5667,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, /* build raid_map */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && - need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || + need_raid_map && + ((op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS) || mirror_num > 1)) { u64 tmp; unsigned rot; @@ -5609,7 +5693,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, RAID6_Q_STRIPE; } - if (rw & REQ_DISCARD) { + if (op == REQ_OP_DISCARD) { u32 factor = 0; u32 sub_stripes = 0; u64 stripes_per_dev = 0; @@ -5689,14 +5773,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } } - if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) + if (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS) max_errors = btrfs_chunk_max_errors(map); if (bbio->raid_map) sort_parity_stripes(bbio, num_stripes); tgtdev_indexes = 0; - if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && + if (dev_replace_is_ongoing && + (op == REQ_OP_WRITE || op == REQ_OP_DISCARD) && dev_replace->tgtdev != NULL) { int index_where_to_add; u64 srcdev_devid = dev_replace->srcdev->devid; @@ -5731,7 +5816,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } } num_stripes = index_where_to_add; - } else if (dev_replace_is_ongoing && (rw & REQ_GET_READ_MIRRORS) && + } else if (dev_replace_is_ongoing && (op == REQ_GET_READ_MIRRORS) && dev_replace->tgtdev != NULL) { u64 srcdev_devid = dev_replace->srcdev->devid; int index_srcdev = 0; @@ -5762,20 +5847,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } } if (found) { - if (physical_of_found + map->stripe_len <= - dev_replace->cursor_left) { - struct btrfs_bio_stripe *tgtdev_stripe = - bbio->stripes + num_stripes; + struct btrfs_bio_stripe *tgtdev_stripe = + bbio->stripes + num_stripes; - tgtdev_stripe->physical = physical_of_found; - tgtdev_stripe->length = - bbio->stripes[index_srcdev].length; - tgtdev_stripe->dev = dev_replace->tgtdev; - bbio->tgtdev_map[index_srcdev] = num_stripes; + tgtdev_stripe->physical = physical_of_found; + tgtdev_stripe->length = + bbio->stripes[index_srcdev].length; + tgtdev_stripe->dev = dev_replace->tgtdev; + bbio->tgtdev_map[index_srcdev] = num_stripes; - tgtdev_indexes++; - num_stripes++; - } + tgtdev_indexes++; + num_stripes++; } } @@ -5806,28 +5888,29 @@ out: return ret; } -int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, +int btrfs_map_block(struct btrfs_fs_info *fs_info, int op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num) { - return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, + return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, mirror_num, 0); } /* For Scrub/replace */ -int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, +int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num, int need_raid_map) { - return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, + return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, mirror_num, need_raid_map); } -int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, +int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, u64 physical, u64 devid, u64 **logical, int *naddrs, int *stripe_len) { + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; struct extent_map_tree *em_tree = &map_tree->map_tree; struct extent_map *em; struct map_lookup *map; @@ -5843,13 +5926,13 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, read_unlock(&em_tree->lock); if (!em) { - printk(KERN_ERR "BTRFS: couldn't find em for chunk %Lu\n", - chunk_start); + btrfs_err(fs_info, "couldn't find em for chunk %Lu", + chunk_start); return -EIO; } if (em->start != chunk_start) { - printk(KERN_ERR "BTRFS: bad chunk start, em=%Lu, wanted=%Lu\n", + btrfs_err(fs_info, "bad chunk start, em=%Lu, wanted=%Lu", em->start, chunk_start); free_extent_map(em); return -EIO; @@ -5934,13 +6017,13 @@ static void btrfs_end_bio(struct bio *bio) BUG_ON(stripe_index >= bbio->num_stripes); dev = bbio->stripes[stripe_index].dev; if (dev->bdev) { - if (bio->bi_rw & WRITE) + if (bio_op(bio) == REQ_OP_WRITE) btrfs_dev_stat_inc(dev, BTRFS_DEV_STAT_WRITE_ERRS); else btrfs_dev_stat_inc(dev, BTRFS_DEV_STAT_READ_ERRS); - if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) + if ((bio->bi_opf & WRITE_FLUSH) == WRITE_FLUSH) btrfs_dev_stat_inc(dev, BTRFS_DEV_STAT_FLUSH_ERRS); btrfs_dev_stat_print_on_error(dev); @@ -5988,7 +6071,7 @@ static void btrfs_end_bio(struct bio *bio) */ static noinline void btrfs_schedule_bio(struct btrfs_root *root, struct btrfs_device *device, - int rw, struct bio *bio) + struct bio *bio) { int should_queue = 1; struct btrfs_pending_bios *pending_bios; @@ -5999,9 +6082,9 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, } /* don't bother with additional async steps for reads, right now */ - if (!(rw & REQ_WRITE)) { + if (bio_op(bio) == REQ_OP_READ) { bio_get(bio); - btrfsic_submit_bio(rw, bio); + btrfsic_submit_bio(bio); bio_put(bio); return; } @@ -6015,10 +6098,9 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, atomic_inc(&root->fs_info->nr_async_bios); WARN_ON(bio->bi_next); bio->bi_next = NULL; - bio->bi_rw |= rw; spin_lock(&device->io_lock); - if (bio->bi_rw & REQ_SYNC) + if (bio->bi_opf & REQ_SYNC) pending_bios = &device->pending_sync_bios; else pending_bios = &device->pending_bios; @@ -6041,7 +6123,7 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, struct bio *bio, u64 physical, int dev_nr, - int rw, int async) + int async) { struct btrfs_device *dev = bbio->stripes[dev_nr].dev; @@ -6055,10 +6137,12 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, rcu_read_lock(); name = rcu_dereference(dev->name); - pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu " - "(%s id %llu), size=%u\n", rw, - (u64)bio->bi_iter.bi_sector, (u_long)dev->bdev->bd_dev, - name->str, dev->devid, bio->bi_iter.bi_size); + btrfs_debug(fs_info, + "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u", + bio_op(bio), bio->bi_opf, + (u64)bio->bi_iter.bi_sector, + (u_long)dev->bdev->bd_dev, name->str, dev->devid, + bio->bi_iter.bi_size); rcu_read_unlock(); } #endif @@ -6067,9 +6151,9 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, btrfs_bio_counter_inc_noblocked(root->fs_info); if (async) - btrfs_schedule_bio(root, dev, rw, bio); + btrfs_schedule_bio(root, dev, bio); else - btrfsic_submit_bio(rw, bio); + btrfsic_submit_bio(bio); } static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) @@ -6086,7 +6170,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) } } -int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, +int btrfs_map_bio(struct btrfs_root *root, struct bio *bio, int mirror_num, int async_submit) { struct btrfs_device *dev; @@ -6103,8 +6187,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, map_length = length; btrfs_bio_counter_inc_blocked(root->fs_info); - ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, - mirror_num, 1); + ret = __btrfs_map_block(root->fs_info, bio_op(bio), logical, + &map_length, &bbio, mirror_num, 1); if (ret) { btrfs_bio_counter_dec(root->fs_info); return ret; @@ -6118,10 +6202,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, atomic_set(&bbio->stripes_pending, bbio->num_stripes); if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) && - ((rw & WRITE) || (mirror_num > 1))) { + ((bio_op(bio) == REQ_OP_WRITE) || (mirror_num > 1))) { /* In this case, map_length has been set to the length of a single stripe; not the whole write */ - if (rw & WRITE) { + if (bio_op(bio) == REQ_OP_WRITE) { ret = raid56_parity_write(root, bio, bbio, map_length); } else { ret = raid56_parity_recover(root, bio, bbio, map_length, @@ -6133,14 +6217,16 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, } if (map_length < length) { - btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu", - logical, length, map_length); + btrfs_crit(root->fs_info, + "mapping failed logical %llu bio len %llu len %llu", + logical, length, map_length); BUG(); } for (dev_nr = 0; dev_nr < total_devs; dev_nr++) { dev = bbio->stripes[dev_nr].dev; - if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) { + if (!dev || !dev->bdev || + (bio_op(bio) == REQ_OP_WRITE && !dev->writeable)) { bbio_error(bbio, first_bio, logical); continue; } @@ -6152,7 +6238,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, bio = first_bio; submit_stripe_bio(root, bbio, bio, - bbio->stripes[dev_nr].physical, dev_nr, rw, + bbio->stripes[dev_nr].physical, dev_nr, async_submit); } btrfs_bio_counter_dec(root->fs_info); @@ -6250,27 +6336,23 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, return dev; } -static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, - struct extent_buffer *leaf, - struct btrfs_chunk *chunk) +/* Return -EIO if any error, otherwise return 0. */ +static int btrfs_check_chunk_valid(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, u64 logical) { - struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; - struct map_lookup *map; - struct extent_map *em; - u64 logical; u64 length; u64 stripe_len; - u64 devid; - u8 uuid[BTRFS_UUID_SIZE]; - int num_stripes; - int ret; - int i; + u16 num_stripes; + u16 sub_stripes; + u64 type; - logical = key->offset; length = btrfs_chunk_length(leaf, chunk); stripe_len = btrfs_chunk_stripe_len(leaf, chunk); num_stripes = btrfs_chunk_num_stripes(leaf, chunk); - /* Validation check */ + sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + type = btrfs_chunk_type(leaf, chunk); + if (!num_stripes) { btrfs_err(root->fs_info, "invalid chunk num_stripes: %u", num_stripes); @@ -6281,6 +6363,11 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, "invalid chunk logical %llu", logical); return -EIO; } + if (btrfs_chunk_sector_size(leaf, chunk) != root->sectorsize) { + btrfs_err(root->fs_info, "invalid chunk sectorsize %u", + btrfs_chunk_sector_size(leaf, chunk)); + return -EIO; + } if (!length || !IS_ALIGNED(length, root->sectorsize)) { btrfs_err(root->fs_info, "invalid chunk length %llu", length); @@ -6292,13 +6379,54 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, return -EIO; } if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & - btrfs_chunk_type(leaf, chunk)) { + type) { btrfs_err(root->fs_info, "unrecognized chunk type: %llu", ~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & btrfs_chunk_type(leaf, chunk)); return -EIO; } + if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || + (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) || + (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || + (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || + (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) || + ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && + num_stripes != 1)) { + btrfs_err(root->fs_info, + "invalid num_stripes:sub_stripes %u:%u for profile %llu", + num_stripes, sub_stripes, + type & BTRFS_BLOCK_GROUP_PROFILE_MASK); + return -EIO; + } + + return 0; +} + +static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk) +{ + struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; + struct map_lookup *map; + struct extent_map *em; + u64 logical; + u64 length; + u64 stripe_len; + u64 devid; + u8 uuid[BTRFS_UUID_SIZE]; + int num_stripes; + int ret; + int i; + + logical = key->offset; + length = btrfs_chunk_length(leaf, chunk); + stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + + ret = btrfs_check_chunk_valid(root, leaf, chunk, logical); + if (ret) + return ret; read_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); @@ -6345,7 +6473,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, BTRFS_UUID_SIZE); map->stripes[i].dev = btrfs_find_device(root->fs_info, devid, uuid, NULL); - if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { + if (!map->stripes[i].dev && + !btrfs_test_opt(root->fs_info, DEGRADED)) { free_extent_map(em); return -EIO; } @@ -6357,8 +6486,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, free_extent_map(em); return -EIO; } - btrfs_warn(root->fs_info, "devid %llu uuid %pU is missing", - devid, uuid); + btrfs_warn(root->fs_info, + "devid %llu uuid %pU is missing", + devid, uuid); } map->stripes[i].dev->in_fs_metadata = 1; } @@ -6413,7 +6543,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_root *root, fs_devices = find_fsid(fsid); if (!fs_devices) { - if (!btrfs_test_opt(root, DEGRADED)) + if (!btrfs_test_opt(root->fs_info, DEGRADED)) return ERR_PTR(-ENOENT); fs_devices = alloc_fs_devices(fsid); @@ -6475,7 +6605,7 @@ static int read_one_dev(struct btrfs_root *root, device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid); if (!device) { - if (!btrfs_test_opt(root, DEGRADED)) + if (!btrfs_test_opt(root->fs_info, DEGRADED)) return -EIO; device = add_missing_dev(root, fs_devices, devid, dev_uuid); @@ -6484,7 +6614,7 @@ static int read_one_dev(struct btrfs_root *root, btrfs_warn(root->fs_info, "devid %llu uuid %pU missing", devid, dev_uuid); } else { - if (!device->bdev && !btrfs_test_opt(root, DEGRADED)) + if (!device->bdev && !btrfs_test_opt(root->fs_info, DEGRADED)) return -EIO; if(!device->bdev && !device->missing) { @@ -6535,7 +6665,8 @@ static int read_one_dev(struct btrfs_root *root, int btrfs_read_sys_array(struct btrfs_root *root) { - struct btrfs_super_block *super_copy = root->fs_info->super_copy; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_super_block *super_copy = fs_info->super_copy; struct extent_buffer *sb; struct btrfs_disk_key *disk_key; struct btrfs_chunk *chunk; @@ -6546,6 +6677,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) u32 array_size; u32 len = 0; u32 cur_offset; + u64 type; struct btrfs_key key; ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize); @@ -6555,8 +6687,8 @@ int btrfs_read_sys_array(struct btrfs_root *root) * overallocate but we can keep it as-is, only the first page is used. */ sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET); - if (!sb) - return -ENOMEM; + if (IS_ERR(sb)) + return PTR_ERR(sb); set_extent_buffer_uptodate(sb); btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); /* @@ -6605,13 +6737,22 @@ int btrfs_read_sys_array(struct btrfs_root *root) num_stripes = btrfs_chunk_num_stripes(sb, chunk); if (!num_stripes) { - printk(KERN_ERR - "BTRFS: invalid number of stripes %u in sys_array at offset %u\n", + btrfs_err(fs_info, + "invalid number of stripes %u in sys_array at offset %u", num_stripes, cur_offset); ret = -EIO; break; } + type = btrfs_chunk_type(sb, chunk); + if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) { + btrfs_err(fs_info, + "invalid chunk type %llu in sys_array at offset %u", + type, cur_offset); + ret = -EIO; + break; + } + len = btrfs_chunk_item_size(num_stripes); if (cur_offset + len > array_size) goto out_short_read; @@ -6620,9 +6761,9 @@ int btrfs_read_sys_array(struct btrfs_root *root) if (ret) break; } else { - printk(KERN_ERR - "BTRFS: unexpected item type %u in sys_array at offset %u\n", - (u32)key.type, cur_offset); + btrfs_err(fs_info, + "unexpected item type %u in sys_array at offset %u", + (u32)key.type, cur_offset); ret = -EIO; break; } @@ -6630,12 +6771,14 @@ int btrfs_read_sys_array(struct btrfs_root *root) sb_array_offset += len; cur_offset += len; } + clear_extent_buffer_uptodate(sb); free_extent_buffer_stale(sb); return ret; out_short_read: - printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n", + btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u", len, cur_offset); + clear_extent_buffer_uptodate(sb); free_extent_buffer_stale(sb); return -EIO; } @@ -6648,6 +6791,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) struct btrfs_key found_key; int ret; int slot; + u64 total_dev = 0; root = root->fs_info->chunk_root; @@ -6689,6 +6833,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) ret = read_one_dev(root, leaf, dev_item); if (ret) goto error; + total_dev++; } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { struct btrfs_chunk *chunk; chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); @@ -6698,6 +6843,28 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) } path->slots[0]++; } + + /* + * After loading chunk tree, we've got all device information, + * do another round of validation checks. + */ + if (total_dev != root->fs_info->fs_devices->total_devices) { + btrfs_err(root->fs_info, + "super_num_devices %llu mismatch with num_devices %llu found here", + btrfs_super_num_devices(root->fs_info->super_copy), + total_dev); + ret = -EINVAL; + goto error; + } + if (btrfs_super_total_bytes(root->fs_info->super_copy) < + root->fs_info->fs_devices->total_rw_bytes) { + btrfs_err(root->fs_info, + "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu", + btrfs_super_total_bytes(root->fs_info->super_copy), + root->fs_info->fs_devices->total_rw_bytes); + ret = -EINVAL; + goto error; + } ret = 0; error: unlock_chunks(root); @@ -6933,10 +7100,12 @@ int btrfs_get_dev_stats(struct btrfs_root *root, mutex_unlock(&fs_devices->device_list_mutex); if (!dev) { - btrfs_warn(root->fs_info, "get dev_stats failed, device not found"); + btrfs_warn(root->fs_info, + "get dev_stats failed, device not found"); return -ENODEV; } else if (!dev->dev_stats_valid) { - btrfs_warn(root->fs_info, "get dev_stats failed, not yet valid"); + btrfs_warn(root->fs_info, + "get dev_stats failed, not yet valid"); return -ENODEV; } else if (stats->flags & BTRFS_DEV_STATS_RESET) { for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { @@ -7051,38 +7220,3 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info) fs_devices = fs_devices->seed; } } - -static void btrfs_close_one_device(struct btrfs_device *device) -{ - struct btrfs_fs_devices *fs_devices = device->fs_devices; - struct btrfs_device *new_device; - struct rcu_string *name; - - if (device->bdev) - fs_devices->open_devices--; - - if (device->writeable && - device->devid != BTRFS_DEV_REPLACE_DEVID) { - list_del_init(&device->dev_alloc_list); - fs_devices->rw_devices--; - } - - if (device->missing) - fs_devices->missing_devices--; - - new_device = btrfs_alloc_device(NULL, &device->devid, - device->uuid); - BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ - - /* Safe because we are under uuid_mutex */ - if (device->name) { - name = rcu_string_strdup(device->name->str, GFP_NOFS); - BUG_ON(!name); /* -ENOMEM */ - rcu_assign_pointer(new_device->name, name); - } - - list_replace_rcu(&device->dev_list, &new_device->dev_list); - new_device->fs_devices = device->fs_devices; - - call_rcu(&device->rcu, free_device); -} diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 0ac90f8d85bd..09ed29c67848 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -375,14 +375,14 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, u64 end, u64 *length); void btrfs_get_bbio(struct btrfs_bio *bbio); void btrfs_put_bbio(struct btrfs_bio *bbio); -int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, +int btrfs_map_block(struct btrfs_fs_info *fs_info, int op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num); -int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, +int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num, int need_raid_map); -int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, +int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, u64 physical, u64 devid, u64 **logical, int *naddrs, int *stripe_len); int btrfs_read_sys_array(struct btrfs_root *root); @@ -391,7 +391,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 type); void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); -int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, +int btrfs_map_bio(struct btrfs_root *root, struct bio *bio, int mirror_num, int async_submit); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fmode_t flags, void *holder); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index d1a177a3dbe8..fccbf5567e78 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -252,7 +252,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, goto out; inode_inc_iversion(inode); - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 88d274e8ecf2..441b81a3e545 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -95,7 +95,7 @@ static int zlib_compress_pages(struct list_head *ws, *total_in = 0; if (Z_OK != zlib_deflateInit(&workspace->strm, 3)) { - printk(KERN_WARNING "BTRFS: deflateInit failed\n"); + pr_warn("BTRFS: deflateInit failed\n"); ret = -EIO; goto out; } @@ -123,7 +123,7 @@ static int zlib_compress_pages(struct list_head *ws, while (workspace->strm.total_in < len) { ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH); if (ret != Z_OK) { - printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n", + pr_debug("BTRFS: deflate in loop returned %d\n", ret); zlib_deflateEnd(&workspace->strm); ret = -EIO; @@ -249,7 +249,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, } if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) { - printk(KERN_WARNING "BTRFS: inflateInit failed\n"); + pr_warn("BTRFS: inflateInit failed\n"); return -EIO; } while (workspace->strm.total_in < srclen) { @@ -339,7 +339,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, } if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) { - printk(KERN_WARNING "BTRFS: inflateInit failed\n"); + pr_warn("BTRFS: inflateInit failed\n"); return -EIO; } |