diff options
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r-- | fs/btrfs/free-space-cache.c | 70 |
1 files changed, 55 insertions, 15 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 0adf38b00fa0..74aa552f4793 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -10,6 +10,7 @@ #include <linux/math64.h> #include <linux/ratelimit.h> #include <linux/error-injection.h> +#include <linux/sched/mm.h> #include "ctree.h" #include "free-space-cache.h" #include "transaction.h" @@ -47,6 +48,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, struct btrfs_free_space_header *header; struct extent_buffer *leaf; struct inode *inode = NULL; + unsigned nofs_flag; int ret; key.objectid = BTRFS_FREE_SPACE_OBJECTID; @@ -68,7 +70,14 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, btrfs_disk_key_to_cpu(&location, &disk_key); btrfs_release_path(path); - inode = btrfs_iget(fs_info->sb, &location, root, NULL); + /* + * We are often under a trans handle at this point, so we need to make + * sure NOFS is set to keep us from deadlocking. + */ + nofs_flag = memalloc_nofs_save(); + inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path); + btrfs_release_path(path); + memalloc_nofs_restore(nofs_flag); if (IS_ERR(inode)) return inode; @@ -830,6 +839,25 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, path->search_commit_root = 1; path->skip_locking = 1; + /* + * We must pass a path with search_commit_root set to btrfs_iget in + * order to avoid a deadlock when allocating extents for the tree root. + * + * When we are COWing an extent buffer from the tree root, when looking + * for a free extent, at extent-tree.c:find_free_extent(), we can find + * block group without its free space cache loaded. When we find one + * we must load its space cache which requires reading its free space + * cache's inode item from the root tree. If this inode item is located + * in the same leaf that we started COWing before, then we end up in + * deadlock on the extent buffer (trying to read lock it when we + * previously write locked it). + * + * It's safe to read the inode item using the commit root because + * block groups, once loaded, stay in memory forever (until they are + * removed) as well as their space caches once loaded. New block groups + * once created get their ->cached field set to BTRFS_CACHE_FINISHED so + * we will never try to read their inode item while the fs is mounted. + */ inode = lookup_free_space_inode(fs_info, block_group, path); if (IS_ERR(inode)) { btrfs_free_path(path); @@ -1679,6 +1707,8 @@ static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, bitmap_clear(info->bitmap, start, count); info->bytes -= bytes; + if (info->max_extent_size > ctl->unit) + info->max_extent_size = 0; } static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, @@ -1762,6 +1792,13 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl, return -1; } +static inline u64 get_max_extent_size(struct btrfs_free_space *entry) +{ + if (entry->bitmap) + return entry->max_extent_size; + return entry->bytes; +} + /* Cache the size of the max extent in bytes */ static struct btrfs_free_space * find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, @@ -1783,8 +1820,8 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, for (node = &entry->offset_index; node; node = rb_next(node)) { entry = rb_entry(node, struct btrfs_free_space, offset_index); if (entry->bytes < *bytes) { - if (entry->bytes > *max_extent_size) - *max_extent_size = entry->bytes; + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); continue; } @@ -1802,8 +1839,8 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, } if (entry->bytes < *bytes + align_off) { - if (entry->bytes > *max_extent_size) - *max_extent_size = entry->bytes; + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); continue; } @@ -1815,8 +1852,10 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, *offset = tmp; *bytes = size; return entry; - } else if (size > *max_extent_size) { - *max_extent_size = size; + } else { + *max_extent_size = + max(get_max_extent_size(entry), + *max_extent_size); } continue; } @@ -2110,8 +2149,7 @@ new_bitmap: out: if (info) { - if (info->bitmap) - kfree(info->bitmap); + kfree(info->bitmap); kmem_cache_free(btrfs_free_space_cachep, info); } @@ -2440,6 +2478,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, struct rb_node *n; int count = 0; + spin_lock(&ctl->tree_lock); for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { info = rb_entry(n, struct btrfs_free_space, offset_index); if (info->bytes >= bytes && !block_group->ro) @@ -2448,6 +2487,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, info->offset, info->bytes, (info->bitmap) ? "yes" : "no"); } + spin_unlock(&ctl->tree_lock); btrfs_info(fs_info, "block group has cluster?: %s", list_empty(&block_group->cluster_list) ? "no" : "yes"); btrfs_info(fs_info, @@ -2676,8 +2716,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); if (err) { - if (search_bytes > *max_extent_size) - *max_extent_size = search_bytes; + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); return 0; } @@ -2714,8 +2754,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, entry = rb_entry(node, struct btrfs_free_space, offset_index); while (1) { - if (entry->bytes < bytes && entry->bytes > *max_extent_size) - *max_extent_size = entry->bytes; + if (entry->bytes < bytes) + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); if (entry->bytes < bytes || (!entry->bitmap && entry->offset < min_start)) { @@ -3601,8 +3642,7 @@ again: if (info) kmem_cache_free(btrfs_free_space_cachep, info); - if (map) - kfree(map); + kfree(map); return 0; } |