diff options
author | Chris Mason <chris.mason@oracle.com> | 2011-10-31 20:52:39 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2011-11-06 03:03:48 -0500 |
commit | e688b7252f784c2479d559f9f70ca8354752c5e7 (patch) | |
tree | 3934b0a9c348b2900e08e8fc9c0e6819e80d0fff /fs/btrfs/extent-tree.c | |
parent | 1eae31e918972bbeefc119d23c1d67674f49a301 (diff) | |
download | talos-op-linux-e688b7252f784c2479d559f9f70ca8354752c5e7.tar.gz talos-op-linux-e688b7252f784c2479d559f9f70ca8354752c5e7.zip |
Btrfs: fix extent pinning bugs in the tree log
The tree log had two important bugs that could cause corruptions after a
crash. Sometimes we were allowing tree log blocks to be reused after
the tree log was committed but before the transaction commit was done.
This allowed a future metadata write to overwrite the tree log data. It
is fixed by adding a new variant of freeing reserved extents that always
pins them. Credit goes to Stefan Behrens and Arne Jansen for many many
hours spent tracking this bug down.
During tree log replay, we do a pass through the tree log and pin all
the extents we find. This makes sure the replay code won't go in and
use any of those blocks for new allocations during replay. The problem
is the free space cache isn't honoring these pinned extents. So the
allocator can end up handing them out, leading to all kinds of problems
during replay.
The fix here is to force any free space cache to load while we pin the
extents, and then to make sure we remove the pinned extents from the
free space rbtree.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Reported-by: Stefan Behrens <sbehrens@giantdisaster.de>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 51 |
1 files changed, 48 insertions, 3 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 28c4809851a5..cb7626646bba 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4344,6 +4344,34 @@ int btrfs_pin_extent(struct btrfs_root *root, return 0; } +/* + * this function must be called within transaction + */ +int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes) +{ + struct btrfs_block_group_cache *cache; + + cache = btrfs_lookup_block_group(root->fs_info, bytenr); + BUG_ON(!cache); + + /* + * pull in the free space cache (if any) so that our pin + * removes the free space from the cache. We have load_only set + * to one because the slow code to read in the free extents does check + * the pinned extents. + */ + cache_block_group(cache, trans, root, 1); + + pin_down_extent(root, cache, bytenr, num_bytes, 0); + + /* remove us from the free space cache (if we're there at all) */ + btrfs_remove_free_space(cache, bytenr, num_bytes); + btrfs_put_block_group(cache); + return 0; +} + /** * btrfs_update_reserved_bytes - update the block_group and space info counters * @cache: The cache we are manipulating @@ -5487,7 +5515,8 @@ again: return ret; } -int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) +static int __btrfs_free_reserved_extent(struct btrfs_root *root, + u64 start, u64 len, int pin) { struct btrfs_block_group_cache *cache; int ret = 0; @@ -5502,8 +5531,12 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) if (btrfs_test_opt(root, DISCARD)) ret = btrfs_discard_extent(root, start, len, NULL); - btrfs_add_free_space(cache, start, len); - btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); + if (pin) + pin_down_extent(root, cache, start, len, 1); + else { + btrfs_add_free_space(cache, start, len); + btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); + } btrfs_put_block_group(cache); trace_btrfs_reserved_extent_free(root, start, len); @@ -5511,6 +5544,18 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) return ret; } +int btrfs_free_reserved_extent(struct btrfs_root *root, + u64 start, u64 len) +{ + return __btrfs_free_reserved_extent(root, start, len, 0); +} + +int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, + u64 start, u64 len) +{ + return __btrfs_free_reserved_extent(root, start, len, 1); +} + static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, |