summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/free-space-cache.c73
-rw-r--r--fs/btrfs/free-space-cache.h2
-rw-r--r--fs/btrfs/inode-map.c2
3 files changed, 71 insertions, 6 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0ddc114e2aed..2ee73c276acb 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -32,6 +32,12 @@
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
+struct btrfs_trim_range {
+ u64 start;
+ u64 bytes;
+ struct list_head list;
+};
+
static int link_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
@@ -882,6 +888,7 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
int ret;
struct btrfs_free_cluster *cluster = NULL;
struct rb_node *node = rb_first(&ctl->free_space_offset);
+ struct btrfs_trim_range *trim_entry;
/* Get the cluster for this block_group if it exists */
if (block_group && !list_empty(&block_group->cluster_list)) {
@@ -917,6 +924,21 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
cluster = NULL;
}
}
+
+ /*
+ * Make sure we don't miss any range that was removed from our rbtree
+ * because trimming is running. Otherwise after a umount+mount (or crash
+ * after committing the transaction) we would leak free space and get
+ * an inconsistent free space cache report from fsck.
+ */
+ list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) {
+ ret = io_ctl_add_entry(io_ctl, trim_entry->start,
+ trim_entry->bytes, NULL);
+ if (ret)
+ goto fail;
+ *entries += 1;
+ }
+
return 0;
fail:
return -ENOSPC;
@@ -1136,12 +1158,15 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
io_ctl_set_generation(&io_ctl, trans->transid);
+ mutex_lock(&ctl->cache_writeout_mutex);
/* Write out the extent entries in the free space cache */
ret = write_cache_extent_entries(&io_ctl, ctl,
block_group, &entries, &bitmaps,
&bitmap_list);
- if (ret)
+ if (ret) {
+ mutex_unlock(&ctl->cache_writeout_mutex);
goto out_nospc;
+ }
/*
* Some spaces that are freed in the current transaction are pinned,
@@ -1149,11 +1174,18 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
* committed, we shouldn't lose them.
*/
ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries);
- if (ret)
+ if (ret) {
+ mutex_unlock(&ctl->cache_writeout_mutex);
goto out_nospc;
+ }
- /* At last, we write out all the bitmaps. */
+ /*
+ * At last, we write out all the bitmaps and keep cache_writeout_mutex
+ * locked while doing it because a concurrent trim can be manipulating
+ * or freeing the bitmap.
+ */
ret = write_bitmap_entries(&io_ctl, &bitmap_list);
+ mutex_unlock(&ctl->cache_writeout_mutex);
if (ret)
goto out_nospc;
@@ -2296,6 +2328,8 @@ void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
ctl->start = block_group->key.objectid;
ctl->private = block_group;
ctl->op = &free_space_op;
+ INIT_LIST_HEAD(&ctl->trimming_ranges);
+ mutex_init(&ctl->cache_writeout_mutex);
/*
* we only want to have 32k of ram per block group for keeping
@@ -2912,10 +2946,12 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
static int do_trimming(struct btrfs_block_group_cache *block_group,
u64 *total_trimmed, u64 start, u64 bytes,
- u64 reserved_start, u64 reserved_bytes)
+ u64 reserved_start, u64 reserved_bytes,
+ struct btrfs_trim_range *trim_entry)
{
struct btrfs_space_info *space_info = block_group->space_info;
struct btrfs_fs_info *fs_info = block_group->fs_info;
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
int ret;
int update = 0;
u64 trimmed = 0;
@@ -2935,7 +2971,10 @@ static int do_trimming(struct btrfs_block_group_cache *block_group,
if (!ret)
*total_trimmed += trimmed;
+ mutex_lock(&ctl->cache_writeout_mutex);
btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
+ list_del(&trim_entry->list);
+ mutex_unlock(&ctl->cache_writeout_mutex);
if (update) {
spin_lock(&space_info->lock);
@@ -2963,16 +3002,21 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
u64 bytes;
while (start < end) {
+ struct btrfs_trim_range trim_entry;
+
+ mutex_lock(&ctl->cache_writeout_mutex);
spin_lock(&ctl->tree_lock);
if (ctl->free_space < minlen) {
spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
break;
}
entry = tree_search_offset(ctl, start, 0, 1);
if (!entry) {
spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
break;
}
@@ -2981,6 +3025,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
node = rb_next(&entry->offset_index);
if (!node) {
spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
goto out;
}
entry = rb_entry(node, struct btrfs_free_space,
@@ -2989,6 +3034,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
if (entry->offset >= end) {
spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
break;
}
@@ -2998,6 +3044,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
bytes = min(extent_start + extent_bytes, end) - start;
if (bytes < minlen) {
spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
goto next;
}
@@ -3005,9 +3052,13 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
kmem_cache_free(btrfs_free_space_cachep, entry);
spin_unlock(&ctl->tree_lock);
+ trim_entry.start = extent_start;
+ trim_entry.bytes = extent_bytes;
+ list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
+ mutex_unlock(&ctl->cache_writeout_mutex);
ret = do_trimming(block_group, total_trimmed, start, bytes,
- extent_start, extent_bytes);
+ extent_start, extent_bytes, &trim_entry);
if (ret)
break;
next:
@@ -3036,17 +3087,21 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
while (offset < end) {
bool next_bitmap = false;
+ struct btrfs_trim_range trim_entry;
+ mutex_lock(&ctl->cache_writeout_mutex);
spin_lock(&ctl->tree_lock);
if (ctl->free_space < minlen) {
spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
break;
}
entry = tree_search_offset(ctl, offset, 1, 0);
if (!entry) {
spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
next_bitmap = true;
goto next;
}
@@ -3055,6 +3110,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
ret2 = search_bitmap(ctl, entry, &start, &bytes);
if (ret2 || start >= end) {
spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
next_bitmap = true;
goto next;
}
@@ -3062,6 +3118,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
bytes = min(bytes, end - start);
if (bytes < minlen) {
spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
goto next;
}
@@ -3070,9 +3127,13 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
free_bitmap(ctl, entry);
spin_unlock(&ctl->tree_lock);
+ trim_entry.start = start;
+ trim_entry.bytes = bytes;
+ list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
+ mutex_unlock(&ctl->cache_writeout_mutex);
ret = do_trimming(block_group, total_trimmed, start, bytes,
- start, bytes);
+ start, bytes, &trim_entry);
if (ret)
break;
next:
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 0cf4977ef70d..88b2238a0aed 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -38,6 +38,8 @@ struct btrfs_free_space_ctl {
u64 start;
struct btrfs_free_space_op *op;
void *private;
+ struct mutex cache_writeout_mutex;
+ struct list_head trimming_ranges;
};
struct btrfs_free_space_op {
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 4ebd5ebb1ea1..74faea3a516e 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -364,6 +364,8 @@ void btrfs_init_free_ino_ctl(struct btrfs_root *root)
ctl->start = 0;
ctl->private = NULL;
ctl->op = &free_ino_op;
+ INIT_LIST_HEAD(&ctl->trimming_ranges);
+ mutex_init(&ctl->cache_writeout_mutex);
/*
* Initially we allow to use 16K of ram to cache chunks of
OpenPOWER on IntegriCloud