summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2011-04-15 16:05:44 -0400
committerChris Mason <chris.mason@oracle.com>2011-04-15 16:05:44 -0400
commit0e4f8f888845f9dca540ad175884244e5db5eea2 (patch)
treecc43a4a6ff229a84f26376f27d9d938ac4dc1b26
parent329c5056be8774255db04b01242a9ff4f02eb8ea (diff)
downloadblackbird-op-linux-0e4f8f888845f9dca540ad175884244e5db5eea2.tar.gz
blackbird-op-linux-0e4f8f888845f9dca540ad175884244e5db5eea2.zip
Btrfs: don't force chunk allocation in find_free_extent
find_free_extent likes to allocate in contiguous clusters, which makes writeback faster, especially on SSD storage. As the FS fragments, these clusters become harder to find and we have to decide between allocating a new chunk to make more clusters or giving up on the cluster to allocate from the free space we have. Right now it creates too many chunks, and you can end up with a whole FS that is mostly empty metadata chunks. This commit changes the allocation code to be more strict and only allocate new chunks when we've made good use of the chunks we already have. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/extent-tree.c95
1 files changed, 73 insertions, 22 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f619c3cb13b7..26479484180d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,6 +33,25 @@
#include "locking.h"
#include "free-space-cache.h"
+/* control flags for do_chunk_alloc's force field
+ * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
+ * if we really need one.
+ *
+ * CHUNK_ALLOC_FORCE means it must try to allocate one
+ *
+ * CHUNK_ALLOC_LIMITED means to only try and allocate one
+ * if we have very few chunks already allocated. This is
+ * used as part of the clustering code to help make sure
+ * we have a good pool of storage to cluster in, without
+ * filling the FS with empty chunks
+ *
+ */
+enum {
+ CHUNK_ALLOC_NO_FORCE = 0,
+ CHUNK_ALLOC_FORCE = 1,
+ CHUNK_ALLOC_LIMITED = 2,
+};
+
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc);
@@ -3019,7 +3038,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->bytes_readonly = 0;
found->bytes_may_use = 0;
found->full = 0;
- found->force_alloc = 0;
+ found->force_alloc = CHUNK_ALLOC_NO_FORCE;
*space_info = found;
list_add_rcu(&found->list, &info->space_info);
atomic_set(&found->caching_threads, 0);
@@ -3150,7 +3169,7 @@ again:
if (!data_sinfo->full && alloc_chunk) {
u64 alloc_target;
- data_sinfo->force_alloc = 1;
+ data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
spin_unlock(&data_sinfo->lock);
alloc:
alloc_target = btrfs_get_alloc_profile(root, 1);
@@ -3160,7 +3179,8 @@ alloc:
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
bytes + 2 * 1024 * 1024,
- alloc_target, 0);
+ alloc_target,
+ CHUNK_ALLOC_NO_FORCE);
btrfs_end_transaction(trans, root);
if (ret < 0) {
if (ret != -ENOSPC)
@@ -3239,31 +3259,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
- found->force_alloc = 1;
+ found->force_alloc = CHUNK_ALLOC_FORCE;
}
rcu_read_unlock();
}
static int should_alloc_chunk(struct btrfs_root *root,
- struct btrfs_space_info *sinfo, u64 alloc_bytes)
+ struct btrfs_space_info *sinfo, u64 alloc_bytes,
+ int force)
{
u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+ u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
u64 thresh;
- if (sinfo->bytes_used + sinfo->bytes_reserved +
- alloc_bytes + 256 * 1024 * 1024 < num_bytes)
+ if (force == CHUNK_ALLOC_FORCE)
+ return 1;
+
+ /*
+ * in limited mode, we want to have some free space up to
+ * about 1% of the FS size.
+ */
+ if (force == CHUNK_ALLOC_LIMITED) {
+ thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+ thresh = max_t(u64, 64 * 1024 * 1024,
+ div_factor_fine(thresh, 1));
+
+ if (num_bytes - num_allocated < thresh)
+ return 1;
+ }
+
+ /*
+ * we have two similar checks here, one based on percentage
+ * and once based on a hard number of 256MB. The idea
+ * is that if we have a good amount of free
+ * room, don't allocate a chunk. A good mount is
+ * less than 80% utilized of the chunks we have allocated,
+ * or more than 256MB free
+ */
+ if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
return 0;
- if (sinfo->bytes_used + sinfo->bytes_reserved +
- alloc_bytes < div_factor(num_bytes, 8))
+ if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
return 0;
thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+
+ /* 256MB or 5% of the FS */
thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
return 0;
-
return 1;
}
@@ -3289,17 +3334,17 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
spin_lock(&space_info->lock);
if (space_info->force_alloc)
- force = 1;
+ force = space_info->force_alloc;
if (space_info->full) {
spin_unlock(&space_info->lock);
goto out;
}
- if (!force && !should_alloc_chunk(extent_root, space_info,
- alloc_bytes)) {
+ if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
spin_unlock(&space_info->lock);
goto out;
}
+
spin_unlock(&space_info->lock);
/*
@@ -3327,7 +3372,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
space_info->full = 1;
else
ret = 1;
- space_info->force_alloc = 0;
+ space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
spin_unlock(&space_info->lock);
out:
mutex_unlock(&extent_root->fs_info->chunk_mutex);
@@ -5303,11 +5348,13 @@ loop:
if (allowed_chunk_alloc) {
ret = do_chunk_alloc(trans, root, num_bytes +
- 2 * 1024 * 1024, data, 1);
+ 2 * 1024 * 1024, data,
+ CHUNK_ALLOC_LIMITED);
allowed_chunk_alloc = 0;
done_chunk_alloc = 1;
- } else if (!done_chunk_alloc) {
- space_info->force_alloc = 1;
+ } else if (!done_chunk_alloc &&
+ space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
+ space_info->force_alloc = CHUNK_ALLOC_LIMITED;
}
if (loop < LOOP_NO_EMPTY_SIZE) {
@@ -5393,7 +5440,8 @@ again:
*/
if (empty_size || root->ref_cows)
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
- num_bytes + 2 * 1024 * 1024, data, 0);
+ num_bytes + 2 * 1024 * 1024, data,
+ CHUNK_ALLOC_NO_FORCE);
WARN_ON(num_bytes < root->sectorsize);
ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -5405,7 +5453,7 @@ again:
num_bytes = num_bytes & ~(root->sectorsize - 1);
num_bytes = max(num_bytes, min_alloc_size);
do_chunk_alloc(trans, root->fs_info->extent_root,
- num_bytes, data, 1);
+ num_bytes, data, CHUNK_ALLOC_FORCE);
goto again;
}
if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
@@ -8109,13 +8157,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
alloc_flags = update_block_group_flags(root, cache->flags);
if (alloc_flags != cache->flags)
- do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+ do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+ CHUNK_ALLOC_FORCE);
ret = set_block_group_ro(cache);
if (!ret)
goto out;
alloc_flags = get_alloc_profile(root, cache->space_info->flags);
- ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+ ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+ CHUNK_ALLOC_FORCE);
if (ret < 0)
goto out;
ret = set_block_group_ro(cache);
@@ -8128,7 +8178,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 type)
{
u64 alloc_flags = get_alloc_profile(root, type);
- return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+ return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+ CHUNK_ALLOC_FORCE);
}
/*
OpenPOWER on IntegriCloud