summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/transaction.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-09-05 16:13:11 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:07 -0400
commite02119d5a7b4396c5a872582fddc8bd6d305a70a (patch)
tree825efe2a79dbca8d61256183f3526a5b5dc40dc6 /fs/btrfs/transaction.c
parenta1b32a5932cfac7c38b442582285f3da2a09dfd8 (diff)
downloadblackbird-op-linux-e02119d5a7b4396c5a872582fddc8bd6d305a70a.tar.gz
blackbird-op-linux-e02119d5a7b4396c5a872582fddc8bd6d305a70a.zip
Btrfs: Add a write ahead tree log to optimize synchronous operations
File syncs and directory syncs are optimized by copying their items into a special (copy-on-write) log tree. There is one log tree per subvolume and the btrfs super block points to a tree of log tree roots. After a crash, items are copied out of the log tree and back into the subvolume. See tree-log.c for all the details. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r--fs/btrfs/transaction.c45
1 files changed, 40 insertions, 5 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index eff3ad72991b..49c4f5b40ed6 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -25,6 +25,7 @@
#include "transaction.h"
#include "locking.h"
#include "ref-cache.h"
+#include "tree-log.h"
static int total_trans = 0;
extern struct kmem_cache *btrfs_trans_handle_cachep;
@@ -57,6 +58,7 @@ static noinline int join_transaction(struct btrfs_root *root)
root->fs_info->generation++;
root->fs_info->last_alloc = 0;
root->fs_info->last_data_alloc = 0;
+ root->fs_info->last_log_alloc = 0;
cur_trans->num_writers = 1;
cur_trans->num_joined = 0;
cur_trans->transid = root->fs_info->generation;
@@ -83,7 +85,7 @@ static noinline int join_transaction(struct btrfs_root *root)
return 0;
}
-static noinline int record_root_in_trans(struct btrfs_root *root)
+noinline int btrfs_record_root_in_trans(struct btrfs_root *root)
{
struct btrfs_dirty_root *dirty;
u64 running_trans_id = root->fs_info->running_transaction->transid;
@@ -151,7 +153,7 @@ static void wait_current_trans(struct btrfs_root *root)
}
}
-struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
+static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
int num_blocks, int wait)
{
struct btrfs_trans_handle *h =
@@ -164,7 +166,7 @@ struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
ret = join_transaction(root);
BUG_ON(ret);
- record_root_in_trans(root);
+ btrfs_record_root_in_trans(root);
h->transid = root->fs_info->running_transaction->transid;
h->transaction = root->fs_info->running_transaction;
h->blocks_reserved = num_blocks;
@@ -456,6 +458,8 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
BUG_ON(!root->ref_tree);
dirty = root->dirty_root;
+ btrfs_free_log(trans, root);
+
if (root->commit_root == root->node) {
WARN_ON(root->node->start !=
btrfs_root_bytenr(&root->root_item));
@@ -600,7 +604,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
num_bytes -= btrfs_root_used(&dirty->root->root_item);
bytes_used = btrfs_root_used(&root->root_item);
if (num_bytes) {
- record_root_in_trans(root);
+ btrfs_record_root_in_trans(root);
btrfs_set_root_used(&root->root_item,
bytes_used - num_bytes);
}
@@ -745,7 +749,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
int ret;
INIT_LIST_HEAD(&dirty_fs_roots);
-
mutex_lock(&root->fs_info->trans_mutex);
if (trans->transaction->in_commit) {
cur_trans = trans->transaction;
@@ -821,10 +824,30 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
WARN_ON(cur_trans != trans->transaction);
+ /* btrfs_commit_tree_roots is responsible for getting the
+ * various roots consistent with each other. Every pointer
+ * in the tree of tree roots has to point to the most up to date
+ * root for every subvolume and other tree. So, we have to keep
+ * the tree logging code from jumping in and changing any
+ * of the trees.
+ *
+ * At this point in the commit, there can't be any tree-log
+ * writers, but a little lower down we drop the trans mutex
+ * and let new people in. By holding the tree_log_mutex
+ * from now until after the super is written, we avoid races
+ * with the tree-log code.
+ */
+ mutex_lock(&root->fs_info->tree_log_mutex);
+
ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
&dirty_fs_roots);
BUG_ON(ret);
+ /* add_dirty_roots gets rid of all the tree log roots, it is now
+ * safe to free the root of tree log roots
+ */
+ btrfs_free_log_root_tree(trans, root->fs_info);
+
ret = btrfs_commit_tree_roots(trans, root);
BUG_ON(ret);
@@ -843,6 +866,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
chunk_root->node->start);
btrfs_set_super_chunk_root_level(&root->fs_info->super_copy,
btrfs_header_level(chunk_root->node));
+
+ if (!root->fs_info->log_root_recovering) {
+ btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
+ btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0);
+ }
+
memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
sizeof(root->fs_info->super_copy));
@@ -857,6 +886,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
BUG_ON(ret);
write_ctree_super(trans, root);
+ /*
+ * the super is written, we can safely allow the tree-loggers
+ * to go about their business
+ */
+ mutex_unlock(&root->fs_info->tree_log_mutex);
+
btrfs_finish_extent_commit(trans, root, pinned_copy);
mutex_lock(&root->fs_info->trans_mutex);
OpenPOWER on IntegriCloud