summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/delayed-inode.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-04-01 17:08:13 -0700
committerTejun Heo <tj@kernel.org>2013-04-01 18:45:36 -0700
commit229641a6f1f09e27a1f12fba38980f33f4c92975 (patch)
tree234a6f8aea0910de3242af0bbe6d7494fcf81847 /fs/btrfs/delayed-inode.c
parentd55262c4d164759a8debe772da6c9b16059dec47 (diff)
parent07961ac7c0ee8b546658717034fe692fd12eefa9 (diff)
downloadtalos-op-linux-229641a6f1f09e27a1f12fba38980f33f4c92975.tar.gz
talos-op-linux-229641a6f1f09e27a1f12fba38980f33f4c92975.zip
Merge tag 'v3.9-rc5' into wq/for-3.10
Writeback conversion to workqueue will be based on top of wq/for-3.10 branch to take advantage of custom attrs and NUMA support for unbound workqueues. Mainline currently contains two commits which result in non-trivial merge conflicts with wq/for-3.10 and because block/for-3.10/core is based on v3.9-rc3 which contains one of the conflicting commits, we need a pre-merge-window merge anyway. Let's pull v3.9-rc5 into wq/for-3.10 so that the block tree doesn't suffer from workqueue merge conflicts. The two conflicts and their resolutions: * e68035fb65 ("workqueue: convert to idr_alloc()") in mainline changes worker_pool_assign_id() to use idr_alloc() instead of the old idr interface. worker_pool_assign_id() goes through multiple locking changes in wq/for-3.10 causing the following conflict. static int worker_pool_assign_id(struct worker_pool *pool) { int ret; <<<<<<< HEAD lockdep_assert_held(&wq_pool_mutex); do { if (!idr_pre_get(&worker_pool_idr, GFP_KERNEL)) return -ENOMEM; ret = idr_get_new(&worker_pool_idr, pool, &pool->id); } while (ret == -EAGAIN); ======= mutex_lock(&worker_pool_idr_mutex); ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL); if (ret >= 0) pool->id = ret; mutex_unlock(&worker_pool_idr_mutex); >>>>>>> c67bf5361e7e66a0ff1f4caf95f89347d55dfb89 return ret < 0 ? ret : 0; } We want locking from the former and idr_alloc() usage from the latter, which can be combined to the following. static int worker_pool_assign_id(struct worker_pool *pool) { int ret; lockdep_assert_held(&wq_pool_mutex); ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL); if (ret >= 0) { pool->id = ret; return 0; } return ret; } * eb2834285c ("workqueue: fix possible pool stall bug in wq_unbind_fn()") updated wq_unbind_fn() such that it has single larger for_each_std_worker_pool() loop instead of two separate loops with a schedule() call inbetween. wq/for-3.10 renamed pool->assoc_mutex to pool->manager_mutex causing the following conflict (earlier function body and comments omitted for brevity). static void wq_unbind_fn(struct work_struct *work) { ... spin_unlock_irq(&pool->lock); <<<<<<< HEAD mutex_unlock(&pool->manager_mutex); } ======= mutex_unlock(&pool->assoc_mutex); >>>>>>> c67bf5361e7e66a0ff1f4caf95f89347d55dfb89 schedule(); <<<<<<< HEAD for_each_cpu_worker_pool(pool, cpu) ======= >>>>>>> c67bf5361e7e66a0ff1f4caf95f89347d55dfb89 atomic_set(&pool->nr_running, 0); spin_lock_irq(&pool->lock); wake_up_worker(pool); spin_unlock_irq(&pool->lock); } } The resolution is mostly trivial. We want the control flow of the latter with the rename of the former. static void wq_unbind_fn(struct work_struct *work) { ... spin_unlock_irq(&pool->lock); mutex_unlock(&pool->manager_mutex); schedule(); atomic_set(&pool->nr_running, 0); spin_lock_irq(&pool->lock); wake_up_worker(pool); spin_unlock_irq(&pool->lock); } } Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'fs/btrfs/delayed-inode.c')
-rw-r--r--fs/btrfs/delayed-inode.c151
1 files changed, 90 insertions, 61 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0b278b117cbe..14fce27b4780 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -22,8 +22,9 @@
#include "disk-io.h"
#include "transaction.h"
-#define BTRFS_DELAYED_WRITEBACK 400
-#define BTRFS_DELAYED_BACKGROUND 100
+#define BTRFS_DELAYED_WRITEBACK 512
+#define BTRFS_DELAYED_BACKGROUND 128
+#define BTRFS_DELAYED_BATCH 16
static struct kmem_cache *delayed_node_cache;
@@ -494,6 +495,15 @@ static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
BTRFS_DELAYED_DELETION_ITEM);
}
+static void finish_one_item(struct btrfs_delayed_root *delayed_root)
+{
+ int seq = atomic_inc_return(&delayed_root->items_seq);
+ if ((atomic_dec_return(&delayed_root->items) <
+ BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
+ waitqueue_active(&delayed_root->wait))
+ wake_up(&delayed_root->wait);
+}
+
static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
{
struct rb_root *root;
@@ -512,10 +522,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
rb_erase(&delayed_item->rb_node, root);
delayed_item->delayed_node->count--;
- if (atomic_dec_return(&delayed_root->items) <
- BTRFS_DELAYED_BACKGROUND &&
- waitqueue_active(&delayed_root->wait))
- wake_up(&delayed_root->wait);
+
+ finish_one_item(delayed_root);
}
static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
@@ -1056,10 +1064,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
delayed_node->count--;
delayed_root = delayed_node->root->fs_info->delayed_root;
- if (atomic_dec_return(&delayed_root->items) <
- BTRFS_DELAYED_BACKGROUND &&
- waitqueue_active(&delayed_root->wait))
- wake_up(&delayed_root->wait);
+ finish_one_item(delayed_root);
}
}
@@ -1304,35 +1309,44 @@ void btrfs_remove_delayed_node(struct inode *inode)
btrfs_release_delayed_node(delayed_node);
}
-struct btrfs_async_delayed_node {
- struct btrfs_root *root;
- struct btrfs_delayed_node *delayed_node;
+struct btrfs_async_delayed_work {
+ struct btrfs_delayed_root *delayed_root;
+ int nr;
struct btrfs_work work;
};
-static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
+static void btrfs_async_run_delayed_root(struct btrfs_work *work)
{
- struct btrfs_async_delayed_node *async_node;
+ struct btrfs_async_delayed_work *async_work;
+ struct btrfs_delayed_root *delayed_root;
struct btrfs_trans_handle *trans;
struct btrfs_path *path;
struct btrfs_delayed_node *delayed_node = NULL;
struct btrfs_root *root;
struct btrfs_block_rsv *block_rsv;
- int need_requeue = 0;
+ int total_done = 0;
- async_node = container_of(work, struct btrfs_async_delayed_node, work);
+ async_work = container_of(work, struct btrfs_async_delayed_work, work);
+ delayed_root = async_work->delayed_root;
path = btrfs_alloc_path();
if (!path)
goto out;
- path->leave_spinning = 1;
- delayed_node = async_node->delayed_node;
+again:
+ if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
+ goto free_path;
+
+ delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
+ if (!delayed_node)
+ goto free_path;
+
+ path->leave_spinning = 1;
root = delayed_node->root;
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
- goto free_path;
+ goto release_path;
block_rsv = trans->block_rsv;
trans->block_rsv = &root->fs_info->delayed_block_rsv;
@@ -1363,57 +1377,47 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
* Task1 will sleep until the transaction is commited.
*/
mutex_lock(&delayed_node->mutex);
- if (delayed_node->count)
- need_requeue = 1;
- else
- btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
- delayed_node);
+ btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node);
mutex_unlock(&delayed_node->mutex);
trans->block_rsv = block_rsv;
btrfs_end_transaction_dmeta(trans, root);
btrfs_btree_balance_dirty_nodelay(root);
+
+release_path:
+ btrfs_release_path(path);
+ total_done++;
+
+ btrfs_release_prepared_delayed_node(delayed_node);
+ if (async_work->nr == 0 || total_done < async_work->nr)
+ goto again;
+
free_path:
btrfs_free_path(path);
out:
- if (need_requeue)
- btrfs_requeue_work(&async_node->work);
- else {
- btrfs_release_prepared_delayed_node(delayed_node);
- kfree(async_node);
- }
+ wake_up(&delayed_root->wait);
+ kfree(async_work);
}
+
static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
- struct btrfs_root *root, int all)
+ struct btrfs_root *root, int nr)
{
- struct btrfs_async_delayed_node *async_node;
- struct btrfs_delayed_node *curr;
- int count = 0;
+ struct btrfs_async_delayed_work *async_work;
-again:
- curr = btrfs_first_prepared_delayed_node(delayed_root);
- if (!curr)
+ if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
return 0;
- async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
- if (!async_node) {
- btrfs_release_prepared_delayed_node(curr);
+ async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
+ if (!async_work)
return -ENOMEM;
- }
-
- async_node->root = root;
- async_node->delayed_node = curr;
-
- async_node->work.func = btrfs_async_run_delayed_node_done;
- async_node->work.flags = 0;
- btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
- count++;
-
- if (all || count < 4)
- goto again;
+ async_work->delayed_root = delayed_root;
+ async_work->work.func = btrfs_async_run_delayed_root;
+ async_work->work.flags = 0;
+ async_work->nr = nr;
+ btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work);
return 0;
}
@@ -1424,30 +1428,55 @@ void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
WARN_ON(btrfs_first_delayed_node(delayed_root));
}
+static int refs_newer(struct btrfs_delayed_root *delayed_root,
+ int seq, int count)
+{
+ int val = atomic_read(&delayed_root->items_seq);
+
+ if (val < seq || val >= seq + count)
+ return 1;
+ return 0;
+}
+
void btrfs_balance_delayed_items(struct btrfs_root *root)
{
struct btrfs_delayed_root *delayed_root;
+ int seq;
delayed_root = btrfs_get_delayed_root(root);
if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
return;
+ seq = atomic_read(&delayed_root->items_seq);
+
if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
int ret;
- ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
+ DEFINE_WAIT(__wait);
+
+ ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
if (ret)
return;
- wait_event_interruptible_timeout(
- delayed_root->wait,
- (atomic_read(&delayed_root->items) <
- BTRFS_DELAYED_BACKGROUND),
- HZ);
- return;
+ while (1) {
+ prepare_to_wait(&delayed_root->wait, &__wait,
+ TASK_INTERRUPTIBLE);
+
+ if (refs_newer(delayed_root, seq,
+ BTRFS_DELAYED_BATCH) ||
+ atomic_read(&delayed_root->items) <
+ BTRFS_DELAYED_BACKGROUND) {
+ break;
+ }
+ if (!signal_pending(current))
+ schedule();
+ else
+ break;
+ }
+ finish_wait(&delayed_root->wait, &__wait);
}
- btrfs_wq_run_delayed_node(delayed_root, root, 0);
+ btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
}
/* Will return 0 or -ENOMEM */
OpenPOWER on IntegriCloud