summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2015-07-02 08:44:34 -0600
committerJens Axboe <axboe@fb.com>2015-07-02 08:46:00 -0600
commita13f35e8714009145e32ebe2bf25b84e1376e314 (patch)
tree911f2bddedea196ec012c4aaa9c17d35b4e36acc /mm
parent4da3064d1775810f10f7ddc1c34c3f1ff502a654 (diff)
downloadtalos-op-linux-a13f35e8714009145e32ebe2bf25b84e1376e314.tar.gz
talos-op-linux-a13f35e8714009145e32ebe2bf25b84e1376e314.zip
writeback: don't embed root bdi_writeback_congested in bdi_writeback
52ebea749aae ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks") made bdi (backing_dev_info) host per-cgroup wb's (bdi_writeback's). As the congested state needs to be per-wb and referenced from blkcg side and multiple wbs, the patch made all non-root cong's (bdi_writeback_congested's) reference counted and indexed on bdi. When a bdi is destroyed, cgwb_bdi_destroy() tries to drain all non-root cong's; however, this can hang indefinitely because wb's can also be referenced from blkcg_gq's which are destroyed after bdi destruction is complete. To fix the bug, bdi destruction will be updated to not wait for cong's to drain, which naturally means that cong's may outlive the associated bdi. This is fine for non-root cong's but is problematic for the root cong's which are embedded in their bdi's as they may end up getting dereferenced after the containing bdi's are freed. This patch makes root cong's behave the same as non-root cong's. They are no longer embedded in their bdi's but allocated separately during bdi initialization, indexed and reference counted the same way. * As cong handling is the same for all wb's, wb->congested initialization is moved into wb_init(). * When !CONFIG_CGROUP_WRITEBACK, there was no indexing or refcnting. bdi->wb_congested is now a pointer pointing to the root cong allocated during bdi init and minimal refcnting operations are implemented. * The above makes root wb init paths diverge depending on CONFIG_CGROUP_WRITEBACK. root wb init is moved to cgwb_bdi_init(). This patch in itself shouldn't cause any consequential behavior differences but prepares for the actual fix. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Jon Christopherson <jon@jons.org> Link: https://bugzilla.kernel.org/show_bug.cgi?id=100681 Tested-by: Jon Christopherson <jon@jons.org> Added <linux/slab.h> include to backing-dev.h for kfree() definition. Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c87
1 files changed, 46 insertions, 41 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 7756da31b02b..51cc461e7256 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -287,7 +287,7 @@ void wb_wakeup_delayed(struct bdi_writeback *wb)
#define INIT_BW (100 << (20 - PAGE_SHIFT))
static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
- gfp_t gfp)
+ int blkcg_id, gfp_t gfp)
{
int i, err;
@@ -311,21 +311,29 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
INIT_LIST_HEAD(&wb->work_list);
INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
+ wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
+ if (!wb->congested)
+ return -ENOMEM;
+
err = fprop_local_init_percpu(&wb->completions, gfp);
if (err)
- return err;
+ goto out_put_cong;
for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
err = percpu_counter_init(&wb->stat[i], 0, gfp);
- if (err) {
- while (--i)
- percpu_counter_destroy(&wb->stat[i]);
- fprop_local_destroy_percpu(&wb->completions);
- return err;
- }
+ if (err)
+ goto out_destroy_stat;
}
return 0;
+
+out_destroy_stat:
+ while (--i)
+ percpu_counter_destroy(&wb->stat[i]);
+ fprop_local_destroy_percpu(&wb->completions);
+out_put_cong:
+ wb_congested_put(wb->congested);
+ return err;
}
/*
@@ -361,6 +369,7 @@ static void wb_exit(struct bdi_writeback *wb)
percpu_counter_destroy(&wb->stat[i]);
fprop_local_destroy_percpu(&wb->completions);
+ wb_congested_put(wb->congested);
}
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -392,9 +401,6 @@ wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
struct bdi_writeback_congested *new_congested = NULL, *congested;
struct rb_node **node, *parent;
unsigned long flags;
-
- if (blkcg_id == 1)
- return &bdi->wb_congested;
retry:
spin_lock_irqsave(&cgwb_lock, flags);
@@ -453,9 +459,6 @@ void wb_congested_put(struct bdi_writeback_congested *congested)
struct backing_dev_info *bdi = congested->bdi;
unsigned long flags;
- if (congested->blkcg_id == 1)
- return;
-
local_irq_save(flags);
if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
local_irq_restore(flags);
@@ -480,7 +483,6 @@ static void cgwb_release_workfn(struct work_struct *work)
css_put(wb->memcg_css);
css_put(wb->blkcg_css);
- wb_congested_put(wb->congested);
fprop_local_destroy_percpu(&wb->memcg_completions);
percpu_ref_exit(&wb->refcnt);
@@ -541,7 +543,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
if (!wb)
return -ENOMEM;
- ret = wb_init(wb, bdi, gfp);
+ ret = wb_init(wb, bdi, blkcg_css->id, gfp);
if (ret)
goto err_free;
@@ -553,12 +555,6 @@ static int cgwb_create(struct backing_dev_info *bdi,
if (ret)
goto err_ref_exit;
- wb->congested = wb_congested_get_create(bdi, blkcg_css->id, gfp);
- if (!wb->congested) {
- ret = -ENOMEM;
- goto err_fprop_exit;
- }
-
wb->memcg_css = memcg_css;
wb->blkcg_css = blkcg_css;
INIT_WORK(&wb->release_work, cgwb_release_workfn);
@@ -588,12 +584,10 @@ static int cgwb_create(struct backing_dev_info *bdi,
if (ret) {
if (ret == -EEXIST)
ret = 0;
- goto err_put_congested;
+ goto err_fprop_exit;
}
goto out_put;
-err_put_congested:
- wb_congested_put(wb->congested);
err_fprop_exit:
fprop_local_destroy_percpu(&wb->memcg_completions);
err_ref_exit:
@@ -662,14 +656,20 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
return wb;
}
-static void cgwb_bdi_init(struct backing_dev_info *bdi)
+static int cgwb_bdi_init(struct backing_dev_info *bdi)
{
- bdi->wb.memcg_css = mem_cgroup_root_css;
- bdi->wb.blkcg_css = blkcg_root_css;
- bdi->wb_congested.blkcg_id = 1;
+ int ret;
+
INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
bdi->cgwb_congested_tree = RB_ROOT;
atomic_set(&bdi->usage_cnt, 1);
+
+ ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
+ if (!ret) {
+ bdi->wb.memcg_css = mem_cgroup_root_css;
+ bdi->wb.blkcg_css = blkcg_root_css;
+ }
+ return ret;
}
static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
@@ -732,15 +732,28 @@ void wb_blkcg_offline(struct blkcg *blkcg)
#else /* CONFIG_CGROUP_WRITEBACK */
-static void cgwb_bdi_init(struct backing_dev_info *bdi) { }
+static int cgwb_bdi_init(struct backing_dev_info *bdi)
+{
+ int err;
+
+ bdi->wb_congested = kzalloc(sizeof(*bdi->wb_congested), GFP_KERNEL);
+ if (!bdi->wb_congested)
+ return -ENOMEM;
+
+ err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
+ if (err) {
+ kfree(bdi->wb_congested);
+ return err;
+ }
+ return 0;
+}
+
static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { }
#endif /* CONFIG_CGROUP_WRITEBACK */
int bdi_init(struct backing_dev_info *bdi)
{
- int err;
-
bdi->dev = NULL;
bdi->min_ratio = 0;
@@ -749,15 +762,7 @@ int bdi_init(struct backing_dev_info *bdi)
INIT_LIST_HEAD(&bdi->bdi_list);
init_waitqueue_head(&bdi->wb_waitq);
- err = wb_init(&bdi->wb, bdi, GFP_KERNEL);
- if (err)
- return err;
-
- bdi->wb_congested.state = 0;
- bdi->wb.congested = &bdi->wb_congested;
-
- cgwb_bdi_init(bdi);
- return 0;
+ return cgwb_bdi_init(bdi);
}
EXPORT_SYMBOL(bdi_init);
OpenPOWER on IntegriCloud