diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bfq-cgroup.c | 4 | ||||
-rw-r--r-- | block/bio.c | 3 | ||||
-rw-r--r-- | block/blk-cgroup.c | 109 | ||||
-rw-r--r-- | block/blk-core.c | 5 | ||||
-rw-r--r-- | block/blk-throttle.c | 5 | ||||
-rw-r--r-- | block/blk-wbt.c | 89 | ||||
-rw-r--r-- | block/bsg.c | 8 | ||||
-rw-r--r-- | block/elevator.c | 3 |
8 files changed, 137 insertions, 89 deletions
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 58c6efa9f9a9..9fe5952d117d 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -275,9 +275,9 @@ static void bfqg_and_blkg_get(struct bfq_group *bfqg) void bfqg_and_blkg_put(struct bfq_group *bfqg) { - bfqg_put(bfqg); - blkg_put(bfqg_to_blkg(bfqg)); + + bfqg_put(bfqg); } /* @stats = 0 */ diff --git a/block/bio.c b/block/bio.c index b12966e415d3..8c680a776171 100644 --- a/block/bio.c +++ b/block/bio.c @@ -2015,7 +2015,8 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) { if (unlikely(bio->bi_blkg)) return -EBUSY; - blkg_get(blkg); + if (!blkg_try_get(blkg)) + return -ENODEV; bio->bi_blkg = blkg; return 0; } diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 694595b29b8f..c630e02836a8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -310,28 +310,11 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, } } -static void blkg_pd_offline(struct blkcg_gq *blkg) -{ - int i; - - lockdep_assert_held(blkg->q->queue_lock); - lockdep_assert_held(&blkg->blkcg->lock); - - for (i = 0; i < BLKCG_MAX_POLS; i++) { - struct blkcg_policy *pol = blkcg_policy[i]; - - if (blkg->pd[i] && !blkg->pd[i]->offline && - pol->pd_offline_fn) { - pol->pd_offline_fn(blkg->pd[i]); - blkg->pd[i]->offline = true; - } - } -} - static void blkg_destroy(struct blkcg_gq *blkg) { struct blkcg *blkcg = blkg->blkcg; struct blkcg_gq *parent = blkg->parent; + int i; lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(&blkcg->lock); @@ -340,6 +323,13 @@ static void blkg_destroy(struct blkcg_gq *blkg) WARN_ON_ONCE(list_empty(&blkg->q_node)); WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_offline_fn) + pol->pd_offline_fn(blkg->pd[i]); + } + if (parent) { blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes); blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios); @@ -382,7 +372,6 @@ static void blkg_destroy_all(struct request_queue *q) struct blkcg *blkcg = blkg->blkcg; spin_lock(&blkcg->lock); - blkg_pd_offline(blkg); blkg_destroy(blkg); spin_unlock(&blkcg->lock); } @@ -1053,59 +1042,64 @@ static struct cftype blkcg_legacy_files[] = { { } /* terminate */ }; +/* + * blkcg destruction is a three-stage process. + * + * 1. Destruction starts. The blkcg_css_offline() callback is invoked + * which offlines writeback. Here we tie the next stage of blkg destruction + * to the completion of writeback associated with the blkcg. This lets us + * avoid punting potentially large amounts of outstanding writeback to root + * while maintaining any ongoing policies. The next stage is triggered when + * the nr_cgwbs count goes to zero. + * + * 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called + * and handles the destruction of blkgs. Here the css reference held by + * the blkg is put back eventually allowing blkcg_css_free() to be called. + * This work may occur in cgwb_release_workfn() on the cgwb_release + * workqueue. Any submitted ios that fail to get the blkg ref will be + * punted to the root_blkg. + * + * 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called. + * This finally frees the blkcg. + */ + /** * blkcg_css_offline - cgroup css_offline callback * @css: css of interest * - * This function is called when @css is about to go away and responsible - * for offlining all blkgs pd and killing all wbs associated with @css. - * blkgs pd offline should be done while holding both q and blkcg locks. - * As blkcg lock is nested inside q lock, this function performs reverse - * double lock dancing. - * - * This is the blkcg counterpart of ioc_release_fn(). + * This function is called when @css is about to go away. Here the cgwbs are + * offlined first and only once writeback associated with the blkcg has + * finished do we start step 2 (see above). */ static void blkcg_css_offline(struct cgroup_subsys_state *css) { struct blkcg *blkcg = css_to_blkcg(css); - struct blkcg_gq *blkg; - - spin_lock_irq(&blkcg->lock); - - hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { - struct request_queue *q = blkg->q; - - if (spin_trylock(q->queue_lock)) { - blkg_pd_offline(blkg); - spin_unlock(q->queue_lock); - } else { - spin_unlock_irq(&blkcg->lock); - cpu_relax(); - spin_lock_irq(&blkcg->lock); - } - } - - spin_unlock_irq(&blkcg->lock); + /* this prevents anyone from attaching or migrating to this blkcg */ wb_blkcg_offline(blkcg); + + /* put the base cgwb reference allowing step 2 to be triggered */ + blkcg_cgwb_put(blkcg); } /** - * blkcg_destroy_all_blkgs - destroy all blkgs associated with a blkcg + * blkcg_destroy_blkgs - responsible for shooting down blkgs * @blkcg: blkcg of interest * - * This function is called when blkcg css is about to free and responsible for - * destroying all blkgs associated with @blkcg. - * blkgs should be removed while holding both q and blkcg locks. As blkcg lock + * blkgs should be removed while holding both q and blkcg locks. As blkcg lock * is nested inside q lock, this function performs reverse double lock dancing. + * Destroying the blkgs releases the reference held on the blkcg's css allowing + * blkcg_css_free to eventually be called. + * + * This is the blkcg counterpart of ioc_release_fn(). */ -static void blkcg_destroy_all_blkgs(struct blkcg *blkcg) +void blkcg_destroy_blkgs(struct blkcg *blkcg) { spin_lock_irq(&blkcg->lock); + while (!hlist_empty(&blkcg->blkg_list)) { struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first, - struct blkcg_gq, - blkcg_node); + struct blkcg_gq, blkcg_node); struct request_queue *q = blkg->q; if (spin_trylock(q->queue_lock)) { @@ -1117,6 +1111,7 @@ static void blkcg_destroy_all_blkgs(struct blkcg *blkcg) spin_lock_irq(&blkcg->lock); } } + spin_unlock_irq(&blkcg->lock); } @@ -1125,8 +1120,6 @@ static void blkcg_css_free(struct cgroup_subsys_state *css) struct blkcg *blkcg = css_to_blkcg(css); int i; - blkcg_destroy_all_blkgs(blkcg); - mutex_lock(&blkcg_pol_mutex); list_del(&blkcg->all_blkcgs_node); @@ -1189,6 +1182,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) INIT_HLIST_HEAD(&blkcg->blkg_list); #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&blkcg->cgwb_list); + refcount_set(&blkcg->cgwb_refcnt, 1); #endif list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); @@ -1480,11 +1474,8 @@ void blkcg_deactivate_policy(struct request_queue *q, list_for_each_entry(blkg, &q->blkg_list, q_node) { if (blkg->pd[pol->plid]) { - if (!blkg->pd[pol->plid]->offline && - pol->pd_offline_fn) { + if (pol->pd_offline_fn) pol->pd_offline_fn(blkg->pd[pol->plid]); - blkg->pd[pol->plid]->offline = true; - } pol->pd_free_fn(blkg->pd[pol->plid]); blkg->pd[pol->plid] = NULL; } @@ -1519,8 +1510,10 @@ int blkcg_policy_register(struct blkcg_policy *pol) for (i = 0; i < BLKCG_MAX_POLS; i++) if (!blkcg_policy[i]) break; - if (i >= BLKCG_MAX_POLS) + if (i >= BLKCG_MAX_POLS) { + pr_warn("blkcg_policy_register: BLKCG_MAX_POLS too small\n"); goto err_unlock; + } /* Make sure cpd/pd_alloc_fn and cpd/pd_free_fn in pairs */ if ((!pol->cpd_alloc_fn ^ !pol->cpd_free_fn) || diff --git a/block/blk-core.c b/block/blk-core.c index dee56c282efb..4dbc93f43b38 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2163,9 +2163,12 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) { const int op = bio_op(bio); - if (part->policy && (op_is_write(op) && !op_is_flush(op))) { + if (part->policy && op_is_write(op)) { char b[BDEVNAME_SIZE]; + if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) + return false; + WARN_ONCE(1, "generic_make_request: Trying to write " "to read-only block-device %s (partno %d)\n", diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a3eede00d302..01d0620a4e4a 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2129,8 +2129,9 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio) { #ifdef CONFIG_BLK_DEV_THROTTLING_LOW - if (bio->bi_css) - bio_associate_blkg(bio, tg_to_blkg(tg)); + /* fallback to root_blkg if we fail to get a blkg ref */ + if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV)) + bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg); bio_issue_init(&bio->bi_issue, bio_sectors(bio)); #endif } diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 84507d3e9a98..8e20a0677dcf 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -123,16 +123,11 @@ static void rwb_wake_all(struct rq_wb *rwb) } } -static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) +static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw, + enum wbt_flags wb_acct) { - struct rq_wb *rwb = RQWB(rqos); - struct rq_wait *rqw; int inflight, limit; - if (!(wb_acct & WBT_TRACKED)) - return; - - rqw = get_rq_wait(rwb, wb_acct); inflight = atomic_dec_return(&rqw->inflight); /* @@ -166,10 +161,22 @@ static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) int diff = limit - inflight; if (!inflight || diff >= rwb->wb_background / 2) - wake_up(&rqw->wait); + wake_up_all(&rqw->wait); } } +static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) +{ + struct rq_wb *rwb = RQWB(rqos); + struct rq_wait *rqw; + + if (!(wb_acct & WBT_TRACKED)) + return; + + rqw = get_rq_wait(rwb, wb_acct); + wbt_rqw_done(rwb, rqw, wb_acct); +} + /* * Called on completion of a request. Note that it's also called when * a request is merged, when the request gets freed. @@ -481,6 +488,34 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) return limit; } +struct wbt_wait_data { + struct wait_queue_entry wq; + struct task_struct *task; + struct rq_wb *rwb; + struct rq_wait *rqw; + unsigned long rw; + bool got_token; +}; + +static int wbt_wake_function(struct wait_queue_entry *curr, unsigned int mode, + int wake_flags, void *key) +{ + struct wbt_wait_data *data = container_of(curr, struct wbt_wait_data, + wq); + + /* + * If we fail to get a budget, return -1 to interrupt the wake up + * loop in __wake_up_common. + */ + if (!rq_wait_inc_below(data->rqw, get_limit(data->rwb, data->rw))) + return -1; + + data->got_token = true; + list_del_init(&curr->entry); + wake_up_process(data->task); + return 1; +} + /* * Block if we will exceed our limit, or if we are currently waiting for * the timer to kick off queuing again. @@ -491,19 +526,40 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, __acquires(lock) { struct rq_wait *rqw = get_rq_wait(rwb, wb_acct); - DECLARE_WAITQUEUE(wait, current); + struct wbt_wait_data data = { + .wq = { + .func = wbt_wake_function, + .entry = LIST_HEAD_INIT(data.wq.entry), + }, + .task = current, + .rwb = rwb, + .rqw = rqw, + .rw = rw, + }; bool has_sleeper; has_sleeper = wq_has_sleeper(&rqw->wait); if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw))) return; - add_wait_queue_exclusive(&rqw->wait, &wait); + prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); do { - set_current_state(TASK_UNINTERRUPTIBLE); + if (data.got_token) + break; - if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw))) + if (!has_sleeper && + rq_wait_inc_below(rqw, get_limit(rwb, rw))) { + finish_wait(&rqw->wait, &data.wq); + + /* + * We raced with wbt_wake_function() getting a token, + * which means we now have two. Put our local token + * and wake anyone else potentially waiting for one. + */ + if (data.got_token) + wbt_rqw_done(rwb, rqw, wb_acct); break; + } if (lock) { spin_unlock_irq(lock); @@ -511,11 +567,11 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, spin_lock_irq(lock); } else io_schedule(); + has_sleeper = false; } while (1); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&rqw->wait, &wait); + finish_wait(&rqw->wait, &data.wq); } static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) @@ -580,11 +636,6 @@ static void wbt_wait(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock) return; } - if (current_is_kswapd()) - flags |= WBT_KSWAPD; - if (bio_op(bio) == REQ_OP_DISCARD) - flags |= WBT_DISCARD; - __wbt_wait(rwb, flags, bio->bi_opf, lock); if (!blk_stat_is_active(rwb->cb)) diff --git a/block/bsg.c b/block/bsg.c index db588add6ba6..9a442c23a715 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -37,7 +37,7 @@ struct bsg_device { struct request_queue *queue; spinlock_t lock; struct hlist_node dev_list; - atomic_t ref_count; + refcount_t ref_count; char name[20]; int max_queue; }; @@ -252,7 +252,7 @@ static int bsg_put_device(struct bsg_device *bd) mutex_lock(&bsg_mutex); - if (!atomic_dec_and_test(&bd->ref_count)) { + if (!refcount_dec_and_test(&bd->ref_count)) { mutex_unlock(&bsg_mutex); return 0; } @@ -290,7 +290,7 @@ static struct bsg_device *bsg_add_device(struct inode *inode, bd->queue = rq; - atomic_set(&bd->ref_count, 1); + refcount_set(&bd->ref_count, 1); hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode))); strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1); @@ -308,7 +308,7 @@ static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q) hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) { if (bd->queue == q) { - atomic_inc(&bd->ref_count); + refcount_inc(&bd->ref_count); goto found; } } diff --git a/block/elevator.c b/block/elevator.c index 5ea6e7d600e4..6a06b5d040e5 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -895,8 +895,7 @@ int elv_register(struct elevator_type *e) spin_lock(&elv_list_lock); if (elevator_find(e->elevator_name, e->uses_mq)) { spin_unlock(&elv_list_lock); - if (e->icq_cache) - kmem_cache_destroy(e->icq_cache); + kmem_cache_destroy(e->icq_cache); return -EBUSY; } list_add_tail(&e->list, &elv_list); |