diff options
author | Jens Axboe <axboe@kernel.dk> | 2013-04-02 10:04:39 +0200 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2013-04-02 10:04:39 +0200 |
commit | 64f8de4da7d3962632f152d3d702d68bb8accc29 (patch) | |
tree | c90a872a6d91c824635d59572e1e578980f4bc98 /drivers/md/raid5.c | |
parent | f1fb3449efd5c49b48e35746bc7283eb9c73e3a0 (diff) | |
parent | b5c872ddb7083c7909fb76a170c3807e04564bb3 (diff) | |
download | talos-obmc-linux-64f8de4da7d3962632f152d3d702d68bb8accc29.tar.gz talos-obmc-linux-64f8de4da7d3962632f152d3d702d68bb8accc29.zip |
Merge branch 'writeback-workqueue' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq into for-3.10/core
Tejun writes:
-----
This is the pull request for the earlier patchset[1] with the same
name. It's only three patches (the first one was committed to
workqueue tree) but the merge strategy is a bit involved due to the
dependencies.
* Because the conversion needs features from wq/for-3.10,
block/for-3.10/core is based on rc3, and wq/for-3.10 has conflicts
with rc3, I pulled mainline (rc5) into wq/for-3.10 to prevent those
workqueue conflicts from flaring up in block tree.
* Resolving the issue that Jan and Dave raised about debugging
requires arch-wide changes. The patchset is being worked on[2] but
it'll have to go through -mm after these changes show up in -next,
and not included in this pull request.
The three commits are located in the following git branch.
git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq.git writeback-workqueue
Pulling it into block/for-3.10/core produces a conflict in
drivers/md/raid5.c between the following two commits.
e3620a3ad5 ("MD RAID5: Avoid accessing gendisk or queue structs when not available")
2f6db2a707 ("raid5: use bio_reset()")
The conflict is trivial - one removes an "if ()" conditional while the
other removes "rbi->bi_next = NULL" right above it. We just need to
remove both. The merged branch is available at
git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq.git block-test-merge
so that you can use it for verification. The test merge commit has
proper merge description.
While these changes are a bit of pain to route, they make code simpler
and even have, while minute, measureable performance gain[3] even on a
workload which isn't particularly favorable to showing the benefits of
this conversion.
----
Fixed up the conflict.
Conflicts:
drivers/md/raid5.c
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 116 |
1 files changed, 81 insertions, 35 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7bbd28546214..2fefb9f2198e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -667,9 +667,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) bi->bi_size = STRIPE_SIZE; if (rrdev) set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); - trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), - bi, disk_devt(conf->mddev->gendisk), - sh->dev[i].sector); + + if (conf->mddev->gendisk) + trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), + bi, disk_devt(conf->mddev->gendisk), + sh->dev[i].sector); generic_make_request(bi); } if (rrdev) { @@ -700,9 +702,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; rbi->bi_io_vec[0].bv_offset = 0; rbi->bi_size = STRIPE_SIZE; - trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), - rbi, disk_devt(conf->mddev->gendisk), - sh->dev[i].sector); + if (conf->mddev->gendisk) + trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), + rbi, disk_devt(conf->mddev->gendisk), + sh->dev[i].sector); generic_make_request(rbi); } if (!rdev && !rrdev) { @@ -2279,17 +2282,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, int level = conf->level; if (rcw) { - /* if we are not expanding this is a proper write request, and - * there will be bios with new data to be drained into the - * stripe cache - */ - if (!expand) { - sh->reconstruct_state = reconstruct_state_drain_run; - set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); - } else - sh->reconstruct_state = reconstruct_state_run; - - set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; @@ -2302,6 +2294,21 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, s->locked++; } } + /* if we are not expanding this is a proper write request, and + * there will be bios with new data to be drained into the + * stripe cache + */ + if (!expand) { + if (!s->locked) + /* False alarm, nothing to do */ + return; + sh->reconstruct_state = reconstruct_state_drain_run; + set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); + } else + sh->reconstruct_state = reconstruct_state_run; + + set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); + if (s->locked + conf->max_degraded == disks) if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) atomic_inc(&conf->pending_full_writes); @@ -2310,11 +2317,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); - sh->reconstruct_state = reconstruct_state_prexor_drain_run; - set_bit(STRIPE_OP_PREXOR, &s->ops_request); - set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); - set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); - for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; if (i == pd_idx) @@ -2329,6 +2331,13 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, s->locked++; } } + if (!s->locked) + /* False alarm - nothing to do */ + return; + sh->reconstruct_state = reconstruct_state_prexor_drain_run; + set_bit(STRIPE_OP_PREXOR, &s->ops_request); + set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); + set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); } /* keep the parity disk(s) locked while asynchronous operations @@ -2563,6 +2572,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, int i; clear_bit(STRIPE_SYNCING, &sh->state); + if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) + wake_up(&conf->wait_for_overlap); s->syncing = 0; s->replacing = 0; /* There is nothing more to do for sync/check/repair. @@ -2736,6 +2747,7 @@ static void handle_stripe_clean_event(struct r5conf *conf, { int i; struct r5dev *dev; + int discard_pending = 0; for (i = disks; i--; ) if (sh->dev[i].written) { @@ -2764,9 +2776,23 @@ static void handle_stripe_clean_event(struct r5conf *conf, STRIPE_SECTORS, !test_bit(STRIPE_DEGRADED, &sh->state), 0); - } - } else if (test_bit(R5_Discard, &sh->dev[i].flags)) - clear_bit(R5_Discard, &sh->dev[i].flags); + } else if (test_bit(R5_Discard, &dev->flags)) + discard_pending = 1; + } + if (!discard_pending && + test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { + clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); + clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); + if (sh->qd_idx >= 0) { + clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); + clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags); + } + /* now that discard is done we can proceed with any sync */ + clear_bit(STRIPE_DISCARD, &sh->state); + if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) + set_bit(STRIPE_HANDLE, &sh->state); + + } if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) if (atomic_dec_and_test(&conf->pending_full_writes)) @@ -2825,8 +2851,10 @@ static void handle_stripe_dirtying(struct r5conf *conf, set_bit(STRIPE_HANDLE, &sh->state); if (rmw < rcw && rmw > 0) { /* prefer read-modify-write, but need to get some data */ - blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d", - (unsigned long long)sh->sector, rmw); + if (conf->mddev->queue) + blk_add_trace_msg(conf->mddev->queue, + "raid5 rmw %llu %d", + (unsigned long long)sh->sector, rmw); for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; if ((dev->towrite || i == sh->pd_idx) && @@ -2876,7 +2904,7 @@ static void handle_stripe_dirtying(struct r5conf *conf, } } } - if (rcw) + if (rcw && conf->mddev->queue) blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", (unsigned long long)sh->sector, rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); @@ -3416,9 +3444,15 @@ static void handle_stripe(struct stripe_head *sh) return; } - if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { - set_bit(STRIPE_SYNCING, &sh->state); - clear_bit(STRIPE_INSYNC, &sh->state); + if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { + spin_lock(&sh->stripe_lock); + /* Cannot process 'sync' concurrently with 'discard' */ + if (!test_bit(STRIPE_DISCARD, &sh->state) && + test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { + set_bit(STRIPE_SYNCING, &sh->state); + clear_bit(STRIPE_INSYNC, &sh->state); + } + spin_unlock(&sh->stripe_lock); } clear_bit(STRIPE_DELAYED, &sh->state); @@ -3578,6 +3612,8 @@ static void handle_stripe(struct stripe_head *sh) test_bit(STRIPE_INSYNC, &sh->state)) { md_done_sync(conf->mddev, STRIPE_SECTORS, 1); clear_bit(STRIPE_SYNCING, &sh->state); + if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) + wake_up(&conf->wait_for_overlap); } /* If the failed drives are just a ReadError, then we might need @@ -3981,9 +4017,10 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) atomic_inc(&conf->active_aligned_reads); spin_unlock_irq(&conf->device_lock); - trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), - align_bi, disk_devt(mddev->gendisk), - raid_bio->bi_sector); + if (mddev->gendisk) + trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), + align_bi, disk_devt(mddev->gendisk), + raid_bio->bi_sector); generic_make_request(align_bi); return 1; } else { @@ -4077,7 +4114,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) } spin_unlock_irq(&conf->device_lock); } - trace_block_unplug(mddev->queue, cnt, !from_schedule); + if (mddev->queue) + trace_block_unplug(mddev->queue, cnt, !from_schedule); kfree(cb); } @@ -4140,6 +4178,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) sh = get_active_stripe(conf, logical_sector, 0, 0, 0); prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); + set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); + if (test_bit(STRIPE_SYNCING, &sh->state)) { + release_stripe(sh); + schedule(); + goto again; + } + clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); spin_lock_irq(&sh->stripe_lock); for (d = 0; d < conf->raid_disks; d++) { if (d == sh->pd_idx || d == sh->qd_idx) @@ -4152,6 +4197,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) goto again; } } + set_bit(STRIPE_DISCARD, &sh->state); finish_wait(&conf->wait_for_overlap, &w); for (d = 0; d < conf->raid_disks; d++) { if (d == sh->pd_idx || d == sh->qd_idx) |