diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-09 10:45:06 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-09 10:45:06 -0700 |
commit | 3b99107f0e0298e6fe0787f75b8f3d8306dfb230 (patch) | |
tree | 30536dbc9ca176470a2ae2938f952381e33f5deb /drivers/md/md.c | |
parent | 0415052db4f92b7e272fc15802ad8b8be672deea (diff) | |
parent | c9b3007feca018d3f7061f5d5a14cb00766ffe9b (diff) | |
download | talos-op-linux-3b99107f0e0298e6fe0787f75b8f3d8306dfb230.tar.gz talos-op-linux-3b99107f0e0298e6fe0787f75b8f3d8306dfb230.zip |
Merge tag 'for-5.3/block-20190708' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"This is the main block updates for 5.3. Nothing earth shattering or
major in here, just fixes, additions, and improvements all over the
map. This contains:
- Series of documentation fixes (Bart)
- Optimization of the blk-mq ctx get/put (Bart)
- null_blk removal race condition fix (Bob)
- req/bio_op() cleanups (Chaitanya)
- Series cleaning up the segment accounting, and request/bio mapping
(Christoph)
- Series cleaning up the page getting/putting for bios (Christoph)
- block cgroup cleanups and moving it to where it is used (Christoph)
- block cgroup fixes (Tejun)
- Series of fixes and improvements to bcache, most notably a write
deadlock fix (Coly)
- blk-iolatency STS_AGAIN and accounting fixes (Dennis)
- Series of improvements and fixes to BFQ (Douglas, Paolo)
- debugfs_create() return value check removal for drbd (Greg)
- Use struct_size(), where appropriate (Gustavo)
- Two lighnvm fixes (Heiner, Geert)
- MD fixes, including a read balance and corruption fix (Guoqing,
Marcos, Xiao, Yufen)
- block opal shadow mbr additions (Jonas, Revanth)
- sbitmap compare-and-exhange improvemnts (Pavel)
- Fix for potential bio->bi_size overflow (Ming)
- NVMe pull requests:
- improved PCIe suspent support (Keith Busch)
- error injection support for the admin queue (Akinobu Mita)
- Fibre Channel discovery improvements (James Smart)
- tracing improvements including nvmetc tracing support (Minwoo Im)
- misc fixes and cleanups (Anton Eidelman, Minwoo Im, Chaitanya
Kulkarni)"
- Various little fixes and improvements to drivers and core"
* tag 'for-5.3/block-20190708' of git://git.kernel.dk/linux-block: (153 commits)
blk-iolatency: fix STS_AGAIN handling
block: nr_phys_segments needs to be zero for REQ_OP_WRITE_ZEROES
blk-mq: simplify blk_mq_make_request()
blk-mq: remove blk_mq_put_ctx()
sbitmap: Replace cmpxchg with xchg
block: fix .bi_size overflow
block: sed-opal: check size of shadow mbr
block: sed-opal: ioctl for writing to shadow mbr
block: sed-opal: add ioctl for done-mark of shadow mbr
block: never take page references for ITER_BVEC
direct-io: use bio_release_pages in dio_bio_complete
block_dev: use bio_release_pages in bio_unmap_user
block_dev: use bio_release_pages in blkdev_bio_end_io
iomap: use bio_release_pages in iomap_dio_bio_end_io
block: use bio_release_pages in bio_map_user_iov
block: use bio_release_pages in bio_unmap_user
block: optionally mark pages dirty in bio_release_pages
block: move the BIO_NO_PAGE_REF check into bio_release_pages
block: skd_main.c: Remove call to memset after dma_alloc_coherent
block: mtip32xx: Remove call to memset after dma_alloc_coherent
...
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 129 |
1 files changed, 113 insertions, 16 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 9801d540fea1..a114b05e3db4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -37,6 +37,7 @@ */ +#include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/kthread.h> #include <linux/blkdev.h> @@ -124,6 +125,77 @@ static inline int speed_max(struct mddev *mddev) mddev->sync_speed_max : sysctl_speed_limit_max; } +static int rdev_init_wb(struct md_rdev *rdev) +{ + if (rdev->bdev->bd_queue->nr_hw_queues == 1) + return 0; + + spin_lock_init(&rdev->wb_list_lock); + INIT_LIST_HEAD(&rdev->wb_list); + init_waitqueue_head(&rdev->wb_io_wait); + set_bit(WBCollisionCheck, &rdev->flags); + + return 1; +} + +/* + * Create wb_info_pool if rdev is the first multi-queue device flaged + * with writemostly, also write-behind mode is enabled. + */ +void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev, + bool is_suspend) +{ + if (mddev->bitmap_info.max_write_behind == 0) + return; + + if (!test_bit(WriteMostly, &rdev->flags) || !rdev_init_wb(rdev)) + return; + + if (mddev->wb_info_pool == NULL) { + unsigned int noio_flag; + + if (!is_suspend) + mddev_suspend(mddev); + noio_flag = memalloc_noio_save(); + mddev->wb_info_pool = mempool_create_kmalloc_pool(NR_WB_INFOS, + sizeof(struct wb_info)); + memalloc_noio_restore(noio_flag); + if (!mddev->wb_info_pool) + pr_err("can't alloc memory pool for writemostly\n"); + if (!is_suspend) + mddev_resume(mddev); + } +} +EXPORT_SYMBOL_GPL(mddev_create_wb_pool); + +/* + * destroy wb_info_pool if rdev is the last device flaged with WBCollisionCheck. + */ +static void mddev_destroy_wb_pool(struct mddev *mddev, struct md_rdev *rdev) +{ + if (!test_and_clear_bit(WBCollisionCheck, &rdev->flags)) + return; + + if (mddev->wb_info_pool) { + struct md_rdev *temp; + int num = 0; + + /* + * Check if other rdevs need wb_info_pool. + */ + rdev_for_each(temp, mddev) + if (temp != rdev && + test_bit(WBCollisionCheck, &temp->flags)) + num++; + if (!num) { + mddev_suspend(rdev->mddev); + mempool_destroy(mddev->wb_info_pool); + mddev->wb_info_pool = NULL; + mddev_resume(rdev->mddev); + } + } +} + static struct ctl_table_header *raid_table_header; static struct ctl_table raid_table[] = { @@ -2210,6 +2282,9 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) rdev->mddev = mddev; pr_debug("md: bind<%s>\n", b); + if (mddev->raid_disks) + mddev_create_wb_pool(mddev, rdev, false); + if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) goto fail; @@ -2246,6 +2321,7 @@ static void unbind_rdev_from_array(struct md_rdev *rdev) bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); list_del_rcu(&rdev->same_set); pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b)); + mddev_destroy_wb_pool(rdev->mddev, rdev); rdev->mddev = NULL; sysfs_remove_link(&rdev->kobj, "block"); sysfs_put(rdev->sysfs_state); @@ -2758,8 +2834,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) } } else if (cmd_match(buf, "writemostly")) { set_bit(WriteMostly, &rdev->flags); + mddev_create_wb_pool(rdev->mddev, rdev, false); err = 0; } else if (cmd_match(buf, "-writemostly")) { + mddev_destroy_wb_pool(rdev->mddev, rdev); clear_bit(WriteMostly, &rdev->flags); err = 0; } else if (cmd_match(buf, "blocked")) { @@ -3356,7 +3434,7 @@ rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page) if (!entry->show) return -EIO; if (!rdev->mddev) - return -EBUSY; + return -ENODEV; return entry->show(rdev, page); } @@ -5588,15 +5666,28 @@ int md_run(struct mddev *mddev) mddev->bitmap = bitmap; } - if (err) { - mddev_detach(mddev); - if (mddev->private) - pers->free(mddev, mddev->private); - mddev->private = NULL; - module_put(pers->owner); - md_bitmap_destroy(mddev); - goto abort; + if (err) + goto bitmap_abort; + + if (mddev->bitmap_info.max_write_behind > 0) { + bool creat_pool = false; + + rdev_for_each(rdev, mddev) { + if (test_bit(WriteMostly, &rdev->flags) && + rdev_init_wb(rdev)) + creat_pool = true; + } + if (creat_pool && mddev->wb_info_pool == NULL) { + mddev->wb_info_pool = + mempool_create_kmalloc_pool(NR_WB_INFOS, + sizeof(struct wb_info)); + if (!mddev->wb_info_pool) { + err = -ENOMEM; + goto bitmap_abort; + } + } } + if (mddev->queue) { bool nonrot = true; @@ -5639,8 +5730,7 @@ int md_run(struct mddev *mddev) spin_unlock(&mddev->lock); rdev_for_each(rdev, mddev) if (rdev->raid_disk >= 0) - if (sysfs_link_rdev(mddev, rdev)) - /* failure here is OK */; + sysfs_link_rdev(mddev, rdev); /* failure here is OK */ if (mddev->degraded && !mddev->ro) /* This ensures that recovering status is reported immediately @@ -5658,6 +5748,13 @@ int md_run(struct mddev *mddev) sysfs_notify(&mddev->kobj, NULL, "degraded"); return 0; +bitmap_abort: + mddev_detach(mddev); + if (mddev->private) + pers->free(mddev, mddev->private); + mddev->private = NULL; + module_put(pers->owner); + md_bitmap_destroy(mddev); abort: bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); @@ -5826,6 +5923,8 @@ static void __md_stop_writes(struct mddev *mddev) mddev->in_sync = 1; md_update_sb(mddev, 1); } + mempool_destroy(mddev->wb_info_pool); + mddev->wb_info_pool = NULL; } void md_stop_writes(struct mddev *mddev) @@ -8198,8 +8297,7 @@ void md_do_sync(struct md_thread *thread) { struct mddev *mddev = thread->mddev; struct mddev *mddev2; - unsigned int currspeed = 0, - window; + unsigned int currspeed = 0, window; sector_t max_sectors,j, io_sectors, recovery_done; unsigned long mark[SYNC_MARKS]; unsigned long update_time; @@ -8256,7 +8354,7 @@ void md_do_sync(struct md_thread *thread) * 0 == not engaged in resync at all * 2 == checking that there is no conflict with another sync * 1 == like 2, but have yielded to allow conflicting resync to - * commense + * commence * other == active in resync - this many blocks * * Before starting a resync we must have set curr_resync to @@ -8387,7 +8485,7 @@ void md_do_sync(struct md_thread *thread) /* * Tune reconstruction: */ - window = 32*(PAGE_SIZE/512); + window = 32 * (PAGE_SIZE / 512); pr_debug("md: using %dk window, over a total of %lluk.\n", window/2, (unsigned long long)max_sectors/2); @@ -9200,7 +9298,6 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) * perform resync with the new activated disk */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); - } /* device faulty * We just want to do the minimum to mark the disk |