From 24941b90e639df2bf467531601a15d792eaa6d6b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 28 Feb 2018 09:18:57 -0700 Subject: null_blk: add 'requeue' fault attribute Similarly to the support we have for testing/faking timeouts for null_blk, this adds support for triggering a requeue condition. Considering the issues around restart we've been seeing, this should be a useful addition to the testing arsenal to ensure that we are handling requeue conditions correctly. This works for queue mode 1 (legacy request_fn based path) and 2 (blk-mq path), as there's no good way to do requeue with a bio based driver. This is similar to the timeout path. For the blk-mq path, we alternate between passing back BLK_STS_RESOURCE and manually calling blk_mq_requeue_request() in the driver. The former will hit the core requeue path, while the latter exercises the IO scheduler requeue path. Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 67 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 12 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 287a09611c0f..d12d7a8325ad 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -29,6 +29,7 @@ #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION static DECLARE_FAULT_ATTR(null_timeout_attr); +static DECLARE_FAULT_ATTR(null_requeue_attr); #endif static inline u64 mb_per_tick(int mbps) @@ -53,6 +54,7 @@ struct nullb_queue { wait_queue_head_t wait; unsigned int queue_depth; struct nullb_device *dev; + unsigned int requeue_selection; struct nullb_cmd *cmds; }; @@ -170,6 +172,9 @@ MODULE_PARM_DESC(home_node, "Home node for the device"); #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION static char g_timeout_str[80]; module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO); + +static char g_requeue_str[80]; +module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), S_IRUGO); #endif static int g_queue_mode = NULL_Q_MQ; @@ -1380,7 +1385,15 @@ static bool should_timeout_request(struct request *rq) if (g_timeout_str[0]) return should_fail(&null_timeout_attr, 1); #endif + return false; +} +static bool should_requeue_request(struct request *rq) +{ +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + if (g_requeue_str[0]) + return should_fail(&null_requeue_attr, 1); +#endif return false; } @@ -1391,11 +1404,17 @@ static void null_request_fn(struct request_queue *q) while ((rq = blk_fetch_request(q)) != NULL) { struct nullb_cmd *cmd = rq->special; - if (!should_timeout_request(rq)) { - spin_unlock_irq(q->queue_lock); - null_handle_cmd(cmd); - spin_lock_irq(q->queue_lock); + /* just ignore the request */ + if (should_timeout_request(rq)) + continue; + if (should_requeue_request(rq)) { + blk_requeue_request(q, rq); + continue; } + + spin_unlock_irq(q->queue_lock); + null_handle_cmd(cmd); + spin_lock_irq(q->queue_lock); } } @@ -1422,10 +1441,23 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(bd->rq); - if (!should_timeout_request(bd->rq)) - return null_handle_cmd(cmd); + if (should_requeue_request(bd->rq)) { + /* + * Alternate between hitting the core BUSY path, and the + * driver driven requeue path + */ + nq->requeue_selection++; + if (nq->requeue_selection & 1) + return BLK_STS_RESOURCE; + else { + blk_mq_requeue_request(bd->rq, true); + return BLK_STS_OK; + } + } + if (should_timeout_request(bd->rq)) + return BLK_STS_OK; - return BLK_STS_OK; + return null_handle_cmd(cmd); } static const struct blk_mq_ops null_mq_ops = { @@ -1659,16 +1691,27 @@ static void null_validate_conf(struct nullb_device *dev) dev->mbps = 0; } -static bool null_setup_fault(void) -{ #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (!g_timeout_str[0]) +static bool __null_setup_fault(struct fault_attr *attr, char *str) +{ + if (!str[0]) return true; - if (!setup_fault_attr(&null_timeout_attr, g_timeout_str)) + if (!setup_fault_attr(attr, str)) return false; - null_timeout_attr.verbose = 0; + attr->verbose = 0; + return true; +} +#endif + +static bool null_setup_fault(void) +{ +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + if (!__null_setup_fault(&null_timeout_attr, g_timeout_str)) + return false; + if (!__null_setup_fault(&null_requeue_attr, g_requeue_str)) + return false; #endif return true; } -- cgit v1.2.1 From 0fa8ebdd4244b8e652cc5341c3d5b4b06f84a637 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 28 Feb 2018 10:15:28 -0800 Subject: block/loop: Delete gendisk before cleaning up the request queue Remove the disk, partition and bdi sysfs attributes before cleaning up the request queue associated with the disk. Signed-off-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Reviewed-by: Joseph Qi Reviewed-by: Ming Lei Cc: Josef Bacik Cc: Shaohua Li Cc: Omar Sandoval Cc: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/block/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 87855b5123a6..9d29aa6413e5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1864,8 +1864,8 @@ out: static void loop_remove(struct loop_device *lo) { - blk_cleanup_queue(lo->lo_queue); del_gendisk(lo->lo_disk); + blk_cleanup_queue(lo->lo_queue); blk_mq_free_tag_set(&lo->tag_set); put_disk(lo->lo_disk); kfree(lo); -- cgit v1.2.1 From 392db38058eb47250a9d0cc737af37e78a7e443d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 28 Feb 2018 10:15:30 -0800 Subject: zram: Delete gendisk before cleaning up the request queue Remove the disk, partition and bdi sysfs attributes before cleaning up the request queue associated with the disk. Signed-off-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Reviewed-by: Joseph Qi Reviewed-by: Ming Lei Cc: Minchan Kim Cc: Nitin Gupta Cc: Sergey Senozhatsky Signed-off-by: Jens Axboe --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0afa6c8c3857..85110e7931e5 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1620,8 +1620,8 @@ static int zram_remove(struct zram *zram) pr_info("Removed device: %s\n", zram->disk->disk_name); - blk_cleanup_queue(zram->disk->queue); del_gendisk(zram->disk); + blk_cleanup_queue(zram->disk->queue); put_disk(zram->disk); kfree(zram); return 0; -- cgit v1.2.1 From 5ee0524ba137fe928a88b440d014e3c8451fb32c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 28 Feb 2018 10:15:31 -0800 Subject: block: Add 'lock' as third argument to blk_alloc_queue_node() This patch does not change any functionality. Signed-off-by: Bart Van Assche Reviewed-by: Joseph Qi Cc: Christoph Hellwig Cc: Philipp Reisner Cc: Ulf Hansson Cc: Kees Cook Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index d12d7a8325ad..6dc7e7cfca4a 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -1760,7 +1760,8 @@ static int null_add_dev(struct nullb_device *dev) } null_init_queues(nullb); } else if (dev->queue_mode == NULL_Q_BIO) { - nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node); + nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node, + NULL); if (!nullb->q) { rv = -ENOMEM; goto out_cleanup_queues; -- cgit v1.2.1 From 498f6650aec864e331cae7575fec5f07781d0bf3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 28 Feb 2018 10:15:32 -0800 Subject: block: Fix a race between the cgroup code and request queue initialization Initialize the request queue lock earlier such that the following race can no longer occur: blk_init_queue_node() blkcg_print_blkgs() blk_alloc_queue_node (1) q->queue_lock = &q->__queue_lock (2) blkcg_init_queue(q) (3) spin_lock_irq(blkg->q->queue_lock) (4) q->queue_lock = lock (5) spin_unlock_irq(blkg->q->queue_lock) (6) (1) allocate an uninitialized queue; (2) initialize queue_lock to its default internal lock; (3) initialize blkcg part of request queue, which will create blkg and then insert it to blkg_list; (4) traverse blkg_list and find the created blkg, and then take its queue lock, here it is the default *internal lock*; (5) *race window*, now queue_lock is overridden with *driver specified lock*; (6) now unlock *driver specified lock*, not the locked *internal lock*, unlock balance breaks. The changes in this patch are as follows: - Move the .queue_lock initialization from blk_init_queue_node() into blk_alloc_queue_node(). - Only override the .queue_lock pointer for legacy queues because it is not useful for blk-mq queues to override this pointer. - For all all block drivers that initialize .queue_lock explicitly, change the blk_alloc_queue() call in the driver into a blk_alloc_queue_node() call and remove the explicit .queue_lock initialization. Additionally, initialize the spin lock that will be used as queue lock earlier if necessary. Reported-by: Joseph Qi Signed-off-by: Bart Van Assche Reviewed-by: Joseph Qi Cc: Christoph Hellwig Cc: Philipp Reisner Cc: Ulf Hansson Cc: Kees Cook Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 3 +-- drivers/block/umem.c | 7 +++---- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0a0394aa1b9c..185f1ef00a7c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2816,7 +2816,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - q = blk_alloc_queue(GFP_KERNEL); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock); if (!q) goto out_no_q; device->rq_queue = q; @@ -2848,7 +2848,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig /* Setting the max_hw_sectors to an odd value of 8kibyte here This triggers a max_bio_size message upon first attach or connect */ blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); - q->queue_lock = &resource->req_lock; device->md_io.page = alloc_page(GFP_KERNEL); if (!device->md_io.page) diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 8077123678ad..5c7fb8cc4149 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -888,13 +888,14 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) card->Active = -1; /* no page is active */ card->bio = NULL; card->biotail = &card->bio; + spin_lock_init(&card->lock); - card->queue = blk_alloc_queue(GFP_KERNEL); + card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, + &card->lock); if (!card->queue) goto failed_alloc; blk_queue_make_request(card->queue, mm_make_request); - card->queue->queue_lock = &card->lock; card->queue->queuedata = card; tasklet_init(&card->tasklet, process_page, (unsigned long)card); @@ -968,8 +969,6 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_printk(KERN_INFO, &card->dev->dev, "Window size %d bytes, IRQ %d\n", data, dev->irq); - spin_lock_init(&card->lock); - pci_set_drvdata(dev, card); if (pci_write_cmd != 0x0F) /* If not Memory Write & Invalidate */ -- cgit v1.2.1 From 66231ad3e2886ba99fbf440cea44cab547e5163f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 6 Mar 2018 12:07:13 +0800 Subject: block: null_blk: fix 'Invalid parameters' when loading module On ARM64, the default page size has been 64K on some distributions, and we should allow ARM64 people to play null_blk. This patch fixes the issue by extend page bitmap size for supporting other non-4KB PAGE_SIZE. Cc: Bart Van Assche Cc: Shaohua Li Cc: Kyungchan Koh , Cc: weiping zhang Cc: Yi Zhang Reported-by: Yi Zhang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 6dc7e7cfca4a..d6be7a6d8ca6 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -74,6 +74,7 @@ enum nullb_device_flags { NULLB_DEV_FL_CACHE = 3, }; +#define MAP_SZ ((PAGE_SIZE >> SECTOR_SHIFT) + 2) /* * nullb_page is a page in memory for nullb devices. * @@ -88,10 +89,10 @@ enum nullb_device_flags { */ struct nullb_page { struct page *page; - unsigned long bitmap; + DECLARE_BITMAP(bitmap, MAP_SZ); }; -#define NULLB_PAGE_LOCK (sizeof(unsigned long) * 8 - 1) -#define NULLB_PAGE_FREE (sizeof(unsigned long) * 8 - 2) +#define NULLB_PAGE_LOCK (MAP_SZ - 1) +#define NULLB_PAGE_FREE (MAP_SZ - 2) struct nullb_device { struct nullb *nullb; @@ -733,7 +734,7 @@ static struct nullb_page *null_alloc_page(gfp_t gfp_flags) if (!t_page->page) goto out_freepage; - t_page->bitmap = 0; + memset(t_page->bitmap, 0, sizeof(t_page->bitmap)); return t_page; out_freepage: kfree(t_page); @@ -743,13 +744,20 @@ out: static void null_free_page(struct nullb_page *t_page) { - __set_bit(NULLB_PAGE_FREE, &t_page->bitmap); - if (test_bit(NULLB_PAGE_LOCK, &t_page->bitmap)) + __set_bit(NULLB_PAGE_FREE, t_page->bitmap); + if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap)) return; __free_page(t_page->page); kfree(t_page); } +static bool null_page_empty(struct nullb_page *page) +{ + int size = MAP_SZ - 2; + + return find_first_bit(page->bitmap, size) == size; +} + static void null_free_sector(struct nullb *nullb, sector_t sector, bool is_cache) { @@ -764,9 +772,9 @@ static void null_free_sector(struct nullb *nullb, sector_t sector, t_page = radix_tree_lookup(root, idx); if (t_page) { - __clear_bit(sector_bit, &t_page->bitmap); + __clear_bit(sector_bit, t_page->bitmap); - if (!t_page->bitmap) { + if (null_page_empty(t_page)) { ret = radix_tree_delete_item(root, idx, t_page); WARN_ON(ret != t_page); null_free_page(ret); @@ -837,7 +845,7 @@ static struct nullb_page *__null_lookup_page(struct nullb *nullb, t_page = radix_tree_lookup(root, idx); WARN_ON(t_page && t_page->page->index != idx); - if (t_page && (for_write || test_bit(sector_bit, &t_page->bitmap))) + if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap))) return t_page; return NULL; @@ -900,10 +908,10 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true); - __clear_bit(NULLB_PAGE_LOCK, &c_page->bitmap); - if (test_bit(NULLB_PAGE_FREE, &c_page->bitmap)) { + __clear_bit(NULLB_PAGE_LOCK, c_page->bitmap); + if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) { null_free_page(c_page); - if (t_page && t_page->bitmap == 0) { + if (t_page && null_page_empty(t_page)) { ret = radix_tree_delete_item(&nullb->dev->data, idx, t_page); null_free_page(t_page); @@ -919,11 +927,11 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) for (i = 0; i < PAGE_SECTORS; i += (nullb->dev->blocksize >> SECTOR_SHIFT)) { - if (test_bit(i, &c_page->bitmap)) { + if (test_bit(i, c_page->bitmap)) { offset = (i << SECTOR_SHIFT); memcpy(dst + offset, src + offset, nullb->dev->blocksize); - __set_bit(i, &t_page->bitmap); + __set_bit(i, t_page->bitmap); } } @@ -960,10 +968,10 @@ again: * We found the page which is being flushed to disk by other * threads */ - if (test_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap)) + if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap)) c_pages[i] = NULL; else - __set_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap); + __set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap); } one_round = 0; @@ -1016,7 +1024,7 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source, kunmap_atomic(dst); kunmap_atomic(src); - __set_bit(sector & SECTOR_MASK, &t_page->bitmap); + __set_bit(sector & SECTOR_MASK, t_page->bitmap); if (is_fua) null_free_sector(nullb, sector, true); @@ -1846,10 +1854,6 @@ static int __init null_init(void) struct nullb *nullb; struct nullb_device *dev; - /* check for nullb_page.bitmap */ - if (sizeof(unsigned long) * 8 - 2 < (PAGE_SIZE >> SECTOR_SHIFT)) - return -EINVAL; - if (g_bs > PAGE_SIZE) { pr_warn("null_blk: invalid block size\n"); pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); -- cgit v1.2.1 From 4e699cb99d08563df4587a9a667bc5936dc75e51 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 7 Mar 2018 17:10:06 -0800 Subject: mtip32xx: Use the blk_queue_flag_*() functions Use the blk_queue_flag_*() functions instead of open-coding these. Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Ming Lei Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index b8af7352a18f..769c551e3d71 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -159,7 +159,7 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev) if (vendor_id == 0xFFFF) { dd->sr = true; if (dd->queue) - set_bit(QUEUE_FLAG_DEAD, &dd->queue->queue_flags); + blk_queue_flag_set(QUEUE_FLAG_DEAD, dd->queue); else dev_warn(&dd->pdev->dev, "%s: dd->queue is NULL\n", __func__); @@ -3855,8 +3855,8 @@ skip_create_disk: goto start_service_thread; /* Set device limits. */ - set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags); - clear_bit(QUEUE_FLAG_ADD_RANDOM, &dd->queue->queue_flags); + blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue); blk_queue_max_segments(dd->queue, MTIP_MAX_SG); blk_queue_physical_block_size(dd->queue, 4096); blk_queue_max_hw_sectors(dd->queue, 0xffff); @@ -3866,7 +3866,7 @@ skip_create_disk: /* Signal trim support */ if (dd->trim_supp == true) { - set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, dd->queue); dd->queue->limits.discard_granularity = 4096; blk_queue_max_discard_sectors(dd->queue, MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES); -- cgit v1.2.1 From 8b904b5b6b58b9a29dcf3f82d936d9e7fd69fda6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 7 Mar 2018 17:10:10 -0800 Subject: block: Use blk_queue_flag_*() in drivers instead of queue_flag_*() This patch has been generated as follows: for verb in set_unlocked clear_unlocked set clear; do replace-in-files queue_flag_${verb} blk_queue_flag_${verb%_unlocked} \ $(git grep -lw queue_flag_${verb} drivers block/bsg*) done Except for protecting all queue flag changes with the queue lock this patch does not change any functionality. Cc: Mike Snitzer Cc: Shaohua Li Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Ming Lei Signed-off-by: Bart Van Assche Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 4 ++-- drivers/block/loop.c | 10 +++++----- drivers/block/nbd.c | 8 ++++---- drivers/block/null_blk.c | 6 +++--- drivers/block/rbd.c | 4 ++-- drivers/block/rsxx/dev.c | 6 +++--- drivers/block/skd_main.c | 4 ++-- drivers/block/xen-blkfront.c | 10 +++++----- drivers/block/zram/zram_drv.c | 6 +++--- 9 files changed, 29 insertions(+), 29 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a12f77e6891e..b4f02768ba47 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1212,10 +1212,10 @@ static void decide_on_discard_support(struct drbd_device *device, * topology on all peers. */ blk_queue_discard_granularity(q, 512); q->limits.max_discard_sectors = drbd_max_discard_sectors(connection); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection); } else { - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); blk_queue_discard_granularity(q, 0); q->limits.max_discard_sectors = 0; q->limits.max_write_zeroes_sectors = 0; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 9d29aa6413e5..7952ed5c607b 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -214,10 +214,10 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) blk_mq_freeze_queue(lo->lo_queue); lo->use_dio = use_dio; if (use_dio) { - queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); + blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue); lo->lo_flags |= LO_FLAGS_DIRECT_IO; } else { - queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); + blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue); lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; } blk_mq_unfreeze_queue(lo->lo_queue); @@ -817,7 +817,7 @@ static void loop_config_discard(struct loop_device *lo) q->limits.discard_alignment = 0; blk_queue_max_discard_sectors(q, 0); blk_queue_max_write_zeroes_sectors(q, 0); - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); return; } @@ -826,7 +826,7 @@ static void loop_config_discard(struct loop_device *lo) blk_queue_max_discard_sectors(q, UINT_MAX >> 9); blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } static void loop_unprepare_queue(struct loop_device *lo) @@ -1808,7 +1808,7 @@ static int loop_add(struct loop_device **l, int i) * page. For directio mode, merge does help to dispatch bigger request * to underlayer disk. We will enable merge once directio is enabled. */ - queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); + blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue); err = -ENOMEM; disk = lo->lo_disk = alloc_disk(1 << part_shift); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 86258b00a1d4..afbc202ca6fd 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -964,7 +964,7 @@ static void nbd_parse_flags(struct nbd_device *nbd) else set_disk_ro(nbd->disk, false); if (config->flags & NBD_FLAG_SEND_TRIM) - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue); if (config->flags & NBD_FLAG_SEND_FLUSH) { if (config->flags & NBD_FLAG_SEND_FUA) blk_queue_write_cache(nbd->disk->queue, true, true); @@ -1040,7 +1040,7 @@ static void nbd_config_put(struct nbd_device *nbd) nbd->config = NULL; nbd->tag_set.timeout = 0; - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue); mutex_unlock(&nbd->config_lock); nbd_put(nbd); @@ -1488,8 +1488,8 @@ static int nbd_dev_add(int index) /* * Tell the block layer that we are not a rotational device */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); disk->queue->limits.discard_granularity = 512; blk_queue_max_discard_sectors(disk->queue, UINT_MAX); blk_queue_max_segment_size(disk->queue, UINT_MAX); diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index d6be7a6d8ca6..0517613afccb 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -1525,7 +1525,7 @@ static void null_config_discard(struct nullb *nullb) nullb->q->limits.discard_granularity = nullb->dev->blocksize; nullb->q->limits.discard_alignment = nullb->dev->blocksize; blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nullb->q); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q); } static int null_open(struct block_device *bdev, fmode_t mode) @@ -1810,8 +1810,8 @@ static int null_add_dev(struct nullb_device *dev) } nullb->q->queuedata = nullb; - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q); mutex_lock(&lock); nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8e40da093766..0016170cde0a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4370,7 +4370,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) goto out_tag_set; } - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ /* set io sizes to object size */ @@ -4383,7 +4383,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) blk_queue_io_opt(q, segment_size); /* enable the discard support */ - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); q->limits.discard_granularity = segment_size; blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE); diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index e397d3ee7308..dddb3f2490b6 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -287,10 +287,10 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card) blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors); blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, card->queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, card->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, card->queue); if (rsxx_discard_supported(card)) { - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, card->queue); blk_queue_max_discard_sectors(card->queue, RSXX_HW_BLK_SIZE >> 9); card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE; diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index e41935ab41ef..bc7aea6d7b7c 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -2858,8 +2858,8 @@ static int skd_cons_disk(struct skd_device *skdev) /* set optimal I/O size to 8KB */ blk_queue_io_opt(q, 8192); - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); blk_queue_rq_timeout(q, 8 * HZ); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index e126e4cac2ca..3fcdc0d8eed3 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -931,15 +931,15 @@ static void blkif_set_queue_limits(struct blkfront_info *info) unsigned int segments = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; - queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); + blk_queue_flag_set(QUEUE_FLAG_VIRT, rq); if (info->feature_discard) { - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq); blk_queue_max_discard_sectors(rq, get_capacity(gd)); rq->limits.discard_granularity = info->discard_granularity; rq->limits.discard_alignment = info->discard_alignment; if (info->feature_secdiscard) - queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq); + blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq); } /* Hard sector size and max sectors impersonate the equiv. hardware. */ @@ -1610,8 +1610,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) blkif_req(req)->error = BLK_STS_NOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; - queue_flag_clear(QUEUE_FLAG_DISCARD, rq); - queue_flag_clear(QUEUE_FLAG_SECERASE, rq); + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq); + blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq); } break; case BLKIF_OP_FLUSH_DISKCACHE: diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 85110e7931e5..71b449613cfa 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1530,8 +1530,8 @@ static int zram_add(void) /* Actual capacity set using syfs (/sys/block/zram/disksize */ set_capacity(zram->disk, 0); /* zram devices sort of resembles non-rotational disks */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); /* * To ensure that we always get PAGE_SIZE aligned @@ -1544,7 +1544,7 @@ static int zram_add(void) blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); zram->disk->queue->limits.discard_granularity = PAGE_SIZE; blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue); /* * zram_bio_discard() will clear all logical blocks if logical block -- cgit v1.2.1 From 2bbea6e117357d17842114c65e9a9cf2d13ae8a3 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 9 Mar 2018 13:59:06 +0100 Subject: cdrom: do not call check_disk_change() inside cdrom_open() when mounting an ISO filesystem sometimes (very rarely) the system hangs because of a race condition between two tasks. PID: 6766 TASK: ffff88007b2a6dd0 CPU: 0 COMMAND: "mount" #0 [ffff880078447ae0] __schedule at ffffffff8168d605 #1 [ffff880078447b48] schedule_preempt_disabled at ffffffff8168ed49 #2 [ffff880078447b58] __mutex_lock_slowpath at ffffffff8168c995 #3 [ffff880078447bb8] mutex_lock at ffffffff8168bdef #4 [ffff880078447bd0] sr_block_ioctl at ffffffffa00b6818 [sr_mod] #5 [ffff880078447c10] blkdev_ioctl at ffffffff812fea50 #6 [ffff880078447c70] ioctl_by_bdev at ffffffff8123a8b3 #7 [ffff880078447c90] isofs_fill_super at ffffffffa04fb1e1 [isofs] #8 [ffff880078447da8] mount_bdev at ffffffff81202570 #9 [ffff880078447e18] isofs_mount at ffffffffa04f9828 [isofs] #10 [ffff880078447e28] mount_fs at ffffffff81202d09 #11 [ffff880078447e70] vfs_kern_mount at ffffffff8121ea8f #12 [ffff880078447ea8] do_mount at ffffffff81220fee #13 [ffff880078447f28] sys_mount at ffffffff812218d6 #14 [ffff880078447f80] system_call_fastpath at ffffffff81698c49 RIP: 00007fd9ea914e9a RSP: 00007ffd5d9bf648 RFLAGS: 00010246 RAX: 00000000000000a5 RBX: ffffffff81698c49 RCX: 0000000000000010 RDX: 00007fd9ec2bc210 RSI: 00007fd9ec2bc290 RDI: 00007fd9ec2bcf30 RBP: 0000000000000000 R8: 0000000000000000 R9: 0000000000000010 R10: 00000000c0ed0001 R11: 0000000000000206 R12: 00007fd9ec2bc040 R13: 00007fd9eb6b2380 R14: 00007fd9ec2bc210 R15: 00007fd9ec2bcf30 ORIG_RAX: 00000000000000a5 CS: 0033 SS: 002b This task was trying to mount the cdrom. It allocated and configured a super_block struct and owned the write-lock for the super_block->s_umount rwsem. While exclusively owning the s_umount lock, it called sr_block_ioctl and waited to acquire the global sr_mutex lock. PID: 6785 TASK: ffff880078720fb0 CPU: 0 COMMAND: "systemd-udevd" #0 [ffff880078417898] __schedule at ffffffff8168d605 #1 [ffff880078417900] schedule at ffffffff8168dc59 #2 [ffff880078417910] rwsem_down_read_failed at ffffffff8168f605 #3 [ffff880078417980] call_rwsem_down_read_failed at ffffffff81328838 #4 [ffff8800784179d0] down_read at ffffffff8168cde0 #5 [ffff8800784179e8] get_super at ffffffff81201cc7 #6 [ffff880078417a10] __invalidate_device at ffffffff8123a8de #7 [ffff880078417a40] flush_disk at ffffffff8123a94b #8 [ffff880078417a88] check_disk_change at ffffffff8123ab50 #9 [ffff880078417ab0] cdrom_open at ffffffffa00a29e1 [cdrom] #10 [ffff880078417b68] sr_block_open at ffffffffa00b6f9b [sr_mod] #11 [ffff880078417b98] __blkdev_get at ffffffff8123ba86 #12 [ffff880078417bf0] blkdev_get at ffffffff8123bd65 #13 [ffff880078417c78] blkdev_open at ffffffff8123bf9b #14 [ffff880078417c90] do_dentry_open at ffffffff811fc7f7 #15 [ffff880078417cd8] vfs_open at ffffffff811fc9cf #16 [ffff880078417d00] do_last at ffffffff8120d53d #17 [ffff880078417db0] path_openat at ffffffff8120e6b2 #18 [ffff880078417e48] do_filp_open at ffffffff8121082b #19 [ffff880078417f18] do_sys_open at ffffffff811fdd33 #20 [ffff880078417f70] sys_open at ffffffff811fde4e #21 [ffff880078417f80] system_call_fastpath at ffffffff81698c49 RIP: 00007f29438b0c20 RSP: 00007ffc76624b78 RFLAGS: 00010246 RAX: 0000000000000002 RBX: ffffffff81698c49 RCX: 0000000000000000 RDX: 00007f2944a5fa70 RSI: 00000000000a0800 RDI: 00007f2944a5fa70 RBP: 00007f2944a5f540 R8: 0000000000000000 R9: 0000000000000020 R10: 00007f2943614c40 R11: 0000000000000246 R12: ffffffff811fde4e R13: ffff880078417f78 R14: 000000000000000c R15: 00007f2944a4b010 ORIG_RAX: 0000000000000002 CS: 0033 SS: 002b This task tried to open the cdrom device, the sr_block_open function acquired the global sr_mutex lock. The call to check_disk_change() then saw an event flag indicating a possible media change and tried to flush any cached data for the device. As part of the flush, it tried to acquire the super_block->s_umount lock associated with the cdrom device. This was the same super_block as created and locked by the previous task. The first task acquires the s_umount lock and then the sr_mutex_lock; the second task acquires the sr_mutex_lock and then the s_umount lock. This patch fixes the issue by moving check_disk_change() out of cdrom_open() and let the caller take care of it. Signed-off-by: Maurizio Lombardi Signed-off-by: Jens Axboe --- drivers/block/paride/pcd.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 7b8c6368beb7..a026211afb51 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -230,6 +230,8 @@ static int pcd_block_open(struct block_device *bdev, fmode_t mode) struct pcd_unit *cd = bdev->bd_disk->private_data; int ret; + check_disk_change(bdev); + mutex_lock(&pcd_mutex); ret = cdrom_open(&cd->info, bdev, mode); mutex_unlock(&pcd_mutex); -- cgit v1.2.1 From 233bde21aa43516baa013ef7ac33f3427056db3e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 14 Mar 2018 15:48:06 -0700 Subject: block: Move SECTOR_SIZE and SECTOR_SHIFT definitions into It happens often while I'm preparing a patch for a block driver that I'm wondering: is a definition of SECTOR_SIZE and/or SECTOR_SHIFT available for this driver? Do I have to introduce definitions of these constants before I can use these constants? To avoid this confusion, move the existing definitions of SECTOR_SIZE and SECTOR_SHIFT into the header file such that these become available for all block drivers. Make the SECTOR_SIZE definition in the uapi msdos_fs.h header file conditional to avoid that including that header file after causes the compiler to complain about a SECTOR_SIZE redefinition. Note: the SECTOR_SIZE / SECTOR_SHIFT / SECTOR_BITS definitions have not been removed from uapi header files nor from NAND drivers in which these constants are used for another purpose than converting block layer offsets and sizes into a number of sectors. Cc: David S. Miller Cc: Mike Snitzer Cc: Dan Williams Cc: Minchan Kim Cc: Nitin Gupta Reviewed-by: Sergey Senozhatsky Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- drivers/block/brd.c | 1 - drivers/block/null_blk.c | 2 -- drivers/block/rbd.c | 9 --------- drivers/block/zram/zram_drv.h | 1 - 4 files changed, 13 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index deea78e485da..66cb0f857f64 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -24,7 +24,6 @@ #include -#define SECTOR_SHIFT 9 #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 0517613afccb..a76553293a31 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -16,10 +16,8 @@ #include #include -#define SECTOR_SHIFT 9 #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) -#define SECTOR_SIZE (1 << SECTOR_SHIFT) #define SECTOR_MASK (PAGE_SECTORS - 1) #define FREE_BATCH 16 diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 0016170cde0a..1e03b04819c8 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -50,15 +50,6 @@ #define RBD_DEBUG /* Activate rbd_assert() calls */ -/* - * The basic unit of block I/O is a sector. It is interpreted in a - * number of contexts in Linux (blk, bio, genhd), but the default is - * universally 512 bytes. These symbols are just slightly more - * meaningful than the bare numbers they represent. - */ -#define SECTOR_SHIFT 9 -#define SECTOR_SIZE (1ULL << SECTOR_SHIFT) - /* * Increment the given counter and return its updated value. * If the counter is already 0 it will not be incremented. diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 31762db861e3..1e9bf65c0bfb 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -37,7 +37,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; /*-- End of configurable params */ -#define SECTOR_SHIFT 9 #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) #define ZRAM_LOGICAL_BLOCK_SHIFT 12 -- cgit v1.2.1 From 2d1d4c1e591fd40bd7dafd868a249d7d00e215d5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 26 Mar 2018 21:39:11 -0700 Subject: loop: don't call into filesystem while holding lo_ctl_mutex We hit an issue where a loop device on NFS was stuck in loop_get_status() doing vfs_getattr() after the NFS server died, which caused a pile-up of uninterruptible processes waiting on lo_ctl_mutex. There's no reason to hold this lock while we wait on the filesystem; let's drop it so that other processes can do their thing. We need to grab a reference on lo_backing_file while we use it, and we can get rid of the check on lo_device, which has been unnecessary since commit a34c0ae9ebd6 ("[PATCH] loop: remove the bio remapping capability") in the linux-history tree. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- drivers/block/loop.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 7952ed5c607b..c633b68b69ff 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1167,21 +1167,17 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) static int loop_get_status(struct loop_device *lo, struct loop_info64 *info) { - struct file *file = lo->lo_backing_file; + struct file *file; struct kstat stat; - int error; + int ret; - if (lo->lo_state != Lo_bound) + if (lo->lo_state != Lo_bound) { + mutex_unlock(&lo->lo_ctl_mutex); return -ENXIO; - error = vfs_getattr(&file->f_path, &stat, - STATX_INO, AT_STATX_SYNC_AS_STAT); - if (error) - return error; + } + memset(info, 0, sizeof(*info)); info->lo_number = lo->lo_number; - info->lo_device = huge_encode_dev(stat.dev); - info->lo_inode = stat.ino; - info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev); info->lo_offset = lo->lo_offset; info->lo_sizelimit = lo->lo_sizelimit; info->lo_flags = lo->lo_flags; @@ -1194,7 +1190,19 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, lo->lo_encrypt_key_size); } - return 0; + + /* Drop lo_ctl_mutex while we call into the filesystem. */ + file = get_file(lo->lo_backing_file); + mutex_unlock(&lo->lo_ctl_mutex); + ret = vfs_getattr(&file->f_path, &stat, STATX_INO, + AT_STATX_SYNC_AS_STAT); + if (!ret) { + info->lo_device = huge_encode_dev(stat.dev); + info->lo_inode = stat.ino; + info->lo_rdevice = huge_encode_dev(stat.rdev); + } + fput(file); + return ret; } static void @@ -1374,7 +1382,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, break; case LOOP_GET_STATUS: err = loop_get_status_old(lo, (struct loop_info __user *) arg); - break; + /* loop_get_status() unlocks lo_ctl_mutex */ + goto out_unlocked; case LOOP_SET_STATUS64: err = -EPERM; if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) @@ -1383,7 +1392,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, break; case LOOP_GET_STATUS64: err = loop_get_status64(lo, (struct loop_info64 __user *) arg); - break; + /* loop_get_status() unlocks lo_ctl_mutex */ + goto out_unlocked; case LOOP_SET_CAPACITY: err = -EPERM; if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) @@ -1544,7 +1554,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, mutex_lock(&lo->lo_ctl_mutex); err = loop_get_status_compat( lo, (struct compat_loop_info __user *) arg); - mutex_unlock(&lo->lo_ctl_mutex); + /* loop_get_status() unlocks lo_ctl_mutex */ break; case LOOP_SET_CAPACITY: case LOOP_CLR_FD: -- cgit v1.2.1 From 3148ffbdb9162baa28545809d675d3bf9339d6a1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 26 Mar 2018 21:39:12 -0700 Subject: loop: use killable lock in ioctls Even after the previous patch to drop lo_ctl_mutex while calling vfs_getattr(), there are other cases where we can end up sleeping for a long time while holding lo_ctl_mutex. Let's avoid the uninterruptible sleep from the ioctls. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- drivers/block/loop.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c633b68b69ff..f34863af332a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1360,7 +1360,10 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, struct loop_device *lo = bdev->bd_disk->private_data; int err; - mutex_lock_nested(&lo->lo_ctl_mutex, 1); + err = mutex_lock_killable_nested(&lo->lo_ctl_mutex, 1); + if (err) + goto out_unlocked; + switch (cmd) { case LOOP_SET_FD: err = loop_set_fd(lo, mode, bdev, arg); @@ -1545,16 +1548,20 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, switch(cmd) { case LOOP_SET_STATUS: - mutex_lock(&lo->lo_ctl_mutex); - err = loop_set_status_compat( - lo, (const struct compat_loop_info __user *) arg); - mutex_unlock(&lo->lo_ctl_mutex); + err = mutex_lock_killable(&lo->lo_ctl_mutex); + if (!err) { + err = loop_set_status_compat(lo, + (const struct compat_loop_info __user *)arg); + mutex_unlock(&lo->lo_ctl_mutex); + } break; case LOOP_GET_STATUS: - mutex_lock(&lo->lo_ctl_mutex); - err = loop_get_status_compat( - lo, (struct compat_loop_info __user *) arg); - /* loop_get_status() unlocks lo_ctl_mutex */ + err = mutex_lock_killable(&lo->lo_ctl_mutex); + if (!err) { + err = loop_get_status_compat(lo, + (struct compat_loop_info __user *)arg); + /* loop_get_status() unlocks lo_ctl_mutex */ + } break; case LOOP_SET_CAPACITY: case LOOP_CLR_FD: @@ -1959,7 +1966,9 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ret = loop_lookup(&lo, parm); if (ret < 0) break; - mutex_lock(&lo->lo_ctl_mutex); + ret = mutex_lock_killable(&lo->lo_ctl_mutex); + if (ret) + break; if (lo->lo_state != Lo_unbound) { ret = -EBUSY; mutex_unlock(&lo->lo_ctl_mutex); -- cgit v1.2.1