diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-14 17:21:53 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-14 17:21:53 -0800 |
commit | cf1b3341afab9d3ad02a76b3a619ea027dcf4e28 (patch) | |
tree | 99d5e2df998556c6ba1e9022eaa361aee74bf4a8 | |
parent | 80eabba70260dcb55b05098f6c1fecbe5c0e518b (diff) | |
parent | d1b1cea1e58477dad88ff769f54c0d2dfa56d923 (diff) | |
download | blackbird-op-linux-cf1b3341afab9d3ad02a76b3a619ea027dcf4e28.tar.gz blackbird-op-linux-cf1b3341afab9d3ad02a76b3a619ea027dcf4e28.zip |
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block IO fixes from Jens Axboe:
"A few fixes that I collected as post-merge.
I was going to wait a bit with sending this out, but the O_DIRECT fix
should really go in sooner rather than later"
* 'for-linus' of git://git.kernel.dk/linux-block:
blk-mq: Fix failed allocation path when mapping queues
blk-mq: Avoid memory reclaim when remapping queues
block_dev: don't update file access position for sync direct IO
nvme/pci: Log PCI_STATUS when the controller dies
block_dev: don't test bdev->bd_contains when it is not stable
-rw-r--r-- | block/blk-mq.c | 32 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 22 | ||||
-rw-r--r-- | fs/block_dev.c | 7 |
3 files changed, 45 insertions, 16 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index d79fdc11b1ee..4bf850e8d6b5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1605,7 +1605,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, INIT_LIST_HEAD(&tags->page_list); tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *), - GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, set->numa_node); if (!tags->rqs) { blk_mq_free_tags(tags); @@ -1631,7 +1631,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, do { page = alloc_pages_node(set->numa_node, - GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, this_order); if (page) break; @@ -1652,7 +1652,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, * Allow kmemleak to scan these pages as they contain pointers * to additional allocations like via ops->init_request(). */ - kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL); + kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO); entries_per_page = order_to_size(this_order) / rq_size; to_do = min(entries_per_page, set->queue_depth - i); left -= to_do * rq_size; @@ -1870,7 +1870,7 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, static void blk_mq_map_swqueue(struct request_queue *q, const struct cpumask *online_mask) { - unsigned int i; + unsigned int i, hctx_idx; struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; struct blk_mq_tag_set *set = q->tag_set; @@ -1893,6 +1893,21 @@ static void blk_mq_map_swqueue(struct request_queue *q, if (!cpumask_test_cpu(i, online_mask)) continue; + hctx_idx = q->mq_map[i]; + /* unmapped hw queue can be remapped after CPU topo changed */ + if (!set->tags[hctx_idx]) { + set->tags[hctx_idx] = blk_mq_init_rq_map(set, hctx_idx); + + /* + * If tags initialization fail for some hctx, + * that hctx won't be brought online. In this + * case, remap the current ctx to hctx[0] which + * is guaranteed to always have tags allocated + */ + if (!set->tags[hctx_idx]) + q->mq_map[i] = 0; + } + ctx = per_cpu_ptr(q->queue_ctx, i); hctx = blk_mq_map_queue(q, i); @@ -1909,7 +1924,11 @@ static void blk_mq_map_swqueue(struct request_queue *q, * disable it and free the request entries. */ if (!hctx->nr_ctx) { - if (set->tags[i]) { + /* Never unmap queue 0. We need it as a + * fallback in case of a new remap fails + * allocation + */ + if (i && set->tags[i]) { blk_mq_free_rq_map(set, set->tags[i], i); set->tags[i] = NULL; } @@ -1917,9 +1936,6 @@ static void blk_mq_map_swqueue(struct request_queue *q, continue; } - /* unmapped hw queue can be remapped after CPU topo changed */ - if (!set->tags[i]) - set->tags[i] = blk_mq_init_rq_map(set, i); hctx->tags = set->tags[i]; WARN_ON(!hctx->tags); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d6e6bce93d0c..2fd7dc2e8fc4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1282,6 +1282,24 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) return true; } +static void nvme_warn_reset(struct nvme_dev *dev, u32 csts) +{ + /* Read a config register to help see what died. */ + u16 pci_status; + int result; + + result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS, + &pci_status); + if (result == PCIBIOS_SUCCESSFUL) + dev_warn(dev->dev, + "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n", + csts, pci_status); + else + dev_warn(dev->dev, + "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n", + csts, result); +} + static void nvme_watchdog_timer(unsigned long data) { struct nvme_dev *dev = (struct nvme_dev *)data; @@ -1290,9 +1308,7 @@ static void nvme_watchdog_timer(unsigned long data) /* Skip controllers under certain specific conditions. */ if (nvme_should_reset(dev, csts)) { if (!nvme_reset(dev)) - dev_warn(dev->dev, - "Failed status: 0x%x, reset controller.\n", - csts); + nvme_warn_reset(dev, csts); return; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 95acbd2ebc5d..7c4507224ed6 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -264,7 +264,6 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(bio.bi_error)) return bio.bi_error; - iocb->ki_pos += ret; return ret; } @@ -411,10 +410,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) __set_current_state(TASK_RUNNING); ret = dio->bio.bi_error; - if (likely(!ret)) { + if (likely(!ret)) ret = dio->size; - iocb->ki_pos += ret; - } bio_put(&dio->bio); return ret; @@ -1089,7 +1086,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, return true; /* already a holder */ else if (bdev->bd_holder != NULL) return false; /* held by someone else */ - else if (bdev->bd_contains == bdev) + else if (whole == bdev) return true; /* is a whole device which isn't held */ else if (whole->bd_holder == bd_may_claim) |