summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-02-16 12:35:52 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-02-16 12:35:52 -0800
commite29c6a13ddf56217563f03fbc6ba9bb72dcbf2e4 (patch)
tree343cce0590d2c5e879e5dbd7e34038e880a40a21
parent713db356041071d16360e82247de3107ec9ed57f (diff)
parentf25372ffc3f6c2684b57fb718219137e6ee2b64c (diff)
downloadtalos-op-linux-e29c6a13ddf56217563f03fbc6ba9bb72dcbf2e4.tar.gz
talos-op-linux-e29c6a13ddf56217563f03fbc6ba9bb72dcbf2e4.zip
Merge tag 'block-5.6-2020-02-16' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "Not a lot here, which is great, basically just three small bcache fixes from Coly, and four NVMe fixes via Keith" * tag 'block-5.6-2020-02-16' of git://git.kernel.dk/linux-block: nvme: fix the parameter order for nvme_get_log in nvme_get_fw_slot_info nvme/pci: move cqe check after device shutdown nvme: prevent warning triggered by nvme_stop_keep_alive nvme/tcp: fix bug on double requeue when send fails bcache: remove macro nr_to_fifo_front() bcache: Revert "bcache: shrink btree node cache after bch_btree_check()" bcache: ignore pending signals when creating gc and allocator thread
-rw-r--r--drivers/md/bcache/alloc.c18
-rw-r--r--drivers/md/bcache/btree.c13
-rw-r--r--drivers/md/bcache/journal.c7
-rw-r--r--drivers/md/bcache/super.c17
-rw-r--r--drivers/nvme/host/core.c12
-rw-r--r--drivers/nvme/host/pci.c23
-rw-r--r--drivers/nvme/host/rdma.c2
-rw-r--r--drivers/nvme/host/tcp.c9
8 files changed, 63 insertions, 38 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index a1df0d95151c..8bc1faf71ff2 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -67,6 +67,7 @@
#include <linux/blkdev.h>
#include <linux/kthread.h>
#include <linux/random.h>
+#include <linux/sched/signal.h>
#include <trace/events/bcache.h>
#define MAX_OPEN_BUCKETS 128
@@ -733,8 +734,21 @@ int bch_open_buckets_alloc(struct cache_set *c)
int bch_cache_allocator_start(struct cache *ca)
{
- struct task_struct *k = kthread_run(bch_allocator_thread,
- ca, "bcache_allocator");
+ struct task_struct *k;
+
+ /*
+ * In case previous btree check operation occupies too many
+ * system memory for bcache btree node cache, and the
+ * registering process is selected by OOM killer. Here just
+ * ignore the SIGKILL sent by OOM killer if there is, to
+ * avoid kthread_run() being failed by pending signals. The
+ * bcache registering process will exit after the registration
+ * done.
+ */
+ if (signal_pending(current))
+ flush_signals(current);
+
+ k = kthread_run(bch_allocator_thread, ca, "bcache_allocator");
if (IS_ERR(k))
return PTR_ERR(k);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index fa872df4e770..b12186c87f52 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -34,6 +34,7 @@
#include <linux/random.h>
#include <linux/rcupdate.h>
#include <linux/sched/clock.h>
+#include <linux/sched/signal.h>
#include <linux/rculist.h>
#include <linux/delay.h>
#include <trace/events/bcache.h>
@@ -1913,6 +1914,18 @@ static int bch_gc_thread(void *arg)
int bch_gc_thread_start(struct cache_set *c)
{
+ /*
+ * In case previous btree check operation occupies too many
+ * system memory for bcache btree node cache, and the
+ * registering process is selected by OOM killer. Here just
+ * ignore the SIGKILL sent by OOM killer if there is, to
+ * avoid kthread_run() being failed by pending signals. The
+ * bcache registering process will exit after the registration
+ * done.
+ */
+ if (signal_pending(current))
+ flush_signals(current);
+
c->gc_thread = kthread_run(bch_gc_thread, c, "bcache_gc");
return PTR_ERR_OR_ZERO(c->gc_thread);
}
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 6730820780b0..0e3ff9745ac7 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -417,8 +417,6 @@ err:
/* Journalling */
-#define nr_to_fifo_front(p, front_p, mask) (((p) - (front_p)) & (mask))
-
static void btree_flush_write(struct cache_set *c)
{
struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR];
@@ -510,9 +508,8 @@ static void btree_flush_write(struct cache_set *c)
* journal entry can be reclaimed). These selected nodes
* will be ignored and skipped in the folowing for-loop.
*/
- if (nr_to_fifo_front(btree_current_write(b)->journal,
- fifo_front_p,
- mask) != 0) {
+ if (((btree_current_write(b)->journal - fifo_front_p) &
+ mask) != 0) {
mutex_unlock(&b->write_lock);
continue;
}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 2749daf09724..0c3c5419c52b 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1917,23 +1917,6 @@ static int run_cache_set(struct cache_set *c)
if (bch_btree_check(c))
goto err;
- /*
- * bch_btree_check() may occupy too much system memory which
- * has negative effects to user space application (e.g. data
- * base) performance. Shrink the mca cache memory proactively
- * here to avoid competing memory with user space workloads..
- */
- if (!c->shrinker_disabled) {
- struct shrink_control sc;
-
- sc.gfp_mask = GFP_KERNEL;
- sc.nr_to_scan = c->btree_cache_used * c->btree_pages;
- /* first run to clear b->accessed tag */
- c->shrink.scan_objects(&c->shrink, &sc);
- /* second run to reap non-accessed nodes */
- c->shrink.scan_objects(&c->shrink, &sc);
- }
-
bch_journal_mark(c, &journal);
bch_initial_gc_finish(c);
pr_debug("btree_check() done");
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5dc32b72e7fa..ada59df642d2 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -66,8 +66,8 @@ MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
* nvme_reset_wq - hosts nvme reset works
* nvme_delete_wq - hosts nvme delete works
*
- * nvme_wq will host works such are scan, aen handling, fw activation,
- * keep-alive error recovery, periodic reconnects etc. nvme_reset_wq
+ * nvme_wq will host works such as scan, aen handling, fw activation,
+ * keep-alive, periodic reconnects etc. nvme_reset_wq
* runs reset works which also flush works hosted on nvme_wq for
* serialization purposes. nvme_delete_wq host controller deletion
* works which flush reset works for serialization.
@@ -976,7 +976,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
startka = true;
spin_unlock_irqrestore(&ctrl->lock, flags);
if (startka)
- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
}
static int nvme_keep_alive(struct nvme_ctrl *ctrl)
@@ -1006,7 +1006,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
dev_dbg(ctrl->device,
"reschedule traffic based keep-alive timer\n");
ctrl->comp_seen = false;
- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
return;
}
@@ -1023,7 +1023,7 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
if (unlikely(ctrl->kato == 0))
return;
- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
}
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
@@ -3867,7 +3867,7 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
if (!log)
return;
- if (nvme_get_log(ctrl, NVME_NSID_ALL, 0, NVME_LOG_FW_SLOT, log,
+ if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, log,
sizeof(*log), 0))
dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
kfree(log);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index da392b50f73e..9c80f9f08149 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1401,6 +1401,23 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
nvme_poll_irqdisable(nvmeq, -1);
}
+/*
+ * Called only on a device that has been disabled and after all other threads
+ * that can check this device's completion queues have synced. This is the
+ * last chance for the driver to see a natural completion before
+ * nvme_cancel_request() terminates all incomplete requests.
+ */
+static void nvme_reap_pending_cqes(struct nvme_dev *dev)
+{
+ u16 start, end;
+ int i;
+
+ for (i = dev->ctrl.queue_count - 1; i > 0; i--) {
+ nvme_process_cq(&dev->queues[i], &start, &end, -1);
+ nvme_complete_cqes(&dev->queues[i], start, end);
+ }
+}
+
static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
int entry_size)
{
@@ -2235,11 +2252,6 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
if (timeout == 0)
return false;
- /* handle any remaining CQEs */
- if (opcode == nvme_admin_delete_cq &&
- !test_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags))
- nvme_poll_irqdisable(nvmeq, -1);
-
sent--;
if (nr_queues)
goto retry;
@@ -2428,6 +2440,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_suspend_io_queues(dev);
nvme_suspend_queue(&dev->queues[0]);
nvme_pci_disable(dev);
+ nvme_reap_pending_cqes(dev);
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 2a47c6c5007e..3e85c5cacefd 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1088,7 +1088,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
return;
- queue_work(nvme_wq, &ctrl->err_work);
+ queue_work(nvme_reset_wq, &ctrl->err_work);
}
static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 6d43b23a0fc8..49d4373b84eb 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -422,7 +422,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
return;
- queue_work(nvme_wq, &to_tcp_ctrl(ctrl)->err_work);
+ queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
}
static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
@@ -1054,7 +1054,12 @@ static void nvme_tcp_io_work(struct work_struct *w)
} else if (unlikely(result < 0)) {
dev_err(queue->ctrl->ctrl.device,
"failed to send request %d\n", result);
- if (result != -EPIPE)
+
+ /*
+ * Fail the request unless peer closed the connection,
+ * in which case error recovery flow will complete all.
+ */
+ if ((result != -EPIPE) && (result != -ECONNRESET))
nvme_tcp_fail_request(queue->request);
nvme_tcp_done_send_req(queue);
return;
OpenPOWER on IntegriCloud