From 0c56010c83703e1f33325838eda9a2077827b6f1 Mon Sep 17 00:00:00 2001 From: Matias Bjorling Date: Tue, 10 Dec 2013 16:50:38 +0100 Subject: null_blk: mem garbage on NUMA systems during init For NUMA systems, initializing the blk-mq layer and using per node hctx. We initialize submit queues to 1, while blk-mq nr_hw_queues is initialized to the number of NUMA nodes. This makes the null_init_hctx function overwrite memory outside of what it allocated. In my case it lead to writing garbage into struct request_queue's mq_map. Signed-off-by: Matias Bjorling Cc: Jens Axboe Signed-off-by: Linus Torvalds --- drivers/block/null_blk.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index ea192ec029c4..f370fc13aea5 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -495,23 +495,23 @@ static int null_add_dev(void) spin_lock_init(&nullb->lock); + if (queue_mode == NULL_Q_MQ && use_per_node_hctx) + submit_queues = nr_online_nodes; + if (setup_queues(nullb)) goto err; if (queue_mode == NULL_Q_MQ) { null_mq_reg.numa_node = home_node; null_mq_reg.queue_depth = hw_queue_depth; + null_mq_reg.nr_hw_queues = submit_queues; if (use_per_node_hctx) { null_mq_reg.ops->alloc_hctx = null_alloc_hctx; null_mq_reg.ops->free_hctx = null_free_hctx; - - null_mq_reg.nr_hw_queues = nr_online_nodes; } else { null_mq_reg.ops->alloc_hctx = blk_mq_alloc_single_hw_queue; null_mq_reg.ops->free_hctx = blk_mq_free_single_hw_queue; - - null_mq_reg.nr_hw_queues = submit_queues; } nullb->q = blk_mq_init_queue(&null_mq_reg, nullb); -- cgit v1.2.1 From 2d263a7856cbaf26dd89b671e2161c4a49f8461b Mon Sep 17 00:00:00 2001 From: Matias Bjorling Date: Wed, 18 Dec 2013 13:41:43 +0100 Subject: null_blk: refactor init and init errors code paths Simplify the initialization logic of the three block-layers. - The queue initialization is split into two parts. This allows reuse of code when initializing the sq-, bio- and mq-based layers. - Set submit_queues default value to 0 and always set it at init time. - Simplify the init error code paths. Signed-off-by: Matias Bjorling Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 63 +++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 25 deletions(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index f370fc13aea5..f0aeb2a7a9ca 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -65,7 +65,7 @@ enum { NULL_Q_MQ = 2, }; -static int submit_queues = 1; +static int submit_queues; module_param(submit_queues, int, S_IRUGO); MODULE_PARM_DESC(submit_queues, "Number of submission queues"); @@ -355,16 +355,24 @@ static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) kfree(hctx); } +static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) +{ + BUG_ON(!nullb); + BUG_ON(!nq); + + init_waitqueue_head(&nq->wait); + nq->queue_depth = nullb->queue_depth; +} + static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int index) { struct nullb *nullb = data; struct nullb_queue *nq = &nullb->queues[index]; - init_waitqueue_head(&nq->wait); - nq->queue_depth = nullb->queue_depth; - nullb->nr_queues++; hctx->driver_data = nq; + null_init_queue(nullb, nq); + nullb->nr_queues++; return 0; } @@ -417,13 +425,13 @@ static int setup_commands(struct nullb_queue *nq) nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL); if (!nq->cmds) - return 1; + return -ENOMEM; tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG; nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL); if (!nq->tag_map) { kfree(nq->cmds); - return 1; + return -ENOMEM; } for (i = 0; i < nq->queue_depth; i++) { @@ -454,33 +462,37 @@ static void cleanup_queues(struct nullb *nullb) static int setup_queues(struct nullb *nullb) { - struct nullb_queue *nq; - int i; - - nullb->queues = kzalloc(submit_queues * sizeof(*nq), GFP_KERNEL); + nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue), + GFP_KERNEL); if (!nullb->queues) - return 1; + return -ENOMEM; nullb->nr_queues = 0; nullb->queue_depth = hw_queue_depth; - if (queue_mode == NULL_Q_MQ) - return 0; + return 0; +} + +static int init_driver_queues(struct nullb *nullb) +{ + struct nullb_queue *nq; + int i, ret = 0; for (i = 0; i < submit_queues; i++) { nq = &nullb->queues[i]; - init_waitqueue_head(&nq->wait); - nq->queue_depth = hw_queue_depth; - if (setup_commands(nq)) - break; + + null_init_queue(nullb, nq); + + ret = setup_commands(nq); + if (ret) + goto err_queue; nullb->nr_queues++; } - if (i == submit_queues) - return 0; - + return 0; +err_queue: cleanup_queues(nullb); - return 1; + return ret; } static int null_add_dev(void) @@ -495,9 +507,6 @@ static int null_add_dev(void) spin_lock_init(&nullb->lock); - if (queue_mode == NULL_Q_MQ && use_per_node_hctx) - submit_queues = nr_online_nodes; - if (setup_queues(nullb)) goto err; @@ -518,11 +527,13 @@ static int null_add_dev(void) } else if (queue_mode == NULL_Q_BIO) { nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); blk_queue_make_request(nullb->q, null_queue_bio); + init_driver_queues(nullb); } else { nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); blk_queue_prep_rq(nullb->q, null_rq_prep_fn); if (nullb->q) blk_queue_softirq_done(nullb->q, null_softirq_done_fn); + init_driver_queues(nullb); } if (!nullb->q) @@ -579,7 +590,9 @@ static int __init null_init(void) } #endif - if (submit_queues > nr_cpu_ids) + if (queue_mode == NULL_Q_MQ && use_per_node_hctx) + submit_queues = nr_online_nodes; + else if (submit_queues > nr_cpu_ids) submit_queues = nr_cpu_ids; else if (!submit_queues) submit_queues = 1; -- cgit v1.2.1 From d15ee6b1a43afbe1a6cece3bd8d336a9d5cb7060 Mon Sep 17 00:00:00 2001 From: Matias Bjorling Date: Wed, 18 Dec 2013 13:41:44 +0100 Subject: null_blk: warning on ignored submit_queues param Let the user know when the number of submission queues are being ignored. Signed-off-by: Matias Bjorling Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index f0aeb2a7a9ca..8f2e7c330d6d 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -590,9 +590,12 @@ static int __init null_init(void) } #endif - if (queue_mode == NULL_Q_MQ && use_per_node_hctx) + if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { + if (submit_queues > 0) + pr_warn("null_blk: submit_queues param is set to %u.", + nr_online_nodes); submit_queues = nr_online_nodes; - else if (submit_queues > nr_cpu_ids) + } else if (submit_queues > nr_cpu_ids) submit_queues = nr_cpu_ids; else if (!submit_queues) submit_queues = 1; -- cgit v1.2.1 From 200052440d3b56f593038a35b7c14bdc780184a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Sat, 21 Dec 2013 00:11:00 +0100 Subject: null_blk: set use_per_node_hctx param to false The defaults for the module is to instantiate itself with blk-mq and a submit queue for each CPU node in the system. To save resources, initialize instead with a single submit queue. Signed-off-by: Matias Bjorling Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 8f2e7c330d6d..9b0311b61fe1 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -101,9 +101,9 @@ static int hw_queue_depth = 64; module_param(hw_queue_depth, int, S_IRUGO); MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); -static bool use_per_node_hctx = true; +static bool use_per_node_hctx = false; module_param(use_per_node_hctx, bool, S_IRUGO); -MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: true"); +MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); static void put_tag(struct nullb_queue *nq, unsigned int tag) { -- cgit v1.2.1 From fc1bc35443741e132dd0118e8dbac53f69a6f76e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Sat, 21 Dec 2013 00:11:01 +0100 Subject: null_blk: support submit_queues on use_per_node_hctx In the case of both the submit_queues param and use_per_node_hctx param are used. We limit the number af submit_queues to the number of online nodes. If the submit_queues is a multiple of nr_online_nodes, its trivial. Simply map them to the nodes. For example: 8 submit queues are mapped as node0[0,1], node1[2,3], ... If uneven, we are left with an uneven number of submit_queues that must be mapped. These are mapped toward the first node and onward. E.g. 5 submit queues mapped onto 4 nodes are mapped as node0[0,1], node1[2], ... Signed-off-by: Matias Bjorling Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) (limited to 'drivers/block/null_blk.c') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 9b0311b61fe1..528f4e47f38e 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -1,4 +1,5 @@ #include + #include #include #include @@ -346,8 +347,37 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index) { - return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, - hctx_index); + int b_size = DIV_ROUND_UP(reg->nr_hw_queues, nr_online_nodes); + int tip = (reg->nr_hw_queues % nr_online_nodes); + int node = 0, i, n; + + /* + * Split submit queues evenly wrt to the number of nodes. If uneven, + * fill the first buckets with one extra, until the rest is filled with + * no extra. + */ + for (i = 0, n = 1; i < hctx_index; i++, n++) { + if (n % b_size == 0) { + n = 0; + node++; + + tip--; + if (!tip) + b_size = reg->nr_hw_queues / nr_online_nodes; + } + } + + /* + * A node might not be online, therefore map the relative node id to the + * real node id. + */ + for_each_online_node(n) { + if (!node) + break; + node--; + } + + return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, n); } static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) @@ -591,10 +621,11 @@ static int __init null_init(void) #endif if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { - if (submit_queues > 0) + if (submit_queues < nr_online_nodes) { pr_warn("null_blk: submit_queues param is set to %u.", nr_online_nodes); - submit_queues = nr_online_nodes; + submit_queues = nr_online_nodes; + } } else if (submit_queues > nr_cpu_ids) submit_queues = nr_cpu_ids; else if (!submit_queues) -- cgit v1.2.1