diff options
Diffstat (limited to 'drivers/block/nbd.c')
| -rw-r--r-- | drivers/block/nbd.c | 226 |
1 files changed, 172 insertions, 54 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index afbc202ca6fd..3fb95c8d9fd8 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -76,6 +76,7 @@ struct link_dead_args { #define NBD_HAS_CONFIG_REF 4 #define NBD_BOUND 5 #define NBD_DESTROY_ON_DISCONNECT 6 +#define NBD_DISCONNECT_ON_CLOSE 7 struct nbd_config { u32 flags; @@ -111,12 +112,16 @@ struct nbd_device { struct task_struct *task_setup; }; +#define NBD_CMD_REQUEUED 1 + struct nbd_cmd { struct nbd_device *nbd; + struct mutex lock; int index; int cookie; - struct completion send_complete; blk_status_t status; + unsigned long flags; + u32 cmd_cookie; }; #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -138,12 +143,42 @@ static void nbd_config_put(struct nbd_device *nbd); static void nbd_connect_reply(struct genl_info *info, int index); static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); static void nbd_dead_link_work(struct work_struct *work); +static void nbd_disconnect_and_put(struct nbd_device *nbd); static inline struct device *nbd_to_dev(struct nbd_device *nbd) { return disk_to_dev(nbd->disk); } +static void nbd_requeue_cmd(struct nbd_cmd *cmd) +{ + struct request *req = blk_mq_rq_from_pdu(cmd); + + if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags)) + blk_mq_requeue_request(req, true); +} + +#define NBD_COOKIE_BITS 32 + +static u64 nbd_cmd_handle(struct nbd_cmd *cmd) +{ + struct request *req = blk_mq_rq_from_pdu(cmd); + u32 tag = blk_mq_unique_tag(req); + u64 cookie = cmd->cmd_cookie; + + return (cookie << NBD_COOKIE_BITS) | tag; +} + +static u32 nbd_handle_to_tag(u64 handle) +{ + return (u32)handle; +} + +static u32 nbd_handle_to_cookie(u64 handle) +{ + return (u32)(handle >> NBD_COOKIE_BITS); +} + static const char *nbdcmd_to_ascii(int cmd) { switch (cmd) { @@ -166,16 +201,19 @@ static ssize_t pid_show(struct device *dev, } static const struct device_attribute pid_attr = { - .attr = { .name = "pid", .mode = S_IRUGO}, + .attr = { .name = "pid", .mode = 0444}, .show = pid_show, }; static void nbd_dev_remove(struct nbd_device *nbd) { struct gendisk *disk = nbd->disk; + struct request_queue *q; + if (disk) { + q = disk->queue; del_gendisk(disk); - blk_cleanup_queue(disk->queue); + blk_cleanup_queue(q); blk_mq_free_tag_set(&nbd->tag_set); disk->private_data = NULL; put_disk(disk); @@ -213,7 +251,15 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, } if (!nsock->dead) { kernel_sock_shutdown(nsock->sock, SHUT_RDWR); - atomic_dec(&nbd->config->live_connections); + if (atomic_dec_return(&nbd->config->live_connections) == 0) { + if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED, + &nbd->config->runtime_flags)) { + set_bit(NBD_DISCONNECTED, + &nbd->config->runtime_flags); + dev_info(nbd_to_dev(nbd), + "Disconnected due to user request.\n"); + } + } } nsock->dead = true; nsock->pending = NULL; @@ -231,9 +277,23 @@ static void nbd_size_clear(struct nbd_device *nbd) static void nbd_size_update(struct nbd_device *nbd) { struct nbd_config *config = nbd->config; + struct block_device *bdev = bdget_disk(nbd->disk, 0); + + if (config->flags & NBD_FLAG_SEND_TRIM) { + nbd->disk->queue->limits.discard_granularity = config->blksize; + nbd->disk->queue->limits.discard_alignment = config->blksize; + blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); + } blk_queue_logical_block_size(nbd->disk->queue, config->blksize); blk_queue_physical_block_size(nbd->disk->queue, config->blksize); set_capacity(nbd->disk, config->bytesize >> 9); + if (bdev) { + if (bdev->bd_disk) + bd_set_size(bdev, config->bytesize); + else + bdev->bd_invalidated = 1; + bdput(bdev); + } kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); } @@ -243,13 +303,15 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, struct nbd_config *config = nbd->config; config->blksize = blocksize; config->bytesize = blocksize * nr_blocks; + if (nbd->task_recv != NULL) + nbd_size_update(nbd); } static void nbd_complete_rq(struct request *req) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); - dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", cmd, + dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", req, cmd->status ? "failed" : "done"); blk_mq_end_request(req, cmd->status); @@ -286,13 +348,18 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, if (!refcount_inc_not_zero(&nbd->config_refs)) { cmd->status = BLK_STS_TIMEOUT; - return BLK_EH_HANDLED; + goto done; } config = nbd->config; + if (!mutex_trylock(&cmd->lock)) + return BLK_EH_RESET_TIMER; + if (config->num_connections > 1) { dev_err_ratelimited(nbd_to_dev(nbd), - "Connection timed out, retrying\n"); + "Connection timed out, retrying (%d/%d alive)\n", + atomic_read(&config->live_connections), + config->num_connections); /* * Hooray we have more connections, requeue this IO, the submit * path will put it on a real connection. @@ -312,9 +379,10 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, nbd_mark_nsock_dead(nbd, nsock, 1); mutex_unlock(&nsock->tx_lock); } - blk_mq_requeue_request(req, true); + mutex_unlock(&cmd->lock); + nbd_requeue_cmd(cmd); nbd_config_put(nbd); - return BLK_EH_NOT_HANDLED; + return BLK_EH_DONE; } } else { dev_err_ratelimited(nbd_to_dev(nbd), @@ -322,10 +390,12 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, } set_bit(NBD_TIMEDOUT, &config->runtime_flags); cmd->status = BLK_STS_IOERR; + mutex_unlock(&cmd->lock); sock_shutdown(nbd); nbd_config_put(nbd); - - return BLK_EH_HANDLED; +done: + blk_mq_complete_request(req); + return BLK_EH_DONE; } /* @@ -398,9 +468,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) struct iov_iter from; unsigned long size = blk_rq_bytes(req); struct bio *bio; + u64 handle; u32 type; u32 nbd_cmd_flags = 0; - u32 tag = blk_mq_unique_tag(req); int sent = nsock->sent, skip = 0; iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); @@ -442,6 +512,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) goto send_pages; } iov_iter_advance(&from, sent); + } else { + cmd->cmd_cookie++; } cmd->index = index; cmd->cookie = nsock->cookie; @@ -450,10 +522,11 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); request.len = htonl(size); } - memcpy(request.handle, &tag, sizeof(tag)); + handle = nbd_cmd_handle(cmd); + memcpy(request.handle, &handle, sizeof(handle)); dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", - cmd, nbdcmd_to_ascii(type), + req, nbdcmd_to_ascii(type), (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); result = sock_xmit(nbd, index, 1, &from, (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); @@ -468,6 +541,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) nsock->pending = req; nsock->sent = sent; } + set_bit(NBD_CMD_REQUEUED, &cmd->flags); return BLK_STS_RESOURCE; } dev_err_ratelimited(disk_to_dev(nbd->disk), @@ -489,7 +563,7 @@ send_pages: int flags = is_last ? 0 : MSG_MORE; dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", - cmd, bvec.bv_len); + req, bvec.bv_len); iov_iter_bvec(&from, ITER_BVEC | WRITE, &bvec, 1, bvec.bv_len); if (skip) { @@ -509,6 +583,7 @@ send_pages: */ nsock->pending = req; nsock->sent = sent; + set_bit(NBD_CMD_REQUEUED, &cmd->flags); return BLK_STS_RESOURCE; } dev_err(disk_to_dev(nbd->disk), @@ -541,10 +616,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) struct nbd_reply reply; struct nbd_cmd *cmd; struct request *req = NULL; + u64 handle; u16 hwq; u32 tag; struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; struct iov_iter to; + int ret = 0; reply.magic = 0; iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); @@ -562,8 +639,8 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) return ERR_PTR(-EPROTO); } - memcpy(&tag, reply.handle, sizeof(u32)); - + memcpy(&handle, reply.handle, sizeof(handle)); + tag = nbd_handle_to_tag(handle); hwq = blk_mq_unique_tag_to_hwq(tag); if (hwq < nbd->tag_set.nr_hw_queues) req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq], @@ -574,14 +651,28 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) return ERR_PTR(-ENOENT); } cmd = blk_mq_rq_to_pdu(req); + + mutex_lock(&cmd->lock); + if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) { + dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n", + req, cmd->cmd_cookie, nbd_handle_to_cookie(handle)); + ret = -ENOENT; + goto out; + } + if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) { + dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n", + req); + ret = -ENOENT; + goto out; + } if (ntohl(reply.error)) { dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", ntohl(reply.error)); cmd->status = BLK_STS_IOERR; - return cmd; + goto out; } - dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd); + dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req); if (rq_data_dir(req) != WRITE) { struct req_iterator iter; struct bio_vec bvec; @@ -603,18 +694,18 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) if (nbd_disconnected(config) || config->num_connections <= 1) { cmd->status = BLK_STS_IOERR; - return cmd; + goto out; } - return ERR_PTR(-EIO); + ret = -EIO; + goto out; } dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", - cmd, bvec.bv_len); + req, bvec.bv_len); } - } else { - /* See the comment in nbd_queue_rq. */ - wait_for_completion(&cmd->send_complete); } - return cmd; +out: + mutex_unlock(&cmd->lock); + return ret ? ERR_PTR(ret) : cmd; } static void recv_work(struct work_struct *work) @@ -647,11 +738,8 @@ static void recv_work(struct work_struct *work) static void nbd_clear_req(struct request *req, void *data, bool reserved) { - struct nbd_cmd *cmd; + struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); - if (!blk_mq_request_started(req)) - return; - cmd = blk_mq_rq_to_pdu(req); cmd->status = BLK_STS_IOERR; blk_mq_complete_request(req); } @@ -714,10 +802,9 @@ static int wait_for_reconnect(struct nbd_device *nbd) return 0; if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) return 0; - wait_event_timeout(config->conn_wait, - atomic_read(&config->live_connections), - config->dead_conn_timeout); - return atomic_read(&config->live_connections); + return wait_event_timeout(config->conn_wait, + atomic_read(&config->live_connections) > 0, + config->dead_conn_timeout) > 0; } static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) @@ -777,7 +864,7 @@ again: */ blk_mq_start_request(req); if (unlikely(nsock->pending && nsock->pending != req)) { - blk_mq_requeue_request(req, true); + nbd_requeue_cmd(cmd); ret = 0; goto out; } @@ -790,7 +877,7 @@ again: dev_err_ratelimited(disk_to_dev(nbd->disk), "Request send failed, requeueing\n"); nbd_mark_nsock_dead(nbd, nsock, 1); - blk_mq_requeue_request(req, true); + nbd_requeue_cmd(cmd); ret = 0; } out: @@ -814,7 +901,8 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, * that the server is misbehaving (or there was an error) before we're * done sending everything over the wire. */ - init_completion(&cmd->send_complete); + mutex_lock(&cmd->lock); + clear_bit(NBD_CMD_REQUEUED, &cmd->flags); /* We can be called directly from the user space process, which means we * could possibly have signals pending so our sendmsg will fail. In @@ -826,7 +914,7 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, ret = BLK_STS_IOERR; else if (!ret) ret = BLK_STS_OK; - complete(&cmd->send_complete); + mutex_unlock(&cmd->lock); return ret; } @@ -950,10 +1038,6 @@ static void nbd_bdev_reset(struct block_device *bdev) if (bdev->bd_openers > 1) return; bd_set_size(bdev, 0); - if (max_part > 0) { - blkdev_reread_part(bdev); - bdev->bd_invalidated = 1; - } } static void nbd_parse_flags(struct nbd_device *nbd) @@ -1040,6 +1124,9 @@ static void nbd_config_put(struct nbd_device *nbd) nbd->config = NULL; nbd->tag_set.timeout = 0; + nbd->disk->queue->limits.discard_granularity = 0; + nbd->disk->queue->limits.discard_alignment = 0; + blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue); mutex_unlock(&nbd->config_lock); @@ -1109,7 +1196,6 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b if (ret) return ret; - bd_set_size(bdev, config->bytesize); if (max_part) bdev->bd_invalidated = 1; mutex_unlock(&nbd->config_lock); @@ -1118,7 +1204,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b if (ret) sock_shutdown(nbd); mutex_lock(&nbd->config_lock); - bd_set_size(bdev, 0); + nbd_bdev_reset(bdev); /* user requested, ignore socket errors */ if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags)) ret = 0; @@ -1269,6 +1355,9 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); + bdev->bd_invalidated = 1; + } else if (nbd_disconnected(nbd->config)) { + bdev->bd_invalidated = 1; } out: mutex_unlock(&nbd_index_mutex); @@ -1278,6 +1367,12 @@ out: static void nbd_release(struct gendisk *disk, fmode_t mode) { struct nbd_device *nbd = disk->private_data; + struct block_device *bdev = bdget_disk(disk, 0); + + if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && + bdev->bd_openers == 0) + nbd_disconnect_and_put(nbd); + nbd_config_put(nbd); nbd_put(nbd); } @@ -1425,6 +1520,8 @@ static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->nbd = set->driver_data; + cmd->flags = 0; + mutex_init(&cmd->lock); return 0; } @@ -1490,8 +1587,9 @@ static int nbd_dev_add(int index) */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); - disk->queue->limits.discard_granularity = 512; - blk_queue_max_discard_sectors(disk->queue, UINT_MAX); + disk->queue->limits.discard_granularity = 0; + disk->queue->limits.discard_alignment = 0; + blk_queue_max_discard_sectors(disk->queue, 0); blk_queue_max_segment_size(disk->queue, UINT_MAX); blk_queue_max_segments(disk->queue, USHRT_MAX); blk_queue_max_hw_sectors(disk->queue, 65536); @@ -1677,6 +1775,10 @@ again: &config->runtime_flags); put_dev = true; } + if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { + set_bit(NBD_DISCONNECT_ON_CLOSE, + &config->runtime_flags); + } } if (info->attrs[NBD_ATTR_SOCKETS]) { @@ -1721,6 +1823,17 @@ out: return ret; } +static void nbd_disconnect_and_put(struct nbd_device *nbd) +{ + mutex_lock(&nbd->config_lock); + nbd_disconnect(nbd); + nbd_clear_sock(nbd); + mutex_unlock(&nbd->config_lock); + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, + &nbd->config->runtime_flags)) + nbd_config_put(nbd); +} + static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) { struct nbd_device *nbd; @@ -1753,12 +1866,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) nbd_put(nbd); return 0; } - mutex_lock(&nbd->config_lock); - nbd_disconnect(nbd); - mutex_unlock(&nbd->config_lock); - if (test_and_clear_bit(NBD_HAS_CONFIG_REF, - &nbd->config->runtime_flags)) - nbd_config_put(nbd); + nbd_disconnect_and_put(nbd); nbd_config_put(nbd); nbd_put(nbd); return 0; @@ -1769,7 +1877,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) struct nbd_device *nbd = NULL; struct nbd_config *config; int index; - int ret = -EINVAL; + int ret = 0; bool put_dev = false; if (!netlink_capable(skb, CAP_SYS_ADMIN)) @@ -1809,6 +1917,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) !nbd->task_recv) { dev_err(nbd_to_dev(nbd), "not configured, cannot reconfigure\n"); + ret = -EINVAL; goto out; } @@ -1833,6 +1942,14 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) &config->runtime_flags)) refcount_inc(&nbd->refs); } + + if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { + set_bit(NBD_DISCONNECT_ON_CLOSE, + &config->runtime_flags); + } else { + clear_bit(NBD_DISCONNECT_ON_CLOSE, + &config->runtime_flags); + } } if (info->attrs[NBD_ATTR_SOCKETS]) { @@ -2093,7 +2210,8 @@ static int __init nbd_init(void) if (nbds_max > 1UL << (MINORBITS - part_shift)) return -EINVAL; recv_workqueue = alloc_workqueue("knbd-recv", - WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + WQ_MEM_RECLAIM | WQ_HIGHPRI | + WQ_UNBOUND, 0); if (!recv_workqueue) return -ENOMEM; |

