summaryrefslogtreecommitdiffstats
path: root/drivers/block/nbd.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/nbd.c')
-rw-r--r--drivers/block/nbd.c226
1 files changed, 172 insertions, 54 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index afbc202ca6fd..3fb95c8d9fd8 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -76,6 +76,7 @@ struct link_dead_args {
#define NBD_HAS_CONFIG_REF 4
#define NBD_BOUND 5
#define NBD_DESTROY_ON_DISCONNECT 6
+#define NBD_DISCONNECT_ON_CLOSE 7
struct nbd_config {
u32 flags;
@@ -111,12 +112,16 @@ struct nbd_device {
struct task_struct *task_setup;
};
+#define NBD_CMD_REQUEUED 1
+
struct nbd_cmd {
struct nbd_device *nbd;
+ struct mutex lock;
int index;
int cookie;
- struct completion send_complete;
blk_status_t status;
+ unsigned long flags;
+ u32 cmd_cookie;
};
#if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -138,12 +143,42 @@ static void nbd_config_put(struct nbd_device *nbd);
static void nbd_connect_reply(struct genl_info *info, int index);
static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info);
static void nbd_dead_link_work(struct work_struct *work);
+static void nbd_disconnect_and_put(struct nbd_device *nbd);
static inline struct device *nbd_to_dev(struct nbd_device *nbd)
{
return disk_to_dev(nbd->disk);
}
+static void nbd_requeue_cmd(struct nbd_cmd *cmd)
+{
+ struct request *req = blk_mq_rq_from_pdu(cmd);
+
+ if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags))
+ blk_mq_requeue_request(req, true);
+}
+
+#define NBD_COOKIE_BITS 32
+
+static u64 nbd_cmd_handle(struct nbd_cmd *cmd)
+{
+ struct request *req = blk_mq_rq_from_pdu(cmd);
+ u32 tag = blk_mq_unique_tag(req);
+ u64 cookie = cmd->cmd_cookie;
+
+ return (cookie << NBD_COOKIE_BITS) | tag;
+}
+
+static u32 nbd_handle_to_tag(u64 handle)
+{
+ return (u32)handle;
+}
+
+static u32 nbd_handle_to_cookie(u64 handle)
+{
+ return (u32)(handle >> NBD_COOKIE_BITS);
+}
+
static const char *nbdcmd_to_ascii(int cmd)
{
switch (cmd) {
@@ -166,16 +201,19 @@ static ssize_t pid_show(struct device *dev,
}
static const struct device_attribute pid_attr = {
- .attr = { .name = "pid", .mode = S_IRUGO},
+ .attr = { .name = "pid", .mode = 0444},
.show = pid_show,
};
static void nbd_dev_remove(struct nbd_device *nbd)
{
struct gendisk *disk = nbd->disk;
+ struct request_queue *q;
+
if (disk) {
+ q = disk->queue;
del_gendisk(disk);
- blk_cleanup_queue(disk->queue);
+ blk_cleanup_queue(q);
blk_mq_free_tag_set(&nbd->tag_set);
disk->private_data = NULL;
put_disk(disk);
@@ -213,7 +251,15 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
}
if (!nsock->dead) {
kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
- atomic_dec(&nbd->config->live_connections);
+ if (atomic_dec_return(&nbd->config->live_connections) == 0) {
+ if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED,
+ &nbd->config->runtime_flags)) {
+ set_bit(NBD_DISCONNECTED,
+ &nbd->config->runtime_flags);
+ dev_info(nbd_to_dev(nbd),
+ "Disconnected due to user request.\n");
+ }
+ }
}
nsock->dead = true;
nsock->pending = NULL;
@@ -231,9 +277,23 @@ static void nbd_size_clear(struct nbd_device *nbd)
static void nbd_size_update(struct nbd_device *nbd)
{
struct nbd_config *config = nbd->config;
+ struct block_device *bdev = bdget_disk(nbd->disk, 0);
+
+ if (config->flags & NBD_FLAG_SEND_TRIM) {
+ nbd->disk->queue->limits.discard_granularity = config->blksize;
+ nbd->disk->queue->limits.discard_alignment = config->blksize;
+ blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
+ }
blk_queue_logical_block_size(nbd->disk->queue, config->blksize);
blk_queue_physical_block_size(nbd->disk->queue, config->blksize);
set_capacity(nbd->disk, config->bytesize >> 9);
+ if (bdev) {
+ if (bdev->bd_disk)
+ bd_set_size(bdev, config->bytesize);
+ else
+ bdev->bd_invalidated = 1;
+ bdput(bdev);
+ }
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
}
@@ -243,13 +303,15 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize,
struct nbd_config *config = nbd->config;
config->blksize = blocksize;
config->bytesize = blocksize * nr_blocks;
+ if (nbd->task_recv != NULL)
+ nbd_size_update(nbd);
}
static void nbd_complete_rq(struct request *req)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
- dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", cmd,
+ dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", req,
cmd->status ? "failed" : "done");
blk_mq_end_request(req, cmd->status);
@@ -286,13 +348,18 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
- return BLK_EH_HANDLED;
+ goto done;
}
config = nbd->config;
+ if (!mutex_trylock(&cmd->lock))
+ return BLK_EH_RESET_TIMER;
+
if (config->num_connections > 1) {
dev_err_ratelimited(nbd_to_dev(nbd),
- "Connection timed out, retrying\n");
+ "Connection timed out, retrying (%d/%d alive)\n",
+ atomic_read(&config->live_connections),
+ config->num_connections);
/*
* Hooray we have more connections, requeue this IO, the submit
* path will put it on a real connection.
@@ -312,9 +379,10 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
nbd_mark_nsock_dead(nbd, nsock, 1);
mutex_unlock(&nsock->tx_lock);
}
- blk_mq_requeue_request(req, true);
+ mutex_unlock(&cmd->lock);
+ nbd_requeue_cmd(cmd);
nbd_config_put(nbd);
- return BLK_EH_NOT_HANDLED;
+ return BLK_EH_DONE;
}
} else {
dev_err_ratelimited(nbd_to_dev(nbd),
@@ -322,10 +390,12 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
}
set_bit(NBD_TIMEDOUT, &config->runtime_flags);
cmd->status = BLK_STS_IOERR;
+ mutex_unlock(&cmd->lock);
sock_shutdown(nbd);
nbd_config_put(nbd);
-
- return BLK_EH_HANDLED;
+done:
+ blk_mq_complete_request(req);
+ return BLK_EH_DONE;
}
/*
@@ -398,9 +468,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
struct iov_iter from;
unsigned long size = blk_rq_bytes(req);
struct bio *bio;
+ u64 handle;
u32 type;
u32 nbd_cmd_flags = 0;
- u32 tag = blk_mq_unique_tag(req);
int sent = nsock->sent, skip = 0;
iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
@@ -442,6 +512,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
goto send_pages;
}
iov_iter_advance(&from, sent);
+ } else {
+ cmd->cmd_cookie++;
}
cmd->index = index;
cmd->cookie = nsock->cookie;
@@ -450,10 +522,11 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
request.len = htonl(size);
}
- memcpy(request.handle, &tag, sizeof(tag));
+ handle = nbd_cmd_handle(cmd);
+ memcpy(request.handle, &handle, sizeof(handle));
dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
- cmd, nbdcmd_to_ascii(type),
+ req, nbdcmd_to_ascii(type),
(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
result = sock_xmit(nbd, index, 1, &from,
(type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
@@ -468,6 +541,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
nsock->pending = req;
nsock->sent = sent;
}
+ set_bit(NBD_CMD_REQUEUED, &cmd->flags);
return BLK_STS_RESOURCE;
}
dev_err_ratelimited(disk_to_dev(nbd->disk),
@@ -489,7 +563,7 @@ send_pages:
int flags = is_last ? 0 : MSG_MORE;
dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
- cmd, bvec.bv_len);
+ req, bvec.bv_len);
iov_iter_bvec(&from, ITER_BVEC | WRITE,
&bvec, 1, bvec.bv_len);
if (skip) {
@@ -509,6 +583,7 @@ send_pages:
*/
nsock->pending = req;
nsock->sent = sent;
+ set_bit(NBD_CMD_REQUEUED, &cmd->flags);
return BLK_STS_RESOURCE;
}
dev_err(disk_to_dev(nbd->disk),
@@ -541,10 +616,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
struct nbd_reply reply;
struct nbd_cmd *cmd;
struct request *req = NULL;
+ u64 handle;
u16 hwq;
u32 tag;
struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)};
struct iov_iter to;
+ int ret = 0;
reply.magic = 0;
iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
@@ -562,8 +639,8 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
return ERR_PTR(-EPROTO);
}
- memcpy(&tag, reply.handle, sizeof(u32));
-
+ memcpy(&handle, reply.handle, sizeof(handle));
+ tag = nbd_handle_to_tag(handle);
hwq = blk_mq_unique_tag_to_hwq(tag);
if (hwq < nbd->tag_set.nr_hw_queues)
req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
@@ -574,14 +651,28 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
return ERR_PTR(-ENOENT);
}
cmd = blk_mq_rq_to_pdu(req);
+
+ mutex_lock(&cmd->lock);
+ if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
+ dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
+ req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
+ ret = -ENOENT;
+ goto out;
+ }
+ if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) {
+ dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n",
+ req);
+ ret = -ENOENT;
+ goto out;
+ }
if (ntohl(reply.error)) {
dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
ntohl(reply.error));
cmd->status = BLK_STS_IOERR;
- return cmd;
+ goto out;
}
- dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd);
+ dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
if (rq_data_dir(req) != WRITE) {
struct req_iterator iter;
struct bio_vec bvec;
@@ -603,18 +694,18 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
if (nbd_disconnected(config) ||
config->num_connections <= 1) {
cmd->status = BLK_STS_IOERR;
- return cmd;
+ goto out;
}
- return ERR_PTR(-EIO);
+ ret = -EIO;
+ goto out;
}
dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
- cmd, bvec.bv_len);
+ req, bvec.bv_len);
}
- } else {
- /* See the comment in nbd_queue_rq. */
- wait_for_completion(&cmd->send_complete);
}
- return cmd;
+out:
+ mutex_unlock(&cmd->lock);
+ return ret ? ERR_PTR(ret) : cmd;
}
static void recv_work(struct work_struct *work)
@@ -647,11 +738,8 @@ static void recv_work(struct work_struct *work)
static void nbd_clear_req(struct request *req, void *data, bool reserved)
{
- struct nbd_cmd *cmd;
+ struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
- if (!blk_mq_request_started(req))
- return;
- cmd = blk_mq_rq_to_pdu(req);
cmd->status = BLK_STS_IOERR;
blk_mq_complete_request(req);
}
@@ -714,10 +802,9 @@ static int wait_for_reconnect(struct nbd_device *nbd)
return 0;
if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
return 0;
- wait_event_timeout(config->conn_wait,
- atomic_read(&config->live_connections),
- config->dead_conn_timeout);
- return atomic_read(&config->live_connections);
+ return wait_event_timeout(config->conn_wait,
+ atomic_read(&config->live_connections) > 0,
+ config->dead_conn_timeout) > 0;
}
static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
@@ -777,7 +864,7 @@ again:
*/
blk_mq_start_request(req);
if (unlikely(nsock->pending && nsock->pending != req)) {
- blk_mq_requeue_request(req, true);
+ nbd_requeue_cmd(cmd);
ret = 0;
goto out;
}
@@ -790,7 +877,7 @@ again:
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Request send failed, requeueing\n");
nbd_mark_nsock_dead(nbd, nsock, 1);
- blk_mq_requeue_request(req, true);
+ nbd_requeue_cmd(cmd);
ret = 0;
}
out:
@@ -814,7 +901,8 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
* that the server is misbehaving (or there was an error) before we're
* done sending everything over the wire.
*/
- init_completion(&cmd->send_complete);
+ mutex_lock(&cmd->lock);
+ clear_bit(NBD_CMD_REQUEUED, &cmd->flags);
/* We can be called directly from the user space process, which means we
* could possibly have signals pending so our sendmsg will fail. In
@@ -826,7 +914,7 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
ret = BLK_STS_IOERR;
else if (!ret)
ret = BLK_STS_OK;
- complete(&cmd->send_complete);
+ mutex_unlock(&cmd->lock);
return ret;
}
@@ -950,10 +1038,6 @@ static void nbd_bdev_reset(struct block_device *bdev)
if (bdev->bd_openers > 1)
return;
bd_set_size(bdev, 0);
- if (max_part > 0) {
- blkdev_reread_part(bdev);
- bdev->bd_invalidated = 1;
- }
}
static void nbd_parse_flags(struct nbd_device *nbd)
@@ -1040,6 +1124,9 @@ static void nbd_config_put(struct nbd_device *nbd)
nbd->config = NULL;
nbd->tag_set.timeout = 0;
+ nbd->disk->queue->limits.discard_granularity = 0;
+ nbd->disk->queue->limits.discard_alignment = 0;
+ blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
mutex_unlock(&nbd->config_lock);
@@ -1109,7 +1196,6 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
if (ret)
return ret;
- bd_set_size(bdev, config->bytesize);
if (max_part)
bdev->bd_invalidated = 1;
mutex_unlock(&nbd->config_lock);
@@ -1118,7 +1204,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
if (ret)
sock_shutdown(nbd);
mutex_lock(&nbd->config_lock);
- bd_set_size(bdev, 0);
+ nbd_bdev_reset(bdev);
/* user requested, ignore socket errors */
if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags))
ret = 0;
@@ -1269,6 +1355,9 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
refcount_set(&nbd->config_refs, 1);
refcount_inc(&nbd->refs);
mutex_unlock(&nbd->config_lock);
+ bdev->bd_invalidated = 1;
+ } else if (nbd_disconnected(nbd->config)) {
+ bdev->bd_invalidated = 1;
}
out:
mutex_unlock(&nbd_index_mutex);
@@ -1278,6 +1367,12 @@ out:
static void nbd_release(struct gendisk *disk, fmode_t mode)
{
struct nbd_device *nbd = disk->private_data;
+ struct block_device *bdev = bdget_disk(disk, 0);
+
+ if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
+ bdev->bd_openers == 0)
+ nbd_disconnect_and_put(nbd);
+
nbd_config_put(nbd);
nbd_put(nbd);
}
@@ -1425,6 +1520,8 @@ static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq,
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
cmd->nbd = set->driver_data;
+ cmd->flags = 0;
+ mutex_init(&cmd->lock);
return 0;
}
@@ -1490,8 +1587,9 @@ static int nbd_dev_add(int index)
*/
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
- disk->queue->limits.discard_granularity = 512;
- blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
+ disk->queue->limits.discard_granularity = 0;
+ disk->queue->limits.discard_alignment = 0;
+ blk_queue_max_discard_sectors(disk->queue, 0);
blk_queue_max_segment_size(disk->queue, UINT_MAX);
blk_queue_max_segments(disk->queue, USHRT_MAX);
blk_queue_max_hw_sectors(disk->queue, 65536);
@@ -1677,6 +1775,10 @@ again:
&config->runtime_flags);
put_dev = true;
}
+ if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
+ set_bit(NBD_DISCONNECT_ON_CLOSE,
+ &config->runtime_flags);
+ }
}
if (info->attrs[NBD_ATTR_SOCKETS]) {
@@ -1721,6 +1823,17 @@ out:
return ret;
}
+static void nbd_disconnect_and_put(struct nbd_device *nbd)
+{
+ mutex_lock(&nbd->config_lock);
+ nbd_disconnect(nbd);
+ nbd_clear_sock(nbd);
+ mutex_unlock(&nbd->config_lock);
+ if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+ &nbd->config->runtime_flags))
+ nbd_config_put(nbd);
+}
+
static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
{
struct nbd_device *nbd;
@@ -1753,12 +1866,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
nbd_put(nbd);
return 0;
}
- mutex_lock(&nbd->config_lock);
- nbd_disconnect(nbd);
- mutex_unlock(&nbd->config_lock);
- if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
- &nbd->config->runtime_flags))
- nbd_config_put(nbd);
+ nbd_disconnect_and_put(nbd);
nbd_config_put(nbd);
nbd_put(nbd);
return 0;
@@ -1769,7 +1877,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
struct nbd_device *nbd = NULL;
struct nbd_config *config;
int index;
- int ret = -EINVAL;
+ int ret = 0;
bool put_dev = false;
if (!netlink_capable(skb, CAP_SYS_ADMIN))
@@ -1809,6 +1917,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
!nbd->task_recv) {
dev_err(nbd_to_dev(nbd),
"not configured, cannot reconfigure\n");
+ ret = -EINVAL;
goto out;
}
@@ -1833,6 +1942,14 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
&config->runtime_flags))
refcount_inc(&nbd->refs);
}
+
+ if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
+ set_bit(NBD_DISCONNECT_ON_CLOSE,
+ &config->runtime_flags);
+ } else {
+ clear_bit(NBD_DISCONNECT_ON_CLOSE,
+ &config->runtime_flags);
+ }
}
if (info->attrs[NBD_ATTR_SOCKETS]) {
@@ -2093,7 +2210,8 @@ static int __init nbd_init(void)
if (nbds_max > 1UL << (MINORBITS - part_shift))
return -EINVAL;
recv_workqueue = alloc_workqueue("knbd-recv",
- WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+ WQ_MEM_RECLAIM | WQ_HIGHPRI |
+ WQ_UNBOUND, 0);
if (!recv_workqueue)
return -ENOMEM;
OpenPOWER on IntegriCloud