summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig10
-rw-r--r--drivers/block/aoe/aoeblk.c1
-rw-r--r--drivers/block/ataflop.c2
-rw-r--r--drivers/block/brd.c27
-rw-r--r--drivers/block/drbd/drbd_int.h2
-rw-r--r--drivers/block/drbd/drbd_interval.c29
-rw-r--r--drivers/block/drbd/drbd_main.c1
-rw-r--r--drivers/block/drbd/drbd_nl.c16
-rw-r--r--drivers/block/drbd/drbd_receiver.c2
-rw-r--r--drivers/block/drbd/drbd_req.c2
-rw-r--r--drivers/block/drbd/drbd_worker.c4
-rw-r--r--drivers/block/floppy.c7
-rw-r--r--drivers/block/loop.c50
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c2
-rw-r--r--drivers/block/nbd.c286
-rw-r--r--drivers/block/null_blk.h33
-rw-r--r--drivers/block/null_blk_main.c368
-rw-r--r--drivers/block/null_blk_zoned.c146
-rw-r--r--drivers/block/paride/pcd.c15
-rw-r--r--drivers/block/paride/pd.c1
-rw-r--r--drivers/block/paride/pf.c3
-rw-r--r--drivers/block/pktcdvd.c2
-rw-r--r--drivers/block/rbd.c502
-rw-r--r--drivers/block/rsxx/core.c2
-rw-r--r--drivers/block/sunvdc.c3
-rw-r--r--drivers/block/umem.c2
-rw-r--r--drivers/block/virtio_blk.c115
-rw-r--r--drivers/block/xen-blkback/blkback.c52
-rw-r--r--drivers/block/xen-blkback/common.h4
-rw-r--r--drivers/block/xen-blkback/xenbus.c103
-rw-r--r--drivers/block/xen-blkfront.c13
-rw-r--r--drivers/block/zram/zram_drv.c15
32 files changed, 1053 insertions, 767 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 1bb8ec575352..025b1b77b11a 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -432,16 +432,6 @@ config VIRTIO_BLK
This is the virtual block driver for virtio. It can be used with
QEMU based VMMs (like KVM or Xen). Say Y or M.
-config VIRTIO_BLK_SCSI
- bool "SCSI passthrough request for the Virtio block driver"
- depends on VIRTIO_BLK
- select BLK_SCSI_REQUEST
- ---help---
- Enable support for SCSI passthrough (e.g. the SG_IO ioctl) on
- virtio-blk devices. This is only supported for the legacy
- virtio protocol and not enabled by default by any hypervisor.
- You probably want to use virtio-scsi instead.
-
config BLK_DEV_RBD
tristate "Rados block device (RBD)"
depends on INET && BLOCK
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index bd19f8af950b..7b32fb673375 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -329,6 +329,7 @@ static const struct block_device_operations aoe_bdops = {
.open = aoeblk_open,
.release = aoeblk_release,
.ioctl = aoeblk_ioctl,
+ .compat_ioctl = blkdev_compat_ptr_ioctl,
.getgeo = aoeblk_getgeo,
.owner = THIS_MODULE,
};
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index bd7d3bb8b890..1553d41f0b91 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -857,7 +857,7 @@ static void fd_calibrate( void )
}
if (ATARIHW_PRESENT(FDCSPEED))
- dma_wd.fdc_speed = 0; /* always seek with 8 Mhz */;
+ dma_wd.fdc_speed = 0; /* always seek with 8 Mhz */
DPRINT(("fd_calibrate\n"));
SET_IRQ_HANDLER( fd_calibrate_done );
/* we can't verify, since the speed may be incorrect */
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index c548a5a6c1a0..220c5e18aba0 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -297,6 +297,10 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
unsigned int len = bvec.bv_len;
int err;
+ /* Don't support un-aligned buffer */
+ WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) ||
+ (len & (SECTOR_SIZE - 1)));
+
err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
bio_op(bio), sector);
if (err)
@@ -382,7 +386,6 @@ static struct brd_device *brd_alloc(int i)
goto out_free_dev;
blk_queue_make_request(brd->brd_queue, brd_make_request);
- blk_queue_max_hw_sectors(brd->brd_queue, 1024);
/* This is so fdisk will align partitions on 4k, because of
* direct_access API needing 4k alignment, returning a PFN
@@ -470,6 +473,25 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
return kobj;
}
+static inline void brd_check_and_reset_par(void)
+{
+ if (unlikely(!max_part))
+ max_part = 1;
+
+ /*
+ * make sure 'max_part' can be divided exactly by (1U << MINORBITS),
+ * otherwise, it is possiable to get same dev_t when adding partitions.
+ */
+ if ((1U << MINORBITS) % max_part != 0)
+ max_part = 1UL << fls(max_part);
+
+ if (max_part > DISK_MAX_PARTS) {
+ pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n",
+ DISK_MAX_PARTS, DISK_MAX_PARTS);
+ max_part = DISK_MAX_PARTS;
+ }
+}
+
static int __init brd_init(void)
{
struct brd_device *brd, *next;
@@ -493,8 +515,7 @@ static int __init brd_init(void)
if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
return -EIO;
- if (unlikely(!max_part))
- max_part = 1;
+ brd_check_and_reset_par();
for (i = 0; i < rd_nr; i++) {
brd = brd_alloc(i);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index ddbf56014c51..aae99a2d7bd4 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -622,7 +622,7 @@ struct fifo_buffer {
int total; /* sum of all values */
int values[0];
};
-extern struct fifo_buffer *fifo_alloc(int fifo_size);
+extern struct fifo_buffer *fifo_alloc(unsigned int fifo_size);
/* flag bits per connection */
enum {
diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c
index c58986556161..651bd0236a99 100644
--- a/drivers/block/drbd/drbd_interval.c
+++ b/drivers/block/drbd/drbd_interval.c
@@ -13,33 +13,10 @@ sector_t interval_end(struct rb_node *node)
return this->end;
}
-/**
- * compute_subtree_last - compute end of @node
- *
- * The end of an interval is the highest (start + (size >> 9)) value of this
- * node and of its children. Called for @node and its parents whenever the end
- * may have changed.
- */
-static inline sector_t
-compute_subtree_last(struct drbd_interval *node)
-{
- sector_t max = node->sector + (node->size >> 9);
-
- if (node->rb.rb_left) {
- sector_t left = interval_end(node->rb.rb_left);
- if (left > max)
- max = left;
- }
- if (node->rb.rb_right) {
- sector_t right = interval_end(node->rb.rb_right);
- if (right > max)
- max = right;
- }
- return max;
-}
+#define NODE_END(node) ((node)->sector + ((node)->size >> 9))
-RB_DECLARE_CALLBACKS(static, augment_callbacks, struct drbd_interval, rb,
- sector_t, end, compute_subtree_last);
+RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks,
+ struct drbd_interval, rb, sector_t, end, NODE_END);
/**
* drbd_insert_interval - insert a new interval into a tree
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 5b248763a672..a18155cdce41 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -786,7 +786,6 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm
if (nc->tentative && connection->agreed_pro_version < 92) {
rcu_read_unlock();
- mutex_unlock(&sock->mutex);
drbd_err(connection, "--dry-run is not supported by peer");
return -EOPNOTSUPP;
}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 5d52a2d32155..da4a3ebe04ef 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -268,19 +268,18 @@ static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
/* some more paranoia, if the request was over-determined */
if (adm_ctx->device && adm_ctx->resource &&
adm_ctx->device->resource != adm_ctx->resource) {
- pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
- adm_ctx->minor, adm_ctx->resource->name,
- adm_ctx->device->resource->name);
+ pr_warn("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
+ adm_ctx->minor, adm_ctx->resource->name,
+ adm_ctx->device->resource->name);
drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
return ERR_INVALID_REQUEST;
}
if (adm_ctx->device &&
adm_ctx->volume != VOLUME_UNSPECIFIED &&
adm_ctx->volume != adm_ctx->device->vnr) {
- pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
- adm_ctx->minor, adm_ctx->volume,
- adm_ctx->device->vnr,
- adm_ctx->device->resource->name);
+ pr_warn("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
+ adm_ctx->minor, adm_ctx->volume,
+ adm_ctx->device->vnr, adm_ctx->device->resource->name);
drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
return ERR_INVALID_REQUEST;
}
@@ -1576,7 +1575,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
struct drbd_device *device;
struct disk_conf *new_disk_conf, *old_disk_conf;
struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
- int err, fifo_size;
+ int err;
+ unsigned int fifo_size;
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 2b3103c30857..79e216446030 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3887,7 +3887,7 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
const int apv = connection->agreed_pro_version;
struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
- int fifo_size = 0;
+ unsigned int fifo_size = 0;
int err;
peer_device = conn_peer_device(connection, pi->vnr);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index f86cea4c0f8d..840c3aef3c5c 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -884,7 +884,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
start_new_tl_epoch(connection);
mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE);
break;
- };
+ }
return rv;
}
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 5bdcc70ad589..b7f605c6e231 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -482,11 +482,11 @@ static void fifo_add_val(struct fifo_buffer *fb, int value)
fb->values[i] += value;
}
-struct fifo_buffer *fifo_alloc(int fifo_size)
+struct fifo_buffer *fifo_alloc(unsigned int fifo_size)
{
struct fifo_buffer *fb;
- fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
+ fb = kzalloc(struct_size(fb, values, fifo_size), GFP_NOIO);
if (!fb)
return NULL;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 0469aceaa230..cd3612e4e2e1 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3780,7 +3780,7 @@ static int compat_getdrvprm(int drive,
v.native_format = UDP->native_format;
mutex_unlock(&floppy_mutex);
- if (copy_from_user(arg, &v, sizeof(struct compat_floppy_drive_params)))
+ if (copy_to_user(arg, &v, sizeof(struct compat_floppy_drive_params)))
return -EFAULT;
return 0;
}
@@ -3816,7 +3816,7 @@ static int compat_getdrvstat(int drive, bool poll,
v.bufblocks = UDRS->bufblocks;
mutex_unlock(&floppy_mutex);
- if (copy_from_user(arg, &v, sizeof(struct compat_floppy_drive_struct)))
+ if (copy_to_user(arg, &v, sizeof(struct compat_floppy_drive_struct)))
return -EFAULT;
return 0;
Eintr:
@@ -3879,6 +3879,9 @@ static int fd_compat_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
{
int drive = (long)bdev->bd_disk->private_data;
switch (cmd) {
+ case CDROMEJECT: /* CD-ROM eject */
+ case 0x6470: /* SunOS floppy eject */
+
case FDMSGON:
case FDMSGOFF:
case FDSETEMSGTRESH:
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ab7ca5989097..739b372a5112 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -417,18 +417,20 @@ out_free_page:
return ret;
}
-static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos)
+static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
+ int mode)
{
/*
- * We use punch hole to reclaim the free space used by the
- * image a.k.a. discard. However we do not support discard if
- * encryption is enabled, because it may give an attacker
- * useful information.
+ * We use fallocate to manipulate the space mappings used by the image
+ * a.k.a. discard/zerorange. However we do not support this if
+ * encryption is enabled, because it may give an attacker useful
+ * information.
*/
struct file *file = lo->lo_backing_file;
- int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
int ret;
+ mode |= FALLOC_FL_KEEP_SIZE;
+
if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) {
ret = -EOPNOTSUPP;
goto out;
@@ -596,9 +598,17 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
switch (req_op(rq)) {
case REQ_OP_FLUSH:
return lo_req_flush(lo, rq);
- case REQ_OP_DISCARD:
case REQ_OP_WRITE_ZEROES:
- return lo_discard(lo, rq, pos);
+ /*
+ * If the caller doesn't want deallocation, call zeroout to
+ * write zeroes the range. Otherwise, punch them out.
+ */
+ return lo_fallocate(lo, rq, pos,
+ (rq->cmd_flags & REQ_NOUNMAP) ?
+ FALLOC_FL_ZERO_RANGE :
+ FALLOC_FL_PUNCH_HOLE);
+ case REQ_OP_DISCARD:
+ return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE);
case REQ_OP_WRITE:
if (lo->transfer)
return lo_write_transfer(lo, rq, pos);
@@ -630,7 +640,9 @@ static void loop_reread_partitions(struct loop_device *lo,
{
int rc;
- rc = blkdev_reread_part(bdev);
+ mutex_lock(&bdev->bd_mutex);
+ rc = bdev_disk_changed(bdev, false);
+ mutex_unlock(&bdev->bd_mutex);
if (rc)
pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
__func__, lo->lo_number, lo->lo_file_name, rc);
@@ -994,6 +1006,16 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
blk_queue_write_cache(lo->lo_queue, true, false);
+ if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) {
+ /* In case of direct I/O, match underlying block size */
+ unsigned short bsize = bdev_logical_block_size(
+ inode->i_sb->s_bdev);
+
+ blk_queue_logical_block_size(lo->lo_queue, bsize);
+ blk_queue_physical_block_size(lo->lo_queue, bsize);
+ blk_queue_io_min(lo->lo_queue, bsize);
+ }
+
loop_update_rotational(lo);
loop_update_dio(lo);
set_capacity(lo->lo_disk, size);
@@ -1144,10 +1166,11 @@ out_unlock:
* must be at least one and it can only become zero when the
* current holder is released.
*/
- if (release)
- err = __blkdev_reread_part(bdev);
- else
- err = blkdev_reread_part(bdev);
+ if (!release)
+ mutex_lock(&bdev->bd_mutex);
+ err = bdev_disk_changed(bdev, false);
+ if (!release)
+ mutex_unlock(&bdev->bd_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
__func__, lo_number, err);
@@ -1755,6 +1778,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
case LOOP_SET_FD:
case LOOP_CHANGE_FD:
case LOOP_SET_BLOCK_SIZE:
+ case LOOP_SET_DIRECT_IO:
err = lo_ioctl(bdev, mode, cmd, arg);
break;
default:
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 964f78cfffa0..f6bafa9a68b9 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -129,7 +129,7 @@ struct mtip_compat_ide_task_request_s {
/*
* This function check_for_surprise_removal is called
* while card is removed from the system and it will
- * read the vendor id from the configration space
+ * read the vendor id from the configuration space
*
* @pdev Pointer to the pci_dev structure.
*
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index e21d2ded732b..78181908f0df 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -26,6 +26,7 @@
#include <linux/ioctl.h>
#include <linux/mutex.h>
#include <linux/compiler.h>
+#include <linux/completion.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -71,14 +72,17 @@ struct link_dead_args {
int index;
};
-#define NBD_TIMEDOUT 0
+#define NBD_RT_TIMEDOUT 0
+#define NBD_RT_DISCONNECT_REQUESTED 1
+#define NBD_RT_DISCONNECTED 2
+#define NBD_RT_HAS_PID_FILE 3
+#define NBD_RT_HAS_CONFIG_REF 4
+#define NBD_RT_BOUND 5
+#define NBD_RT_DESTROY_ON_DISCONNECT 6
+#define NBD_RT_DISCONNECT_ON_CLOSE 7
+
+#define NBD_DESTROY_ON_DISCONNECT 0
#define NBD_DISCONNECT_REQUESTED 1
-#define NBD_DISCONNECTED 2
-#define NBD_HAS_PID_FILE 3
-#define NBD_HAS_CONFIG_REF 4
-#define NBD_BOUND 5
-#define NBD_DESTROY_ON_DISCONNECT 6
-#define NBD_DISCONNECT_ON_CLOSE 7
struct nbd_config {
u32 flags;
@@ -108,10 +112,14 @@ struct nbd_device {
struct nbd_config *config;
struct mutex config_lock;
struct gendisk *disk;
+ struct workqueue_struct *recv_workq;
struct list_head list;
struct task_struct *task_recv;
struct task_struct *task_setup;
+
+ struct completion *destroy_complete;
+ unsigned long flags;
};
#define NBD_CMD_REQUEUED 1
@@ -121,6 +129,7 @@ struct nbd_cmd {
struct mutex lock;
int index;
int cookie;
+ int retries;
blk_status_t status;
unsigned long flags;
u32 cmd_cookie;
@@ -138,7 +147,6 @@ static struct dentry *nbd_dbg_dir;
static unsigned int nbds_max = 16;
static int max_part = 16;
-static struct workqueue_struct *recv_workqueue;
static int part_shift;
static int nbd_dev_dbg_init(struct nbd_device *nbd);
@@ -222,6 +230,16 @@ static void nbd_dev_remove(struct nbd_device *nbd)
disk->private_data = NULL;
put_disk(disk);
}
+
+ /*
+ * Place this in the last just before the nbd is freed to
+ * make sure that the disk and the related kobject are also
+ * totally removed to avoid duplicate creation of the same
+ * one.
+ */
+ if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
+ complete(nbd->destroy_complete);
+
kfree(nbd);
}
@@ -230,15 +248,15 @@ static void nbd_put(struct nbd_device *nbd)
if (refcount_dec_and_mutex_lock(&nbd->refs,
&nbd_index_mutex)) {
idr_remove(&nbd_index_idr, nbd->index);
- mutex_unlock(&nbd_index_mutex);
nbd_dev_remove(nbd);
+ mutex_unlock(&nbd_index_mutex);
}
}
static int nbd_disconnected(struct nbd_config *config)
{
- return test_bit(NBD_DISCONNECTED, &config->runtime_flags) ||
- test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
+ return test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags) ||
+ test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
}
static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
@@ -256,9 +274,9 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
if (!nsock->dead) {
kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
if (atomic_dec_return(&nbd->config->live_connections) == 0) {
- if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED,
+ if (test_and_clear_bit(NBD_RT_DISCONNECT_REQUESTED,
&nbd->config->runtime_flags)) {
- set_bit(NBD_DISCONNECTED,
+ set_bit(NBD_RT_DISCONNECTED,
&nbd->config->runtime_flags);
dev_info(nbd_to_dev(nbd),
"Disconnected due to user request.\n");
@@ -332,7 +350,7 @@ static void sock_shutdown(struct nbd_device *nbd)
if (config->num_connections == 0)
return;
- if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags))
+ if (test_and_set_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
return;
for (i = 0; i < config->num_connections; i++) {
@@ -344,6 +362,22 @@ static void sock_shutdown(struct nbd_device *nbd)
dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n");
}
+static u32 req_to_nbd_cmd_type(struct request *req)
+{
+ switch (req_op(req)) {
+ case REQ_OP_DISCARD:
+ return NBD_CMD_TRIM;
+ case REQ_OP_FLUSH:
+ return NBD_CMD_FLUSH;
+ case REQ_OP_WRITE:
+ return NBD_CMD_WRITE;
+ case REQ_OP_READ:
+ return NBD_CMD_READ;
+ default:
+ return U32_MAX;
+ }
+}
+
static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
bool reserved)
{
@@ -351,15 +385,16 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
struct nbd_device *nbd = cmd->nbd;
struct nbd_config *config;
+ if (!mutex_trylock(&cmd->lock))
+ return BLK_EH_RESET_TIMER;
+
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
+ mutex_unlock(&cmd->lock);
goto done;
}
config = nbd->config;
- if (!mutex_trylock(&cmd->lock))
- return BLK_EH_RESET_TIMER;
-
if (config->num_connections > 1) {
dev_err_ratelimited(nbd_to_dev(nbd),
"Connection timed out, retrying (%d/%d alive)\n",
@@ -389,11 +424,26 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
nbd_config_put(nbd);
return BLK_EH_DONE;
}
- } else {
- dev_err_ratelimited(nbd_to_dev(nbd),
- "Connection timed out\n");
}
- set_bit(NBD_TIMEDOUT, &config->runtime_flags);
+
+ if (!nbd->tag_set.timeout) {
+ /*
+ * Userspace sets timeout=0 to disable socket disconnection,
+ * so just warn and reset the timer.
+ */
+ cmd->retries++;
+ dev_info(nbd_to_dev(nbd), "Possible stuck request %p: control (%s@%llu,%uB). Runtime %u seconds\n",
+ req, nbdcmd_to_ascii(req_to_nbd_cmd_type(req)),
+ (unsigned long long)blk_rq_pos(req) << 9,
+ blk_rq_bytes(req), (req->timeout / HZ) * cmd->retries);
+
+ mutex_unlock(&cmd->lock);
+ nbd_config_put(nbd);
+ return BLK_EH_RESET_TIMER;
+ }
+
+ dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
+ set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
cmd->status = BLK_STS_IOERR;
mutex_unlock(&cmd->lock);
sock_shutdown(nbd);
@@ -480,22 +530,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
- switch (req_op(req)) {
- case REQ_OP_DISCARD:
- type = NBD_CMD_TRIM;
- break;
- case REQ_OP_FLUSH:
- type = NBD_CMD_FLUSH;
- break;
- case REQ_OP_WRITE:
- type = NBD_CMD_WRITE;
- break;
- case REQ_OP_READ:
- type = NBD_CMD_READ;
- break;
- default:
+ type = req_to_nbd_cmd_type(req);
+ if (type == U32_MAX)
return -EIO;
- }
if (rq_data_dir(req) == WRITE &&
(config->flags & NBD_FLAG_READ_ONLY)) {
@@ -526,6 +563,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
}
cmd->index = index;
cmd->cookie = nsock->cookie;
+ cmd->retries = 0;
request.type = htonl(type | nbd_cmd_flags);
if (type != NBD_CMD_FLUSH) {
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
@@ -672,6 +710,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
ret = -ENOENT;
goto out;
}
+ if (cmd->status != BLK_STS_OK) {
+ dev_err(disk_to_dev(nbd->disk), "Command already handled %p\n",
+ req);
+ ret = -ENOENT;
+ goto out;
+ }
if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) {
dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n",
req);
@@ -753,7 +797,10 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
+ mutex_lock(&cmd->lock);
cmd->status = BLK_STS_IOERR;
+ mutex_unlock(&cmd->lock);
+
blk_mq_complete_request(req);
return true;
}
@@ -773,7 +820,7 @@ static int find_fallback(struct nbd_device *nbd, int index)
struct nbd_sock *nsock = config->socks[index];
int fallback = nsock->fallback_index;
- if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+ if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
return new_index;
if (config->num_connections <= 1) {
@@ -814,7 +861,7 @@ static int wait_for_reconnect(struct nbd_device *nbd)
struct nbd_config *config = nbd->config;
if (!config->dead_conn_timeout)
return 0;
- if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+ if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
return 0;
return wait_event_timeout(config->conn_wait,
atomic_read(&config->live_connections) > 0,
@@ -933,6 +980,26 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
return ret;
}
+static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd,
+ int *err)
+{
+ struct socket *sock;
+
+ *err = 0;
+ sock = sockfd_lookup(fd, err);
+ if (!sock)
+ return NULL;
+
+ if (sock->ops->shutdown == sock_no_shutdown) {
+ dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n");
+ *err = -EINVAL;
+ sockfd_put(sock);
+ return NULL;
+ }
+
+ return sock;
+}
+
static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
bool netlink)
{
@@ -942,17 +1009,17 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
struct nbd_sock *nsock;
int err;
- sock = sockfd_lookup(arg, &err);
+ sock = nbd_get_socket(nbd, arg, &err);
if (!sock)
return err;
if (!netlink && !nbd->task_setup &&
- !test_bit(NBD_BOUND, &config->runtime_flags))
+ !test_bit(NBD_RT_BOUND, &config->runtime_flags))
nbd->task_setup = current;
if (!netlink &&
(nbd->task_setup != current ||
- test_bit(NBD_BOUND, &config->runtime_flags))) {
+ test_bit(NBD_RT_BOUND, &config->runtime_flags))) {
dev_err(disk_to_dev(nbd->disk),
"Device being setup by another task");
sockfd_put(sock);
@@ -965,14 +1032,15 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
sockfd_put(sock);
return -ENOMEM;
}
+
+ config->socks = socks;
+
nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL);
if (!nsock) {
sockfd_put(sock);
return -ENOMEM;
}
- config->socks = socks;
-
nsock->fallback_index = -1;
nsock->dead = false;
mutex_init(&nsock->tx_lock);
@@ -994,7 +1062,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
int i;
int err;
- sock = sockfd_lookup(arg, &err);
+ sock = nbd_get_socket(nbd, arg, &err);
if (!sock)
return err;
@@ -1031,12 +1099,12 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
mutex_unlock(&nsock->tx_lock);
sockfd_put(old);
- clear_bit(NBD_DISCONNECTED, &config->runtime_flags);
+ clear_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
/* We take the tx_mutex in an error path in the recv_work, so we
* need to queue_work outside of the tx_mutex.
*/
- queue_work(recv_workqueue, &args->work);
+ queue_work(nbd->recv_workq, &args->work);
atomic_inc(&config->live_connections);
wake_up(&config->conn_wait);
@@ -1102,7 +1170,8 @@ static int nbd_disconnect(struct nbd_device *nbd)
struct nbd_config *config = nbd->config;
dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
- set_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
+ set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
+ set_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags);
send_disconnects(nbd);
return 0;
}
@@ -1121,7 +1190,7 @@ static void nbd_config_put(struct nbd_device *nbd)
struct nbd_config *config = nbd->config;
nbd_dev_dbg_close(nbd);
nbd_size_clear(nbd);
- if (test_and_clear_bit(NBD_HAS_PID_FILE,
+ if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
&config->runtime_flags))
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
nbd->task_recv = NULL;
@@ -1137,6 +1206,10 @@ static void nbd_config_put(struct nbd_device *nbd)
kfree(nbd->config);
nbd->config = NULL;
+ if (nbd->recv_workq)
+ destroy_workqueue(nbd->recv_workq);
+ nbd->recv_workq = NULL;
+
nbd->tag_set.timeout = 0;
nbd->disk->queue->limits.discard_granularity = 0;
nbd->disk->queue->limits.discard_alignment = 0;
@@ -1165,6 +1238,14 @@ static int nbd_start_device(struct nbd_device *nbd)
return -EINVAL;
}
+ nbd->recv_workq = alloc_workqueue("knbd%d-recv",
+ WQ_MEM_RECLAIM | WQ_HIGHPRI |
+ WQ_UNBOUND, 0, nbd->index);
+ if (!nbd->recv_workq) {
+ dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
+ return -ENOMEM;
+ }
+
blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
nbd->task_recv = current;
@@ -1175,7 +1256,7 @@ static int nbd_start_device(struct nbd_device *nbd)
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
return error;
}
- set_bit(NBD_HAS_PID_FILE, &config->runtime_flags);
+ set_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags);
nbd_dev_dbg_init(nbd);
for (i = 0; i < num_connections; i++) {
@@ -1184,6 +1265,16 @@ static int nbd_start_device(struct nbd_device *nbd)
args = kzalloc(sizeof(*args), GFP_KERNEL);
if (!args) {
sock_shutdown(nbd);
+ /*
+ * If num_connections is m (2 < m),
+ * and NO.1 ~ NO.n(1 < n < m) kzallocs are successful.
+ * But NO.(n + 1) failed. We still have n recv threads.
+ * So, add flush_workqueue here to prevent recv threads
+ * dropping the last config_refs and trying to destroy
+ * the workqueue from inside the workqueue.
+ */
+ if (i)
+ flush_workqueue(nbd->recv_workq);
return -ENOMEM;
}
sk_set_memalloc(config->socks[i]->sock->sk);
@@ -1195,7 +1286,7 @@ static int nbd_start_device(struct nbd_device *nbd)
INIT_WORK(&args->work, recv_work);
args->nbd = nbd;
args->index = i;
- queue_work(recv_workqueue, &args->work);
+ queue_work(nbd->recv_workq, &args->work);
}
nbd_size_update(nbd);
return error;
@@ -1217,12 +1308,14 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
atomic_read(&config->recv_threads) == 0);
if (ret)
sock_shutdown(nbd);
+ flush_workqueue(nbd->recv_workq);
+
mutex_lock(&nbd->config_lock);
nbd_bdev_reset(bdev);
/* user requested, ignore socket errors */
- if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags))
+ if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
ret = 0;
- if (test_bit(NBD_TIMEDOUT, &config->runtime_flags))
+ if (test_bit(NBD_RT_TIMEDOUT, &config->runtime_flags))
ret = -ETIMEDOUT;
return ret;
}
@@ -1233,7 +1326,7 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
sock_shutdown(nbd);
__invalidate_device(bdev, true);
nbd_bdev_reset(bdev);
- if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+ if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
nbd_config_put(nbd);
}
@@ -1246,6 +1339,13 @@ static bool nbd_is_valid_blksize(unsigned long blksize)
return true;
}
+static void nbd_set_cmd_timeout(struct nbd_device *nbd, u64 timeout)
+{
+ nbd->tag_set.timeout = timeout * HZ;
+ if (timeout)
+ blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
+}
+
/* Must be called with config_lock held */
static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
unsigned int cmd, unsigned long arg)
@@ -1276,10 +1376,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
nbd_size_set(nbd, config->blksize, arg);
return 0;
case NBD_SET_TIMEOUT:
- if (arg) {
- nbd->tag_set.timeout = arg * HZ;
- blk_queue_rq_timeout(nbd->disk->queue, arg * HZ);
- }
+ nbd_set_cmd_timeout(nbd, arg);
return 0;
case NBD_SET_FLAGS:
@@ -1324,7 +1421,7 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
/* Don't allow ioctl operations on a nbd device that was created with
* netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine.
*/
- if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+ if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
(cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK))
error = __nbd_ioctl(bdev, nbd, cmd, arg);
else
@@ -1395,7 +1492,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
struct nbd_device *nbd = disk->private_data;
struct block_device *bdev = bdget_disk(disk, 0);
- if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
+ if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
bdev->bd_openers == 0)
nbd_disconnect_and_put(nbd);
@@ -1596,6 +1693,7 @@ static int nbd_dev_add(int index)
nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
BLK_MQ_F_BLOCKING;
nbd->tag_set.driver_data = nbd;
+ nbd->destroy_complete = NULL;
err = blk_mq_alloc_tag_set(&nbd->tag_set);
if (err)
@@ -1710,6 +1808,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
{
+ DECLARE_COMPLETION_ONSTACK(destroy_complete);
struct nbd_device *nbd = NULL;
struct nbd_config *config;
int index = -1;
@@ -1761,6 +1860,17 @@ again:
mutex_unlock(&nbd_index_mutex);
return -EINVAL;
}
+
+ if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
+ test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
+ nbd->destroy_complete = &destroy_complete;
+ mutex_unlock(&nbd_index_mutex);
+
+ /* Wait untill the the nbd stuff is totally destroyed */
+ wait_for_completion(&destroy_complete);
+ goto again;
+ }
+
if (!refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
if (index == -1)
@@ -1793,17 +1903,15 @@ again:
return -ENOMEM;
}
refcount_set(&nbd->config_refs, 1);
- set_bit(NBD_BOUND, &config->runtime_flags);
+ set_bit(NBD_RT_BOUND, &config->runtime_flags);
ret = nbd_genl_size_set(info, nbd);
if (ret)
goto out;
- if (info->attrs[NBD_ATTR_TIMEOUT]) {
- u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]);
- nbd->tag_set.timeout = timeout * HZ;
- blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
- }
+ if (info->attrs[NBD_ATTR_TIMEOUT])
+ nbd_set_cmd_timeout(nbd,
+ nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]));
if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
config->dead_conn_timeout =
nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
@@ -1815,12 +1923,15 @@ again:
if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
- set_bit(NBD_DESTROY_ON_DISCONNECT,
+ set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
&config->runtime_flags);
+ set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
put_dev = true;
+ } else {
+ clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
}
if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
- set_bit(NBD_DISCONNECT_ON_CLOSE,
+ set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
&config->runtime_flags);
}
}
@@ -1859,7 +1970,7 @@ again:
out:
mutex_unlock(&nbd->config_lock);
if (!ret) {
- set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags);
+ set_bit(NBD_RT_HAS_CONFIG_REF, &config->runtime_flags);
refcount_inc(&nbd->config_refs);
nbd_connect_reply(info, nbd->index);
}
@@ -1875,7 +1986,13 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
nbd_disconnect(nbd);
nbd_clear_sock(nbd);
mutex_unlock(&nbd->config_lock);
- if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+ /*
+ * Make sure recv thread has finished, so it does not drop the last
+ * config ref and try to destroy the workqueue from inside the work
+ * queue.
+ */
+ flush_workqueue(nbd->recv_workq);
+ if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
nbd_config_put(nbd);
}
@@ -1959,7 +2076,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&nbd->config_lock);
config = nbd->config;
- if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+ if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
!nbd->task_recv) {
dev_err(nbd_to_dev(nbd),
"not configured, cannot reconfigure\n");
@@ -1971,11 +2088,9 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
if (ret)
goto out;
- if (info->attrs[NBD_ATTR_TIMEOUT]) {
- u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]);
- nbd->tag_set.timeout = timeout * HZ;
- blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
- }
+ if (info->attrs[NBD_ATTR_TIMEOUT])
+ nbd_set_cmd_timeout(nbd,
+ nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]));
if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
config->dead_conn_timeout =
nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
@@ -1984,20 +2099,22 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
- if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
+ if (!test_and_set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
&config->runtime_flags))
put_dev = true;
+ set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
} else {
- if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
+ if (test_and_clear_bit(NBD_RT_DESTROY_ON_DISCONNECT,
&config->runtime_flags))
refcount_inc(&nbd->refs);
+ clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
}
if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
- set_bit(NBD_DISCONNECT_ON_CLOSE,
+ set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
&config->runtime_flags);
} else {
- clear_bit(NBD_DISCONNECT_ON_CLOSE,
+ clear_bit(NBD_RT_DISCONNECT_ON_CLOSE,
&config->runtime_flags);
}
}
@@ -2261,20 +2378,12 @@ static int __init nbd_init(void)
if (nbds_max > 1UL << (MINORBITS - part_shift))
return -EINVAL;
- recv_workqueue = alloc_workqueue("knbd-recv",
- WQ_MEM_RECLAIM | WQ_HIGHPRI |
- WQ_UNBOUND, 0);
- if (!recv_workqueue)
- return -ENOMEM;
- if (register_blkdev(NBD_MAJOR, "nbd")) {
- destroy_workqueue(recv_workqueue);
+ if (register_blkdev(NBD_MAJOR, "nbd"))
return -EIO;
- }
if (genl_register_family(&nbd_genl_family)) {
unregister_blkdev(NBD_MAJOR, "nbd");
- destroy_workqueue(recv_workqueue);
return -EINVAL;
}
nbd_dbg_init();
@@ -2316,7 +2425,6 @@ static void __exit nbd_cleanup(void)
idr_destroy(&nbd_index_idr);
genl_unregister_family(&nbd_genl_family);
- destroy_workqueue(recv_workqueue);
unregister_blkdev(NBD_MAJOR, "nbd");
}
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index a1b9929bd911..bc837862b767 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -2,6 +2,9 @@
#ifndef __BLK_NULL_BLK_H
#define __BLK_NULL_BLK_H
+#undef pr_fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/blkdev.h>
#include <linux/slab.h>
#include <linux/blk-mq.h>
@@ -88,28 +91,32 @@ struct nullb {
#ifdef CONFIG_BLK_DEV_ZONED
int null_zone_init(struct nullb_device *dev);
void null_zone_exit(struct nullb_device *dev);
-int null_zone_report(struct gendisk *disk, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones);
-void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
- unsigned int nr_sectors);
-void null_zone_reset(struct nullb_cmd *cmd, sector_t sector);
+int null_report_zones(struct gendisk *disk, sector_t sector,
+ unsigned int nr_zones, report_zones_cb cb, void *data);
+blk_status_t null_handle_zoned(struct nullb_cmd *cmd,
+ enum req_opf op, sector_t sector,
+ sector_t nr_sectors);
+size_t null_zone_valid_read_len(struct nullb *nullb,
+ sector_t sector, unsigned int len);
#else
static inline int null_zone_init(struct nullb_device *dev)
{
- pr_err("null_blk: CONFIG_BLK_DEV_ZONED not enabled\n");
+ pr_err("CONFIG_BLK_DEV_ZONED not enabled\n");
return -EINVAL;
}
static inline void null_zone_exit(struct nullb_device *dev) {}
-static inline int null_zone_report(struct gendisk *disk, sector_t sector,
- struct blk_zone *zones,
- unsigned int *nr_zones)
+static inline blk_status_t null_handle_zoned(struct nullb_cmd *cmd,
+ enum req_opf op, sector_t sector,
+ sector_t nr_sectors)
{
- return -EOPNOTSUPP;
+ return BLK_STS_NOTSUPP;
}
-static inline void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
- unsigned int nr_sectors)
+static inline size_t null_zone_valid_read_len(struct nullb *nullb,
+ sector_t sector,
+ unsigned int len)
{
+ return len;
}
-static inline void null_zone_reset(struct nullb_cmd *cmd, sector_t sector) {}
+#define null_report_zones NULL
#endif /* CONFIG_BLK_DEV_ZONED */
#endif /* __NULL_BLK_H */
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 99328ded60d1..16510795e377 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -141,8 +141,8 @@ static int g_bs = 512;
module_param_named(bs, g_bs, int, 0444);
MODULE_PARM_DESC(bs, "Block size (in bytes)");
-static int nr_devices = 1;
-module_param(nr_devices, int, 0444);
+static unsigned int nr_devices = 1;
+module_param(nr_devices, uint, 0444);
MODULE_PARM_DESC(nr_devices, "Number of devices to register");
static bool g_blocking;
@@ -227,7 +227,7 @@ static ssize_t nullb_device_uint_attr_store(unsigned int *val,
int result;
result = kstrtouint(page, 0, &tmp);
- if (result)
+ if (result < 0)
return result;
*val = tmp;
@@ -241,7 +241,7 @@ static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
unsigned long tmp;
result = kstrtoul(page, 0, &tmp);
- if (result)
+ if (result < 0)
return result;
*val = tmp;
@@ -255,7 +255,7 @@ static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
int result;
result = kstrtobool(page, &tmp);
- if (result)
+ if (result < 0)
return result;
*val = tmp;
@@ -263,42 +263,68 @@ static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
}
/* The following macro should only be used with TYPE = {uint, ulong, bool}. */
-#define NULLB_DEVICE_ATTR(NAME, TYPE) \
-static ssize_t \
-nullb_device_##NAME##_show(struct config_item *item, char *page) \
-{ \
- return nullb_device_##TYPE##_attr_show( \
- to_nullb_device(item)->NAME, page); \
-} \
-static ssize_t \
-nullb_device_##NAME##_store(struct config_item *item, const char *page, \
- size_t count) \
-{ \
- if (test_bit(NULLB_DEV_FL_CONFIGURED, &to_nullb_device(item)->flags)) \
- return -EBUSY; \
- return nullb_device_##TYPE##_attr_store( \
- &to_nullb_device(item)->NAME, page, count); \
-} \
+#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY) \
+static ssize_t \
+nullb_device_##NAME##_show(struct config_item *item, char *page) \
+{ \
+ return nullb_device_##TYPE##_attr_show( \
+ to_nullb_device(item)->NAME, page); \
+} \
+static ssize_t \
+nullb_device_##NAME##_store(struct config_item *item, const char *page, \
+ size_t count) \
+{ \
+ int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;\
+ struct nullb_device *dev = to_nullb_device(item); \
+ TYPE uninitialized_var(new_value); \
+ int ret; \
+ \
+ ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);\
+ if (ret < 0) \
+ return ret; \
+ if (apply_fn) \
+ ret = apply_fn(dev, new_value); \
+ else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) \
+ ret = -EBUSY; \
+ if (ret < 0) \
+ return ret; \
+ dev->NAME = new_value; \
+ return count; \
+} \
CONFIGFS_ATTR(nullb_device_, NAME);
-NULLB_DEVICE_ATTR(size, ulong);
-NULLB_DEVICE_ATTR(completion_nsec, ulong);
-NULLB_DEVICE_ATTR(submit_queues, uint);
-NULLB_DEVICE_ATTR(home_node, uint);
-NULLB_DEVICE_ATTR(queue_mode, uint);
-NULLB_DEVICE_ATTR(blocksize, uint);
-NULLB_DEVICE_ATTR(irqmode, uint);
-NULLB_DEVICE_ATTR(hw_queue_depth, uint);
-NULLB_DEVICE_ATTR(index, uint);
-NULLB_DEVICE_ATTR(blocking, bool);
-NULLB_DEVICE_ATTR(use_per_node_hctx, bool);
-NULLB_DEVICE_ATTR(memory_backed, bool);
-NULLB_DEVICE_ATTR(discard, bool);
-NULLB_DEVICE_ATTR(mbps, uint);
-NULLB_DEVICE_ATTR(cache_size, ulong);
-NULLB_DEVICE_ATTR(zoned, bool);
-NULLB_DEVICE_ATTR(zone_size, ulong);
-NULLB_DEVICE_ATTR(zone_nr_conv, uint);
+static int nullb_apply_submit_queues(struct nullb_device *dev,
+ unsigned int submit_queues)
+{
+ struct nullb *nullb = dev->nullb;
+ struct blk_mq_tag_set *set;
+
+ if (!nullb)
+ return 0;
+
+ set = nullb->tag_set;
+ blk_mq_update_nr_hw_queues(set, submit_queues);
+ return set->nr_hw_queues == submit_queues ? 0 : -ENOMEM;
+}
+
+NULLB_DEVICE_ATTR(size, ulong, NULL);
+NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL);
+NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues);
+NULLB_DEVICE_ATTR(home_node, uint, NULL);
+NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
+NULLB_DEVICE_ATTR(blocksize, uint, NULL);
+NULLB_DEVICE_ATTR(irqmode, uint, NULL);
+NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
+NULLB_DEVICE_ATTR(index, uint, NULL);
+NULLB_DEVICE_ATTR(blocking, bool, NULL);
+NULLB_DEVICE_ATTR(use_per_node_hctx, bool, NULL);
+NULLB_DEVICE_ATTR(memory_backed, bool, NULL);
+NULLB_DEVICE_ATTR(discard, bool, NULL);
+NULLB_DEVICE_ATTR(mbps, uint, NULL);
+NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
+NULLB_DEVICE_ATTR(zoned, bool, NULL);
+NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
+NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
{
@@ -467,7 +493,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page)
{
- return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size\n");
+ return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_nr_conv\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -996,6 +1022,16 @@ next:
return 0;
}
+static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
+ unsigned int len, unsigned int off)
+{
+ void *dst;
+
+ dst = kmap_atomic(page);
+ memset(dst + off, 0xFF, len);
+ kunmap_atomic(dst);
+}
+
static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n)
{
size_t temp;
@@ -1036,10 +1072,24 @@ static int null_transfer(struct nullb *nullb, struct page *page,
unsigned int len, unsigned int off, bool is_write, sector_t sector,
bool is_fua)
{
+ struct nullb_device *dev = nullb->dev;
+ unsigned int valid_len = len;
int err = 0;
if (!is_write) {
- err = copy_from_nullb(nullb, page, off, sector, len);
+ if (dev->zoned)
+ valid_len = null_zone_valid_read_len(nullb,
+ sector, len);
+
+ if (valid_len) {
+ err = copy_from_nullb(nullb, page, off,
+ sector, valid_len);
+ off += valid_len;
+ len -= valid_len;
+ }
+
+ if (len)
+ nullb_fill_pattern(nullb, page, len, off);
flush_dcache_page(page);
} else {
flush_dcache_page(page);
@@ -1133,93 +1183,61 @@ static void null_restart_queue_async(struct nullb *nullb)
blk_mq_start_stopped_hw_queues(q, true);
}
-static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
+static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
{
struct nullb_device *dev = cmd->nq->dev;
struct nullb *nullb = dev->nullb;
- int err = 0;
+ blk_status_t sts = BLK_STS_OK;
+ struct request *rq = cmd->rq;
- if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
- struct request *rq = cmd->rq;
-
- if (!hrtimer_active(&nullb->bw_timer))
- hrtimer_restart(&nullb->bw_timer);
-
- if (atomic_long_sub_return(blk_rq_bytes(rq),
- &nullb->cur_bytes) < 0) {
- null_stop_queue(nullb);
- /* race with timer */
- if (atomic_long_read(&nullb->cur_bytes) > 0)
- null_restart_queue_async(nullb);
- /* requeue request */
- return BLK_STS_DEV_RESOURCE;
- }
- }
+ if (!hrtimer_active(&nullb->bw_timer))
+ hrtimer_restart(&nullb->bw_timer);
- if (nullb->dev->badblocks.shift != -1) {
- int bad_sectors;
- sector_t sector, size, first_bad;
- bool is_flush = true;
-
- if (dev->queue_mode == NULL_Q_BIO &&
- bio_op(cmd->bio) != REQ_OP_FLUSH) {
- is_flush = false;
- sector = cmd->bio->bi_iter.bi_sector;
- size = bio_sectors(cmd->bio);
- }
- if (dev->queue_mode != NULL_Q_BIO &&
- req_op(cmd->rq) != REQ_OP_FLUSH) {
- is_flush = false;
- sector = blk_rq_pos(cmd->rq);
- size = blk_rq_sectors(cmd->rq);
- }
- if (!is_flush && badblocks_check(&nullb->dev->badblocks, sector,
- size, &first_bad, &bad_sectors)) {
- cmd->error = BLK_STS_IOERR;
- goto out;
- }
+ if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) {
+ null_stop_queue(nullb);
+ /* race with timer */
+ if (atomic_long_read(&nullb->cur_bytes) > 0)
+ null_restart_queue_async(nullb);
+ /* requeue request */
+ sts = BLK_STS_DEV_RESOURCE;
}
+ return sts;
+}
- if (dev->memory_backed) {
- if (dev->queue_mode == NULL_Q_BIO) {
- if (bio_op(cmd->bio) == REQ_OP_FLUSH)
- err = null_handle_flush(nullb);
- else
- err = null_handle_bio(cmd);
- } else {
- if (req_op(cmd->rq) == REQ_OP_FLUSH)
- err = null_handle_flush(nullb);
- else
- err = null_handle_rq(cmd);
- }
- }
- cmd->error = errno_to_blk_status(err);
+static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
+ sector_t sector,
+ sector_t nr_sectors)
+{
+ struct badblocks *bb = &cmd->nq->dev->badblocks;
+ sector_t first_bad;
+ int bad_sectors;
- if (!cmd->error && dev->zoned) {
- sector_t sector;
- unsigned int nr_sectors;
- enum req_opf op;
+ if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
+ return BLK_STS_IOERR;
- if (dev->queue_mode == NULL_Q_BIO) {
- op = bio_op(cmd->bio);
- sector = cmd->bio->bi_iter.bi_sector;
- nr_sectors = cmd->bio->bi_iter.bi_size >> 9;
- } else {
- op = req_op(cmd->rq);
- sector = blk_rq_pos(cmd->rq);
- nr_sectors = blk_rq_sectors(cmd->rq);
- }
+ return BLK_STS_OK;
+}
- if (op == REQ_OP_WRITE)
- null_zone_write(cmd, sector, nr_sectors);
- else if (op == REQ_OP_ZONE_RESET)
- null_zone_reset(cmd, sector);
- }
-out:
+static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
+ enum req_opf op)
+{
+ struct nullb_device *dev = cmd->nq->dev;
+ int err;
+
+ if (dev->queue_mode == NULL_Q_BIO)
+ err = null_handle_bio(cmd);
+ else
+ err = null_handle_rq(cmd);
+
+ return errno_to_blk_status(err);
+}
+
+static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
+{
/* Complete IO by inline, softirq or timer */
- switch (dev->irqmode) {
+ switch (cmd->nq->dev->irqmode) {
case NULL_IRQ_SOFTIRQ:
- switch (dev->queue_mode) {
+ switch (cmd->nq->dev->queue_mode) {
case NULL_Q_MQ:
blk_mq_complete_request(cmd->rq);
break;
@@ -1238,6 +1256,40 @@ out:
null_cmd_end_timer(cmd);
break;
}
+}
+
+static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
+ sector_t nr_sectors, enum req_opf op)
+{
+ struct nullb_device *dev = cmd->nq->dev;
+ struct nullb *nullb = dev->nullb;
+ blk_status_t sts;
+
+ if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
+ sts = null_handle_throttled(cmd);
+ if (sts != BLK_STS_OK)
+ return sts;
+ }
+
+ if (op == REQ_OP_FLUSH) {
+ cmd->error = errno_to_blk_status(null_handle_flush(nullb));
+ goto out;
+ }
+
+ if (nullb->dev->badblocks.shift != -1) {
+ cmd->error = null_handle_badblocks(cmd, sector, nr_sectors);
+ if (cmd->error != BLK_STS_OK)
+ goto out;
+ }
+
+ if (dev->memory_backed)
+ cmd->error = null_handle_memory_backed(cmd, op);
+
+ if (!cmd->error && dev->zoned)
+ cmd->error = null_handle_zoned(cmd, op, sector, nr_sectors);
+
+out:
+ nullb_complete_cmd(cmd);
return BLK_STS_OK;
}
@@ -1280,6 +1332,8 @@ static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
{
+ sector_t sector = bio->bi_iter.bi_sector;
+ sector_t nr_sectors = bio_sectors(bio);
struct nullb *nullb = q->queuedata;
struct nullb_queue *nq = nullb_to_queue(nullb);
struct nullb_cmd *cmd;
@@ -1287,7 +1341,7 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
cmd = alloc_cmd(nq, 1);
cmd->bio = bio;
- null_handle_cmd(cmd);
+ null_handle_cmd(cmd, sector, nr_sectors, bio_op(bio));
return BLK_QC_T_NONE;
}
@@ -1311,7 +1365,7 @@ static bool should_requeue_request(struct request *rq)
static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
{
- pr_info("null: rq %p timed out\n", rq);
+ pr_info("rq %p timed out\n", rq);
blk_mq_complete_request(rq);
return BLK_EH_DONE;
}
@@ -1321,6 +1375,8 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
{
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
struct nullb_queue *nq = hctx->driver_data;
+ sector_t nr_sectors = blk_rq_sectors(bd->rq);
+ sector_t sector = blk_rq_pos(bd->rq);
might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
@@ -1349,7 +1405,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
if (should_timeout_request(bd->rq))
return BLK_STS_OK;
- return null_handle_cmd(cmd);
+ return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq));
}
static const struct blk_mq_ops null_mq_ops = {
@@ -1412,20 +1468,9 @@ static void null_config_discard(struct nullb *nullb)
blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
}
-static int null_open(struct block_device *bdev, fmode_t mode)
-{
- return 0;
-}
-
-static void null_release(struct gendisk *disk, fmode_t mode)
-{
-}
-
-static const struct block_device_operations null_fops = {
- .owner = THIS_MODULE,
- .open = null_open,
- .release = null_release,
- .report_zones = null_zone_report,
+static const struct block_device_operations null_ops = {
+ .owner = THIS_MODULE,
+ .report_zones = null_report_zones,
};
static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
@@ -1514,29 +1559,35 @@ static int init_driver_queues(struct nullb *nullb)
static int null_gendisk_register(struct nullb *nullb)
{
+ sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
struct gendisk *disk;
- sector_t size;
disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
if (!disk)
return -ENOMEM;
- size = (sector_t)nullb->dev->size * 1024 * 1024ULL;
- set_capacity(disk, size >> 9);
+ set_capacity(disk, size);
disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
disk->major = null_major;
disk->first_minor = nullb->index;
- disk->fops = &null_fops;
+ disk->fops = &null_ops;
disk->private_data = nullb;
disk->queue = nullb->q;
strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
+#ifdef CONFIG_BLK_DEV_ZONED
if (nullb->dev->zoned) {
- int ret = blk_revalidate_disk_zones(disk);
-
- if (ret != 0)
- return ret;
+ if (queue_is_mq(nullb->q)) {
+ int ret = blk_revalidate_disk_zones(disk);
+ if (ret)
+ return ret;
+ } else {
+ blk_queue_chunk_sectors(nullb->q,
+ nullb->dev->zone_size_sects);
+ nullb->q->nr_zones = blkdev_nr_zones(disk);
+ }
}
+#endif
add_disk(disk);
return 0;
@@ -1562,7 +1613,7 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
return blk_mq_alloc_tag_set(set);
}
-static void null_validate_conf(struct nullb_device *dev)
+static int null_validate_conf(struct nullb_device *dev)
{
dev->blocksize = round_down(dev->blocksize, 512);
dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
@@ -1589,6 +1640,14 @@ static void null_validate_conf(struct nullb_device *dev)
/* can not stop a queue */
if (dev->queue_mode == NULL_Q_BIO)
dev->mbps = 0;
+
+ if (dev->zoned &&
+ (!dev->zone_size || !is_power_of_2(dev->zone_size))) {
+ pr_err("zone_size must be power-of-two\n");
+ return -EINVAL;
+ }
+
+ return 0;
}
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
@@ -1621,7 +1680,9 @@ static int null_add_dev(struct nullb_device *dev)
struct nullb *nullb;
int rv;
- null_validate_conf(dev);
+ rv = null_validate_conf(dev);
+ if (rv)
+ return rv;
nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
if (!nullb) {
@@ -1686,8 +1747,10 @@ static int null_add_dev(struct nullb_device *dev)
if (rv)
goto out_cleanup_blk_queue;
- blk_queue_chunk_sectors(nullb->q, dev->zone_size_sects);
nullb->q->limits.zoned = BLK_ZONED_HM;
+ blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, nullb->q);
+ blk_queue_required_elevator_features(nullb->q,
+ ELEVATOR_F_ZBD_SEQ_WRITE);
}
nullb->q->queuedata = nullb;
@@ -1739,28 +1802,23 @@ static int __init null_init(void)
struct nullb_device *dev;
if (g_bs > PAGE_SIZE) {
- pr_warn("null_blk: invalid block size\n");
- pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
+ pr_warn("invalid block size\n");
+ pr_warn("defaults block size to %lu\n", PAGE_SIZE);
g_bs = PAGE_SIZE;
}
- if (!is_power_of_2(g_zone_size)) {
- pr_err("null_blk: zone_size must be power-of-two\n");
- return -EINVAL;
- }
-
if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
- pr_err("null_blk: invalid home_node value\n");
+ pr_err("invalid home_node value\n");
g_home_node = NUMA_NO_NODE;
}
if (g_queue_mode == NULL_Q_RQ) {
- pr_err("null_blk: legacy IO path no longer available\n");
+ pr_err("legacy IO path no longer available\n");
return -EINVAL;
}
if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
if (g_submit_queues != nr_online_nodes) {
- pr_warn("null_blk: submit_queues param is set to %u.\n",
+ pr_warn("submit_queues param is set to %u.\n",
nr_online_nodes);
g_submit_queues = nr_online_nodes;
}
@@ -1803,7 +1861,7 @@ static int __init null_init(void)
}
}
- pr_info("null: module loaded\n");
+ pr_info("module loaded\n");
return 0;
err_dev:
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index cb28d93f2bd1..ed34785dd64b 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -17,7 +17,7 @@ int null_zone_init(struct nullb_device *dev)
unsigned int i;
if (!is_power_of_2(dev->zone_size)) {
- pr_err("null_blk: zone_size must be power-of-two\n");
+ pr_err("zone_size must be power-of-two\n");
return -EINVAL;
}
@@ -31,7 +31,7 @@ int null_zone_init(struct nullb_device *dev)
if (dev->zone_nr_conv >= dev->nr_zones) {
dev->zone_nr_conv = dev->nr_zones - 1;
- pr_info("null_blk: changed the number of conventional zones to %u",
+ pr_info("changed the number of conventional zones to %u",
dev->zone_nr_conv);
}
@@ -66,25 +66,56 @@ void null_zone_exit(struct nullb_device *dev)
kvfree(dev->zones);
}
-int null_zone_report(struct gendisk *disk, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones)
+int null_report_zones(struct gendisk *disk, sector_t sector,
+ unsigned int nr_zones, report_zones_cb cb, void *data)
{
struct nullb *nullb = disk->private_data;
struct nullb_device *dev = nullb->dev;
- unsigned int zno, nrz = 0;
-
- zno = null_zone_no(dev, sector);
- if (zno < dev->nr_zones) {
- nrz = min_t(unsigned int, *nr_zones, dev->nr_zones - zno);
- memcpy(zones, &dev->zones[zno], nrz * sizeof(struct blk_zone));
+ unsigned int first_zone, i;
+ struct blk_zone zone;
+ int error;
+
+ first_zone = null_zone_no(dev, sector);
+ if (first_zone >= dev->nr_zones)
+ return 0;
+
+ nr_zones = min(nr_zones, dev->nr_zones - first_zone);
+ for (i = 0; i < nr_zones; i++) {
+ /*
+ * Stacked DM target drivers will remap the zone information by
+ * modifying the zone information passed to the report callback.
+ * So use a local copy to avoid corruption of the device zone
+ * array.
+ */
+ memcpy(&zone, &dev->zones[first_zone + i],
+ sizeof(struct blk_zone));
+ error = cb(&zone, i, data);
+ if (error)
+ return error;
}
- *nr_zones = nrz;
+ return nr_zones;
+}
- return 0;
+size_t null_zone_valid_read_len(struct nullb *nullb,
+ sector_t sector, unsigned int len)
+{
+ struct nullb_device *dev = nullb->dev;
+ struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
+ unsigned int nr_sectors = len >> SECTOR_SHIFT;
+
+ /* Read must be below the write pointer position */
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL ||
+ sector + nr_sectors <= zone->wp)
+ return len;
+
+ if (sector > zone->wp)
+ return 0;
+
+ return (zone->wp - sector) << SECTOR_SHIFT;
}
-void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
+static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
unsigned int nr_sectors)
{
struct nullb_device *dev = cmd->nq->dev;
@@ -95,16 +126,16 @@ void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
case BLK_ZONE_COND_FULL:
/* Cannot write to a full zone */
cmd->error = BLK_STS_IOERR;
- break;
+ return BLK_STS_IOERR;
case BLK_ZONE_COND_EMPTY:
case BLK_ZONE_COND_IMP_OPEN:
+ case BLK_ZONE_COND_EXP_OPEN:
+ case BLK_ZONE_COND_CLOSED:
/* Writes must be at the write pointer position */
- if (sector != zone->wp) {
- cmd->error = BLK_STS_IOERR;
- break;
- }
+ if (sector != zone->wp)
+ return BLK_STS_IOERR;
- if (zone->cond == BLK_ZONE_COND_EMPTY)
+ if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
zone->cond = BLK_ZONE_COND_IMP_OPEN;
zone->wp += nr_sectors;
@@ -115,22 +146,79 @@ void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
break;
default:
/* Invalid zone condition */
- cmd->error = BLK_STS_IOERR;
- break;
+ return BLK_STS_IOERR;
}
+ return BLK_STS_OK;
}
-void null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
+static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
+ sector_t sector)
{
struct nullb_device *dev = cmd->nq->dev;
- unsigned int zno = null_zone_no(dev, sector);
- struct blk_zone *zone = &dev->zones[zno];
+ struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
+ size_t i;
+
+ switch (op) {
+ case REQ_OP_ZONE_RESET_ALL:
+ for (i = 0; i < dev->nr_zones; i++) {
+ if (zone[i].type == BLK_ZONE_TYPE_CONVENTIONAL)
+ continue;
+ zone[i].cond = BLK_ZONE_COND_EMPTY;
+ zone[i].wp = zone[i].start;
+ }
+ break;
+ case REQ_OP_ZONE_RESET:
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ return BLK_STS_IOERR;
- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
- cmd->error = BLK_STS_IOERR;
- return;
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ zone->wp = zone->start;
+ break;
+ case REQ_OP_ZONE_OPEN:
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ return BLK_STS_IOERR;
+ if (zone->cond == BLK_ZONE_COND_FULL)
+ return BLK_STS_IOERR;
+
+ zone->cond = BLK_ZONE_COND_EXP_OPEN;
+ break;
+ case REQ_OP_ZONE_CLOSE:
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ return BLK_STS_IOERR;
+ if (zone->cond == BLK_ZONE_COND_FULL)
+ return BLK_STS_IOERR;
+
+ if (zone->wp == zone->start)
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ else
+ zone->cond = BLK_ZONE_COND_CLOSED;
+ break;
+ case REQ_OP_ZONE_FINISH:
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ return BLK_STS_IOERR;
+
+ zone->cond = BLK_ZONE_COND_FULL;
+ zone->wp = zone->start + zone->len;
+ break;
+ default:
+ return BLK_STS_NOTSUPP;
}
+ return BLK_STS_OK;
+}
- zone->cond = BLK_ZONE_COND_EMPTY;
- zone->wp = zone->start;
+blk_status_t null_handle_zoned(struct nullb_cmd *cmd, enum req_opf op,
+ sector_t sector, sector_t nr_sectors)
+{
+ switch (op) {
+ case REQ_OP_WRITE:
+ return null_zone_write(cmd, sector, nr_sectors);
+ case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_RESET_ALL:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
+ return null_zone_mgmt(cmd, op, sector);
+ default:
+ return BLK_STS_OK;
+ }
}
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 001dbdcbf355..117cfc8cd05a 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -275,6 +275,9 @@ static const struct block_device_operations pcd_bdops = {
.open = pcd_block_open,
.release = pcd_block_release,
.ioctl = pcd_block_ioctl,
+#ifdef CONFIG_COMPAT
+ .ioctl = blkdev_compat_ptr_ioctl,
+#endif
.check_events = pcd_block_check_events,
};
@@ -314,8 +317,8 @@ static void pcd_init_units(void)
disk->queue = blk_mq_init_sq_queue(&cd->tag_set, &pcd_mq_ops,
1, BLK_MQ_F_SHOULD_MERGE);
if (IS_ERR(disk->queue)) {
- put_disk(disk);
disk->queue = NULL;
+ put_disk(disk);
continue;
}
@@ -723,9 +726,9 @@ static int pcd_detect(void)
k = 0;
if (pcd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */
cd = pcd;
- if (pi_init(cd->pi, 1, -1, -1, -1, -1, -1, pcd_buffer,
- PI_PCD, verbose, cd->name)) {
- if (!pcd_probe(cd, -1, id) && cd->disk) {
+ if (cd->disk && pi_init(cd->pi, 1, -1, -1, -1, -1, -1,
+ pcd_buffer, PI_PCD, verbose, cd->name)) {
+ if (!pcd_probe(cd, -1, id)) {
cd->present = 1;
k++;
} else
@@ -736,11 +739,13 @@ static int pcd_detect(void)
int *conf = *drives[unit];
if (!conf[D_PRT])
continue;
+ if (!cd->disk)
+ continue;
if (!pi_init(cd->pi, 0, conf[D_PRT], conf[D_MOD],
conf[D_UNI], conf[D_PRO], conf[D_DLY],
pcd_buffer, PI_PCD, verbose, cd->name))
continue;
- if (!pcd_probe(cd, conf[D_SLV], id) && cd->disk) {
+ if (!pcd_probe(cd, conf[D_SLV], id)) {
cd->present = 1;
k++;
} else
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 6f9ad3fc716f..c0967507d085 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -874,6 +874,7 @@ static const struct block_device_operations pd_fops = {
.open = pd_open,
.release = pd_release,
.ioctl = pd_ioctl,
+ .compat_ioctl = pd_ioctl,
.getgeo = pd_getgeo,
.check_events = pd_check_events,
.revalidate_disk= pd_revalidate
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 1e9c50a7256c..bb09f21ce21a 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -276,6 +276,7 @@ static const struct block_device_operations pf_fops = {
.open = pf_open,
.release = pf_release,
.ioctl = pf_ioctl,
+ .compat_ioctl = pf_ioctl,
.getgeo = pf_getgeo,
.check_events = pf_check_events,
};
@@ -300,8 +301,8 @@ static void __init pf_init_units(void)
disk->queue = blk_mq_init_sq_queue(&pf->tag_set, &pf_mq_ops,
1, BLK_MQ_F_SHOULD_MERGE);
if (IS_ERR(disk->queue)) {
- put_disk(disk);
disk->queue = NULL;
+ put_disk(disk);
continue;
}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 024060165afa..5f970a7d32c0 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2594,7 +2594,6 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
if (ret)
return ret;
if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) {
- WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
return -EINVAL;
}
@@ -2685,6 +2684,7 @@ static const struct block_device_operations pktcdvd_ops = {
.open = pkt_open,
.release = pkt_close,
.ioctl = pkt_ioctl,
+ .compat_ioctl = blkdev_compat_ptr_ioctl,
.check_events = pkt_check_events,
};
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 3327192bb71f..6343402c09e6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -34,7 +34,7 @@
#include <linux/ceph/cls_lock_client.h>
#include <linux/ceph/striper.h>
#include <linux/ceph/decode.h>
-#include <linux/parser.h>
+#include <linux/fs_parser.h>
#include <linux/bsearch.h>
#include <linux/kernel.h>
@@ -377,7 +377,6 @@ struct rbd_client_id {
struct rbd_mapping {
u64 size;
- u64 features;
};
/*
@@ -462,8 +461,9 @@ struct rbd_device {
* by rbd_dev->lock
*/
enum rbd_dev_flags {
- RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */
+ RBD_DEV_FLAG_EXISTS, /* rbd_dev_device_setup() ran */
RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */
+ RBD_DEV_FLAG_READONLY, /* -o ro or snapshot */
};
static DEFINE_MUTEX(client_mutex); /* Serialize client creation */
@@ -514,6 +514,16 @@ static int minor_to_rbd_dev_id(int minor)
return minor >> RBD_SINGLE_MAJOR_PART_SHIFT;
}
+static bool rbd_is_ro(struct rbd_device *rbd_dev)
+{
+ return test_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags);
+}
+
+static bool rbd_is_snap(struct rbd_device *rbd_dev)
+{
+ return rbd_dev->spec->snap_id != CEPH_NOSNAP;
+}
+
static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev)
{
lockdep_assert_held(&rbd_dev->lock_rwsem);
@@ -633,8 +643,6 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
u64 snap_id);
static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
u8 *order, u64 *snap_size);
-static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
- u64 *snap_features);
static int rbd_dev_v2_get_flags(struct rbd_device *rbd_dev);
static void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result);
@@ -695,9 +703,16 @@ static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg)
if (get_user(ro, (int __user *)arg))
return -EFAULT;
- /* Snapshots can't be marked read-write */
- if (rbd_dev->spec->snap_id != CEPH_NOSNAP && !ro)
- return -EROFS;
+ /*
+ * Both images mapped read-only and snapshots can't be marked
+ * read-write.
+ */
+ if (!ro) {
+ if (rbd_is_ro(rbd_dev))
+ return -EROFS;
+
+ rbd_assert(!rbd_is_snap(rbd_dev));
+ }
/* Let blkdev_roset() handle it */
return -ENOTTY;
@@ -823,34 +838,29 @@ enum {
Opt_queue_depth,
Opt_alloc_size,
Opt_lock_timeout,
- Opt_last_int,
/* int args above */
Opt_pool_ns,
- Opt_last_string,
/* string args above */
Opt_read_only,
Opt_read_write,
Opt_lock_on_read,
Opt_exclusive,
Opt_notrim,
- Opt_err
};
-static match_table_t rbd_opts_tokens = {
- {Opt_queue_depth, "queue_depth=%d"},
- {Opt_alloc_size, "alloc_size=%d"},
- {Opt_lock_timeout, "lock_timeout=%d"},
- /* int args above */
- {Opt_pool_ns, "_pool_ns=%s"},
- /* string args above */
- {Opt_read_only, "read_only"},
- {Opt_read_only, "ro"}, /* Alternate spelling */
- {Opt_read_write, "read_write"},
- {Opt_read_write, "rw"}, /* Alternate spelling */
- {Opt_lock_on_read, "lock_on_read"},
- {Opt_exclusive, "exclusive"},
- {Opt_notrim, "notrim"},
- {Opt_err, NULL}
+static const struct fs_parameter_spec rbd_parameters[] = {
+ fsparam_u32 ("alloc_size", Opt_alloc_size),
+ fsparam_flag ("exclusive", Opt_exclusive),
+ fsparam_flag ("lock_on_read", Opt_lock_on_read),
+ fsparam_u32 ("lock_timeout", Opt_lock_timeout),
+ fsparam_flag ("notrim", Opt_notrim),
+ fsparam_string ("_pool_ns", Opt_pool_ns),
+ fsparam_u32 ("queue_depth", Opt_queue_depth),
+ fsparam_flag ("read_only", Opt_read_only),
+ fsparam_flag ("read_write", Opt_read_write),
+ fsparam_flag ("ro", Opt_read_only),
+ fsparam_flag ("rw", Opt_read_write),
+ {}
};
struct rbd_options {
@@ -871,87 +881,12 @@ struct rbd_options {
#define RBD_EXCLUSIVE_DEFAULT false
#define RBD_TRIM_DEFAULT true
-struct parse_rbd_opts_ctx {
+struct rbd_parse_opts_ctx {
struct rbd_spec *spec;
+ struct ceph_options *copts;
struct rbd_options *opts;
};
-static int parse_rbd_opts_token(char *c, void *private)
-{
- struct parse_rbd_opts_ctx *pctx = private;
- substring_t argstr[MAX_OPT_ARGS];
- int token, intval, ret;
-
- token = match_token(c, rbd_opts_tokens, argstr);
- if (token < Opt_last_int) {
- ret = match_int(&argstr[0], &intval);
- if (ret < 0) {
- pr_err("bad option arg (not int) at '%s'\n", c);
- return ret;
- }
- dout("got int token %d val %d\n", token, intval);
- } else if (token > Opt_last_int && token < Opt_last_string) {
- dout("got string token %d val %s\n", token, argstr[0].from);
- } else {
- dout("got token %d\n", token);
- }
-
- switch (token) {
- case Opt_queue_depth:
- if (intval < 1) {
- pr_err("queue_depth out of range\n");
- return -EINVAL;
- }
- pctx->opts->queue_depth = intval;
- break;
- case Opt_alloc_size:
- if (intval < SECTOR_SIZE) {
- pr_err("alloc_size out of range\n");
- return -EINVAL;
- }
- if (!is_power_of_2(intval)) {
- pr_err("alloc_size must be a power of 2\n");
- return -EINVAL;
- }
- pctx->opts->alloc_size = intval;
- break;
- case Opt_lock_timeout:
- /* 0 is "wait forever" (i.e. infinite timeout) */
- if (intval < 0 || intval > INT_MAX / 1000) {
- pr_err("lock_timeout out of range\n");
- return -EINVAL;
- }
- pctx->opts->lock_timeout = msecs_to_jiffies(intval * 1000);
- break;
- case Opt_pool_ns:
- kfree(pctx->spec->pool_ns);
- pctx->spec->pool_ns = match_strdup(argstr);
- if (!pctx->spec->pool_ns)
- return -ENOMEM;
- break;
- case Opt_read_only:
- pctx->opts->read_only = true;
- break;
- case Opt_read_write:
- pctx->opts->read_only = false;
- break;
- case Opt_lock_on_read:
- pctx->opts->lock_on_read = true;
- break;
- case Opt_exclusive:
- pctx->opts->exclusive = true;
- break;
- case Opt_notrim:
- pctx->opts->trim = false;
- break;
- default:
- /* libceph prints "bad option" msg */
- return -EINVAL;
- }
-
- return 0;
-}
-
static char* obj_op_name(enum obj_operation_type op_type)
{
switch (op_type) {
@@ -1302,51 +1237,23 @@ static int rbd_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
return 0;
}
-static int rbd_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
- u64 *snap_features)
-{
- rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
- if (snap_id == CEPH_NOSNAP) {
- *snap_features = rbd_dev->header.features;
- } else if (rbd_dev->image_format == 1) {
- *snap_features = 0; /* No features for format 1 */
- } else {
- u64 features = 0;
- int ret;
-
- ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, &features);
- if (ret)
- return ret;
-
- *snap_features = features;
- }
- return 0;
-}
-
static int rbd_dev_mapping_set(struct rbd_device *rbd_dev)
{
u64 snap_id = rbd_dev->spec->snap_id;
u64 size = 0;
- u64 features = 0;
int ret;
ret = rbd_snap_size(rbd_dev, snap_id, &size);
if (ret)
return ret;
- ret = rbd_snap_features(rbd_dev, snap_id, &features);
- if (ret)
- return ret;
rbd_dev->mapping.size = size;
- rbd_dev->mapping.features = features;
-
return 0;
}
static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
{
rbd_dev->mapping.size = 0;
- rbd_dev->mapping.features = 0;
}
static void zero_bvec(struct bio_vec *bv)
@@ -1754,8 +1661,6 @@ static struct rbd_img_request *rbd_img_request_create(
mutex_init(&img_request->state_mutex);
kref_init(&img_request->kref);
- dout("%s: rbd_dev %p %s -> img %p\n", __func__, rbd_dev,
- obj_op_name(op_type), img_request);
return img_request;
}
@@ -1834,6 +1739,17 @@ static u8 rbd_object_map_get(struct rbd_device *rbd_dev, u64 objno)
static bool use_object_map(struct rbd_device *rbd_dev)
{
+ /*
+ * An image mapped read-only can't use the object map -- it isn't
+ * loaded because the header lock isn't acquired. Someone else can
+ * write to the image and update the object map behind our back.
+ *
+ * A snapshot can't be written to, so using the object map is always
+ * safe.
+ */
+ if (!rbd_is_snap(rbd_dev) && rbd_is_ro(rbd_dev))
+ return false;
+
return ((rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) &&
!(rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID));
}
@@ -2089,7 +2005,7 @@ static int rbd_object_map_update_finish(struct rbd_obj_request *obj_req,
struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
struct ceph_osd_data *osd_data;
u64 objno;
- u8 state, new_state, current_state;
+ u8 state, new_state, uninitialized_var(current_state);
bool has_current_state;
void *p;
@@ -2741,7 +2657,7 @@ static int rbd_img_fill_nodata(struct rbd_img_request *img_req,
u64 off, u64 len)
{
struct ceph_file_extent ex = { off, len };
- union rbd_img_fill_iter dummy;
+ union rbd_img_fill_iter dummy = {};
struct rbd_img_fill_ctx fctx = {
.pos_type = OBJ_REQUEST_NODATA,
.pos = &dummy,
@@ -2944,6 +2860,9 @@ static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
__set_bit(IMG_REQ_CHILD, &child_img_req->flags);
child_img_req->obj_request = obj_req;
+ dout("%s child_img_req %p for obj_req %p\n", __func__, child_img_req,
+ obj_req);
+
if (!rbd_img_is_write(img_req)) {
switch (img_req->data_type) {
case OBJ_REQUEST_BIO:
@@ -3038,6 +2957,17 @@ again:
}
return true;
case RBD_OBJ_READ_PARENT:
+ /*
+ * The parent image is read only up to the overlap -- zero-fill
+ * from the overlap to the end of the request.
+ */
+ if (!*result) {
+ u32 obj_overlap = rbd_obj_img_extents_bytes(obj_req);
+
+ if (obj_overlap < obj_req->ex.oe_len)
+ rbd_obj_zero_range(obj_req, obj_overlap,
+ obj_req->ex.oe_len - obj_overlap);
+ }
return true;
default:
BUG();
@@ -3543,7 +3473,7 @@ static bool need_exclusive_lock(struct rbd_img_request *img_req)
if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK))
return false;
- if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
+ if (rbd_is_ro(rbd_dev))
return false;
rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags));
@@ -4218,7 +4148,7 @@ again:
* lock owner acked, but resend if we don't see them
* release the lock
*/
- dout("%s rbd_dev %p requeueing lock_dwork\n", __func__,
+ dout("%s rbd_dev %p requeuing lock_dwork\n", __func__,
rbd_dev);
mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
msecs_to_jiffies(2 * RBD_NOTIFY_TIMEOUT * MSEC_PER_SEC));
@@ -4814,24 +4744,14 @@ static void rbd_queue_workfn(struct work_struct *work)
goto err_rq;
}
- if (op_type != OBJ_OP_READ && rbd_dev->spec->snap_id != CEPH_NOSNAP) {
- rbd_warn(rbd_dev, "%s on read-only snapshot",
- obj_op_name(op_type));
- result = -EIO;
- goto err;
- }
-
- /*
- * Quit early if the mapped snapshot no longer exists. It's
- * still possible the snapshot will have disappeared by the
- * time our request arrives at the osd, but there's no sense in
- * sending it if we already know.
- */
- if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) {
- dout("request for non-existent snapshot");
- rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
- result = -ENXIO;
- goto err_rq;
+ if (op_type != OBJ_OP_READ) {
+ if (rbd_is_ro(rbd_dev)) {
+ rbd_warn(rbd_dev, "%s on read-only mapping",
+ obj_op_name(op_type));
+ result = -EIO;
+ goto err;
+ }
+ rbd_assert(!rbd_is_snap(rbd_dev));
}
if (offset && length > U64_MAX - offset + 1) {
@@ -4866,6 +4786,9 @@ static void rbd_queue_workfn(struct work_struct *work)
img_request->rq = rq;
snapc = NULL; /* img_request consumes a ref */
+ dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev,
+ img_request, obj_op_name(op_type), offset, length);
+
if (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_ZEROOUT)
result = rbd_img_fill_nodata(img_request, offset, length);
else
@@ -5010,25 +4933,6 @@ out:
return ret;
}
-/*
- * Clear the rbd device's EXISTS flag if the snapshot it's mapped to
- * has disappeared from the (just updated) snapshot context.
- */
-static void rbd_exists_validate(struct rbd_device *rbd_dev)
-{
- u64 snap_id;
-
- if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags))
- return;
-
- snap_id = rbd_dev->spec->snap_id;
- if (snap_id == CEPH_NOSNAP)
- return;
-
- if (rbd_dev_snap_index(rbd_dev, snap_id) == BAD_SNAP_INDEX)
- clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
-}
-
static void rbd_dev_update_size(struct rbd_device *rbd_dev)
{
sector_t size;
@@ -5069,12 +4973,8 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
goto out;
}
- if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
- rbd_dev->mapping.size = rbd_dev->header.image_size;
- } else {
- /* validate mapped snapshot's EXISTS flag */
- rbd_exists_validate(rbd_dev);
- }
+ rbd_assert(!rbd_is_snap(rbd_dev));
+ rbd_dev->mapping.size = rbd_dev->header.image_size;
out:
up_write(&rbd_dev->header_rwsem);
@@ -5196,17 +5096,12 @@ static ssize_t rbd_size_show(struct device *dev,
(unsigned long long)rbd_dev->mapping.size);
}
-/*
- * Note this shows the features for whatever's mapped, which is not
- * necessarily the base image.
- */
static ssize_t rbd_features_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
- return sprintf(buf, "0x%016llx\n",
- (unsigned long long)rbd_dev->mapping.features);
+ return sprintf(buf, "0x%016llx\n", rbd_dev->header.features);
}
static ssize_t rbd_major_show(struct device *dev,
@@ -5658,17 +5553,20 @@ static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
{
+ size_t size;
void *reply_buf;
int ret;
void *p;
- reply_buf = kzalloc(RBD_OBJ_PREFIX_LEN_MAX, GFP_KERNEL);
+ /* Response will be an encoded string, which includes a length */
+ size = sizeof(__le32) + RBD_OBJ_PREFIX_LEN_MAX;
+ reply_buf = kzalloc(size, GFP_KERNEL);
if (!reply_buf)
return -ENOMEM;
ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
&rbd_dev->header_oloc, "get_object_prefix",
- NULL, 0, reply_buf, RBD_OBJ_PREFIX_LEN_MAX);
+ NULL, 0, reply_buf, size);
dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
if (ret < 0)
goto out;
@@ -5691,9 +5589,12 @@ out:
}
static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
- u64 *snap_features)
+ bool read_only, u64 *snap_features)
{
- __le64 snapid = cpu_to_le64(snap_id);
+ struct {
+ __le64 snap_id;
+ u8 read_only;
+ } features_in;
struct {
__le64 features;
__le64 incompat;
@@ -5701,9 +5602,12 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
u64 unsup;
int ret;
+ features_in.snap_id = cpu_to_le64(snap_id);
+ features_in.read_only = read_only;
+
ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
&rbd_dev->header_oloc, "get_features",
- &snapid, sizeof(snapid),
+ &features_in, sizeof(features_in),
&features_buf, sizeof(features_buf));
dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
if (ret < 0)
@@ -5731,7 +5635,8 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
{
return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
- &rbd_dev->header.features);
+ rbd_is_ro(rbd_dev),
+ &rbd_dev->header.features);
}
/*
@@ -6438,6 +6343,118 @@ static inline char *dup_token(const char **buf, size_t *lenp)
return dup;
}
+static int rbd_parse_param(struct fs_parameter *param,
+ struct rbd_parse_opts_ctx *pctx)
+{
+ struct rbd_options *opt = pctx->opts;
+ struct fs_parse_result result;
+ struct p_log log = {.prefix = "rbd"};
+ int token, ret;
+
+ ret = ceph_parse_param(param, pctx->copts, NULL);
+ if (ret != -ENOPARAM)
+ return ret;
+
+ token = __fs_parse(&log, rbd_parameters, param, &result);
+ dout("%s fs_parse '%s' token %d\n", __func__, param->key, token);
+ if (token < 0) {
+ if (token == -ENOPARAM)
+ return inval_plog(&log, "Unknown parameter '%s'",
+ param->key);
+ return token;
+ }
+
+ switch (token) {
+ case Opt_queue_depth:
+ if (result.uint_32 < 1)
+ goto out_of_range;
+ opt->queue_depth = result.uint_32;
+ break;
+ case Opt_alloc_size:
+ if (result.uint_32 < SECTOR_SIZE)
+ goto out_of_range;
+ if (!is_power_of_2(result.uint_32))
+ return inval_plog(&log, "alloc_size must be a power of 2");
+ opt->alloc_size = result.uint_32;
+ break;
+ case Opt_lock_timeout:
+ /* 0 is "wait forever" (i.e. infinite timeout) */
+ if (result.uint_32 > INT_MAX / 1000)
+ goto out_of_range;
+ opt->lock_timeout = msecs_to_jiffies(result.uint_32 * 1000);
+ break;
+ case Opt_pool_ns:
+ kfree(pctx->spec->pool_ns);
+ pctx->spec->pool_ns = param->string;
+ param->string = NULL;
+ break;
+ case Opt_read_only:
+ opt->read_only = true;
+ break;
+ case Opt_read_write:
+ opt->read_only = false;
+ break;
+ case Opt_lock_on_read:
+ opt->lock_on_read = true;
+ break;
+ case Opt_exclusive:
+ opt->exclusive = true;
+ break;
+ case Opt_notrim:
+ opt->trim = false;
+ break;
+ default:
+ BUG();
+ }
+
+ return 0;
+
+out_of_range:
+ return inval_plog(&log, "%s out of range", param->key);
+}
+
+/*
+ * This duplicates most of generic_parse_monolithic(), untying it from
+ * fs_context and skipping standard superblock and security options.
+ */
+static int rbd_parse_options(char *options, struct rbd_parse_opts_ctx *pctx)
+{
+ char *key;
+ int ret = 0;
+
+ dout("%s '%s'\n", __func__, options);
+ while ((key = strsep(&options, ",")) != NULL) {
+ if (*key) {
+ struct fs_parameter param = {
+ .key = key,
+ .type = fs_value_is_flag,
+ };
+ char *value = strchr(key, '=');
+ size_t v_len = 0;
+
+ if (value) {
+ if (value == key)
+ continue;
+ *value++ = 0;
+ v_len = strlen(value);
+ param.string = kmemdup_nul(value, v_len,
+ GFP_KERNEL);
+ if (!param.string)
+ return -ENOMEM;
+ param.type = fs_value_is_string;
+ }
+ param.size = v_len;
+
+ ret = rbd_parse_param(&param, pctx);
+ kfree(param.string);
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
+
/*
* Parse the options provided for an "rbd add" (i.e., rbd image
* mapping) request. These arrive via a write to /sys/bus/rbd/add,
@@ -6489,8 +6506,7 @@ static int rbd_add_parse_args(const char *buf,
const char *mon_addrs;
char *snap_name;
size_t mon_addrs_size;
- struct parse_rbd_opts_ctx pctx = { 0 };
- struct ceph_options *copts;
+ struct rbd_parse_opts_ctx pctx = { 0 };
int ret;
/* The first four tokens are required */
@@ -6501,7 +6517,7 @@ static int rbd_add_parse_args(const char *buf,
return -EINVAL;
}
mon_addrs = buf;
- mon_addrs_size = len + 1;
+ mon_addrs_size = len;
buf += len;
ret = -EINVAL;
@@ -6551,6 +6567,10 @@ static int rbd_add_parse_args(const char *buf,
*(snap_name + len) = '\0';
pctx.spec->snap_name = snap_name;
+ pctx.copts = ceph_alloc_options();
+ if (!pctx.copts)
+ goto out_mem;
+
/* Initialize all rbd options to the defaults */
pctx.opts = kzalloc(sizeof(*pctx.opts), GFP_KERNEL);
@@ -6565,27 +6585,27 @@ static int rbd_add_parse_args(const char *buf,
pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
pctx.opts->trim = RBD_TRIM_DEFAULT;
- copts = ceph_parse_options(options, mon_addrs,
- mon_addrs + mon_addrs_size - 1,
- parse_rbd_opts_token, &pctx);
- if (IS_ERR(copts)) {
- ret = PTR_ERR(copts);
+ ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL);
+ if (ret)
+ goto out_err;
+
+ ret = rbd_parse_options(options, &pctx);
+ if (ret)
goto out_err;
- }
- kfree(options);
- *ceph_opts = copts;
+ *ceph_opts = pctx.copts;
*opts = pctx.opts;
*rbd_spec = pctx.spec;
-
+ kfree(options);
return 0;
+
out_mem:
ret = -ENOMEM;
out_err:
kfree(pctx.opts);
+ ceph_destroy_options(pctx.copts);
rbd_spec_put(pctx.spec);
kfree(options);
-
return ret;
}
@@ -6614,17 +6634,20 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
return -EINVAL;
}
- if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
+ if (rbd_is_ro(rbd_dev))
return 0;
rbd_assert(!rbd_is_lock_owner(rbd_dev));
queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
ret = wait_for_completion_killable_timeout(&rbd_dev->acquire_wait,
ceph_timeout_jiffies(rbd_dev->opts->lock_timeout));
- if (ret > 0)
+ if (ret > 0) {
ret = rbd_dev->acquire_err;
- else if (!ret)
- ret = -ETIMEDOUT;
+ } else {
+ cancel_delayed_work_sync(&rbd_dev->lock_dwork);
+ if (!ret)
+ ret = -ETIMEDOUT;
+ }
if (ret) {
rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret);
@@ -6685,7 +6708,6 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
dout("rbd id object name is %s\n", oid.name);
/* Response will be an encoded string, which includes a length */
-
size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX;
response = kzalloc(size, GFP_NOIO);
if (!response) {
@@ -6697,7 +6719,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc,
"get_id", NULL, 0,
- response, RBD_IMAGE_ID_LEN_MAX);
+ response, size);
dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
if (ret == -ENOENT) {
image_id = kstrdup("", GFP_KERNEL);
@@ -6818,6 +6840,8 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
__rbd_get_client(rbd_dev->rbd_client);
rbd_spec_get(rbd_dev->parent_spec);
+ __set_bit(RBD_DEV_FLAG_READONLY, &parent->flags);
+
ret = rbd_dev_image_probe(parent, depth);
if (ret < 0)
goto out_err;
@@ -6869,7 +6893,7 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
goto err_out_blkdev;
set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
- set_disk_ro(rbd_dev->disk, rbd_dev->opts->read_only);
+ set_disk_ro(rbd_dev->disk, rbd_is_ro(rbd_dev));
ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
if (ret)
@@ -6907,6 +6931,24 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev)
return ret;
}
+static void rbd_print_dne(struct rbd_device *rbd_dev, bool is_snap)
+{
+ if (!is_snap) {
+ pr_info("image %s/%s%s%s does not exist\n",
+ rbd_dev->spec->pool_name,
+ rbd_dev->spec->pool_ns ?: "",
+ rbd_dev->spec->pool_ns ? "/" : "",
+ rbd_dev->spec->image_name);
+ } else {
+ pr_info("snap %s/%s%s%s@%s does not exist\n",
+ rbd_dev->spec->pool_name,
+ rbd_dev->spec->pool_ns ?: "",
+ rbd_dev->spec->pool_ns ? "/" : "",
+ rbd_dev->spec->image_name,
+ rbd_dev->spec->snap_name);
+ }
+}
+
static void rbd_dev_image_release(struct rbd_device *rbd_dev)
{
rbd_dev_unprobe(rbd_dev);
@@ -6925,6 +6967,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
*/
static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
{
+ bool need_watch = !rbd_is_ro(rbd_dev);
int ret;
/*
@@ -6941,22 +6984,21 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
if (ret)
goto err_out_format;
- if (!depth) {
+ if (need_watch) {
ret = rbd_register_watch(rbd_dev);
if (ret) {
if (ret == -ENOENT)
- pr_info("image %s/%s%s%s does not exist\n",
- rbd_dev->spec->pool_name,
- rbd_dev->spec->pool_ns ?: "",
- rbd_dev->spec->pool_ns ? "/" : "",
- rbd_dev->spec->image_name);
+ rbd_print_dne(rbd_dev, false);
goto err_out_format;
}
}
ret = rbd_dev_header_info(rbd_dev);
- if (ret)
+ if (ret) {
+ if (ret == -ENOENT && !need_watch)
+ rbd_print_dne(rbd_dev, false);
goto err_out_watch;
+ }
/*
* If this image is the one being mapped, we have pool name and
@@ -6970,12 +7012,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
ret = rbd_spec_fill_names(rbd_dev);
if (ret) {
if (ret == -ENOENT)
- pr_info("snap %s/%s%s%s@%s does not exist\n",
- rbd_dev->spec->pool_name,
- rbd_dev->spec->pool_ns ?: "",
- rbd_dev->spec->pool_ns ? "/" : "",
- rbd_dev->spec->image_name,
- rbd_dev->spec->snap_name);
+ rbd_print_dne(rbd_dev, true);
goto err_out_probe;
}
@@ -6983,7 +7020,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
if (ret)
goto err_out_probe;
- if (rbd_dev->spec->snap_id != CEPH_NOSNAP &&
+ if (rbd_is_snap(rbd_dev) &&
(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) {
ret = rbd_object_map_load(rbd_dev);
if (ret)
@@ -7007,7 +7044,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
err_out_probe:
rbd_dev_unprobe(rbd_dev);
err_out_watch:
- if (!depth)
+ if (need_watch)
rbd_unregister_watch(rbd_dev);
err_out_format:
rbd_dev->image_format = 0;
@@ -7059,6 +7096,11 @@ static ssize_t do_rbd_add(struct bus_type *bus,
spec = NULL; /* rbd_dev now owns this */
rbd_opts = NULL; /* rbd_dev now owns this */
+ /* if we are mapping a snapshot it will be a read-only mapping */
+ if (rbd_dev->opts->read_only ||
+ strcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME))
+ __set_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags);
+
rbd_dev->config_info = kstrdup(buf, GFP_KERNEL);
if (!rbd_dev->config_info) {
rc = -ENOMEM;
@@ -7072,10 +7114,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
goto err_out_rbd_dev;
}
- /* If we are mapping a snapshot it must be marked read-only */
- if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
- rbd_dev->opts->read_only = true;
-
if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) {
rbd_warn(rbd_dev, "alloc_size adjusted to %u",
rbd_dev->layout.object_size);
@@ -7096,7 +7134,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
if (rc)
goto err_out_image_lock;
- add_disk(rbd_dev->disk);
+ device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL);
/* see rbd_init_disk() */
blk_put_queue(rbd_dev->disk->queue);
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 76b73ddf8fd7..10f6368117d8 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -1000,8 +1000,10 @@ static void rsxx_pci_remove(struct pci_dev *dev)
cancel_work_sync(&card->event_work);
+ destroy_workqueue(card->event_wq);
rsxx_destroy_dev(card);
rsxx_dma_destroy(card);
+ destroy_workqueue(card->creg_ctrl.creg_wq);
spin_lock_irqsave(&card->irq_lock, flags);
rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 6b2fd630de85..39aeebc6837d 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -171,6 +171,7 @@ static const struct block_device_operations vdc_fops = {
.owner = THIS_MODULE,
.getgeo = vdc_getgeo,
.ioctl = vdc_ioctl,
+ .compat_ioctl = blkdev_compat_ptr_ioctl,
};
static void vdc_blk_queue_start(struct vdc_port *port)
@@ -634,7 +635,7 @@ static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
case VD_OP_GET_EFI:
case VD_OP_SET_EFI:
return -EOPNOTSUPP;
- };
+ }
map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 1f3f9e0f02a8..4eaf97d7a170 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -827,7 +827,7 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
goto failed_req_csr;
}
- card->csr_remap = ioremap_nocache(csr_base, csr_len);
+ card->csr_remap = ioremap(csr_base, csr_len);
if (!card->csr_remap) {
dev_printk(KERN_ERR, &card->dev->dev,
"Unable to remap memory region\n");
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 7ffd719d89de..54158766334b 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -11,7 +11,6 @@
#include <linux/virtio_blk.h>
#include <linux/scatterlist.h>
#include <linux/string_helpers.h>
-#include <scsi/scsi_cmnd.h>
#include <linux/idr.h>
#include <linux/blk-mq.h>
#include <linux/blk-mq-virtio.h>
@@ -56,11 +55,6 @@ struct virtio_blk {
};
struct virtblk_req {
-#ifdef CONFIG_VIRTIO_BLK_SCSI
- struct scsi_request sreq; /* for SCSI passthrough, must be first */
- u8 sense[SCSI_SENSE_BUFFERSIZE];
- struct virtio_scsi_inhdr in_hdr;
-#endif
struct virtio_blk_outhdr out_hdr;
u8 status;
struct scatterlist sg[];
@@ -78,80 +72,6 @@ static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
}
}
-/*
- * If this is a packet command we need a couple of additional headers. Behind
- * the normal outhdr we put a segment with the scsi command block, and before
- * the normal inhdr we put the sense data and the inhdr with additional status
- * information.
- */
-#ifdef CONFIG_VIRTIO_BLK_SCSI
-static int virtblk_add_req_scsi(struct virtqueue *vq, struct virtblk_req *vbr,
- struct scatterlist *data_sg, bool have_data)
-{
- struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6];
- unsigned int num_out = 0, num_in = 0;
-
- sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
- sgs[num_out++] = &hdr;
- sg_init_one(&cmd, vbr->sreq.cmd, vbr->sreq.cmd_len);
- sgs[num_out++] = &cmd;
-
- if (have_data) {
- if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT))
- sgs[num_out++] = data_sg;
- else
- sgs[num_out + num_in++] = data_sg;
- }
-
- sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE);
- sgs[num_out + num_in++] = &sense;
- sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
- sgs[num_out + num_in++] = &inhdr;
- sg_init_one(&status, &vbr->status, sizeof(vbr->status));
- sgs[num_out + num_in++] = &status;
-
- return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
-}
-
-static inline void virtblk_scsi_request_done(struct request *req)
-{
- struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
- struct virtio_blk *vblk = req->q->queuedata;
- struct scsi_request *sreq = &vbr->sreq;
-
- sreq->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
- sreq->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
- sreq->result = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
-}
-
-static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long data)
-{
- struct gendisk *disk = bdev->bd_disk;
- struct virtio_blk *vblk = disk->private_data;
-
- /*
- * Only allow the generic SCSI ioctls if the host can support it.
- */
- if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
- return -ENOTTY;
-
- return scsi_cmd_blk_ioctl(bdev, mode, cmd,
- (void __user *)data);
-}
-#else
-static inline int virtblk_add_req_scsi(struct virtqueue *vq,
- struct virtblk_req *vbr, struct scatterlist *data_sg,
- bool have_data)
-{
- return -EIO;
-}
-static inline void virtblk_scsi_request_done(struct request *req)
-{
-}
-#define virtblk_ioctl NULL
-#endif /* CONFIG_VIRTIO_BLK_SCSI */
-
static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
struct scatterlist *data_sg, bool have_data)
{
@@ -216,13 +136,6 @@ static inline void virtblk_request_done(struct request *req)
req->special_vec.bv_offset);
}
- switch (req_op(req)) {
- case REQ_OP_SCSI_IN:
- case REQ_OP_SCSI_OUT:
- virtblk_scsi_request_done(req);
- break;
- }
-
blk_mq_end_request(req, virtblk_result(vbr));
}
@@ -299,10 +212,6 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
type = VIRTIO_BLK_T_WRITE_ZEROES;
unmap = !(req->cmd_flags & REQ_NOUNMAP);
break;
- case REQ_OP_SCSI_IN:
- case REQ_OP_SCSI_OUT:
- type = VIRTIO_BLK_T_SCSI_CMD;
- break;
case REQ_OP_DRV_IN:
type = VIRTIO_BLK_T_GET_ID;
break;
@@ -333,10 +242,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
}
spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
- if (blk_rq_is_scsi(req))
- err = virtblk_add_req_scsi(vblk->vqs[qid].vq, vbr, vbr->sg, num);
- else
- err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
+ err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
if (err) {
virtqueue_kick(vblk->vqs[qid].vq);
blk_mq_stop_hw_queue(hctx);
@@ -404,7 +310,6 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
}
static const struct block_device_operations virtblk_fops = {
- .ioctl = virtblk_ioctl,
.owner = THIS_MODULE,
.getgeo = virtblk_getgeo,
};
@@ -683,9 +588,6 @@ static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq,
struct virtio_blk *vblk = set->driver_data;
struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
-#ifdef CONFIG_VIRTIO_BLK_SCSI
- vbr->sreq.sense = vbr->sense;
-#endif
sg_init_table(vbr->sg, vblk->sg_elems);
return 0;
}
@@ -698,23 +600,11 @@ static int virtblk_map_queues(struct blk_mq_tag_set *set)
vblk->vdev, 0);
}
-#ifdef CONFIG_VIRTIO_BLK_SCSI
-static void virtblk_initialize_rq(struct request *req)
-{
- struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
-
- scsi_req_init(&vbr->sreq);
-}
-#endif
-
static const struct blk_mq_ops virtio_mq_ops = {
.queue_rq = virtio_queue_rq,
.commit_rqs = virtio_commit_rqs,
.complete = virtblk_request_done,
.init_request = virtblk_init_request,
-#ifdef CONFIG_VIRTIO_BLK_SCSI
- .initialize_rq_fn = virtblk_initialize_rq,
-#endif
.map_queues = virtblk_map_queues,
};
@@ -991,9 +881,6 @@ static const struct virtio_device_id id_table[] = {
static unsigned int features_legacy[] = {
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
-#ifdef CONFIG_VIRTIO_BLK_SCSI
- VIRTIO_BLK_F_SCSI,
-#endif
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
}
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index fd1e19f1a49f..c2f71265af4b 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -62,8 +62,8 @@
* IO workloads.
*/
-static int xen_blkif_max_buffer_pages = 1024;
-module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644);
+static int max_buffer_pages = 1024;
+module_param_named(max_buffer_pages, max_buffer_pages, int, 0644);
MODULE_PARM_DESC(max_buffer_pages,
"Maximum number of free pages to keep in each block backend buffer");
@@ -78,8 +78,8 @@ MODULE_PARM_DESC(max_buffer_pages,
* algorithm.
*/
-static int xen_blkif_max_pgrants = 1056;
-module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
+static int max_pgrants = 1056;
+module_param_named(max_persistent_grants, max_pgrants, int, 0644);
MODULE_PARM_DESC(max_persistent_grants,
"Maximum number of grants to map persistently");
@@ -88,8 +88,8 @@ MODULE_PARM_DESC(max_persistent_grants,
* use. The time is in seconds, 0 means indefinitely long.
*/
-static unsigned int xen_blkif_pgrant_timeout = 60;
-module_param_named(persistent_grant_unused_seconds, xen_blkif_pgrant_timeout,
+static unsigned int pgrant_timeout = 60;
+module_param_named(persistent_grant_unused_seconds, pgrant_timeout,
uint, 0644);
MODULE_PARM_DESC(persistent_grant_unused_seconds,
"Time in seconds an unused persistent grant is allowed to "
@@ -137,9 +137,8 @@ module_param(log_stats, int, 0644);
static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt)
{
- return xen_blkif_pgrant_timeout &&
- (jiffies - persistent_gnt->last_used >=
- HZ * xen_blkif_pgrant_timeout);
+ return pgrant_timeout && (jiffies - persistent_gnt->last_used >=
+ HZ * pgrant_timeout);
}
static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
@@ -234,7 +233,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring,
struct persistent_gnt *this;
struct xen_blkif *blkif = ring->blkif;
- if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) {
+ if (ring->persistent_gnt_c >= max_pgrants) {
if (!blkif->vbd.overflow_max_grants)
blkif->vbd.overflow_max_grants = 1;
return -EBUSY;
@@ -397,14 +396,13 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
goto out;
}
- if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
- (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
+ if (ring->persistent_gnt_c < max_pgrants ||
+ (ring->persistent_gnt_c == max_pgrants &&
!ring->blkif->vbd.overflow_max_grants)) {
num_clean = 0;
} else {
- num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
- num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants +
- num_clean;
+ num_clean = (max_pgrants / 100) * LRU_PERCENT_CLEAN;
+ num_clean = ring->persistent_gnt_c - max_pgrants + num_clean;
num_clean = min(ring->persistent_gnt_c, num_clean);
pr_debug("Going to purge at least %u persistent grants\n",
num_clean);
@@ -599,8 +597,7 @@ static void print_stats(struct xen_blkif_ring *ring)
current->comm, ring->st_oo_req,
ring->st_rd_req, ring->st_wr_req,
ring->st_f_req, ring->st_ds_req,
- ring->persistent_gnt_c,
- xen_blkif_max_pgrants);
+ ring->persistent_gnt_c, max_pgrants);
ring->st_print = jiffies + msecs_to_jiffies(10 * 1000);
ring->st_rd_req = 0;
ring->st_wr_req = 0;
@@ -656,8 +653,11 @@ purge_gnt_list:
ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
}
- /* Shrink if we have more than xen_blkif_max_buffer_pages */
- shrink_free_pagepool(ring, xen_blkif_max_buffer_pages);
+ /* Shrink the free pages pool if it is too large. */
+ if (time_before(jiffies, blkif->buffer_squeeze_end))
+ shrink_free_pagepool(ring, 0);
+ else
+ shrink_free_pagepool(ring, max_buffer_pages);
if (log_stats && time_after(jiffies, ring->st_print))
print_stats(ring);
@@ -884,7 +884,7 @@ again:
continue;
}
if (use_persistent_gnts &&
- ring->persistent_gnt_c < xen_blkif_max_pgrants) {
+ ring->persistent_gnt_c < max_pgrants) {
/*
* We are using persistent grants, the grant is
* not mapped but we might have room for it.
@@ -911,7 +911,7 @@ again:
pages[seg_idx]->persistent_gnt = persistent_gnt;
pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n",
persistent_gnt->gnt, ring->persistent_gnt_c,
- xen_blkif_max_pgrants);
+ max_pgrants);
goto next;
}
if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
@@ -936,6 +936,8 @@ next:
out_of_memory:
pr_alert("%s: out of memory\n", __func__);
put_free_pages(ring, pages_to_gnt, segs_to_map);
+ for (i = last_map; i < num; i++)
+ pages[i]->handle = BLKBACK_INVALID_HANDLE;
return -ENOMEM;
}
@@ -1504,5 +1506,13 @@ static int __init xen_blkif_init(void)
module_init(xen_blkif_init);
+static void __exit xen_blkif_fini(void)
+{
+ xen_blkif_xenbus_fini();
+ xen_blkif_interface_fini();
+}
+
+module_exit(xen_blkif_fini);
+
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS("xen-backend:vbd");
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 1d3002d773f7..a3eeccf3ac5f 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -319,6 +319,7 @@ struct xen_blkif {
/* All rings for this device. */
struct xen_blkif_ring *rings;
unsigned int nr_rings;
+ unsigned long buffer_squeeze_end;
};
struct seg_buf {
@@ -375,9 +376,12 @@ struct phys_req {
struct block_device *bdev;
blkif_sector_t sector_number;
};
+
int xen_blkif_interface_init(void);
+void xen_blkif_interface_fini(void);
int xen_blkif_xenbus_init(void);
+void xen_blkif_xenbus_fini(void);
irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
int xen_blkif_schedule(void *arg);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index b90dbcd99c03..42944d41aea0 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -171,6 +171,15 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
blkif->domid = domid;
atomic_set(&blkif->refcnt, 1);
init_completion(&blkif->drain_complete);
+
+ /*
+ * Because freeing back to the cache may be deferred, it is not
+ * safe to unload the module (and hence destroy the cache) until
+ * this has completed. To prevent premature unloading, take an
+ * extra module reference here and release only when the object
+ * has been freed back to the cache.
+ */
+ __module_get(THIS_MODULE);
INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
return blkif;
@@ -181,6 +190,9 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
{
int err;
struct xen_blkif *blkif = ring->blkif;
+ const struct blkif_common_sring *sring_common;
+ RING_IDX rsp_prod, req_prod;
+ unsigned int size;
/* Already connected through? */
if (ring->irq)
@@ -191,46 +203,62 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
if (err < 0)
return err;
+ sring_common = (struct blkif_common_sring *)ring->blk_ring;
+ rsp_prod = READ_ONCE(sring_common->rsp_prod);
+ req_prod = READ_ONCE(sring_common->req_prod);
+
switch (blkif->blk_protocol) {
case BLKIF_PROTOCOL_NATIVE:
{
- struct blkif_sring *sring;
- sring = (struct blkif_sring *)ring->blk_ring;
- BACK_RING_INIT(&ring->blk_rings.native, sring,
- XEN_PAGE_SIZE * nr_grefs);
+ struct blkif_sring *sring_native =
+ (struct blkif_sring *)ring->blk_ring;
+
+ BACK_RING_ATTACH(&ring->blk_rings.native, sring_native,
+ rsp_prod, XEN_PAGE_SIZE * nr_grefs);
+ size = __RING_SIZE(sring_native, XEN_PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_32:
{
- struct blkif_x86_32_sring *sring_x86_32;
- sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring;
- BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32,
- XEN_PAGE_SIZE * nr_grefs);
+ struct blkif_x86_32_sring *sring_x86_32 =
+ (struct blkif_x86_32_sring *)ring->blk_ring;
+
+ BACK_RING_ATTACH(&ring->blk_rings.x86_32, sring_x86_32,
+ rsp_prod, XEN_PAGE_SIZE * nr_grefs);
+ size = __RING_SIZE(sring_x86_32, XEN_PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_64:
{
- struct blkif_x86_64_sring *sring_x86_64;
- sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring;
- BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64,
- XEN_PAGE_SIZE * nr_grefs);
+ struct blkif_x86_64_sring *sring_x86_64 =
+ (struct blkif_x86_64_sring *)ring->blk_ring;
+
+ BACK_RING_ATTACH(&ring->blk_rings.x86_64, sring_x86_64,
+ rsp_prod, XEN_PAGE_SIZE * nr_grefs);
+ size = __RING_SIZE(sring_x86_64, XEN_PAGE_SIZE * nr_grefs);
break;
}
default:
BUG();
}
+ err = -EIO;
+ if (req_prod - rsp_prod > size)
+ goto fail;
+
err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
xen_blkif_be_int, 0,
"blkif-backend", ring);
- if (err < 0) {
- xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
- ring->blk_rings.common.sring = NULL;
- return err;
- }
+ if (err < 0)
+ goto fail;
ring->irq = err;
return 0;
+
+fail:
+ xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
+ ring->blk_rings.common.sring = NULL;
+ return err;
}
static int xen_blkif_disconnect(struct xen_blkif *blkif)
@@ -320,6 +348,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
/* Make sure everything is drained before shutting down */
kmem_cache_free(xen_blkif_cachep, blkif);
+ module_put(THIS_MODULE);
}
int __init xen_blkif_interface_init(void)
@@ -333,6 +362,12 @@ int __init xen_blkif_interface_init(void)
return 0;
}
+void xen_blkif_interface_fini(void)
+{
+ kmem_cache_destroy(xen_blkif_cachep);
+ xen_blkif_cachep = NULL;
+}
+
/*
* sysfs interface for VBD I/O requests
*/
@@ -432,7 +467,6 @@ static void xenvbd_sysfs_delif(struct xenbus_device *dev)
device_remove_file(&dev->dev, &dev_attr_physical_device);
}
-
static void xen_vbd_free(struct xen_vbd *vbd)
{
if (vbd->bdev)
@@ -489,6 +523,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
handle, blkif->domid);
return 0;
}
+
static int xen_blkbk_remove(struct xenbus_device *dev)
{
struct backend_info *be = dev_get_drvdata(&dev->dev);
@@ -572,6 +607,7 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
if (err)
dev_warn(&dev->dev, "writing feature-discard (%d)", err);
}
+
int xen_blkbk_barrier(struct xenbus_transaction xbt,
struct backend_info *be, int state)
{
@@ -656,7 +692,6 @@ fail:
return err;
}
-
/*
* Callback received when the hotplug scripts have placed the physical-device
* node. Read it and the mode node, and create a vbd. If the frontend is
@@ -748,7 +783,6 @@ static void backend_changed(struct xenbus_watch *watch,
}
}
-
/*
* Callback received when the frontend's state changes.
*/
@@ -823,9 +857,27 @@ static void frontend_changed(struct xenbus_device *dev,
}
}
+/* Once a memory pressure is detected, squeeze free page pools for a while. */
+static unsigned int buffer_squeeze_duration_ms = 10;
+module_param_named(buffer_squeeze_duration_ms,
+ buffer_squeeze_duration_ms, int, 0644);
+MODULE_PARM_DESC(buffer_squeeze_duration_ms,
+"Duration in ms to squeeze pages buffer when a memory pressure is detected");
-/* ** Connection ** */
+/*
+ * Callback received when the memory pressure is detected.
+ */
+static void reclaim_memory(struct xenbus_device *dev)
+{
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
+ if (!be)
+ return;
+ be->blkif->buffer_squeeze_end = jiffies +
+ msecs_to_jiffies(buffer_squeeze_duration_ms);
+}
+
+/* ** Connection ** */
/*
* Write the physical details regarding the block device to the store, and
@@ -1115,10 +1167,17 @@ static struct xenbus_driver xen_blkbk_driver = {
.ids = xen_blkbk_ids,
.probe = xen_blkbk_probe,
.remove = xen_blkbk_remove,
- .otherend_changed = frontend_changed
+ .otherend_changed = frontend_changed,
+ .allow_rebind = true,
+ .reclaim_memory = reclaim_memory,
};
int xen_blkif_xenbus_init(void)
{
return xenbus_register_backend(&xen_blkbk_driver);
}
+
+void xen_blkif_xenbus_fini(void)
+{
+ xenbus_unregister_driver(&xen_blkbk_driver);
+}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a74d03913822..e2ad6bba2281 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -151,9 +151,6 @@ MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the
#define BLK_RING_SIZE(info) \
__CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
-#define BLK_MAX_RING_SIZE \
- __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_GRANTS)
-
/*
* ring-ref%u i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
* characters are enough. Define to 20 to keep consistent with backend.
@@ -177,12 +174,12 @@ struct blkfront_ring_info {
unsigned int evtchn, irq;
struct work_struct work;
struct gnttab_free_callback callback;
- struct blk_shadow shadow[BLK_MAX_RING_SIZE];
struct list_head indirect_pages;
struct list_head grants;
unsigned int persistent_gnts_c;
unsigned long shadow_free;
struct blkfront_info *dev_info;
+ struct blk_shadow shadow[];
};
/*
@@ -1113,8 +1110,8 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
if (!VDEV_IS_EXTENDED(info->vdevice)) {
err = xen_translate_vdev(info->vdevice, &minor, &offset);
if (err)
- return err;
- nr_parts = PARTS_PER_DISK;
+ return err;
+ nr_parts = PARTS_PER_DISK;
} else {
minor = BLKIF_MINOR_EXT(info->vdevice);
nr_parts = PARTS_PER_EXT_DISK;
@@ -1915,7 +1912,8 @@ static int negotiate_mq(struct blkfront_info *info)
info->nr_rings = 1;
info->rinfo = kvcalloc(info->nr_rings,
- sizeof(struct blkfront_ring_info),
+ struct_size(info->rinfo, shadow,
+ BLK_RING_SIZE(info)),
GFP_KERNEL);
if (!info->rinfo) {
xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure");
@@ -2632,6 +2630,7 @@ static const struct block_device_operations xlvbd_block_fops =
.release = blkif_release,
.getgeo = blkif_getgeo,
.ioctl = blkif_ioctl,
+ .compat_ioctl = blkdev_compat_ptr_ioctl,
};
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index d58a359a6622..1bdb5793842b 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -207,14 +207,17 @@ static inline void zram_fill_page(void *ptr, unsigned long len,
static bool page_same_filled(void *ptr, unsigned long *element)
{
- unsigned int pos;
unsigned long *page;
unsigned long val;
+ unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
page = (unsigned long *)ptr;
val = page[0];
- for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
+ if (val != page[last_pos])
+ return false;
+
+ for (pos = 1; pos < last_pos; pos++) {
if (val != page[pos])
return false;
}
@@ -413,13 +416,14 @@ static void reset_bdev(struct zram *zram)
static ssize_t backing_dev_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
+ struct file *file;
struct zram *zram = dev_to_zram(dev);
- struct file *file = zram->backing_dev;
char *p;
ssize_t ret;
down_read(&zram->init_lock);
- if (!zram->backing_dev) {
+ file = zram->backing_dev;
+ if (!file) {
memcpy(buf, "none\n", 5);
up_read(&zram->init_lock);
return 5;
@@ -625,7 +629,7 @@ static ssize_t writeback_store(struct device *dev,
struct bio bio;
struct bio_vec bio_vec;
struct page *page;
- ssize_t ret;
+ ssize_t ret = len;
int mode;
unsigned long blk_idx = 0;
@@ -761,7 +765,6 @@ next:
if (blk_idx)
free_block_bdev(zram, blk_idx);
- ret = len;
__free_page(page);
release_init_lock:
up_read(&zram->init_lock);
OpenPOWER on IntegriCloud