summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/floppy.c3
-rw-r--r--drivers/block/nbd.c3
-rw-r--r--drivers/block/null_blk.h17
-rw-r--r--drivers/block/null_blk_main.c45
-rw-r--r--drivers/block/null_blk_zoned.c34
-rw-r--r--drivers/block/rbd.c235
-rw-r--r--drivers/block/xen-blkback/blkback.c99
-rw-r--r--drivers/block/xen-blkback/common.h14
-rw-r--r--drivers/block/xen-blkfront.c110
9 files changed, 399 insertions, 161 deletions
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 48f622728ce6..f2b6f4da1034 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3467,6 +3467,9 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
(struct floppy_struct **)&outparam);
if (ret)
return ret;
+ memcpy(&inparam.g, outparam,
+ offsetof(struct floppy_struct, name));
+ outparam = &inparam.g;
break;
case FDMSGON:
UDP->flags |= FTD_MSG;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 3863c00372bb..14a51254c3db 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1239,6 +1239,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_SET_SOCK:
return nbd_add_socket(nbd, arg, false);
case NBD_SET_BLKSIZE:
+ if (!arg || !is_power_of_2(arg) || arg < 512 ||
+ arg > PAGE_SIZE)
+ return -EINVAL;
nbd_size_set(nbd, arg,
div_s64(config->bytesize, arg));
return 0;
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index d81781f22dba..34e0030f0592 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -87,10 +87,10 @@ struct nullb {
#ifdef CONFIG_BLK_DEV_ZONED
int null_zone_init(struct nullb_device *dev);
void null_zone_exit(struct nullb_device *dev);
-blk_status_t null_zone_report(struct nullb *nullb,
- struct nullb_cmd *cmd);
-void null_zone_write(struct nullb_cmd *cmd);
-void null_zone_reset(struct nullb_cmd *cmd);
+blk_status_t null_zone_report(struct nullb *nullb, struct bio *bio);
+void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
+ unsigned int nr_sectors);
+void null_zone_reset(struct nullb_cmd *cmd, sector_t sector);
#else
static inline int null_zone_init(struct nullb_device *dev)
{
@@ -98,11 +98,14 @@ static inline int null_zone_init(struct nullb_device *dev)
}
static inline void null_zone_exit(struct nullb_device *dev) {}
static inline blk_status_t null_zone_report(struct nullb *nullb,
- struct nullb_cmd *cmd)
+ struct bio *bio)
{
return BLK_STS_NOTSUPP;
}
-static inline void null_zone_write(struct nullb_cmd *cmd) {}
-static inline void null_zone_reset(struct nullb_cmd *cmd) {}
+static inline void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
+ unsigned int nr_sectors)
+{
+}
+static inline void null_zone_reset(struct nullb_cmd *cmd, sector_t sector) {}
#endif /* CONFIG_BLK_DEV_ZONED */
#endif /* __NULL_BLK_H */
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 6127e3ff7b4b..093b614d6524 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1157,16 +1157,33 @@ static void null_restart_queue_async(struct nullb *nullb)
}
}
+static bool cmd_report_zone(struct nullb *nullb, struct nullb_cmd *cmd)
+{
+ struct nullb_device *dev = cmd->nq->dev;
+
+ if (dev->queue_mode == NULL_Q_BIO) {
+ if (bio_op(cmd->bio) == REQ_OP_ZONE_REPORT) {
+ cmd->error = null_zone_report(nullb, cmd->bio);
+ return true;
+ }
+ } else {
+ if (req_op(cmd->rq) == REQ_OP_ZONE_REPORT) {
+ cmd->error = null_zone_report(nullb, cmd->rq->bio);
+ return true;
+ }
+ }
+
+ return false;
+}
+
static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
{
struct nullb_device *dev = cmd->nq->dev;
struct nullb *nullb = dev->nullb;
int err = 0;
- if (req_op(cmd->rq) == REQ_OP_ZONE_REPORT) {
- cmd->error = null_zone_report(nullb, cmd);
+ if (cmd_report_zone(nullb, cmd))
goto out;
- }
if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
struct request *rq = cmd->rq;
@@ -1234,10 +1251,24 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
cmd->error = errno_to_blk_status(err);
if (!cmd->error && dev->zoned) {
- if (req_op(cmd->rq) == REQ_OP_WRITE)
- null_zone_write(cmd);
- else if (req_op(cmd->rq) == REQ_OP_ZONE_RESET)
- null_zone_reset(cmd);
+ sector_t sector;
+ unsigned int nr_sectors;
+ int op;
+
+ if (dev->queue_mode == NULL_Q_BIO) {
+ op = bio_op(cmd->bio);
+ sector = cmd->bio->bi_iter.bi_sector;
+ nr_sectors = cmd->bio->bi_iter.bi_size >> 9;
+ } else {
+ op = req_op(cmd->rq);
+ sector = blk_rq_pos(cmd->rq);
+ nr_sectors = blk_rq_sectors(cmd->rq);
+ }
+
+ if (op == REQ_OP_WRITE)
+ null_zone_write(cmd, sector, nr_sectors);
+ else if (op == REQ_OP_ZONE_RESET)
+ null_zone_reset(cmd, sector);
}
out:
/* Complete IO by inline, softirq or timer */
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index a979ca00d7be..7c6b86d98700 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -48,8 +48,8 @@ void null_zone_exit(struct nullb_device *dev)
kvfree(dev->zones);
}
-static void null_zone_fill_rq(struct nullb_device *dev, struct request *rq,
- unsigned int zno, unsigned int nr_zones)
+static void null_zone_fill_bio(struct nullb_device *dev, struct bio *bio,
+ unsigned int zno, unsigned int nr_zones)
{
struct blk_zone_report_hdr *hdr = NULL;
struct bio_vec bvec;
@@ -57,7 +57,7 @@ static void null_zone_fill_rq(struct nullb_device *dev, struct request *rq,
void *addr;
unsigned int zones_to_cpy;
- bio_for_each_segment(bvec, rq->bio, iter) {
+ bio_for_each_segment(bvec, bio, iter) {
addr = kmap_atomic(bvec.bv_page);
zones_to_cpy = bvec.bv_len / sizeof(struct blk_zone);
@@ -84,29 +84,24 @@ static void null_zone_fill_rq(struct nullb_device *dev, struct request *rq,
}
}
-blk_status_t null_zone_report(struct nullb *nullb,
- struct nullb_cmd *cmd)
+blk_status_t null_zone_report(struct nullb *nullb, struct bio *bio)
{
struct nullb_device *dev = nullb->dev;
- struct request *rq = cmd->rq;
- unsigned int zno = null_zone_no(dev, blk_rq_pos(rq));
+ unsigned int zno = null_zone_no(dev, bio->bi_iter.bi_sector);
unsigned int nr_zones = dev->nr_zones - zno;
- unsigned int max_zones = (blk_rq_bytes(rq) /
- sizeof(struct blk_zone)) - 1;
+ unsigned int max_zones;
+ max_zones = (bio->bi_iter.bi_size / sizeof(struct blk_zone)) - 1;
nr_zones = min_t(unsigned int, nr_zones, max_zones);
-
- null_zone_fill_rq(nullb->dev, rq, zno, nr_zones);
+ null_zone_fill_bio(nullb->dev, bio, zno, nr_zones);
return BLK_STS_OK;
}
-void null_zone_write(struct nullb_cmd *cmd)
+void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
+ unsigned int nr_sectors)
{
struct nullb_device *dev = cmd->nq->dev;
- struct request *rq = cmd->rq;
- sector_t sector = blk_rq_pos(rq);
- unsigned int rq_sectors = blk_rq_sectors(rq);
unsigned int zno = null_zone_no(dev, sector);
struct blk_zone *zone = &dev->zones[zno];
@@ -118,7 +113,7 @@ void null_zone_write(struct nullb_cmd *cmd)
case BLK_ZONE_COND_EMPTY:
case BLK_ZONE_COND_IMP_OPEN:
/* Writes must be at the write pointer position */
- if (blk_rq_pos(rq) != zone->wp) {
+ if (sector != zone->wp) {
cmd->error = BLK_STS_IOERR;
break;
}
@@ -126,7 +121,7 @@ void null_zone_write(struct nullb_cmd *cmd)
if (zone->cond == BLK_ZONE_COND_EMPTY)
zone->cond = BLK_ZONE_COND_IMP_OPEN;
- zone->wp += rq_sectors;
+ zone->wp += nr_sectors;
if (zone->wp == zone->start + zone->len)
zone->cond = BLK_ZONE_COND_FULL;
break;
@@ -137,11 +132,10 @@ void null_zone_write(struct nullb_cmd *cmd)
}
}
-void null_zone_reset(struct nullb_cmd *cmd)
+void null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
{
struct nullb_device *dev = cmd->nq->dev;
- struct request *rq = cmd->rq;
- unsigned int zno = null_zone_no(dev, blk_rq_pos(rq));
+ unsigned int zno = null_zone_no(dev, sector);
struct blk_zone *zone = &dev->zones[zno];
zone->cond = BLK_ZONE_COND_EMPTY;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 7915f3b03736..73ed5f3a862d 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4207,11 +4207,13 @@ static ssize_t rbd_parent_show(struct device *dev,
count += sprintf(&buf[count], "%s"
"pool_id %llu\npool_name %s\n"
+ "pool_ns %s\n"
"image_id %s\nimage_name %s\n"
"snap_id %llu\nsnap_name %s\n"
"overlap %llu\n",
!count ? "" : "\n", /* first? */
spec->pool_id, spec->pool_name,
+ spec->pool_ns ?: "",
spec->image_id, spec->image_name ?: "(unknown)",
spec->snap_id, spec->snap_name,
rbd_dev->parent_overlap);
@@ -4584,47 +4586,177 @@ static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
&rbd_dev->header.features);
}
+struct parent_image_info {
+ u64 pool_id;
+ const char *pool_ns;
+ const char *image_id;
+ u64 snap_id;
+
+ bool has_overlap;
+ u64 overlap;
+};
+
+/*
+ * The caller is responsible for @pii.
+ */
+static int decode_parent_image_spec(void **p, void *end,
+ struct parent_image_info *pii)
+{
+ u8 struct_v;
+ u32 struct_len;
+ int ret;
+
+ ret = ceph_start_decoding(p, end, 1, "ParentImageSpec",
+ &struct_v, &struct_len);
+ if (ret)
+ return ret;
+
+ ceph_decode_64_safe(p, end, pii->pool_id, e_inval);
+ pii->pool_ns = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL);
+ if (IS_ERR(pii->pool_ns)) {
+ ret = PTR_ERR(pii->pool_ns);
+ pii->pool_ns = NULL;
+ return ret;
+ }
+ pii->image_id = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL);
+ if (IS_ERR(pii->image_id)) {
+ ret = PTR_ERR(pii->image_id);
+ pii->image_id = NULL;
+ return ret;
+ }
+ ceph_decode_64_safe(p, end, pii->snap_id, e_inval);
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+static int __get_parent_info(struct rbd_device *rbd_dev,
+ struct page *req_page,
+ struct page *reply_page,
+ struct parent_image_info *pii)
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ size_t reply_len = PAGE_SIZE;
+ void *p, *end;
+ int ret;
+
+ ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
+ "rbd", "parent_get", CEPH_OSD_FLAG_READ,
+ req_page, sizeof(u64), reply_page, &reply_len);
+ if (ret)
+ return ret == -EOPNOTSUPP ? 1 : ret;
+
+ p = page_address(reply_page);
+ end = p + reply_len;
+ ret = decode_parent_image_spec(&p, end, pii);
+ if (ret)
+ return ret;
+
+ ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
+ "rbd", "parent_overlap_get", CEPH_OSD_FLAG_READ,
+ req_page, sizeof(u64), reply_page, &reply_len);
+ if (ret)
+ return ret;
+
+ p = page_address(reply_page);
+ end = p + reply_len;
+ ceph_decode_8_safe(&p, end, pii->has_overlap, e_inval);
+ if (pii->has_overlap)
+ ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
+
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+/*
+ * The caller is responsible for @pii.
+ */
+static int __get_parent_info_legacy(struct rbd_device *rbd_dev,
+ struct page *req_page,
+ struct page *reply_page,
+ struct parent_image_info *pii)
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ size_t reply_len = PAGE_SIZE;
+ void *p, *end;
+ int ret;
+
+ ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
+ "rbd", "get_parent", CEPH_OSD_FLAG_READ,
+ req_page, sizeof(u64), reply_page, &reply_len);
+ if (ret)
+ return ret;
+
+ p = page_address(reply_page);
+ end = p + reply_len;
+ ceph_decode_64_safe(&p, end, pii->pool_id, e_inval);
+ pii->image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
+ if (IS_ERR(pii->image_id)) {
+ ret = PTR_ERR(pii->image_id);
+ pii->image_id = NULL;
+ return ret;
+ }
+ ceph_decode_64_safe(&p, end, pii->snap_id, e_inval);
+ pii->has_overlap = true;
+ ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
+
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+static int get_parent_info(struct rbd_device *rbd_dev,
+ struct parent_image_info *pii)
+{
+ struct page *req_page, *reply_page;
+ void *p;
+ int ret;
+
+ req_page = alloc_page(GFP_KERNEL);
+ if (!req_page)
+ return -ENOMEM;
+
+ reply_page = alloc_page(GFP_KERNEL);
+ if (!reply_page) {
+ __free_page(req_page);
+ return -ENOMEM;
+ }
+
+ p = page_address(req_page);
+ ceph_encode_64(&p, rbd_dev->spec->snap_id);
+ ret = __get_parent_info(rbd_dev, req_page, reply_page, pii);
+ if (ret > 0)
+ ret = __get_parent_info_legacy(rbd_dev, req_page, reply_page,
+ pii);
+
+ __free_page(req_page);
+ __free_page(reply_page);
+ return ret;
+}
+
static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
{
struct rbd_spec *parent_spec;
- size_t size;
- void *reply_buf = NULL;
- __le64 snapid;
- void *p;
- void *end;
- u64 pool_id;
- char *image_id;
- u64 snap_id;
- u64 overlap;
+ struct parent_image_info pii = { 0 };
int ret;
parent_spec = rbd_spec_alloc();
if (!parent_spec)
return -ENOMEM;
- size = sizeof (__le64) + /* pool_id */
- sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX + /* image_id */
- sizeof (__le64) + /* snap_id */
- sizeof (__le64); /* overlap */
- reply_buf = kmalloc(size, GFP_KERNEL);
- if (!reply_buf) {
- ret = -ENOMEM;
+ ret = get_parent_info(rbd_dev, &pii);
+ if (ret)
goto out_err;
- }
- snapid = cpu_to_le64(rbd_dev->spec->snap_id);
- ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
- &rbd_dev->header_oloc, "get_parent",
- &snapid, sizeof(snapid), reply_buf, size);
- dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
- if (ret < 0)
- goto out_err;
+ dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
+ __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id,
+ pii.has_overlap, pii.overlap);
- p = reply_buf;
- end = reply_buf + ret;
- ret = -ERANGE;
- ceph_decode_64_safe(&p, end, pool_id, out_err);
- if (pool_id == CEPH_NOPOOL) {
+ if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) {
/*
* Either the parent never existed, or we have
* record of it but the image got flattened so it no
@@ -4633,6 +4765,10 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
* overlap to 0. The effect of this is that all new
* requests will be treated as if the image had no
* parent.
+ *
+ * If !pii.has_overlap, the parent image spec is not
+ * applicable. It's there to avoid duplication in each
+ * snapshot record.
*/
if (rbd_dev->parent_overlap) {
rbd_dev->parent_overlap = 0;
@@ -4647,51 +4783,36 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
/* The ceph file layout needs to fit pool id in 32 bits */
ret = -EIO;
- if (pool_id > (u64)U32_MAX) {
+ if (pii.pool_id > (u64)U32_MAX) {
rbd_warn(NULL, "parent pool id too large (%llu > %u)",
- (unsigned long long)pool_id, U32_MAX);
+ (unsigned long long)pii.pool_id, U32_MAX);
goto out_err;
}
- image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
- if (IS_ERR(image_id)) {
- ret = PTR_ERR(image_id);
- goto out_err;
- }
- ceph_decode_64_safe(&p, end, snap_id, out_err);
- ceph_decode_64_safe(&p, end, overlap, out_err);
-
/*
* The parent won't change (except when the clone is
* flattened, already handled that). So we only need to
* record the parent spec we have not already done so.
*/
if (!rbd_dev->parent_spec) {
- parent_spec->pool_id = pool_id;
- parent_spec->image_id = image_id;
- parent_spec->snap_id = snap_id;
-
- /* TODO: support cloning across namespaces */
- if (rbd_dev->spec->pool_ns) {
- parent_spec->pool_ns = kstrdup(rbd_dev->spec->pool_ns,
- GFP_KERNEL);
- if (!parent_spec->pool_ns) {
- ret = -ENOMEM;
- goto out_err;
- }
+ parent_spec->pool_id = pii.pool_id;
+ if (pii.pool_ns && *pii.pool_ns) {
+ parent_spec->pool_ns = pii.pool_ns;
+ pii.pool_ns = NULL;
}
+ parent_spec->image_id = pii.image_id;
+ pii.image_id = NULL;
+ parent_spec->snap_id = pii.snap_id;
rbd_dev->parent_spec = parent_spec;
parent_spec = NULL; /* rbd_dev now owns this */
- } else {
- kfree(image_id);
}
/*
* We always update the parent overlap. If it's zero we issue
* a warning, as we will proceed as if there was no parent.
*/
- if (!overlap) {
+ if (!pii.overlap) {
if (parent_spec) {
/* refresh, careful to warn just once */
if (rbd_dev->parent_overlap)
@@ -4702,14 +4823,14 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
}
}
- rbd_dev->parent_overlap = overlap;
+ rbd_dev->parent_overlap = pii.overlap;
out:
ret = 0;
out_err:
- kfree(reply_buf);
+ kfree(pii.pool_ns);
+ kfree(pii.image_id);
rbd_spec_put(parent_spec);
-
return ret;
}
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index b55b245e8052..fd1e19f1a49f 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -84,6 +84,18 @@ MODULE_PARM_DESC(max_persistent_grants,
"Maximum number of grants to map persistently");
/*
+ * How long a persistent grant is allowed to remain allocated without being in
+ * use. The time is in seconds, 0 means indefinitely long.
+ */
+
+static unsigned int xen_blkif_pgrant_timeout = 60;
+module_param_named(persistent_grant_unused_seconds, xen_blkif_pgrant_timeout,
+ uint, 0644);
+MODULE_PARM_DESC(persistent_grant_unused_seconds,
+ "Time in seconds an unused persistent grant is allowed to "
+ "remain allocated. Default is 60, 0 means unlimited.");
+
+/*
* Maximum number of rings/queues blkback supports, allow as many queues as there
* are CPUs if user has not specified a value.
*/
@@ -123,6 +135,13 @@ module_param(log_stats, int, 0644);
/* Number of free pages to remove on each call to gnttab_free_pages */
#define NUM_BATCH_FREE_PAGES 10
+static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt)
+{
+ return xen_blkif_pgrant_timeout &&
+ (jiffies - persistent_gnt->last_used >=
+ HZ * xen_blkif_pgrant_timeout);
+}
+
static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
{
unsigned long flags;
@@ -236,8 +255,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring,
}
}
- bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE);
- set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
+ persistent_gnt->active = true;
/* Add new node and rebalance tree. */
rb_link_node(&(persistent_gnt->node), parent, new);
rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts);
@@ -261,11 +279,11 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
else if (gref > data->gnt)
node = node->rb_right;
else {
- if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) {
+ if (data->active) {
pr_alert_ratelimited("requesting a grant already in use\n");
return NULL;
}
- set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
+ data->active = true;
atomic_inc(&ring->persistent_gnt_in_use);
return data;
}
@@ -276,10 +294,10 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
static void put_persistent_gnt(struct xen_blkif_ring *ring,
struct persistent_gnt *persistent_gnt)
{
- if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+ if (!persistent_gnt->active)
pr_alert_ratelimited("freeing a grant already unused\n");
- set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
- clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
+ persistent_gnt->last_used = jiffies;
+ persistent_gnt->active = false;
atomic_dec(&ring->persistent_gnt_in_use);
}
@@ -371,26 +389,26 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
struct persistent_gnt *persistent_gnt;
struct rb_node *n;
unsigned int num_clean, total;
- bool scan_used = false, clean_used = false;
+ bool scan_used = false;
struct rb_root *root;
- if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
- (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
- !ring->blkif->vbd.overflow_max_grants)) {
- goto out;
- }
-
if (work_busy(&ring->persistent_purge_work)) {
pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
goto out;
}
- num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
- num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
- num_clean = min(ring->persistent_gnt_c, num_clean);
- if ((num_clean == 0) ||
- (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use))))
- goto out;
+ if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
+ (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
+ !ring->blkif->vbd.overflow_max_grants)) {
+ num_clean = 0;
+ } else {
+ num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
+ num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants +
+ num_clean;
+ num_clean = min(ring->persistent_gnt_c, num_clean);
+ pr_debug("Going to purge at least %u persistent grants\n",
+ num_clean);
+ }
/*
* At this point, we can assure that there will be no calls
@@ -401,9 +419,7 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
* number of grants.
*/
- total = num_clean;
-
- pr_debug("Going to purge %u persistent grants\n", num_clean);
+ total = 0;
BUG_ON(!list_empty(&ring->persistent_purge_list));
root = &ring->persistent_gnts;
@@ -412,46 +428,37 @@ purge_list:
BUG_ON(persistent_gnt->handle ==
BLKBACK_INVALID_HANDLE);
- if (clean_used) {
- clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+ if (persistent_gnt->active)
continue;
- }
-
- if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+ if (!scan_used && !persistent_gnt_timeout(persistent_gnt))
continue;
- if (!scan_used &&
- (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags)))
+ if (scan_used && total >= num_clean)
continue;
rb_erase(&persistent_gnt->node, root);
list_add(&persistent_gnt->remove_node,
&ring->persistent_purge_list);
- if (--num_clean == 0)
- goto finished;
+ total++;
}
/*
- * If we get here it means we also need to start cleaning
+ * Check whether we also need to start cleaning
* grants that were used since last purge in order to cope
* with the requested num
*/
- if (!scan_used && !clean_used) {
- pr_debug("Still missing %u purged frames\n", num_clean);
+ if (!scan_used && total < num_clean) {
+ pr_debug("Still missing %u purged frames\n", num_clean - total);
scan_used = true;
goto purge_list;
}
-finished:
- if (!clean_used) {
- pr_debug("Finished scanning for grants to clean, removing used flag\n");
- clean_used = true;
- goto purge_list;
- }
- ring->persistent_gnt_c -= (total - num_clean);
- ring->blkif->vbd.overflow_max_grants = 0;
+ if (total) {
+ ring->persistent_gnt_c -= total;
+ ring->blkif->vbd.overflow_max_grants = 0;
- /* We can defer this work */
- schedule_work(&ring->persistent_purge_work);
- pr_debug("Purged %u/%u\n", (total - num_clean), total);
+ /* We can defer this work */
+ schedule_work(&ring->persistent_purge_work);
+ pr_debug("Purged %u/%u\n", num_clean, total);
+ }
out:
return;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index ecb35fe8ca8d..1d3002d773f7 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -233,16 +233,6 @@ struct xen_vbd {
struct backend_info;
-/* Number of available flags */
-#define PERSISTENT_GNT_FLAGS_SIZE 2
-/* This persistent grant is currently in use */
-#define PERSISTENT_GNT_ACTIVE 0
-/*
- * This persistent grant has been used, this flag is set when we remove the
- * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
- */
-#define PERSISTENT_GNT_WAS_ACTIVE 1
-
/* Number of requests that we can fit in a ring */
#define XEN_BLKIF_REQS_PER_PAGE 32
@@ -250,7 +240,8 @@ struct persistent_gnt {
struct page *page;
grant_ref_t gnt;
grant_handle_t handle;
- DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE);
+ unsigned long last_used;
+ bool active;
struct rb_node node;
struct list_head remove_node;
};
@@ -278,7 +269,6 @@ struct xen_blkif_ring {
wait_queue_head_t pending_free_wq;
/* Tree to store persistent grants. */
- spinlock_t pers_gnts_lock;
struct rb_root persistent_gnts;
unsigned int persistent_gnt_c;
atomic_t persistent_gnt_in_use;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 8986adab9bf5..429d20131c7e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -46,6 +46,7 @@
#include <linux/scatterlist.h>
#include <linux/bitmap.h>
#include <linux/list.h>
+#include <linux/workqueue.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
@@ -121,6 +122,8 @@ static inline struct blkif_req *blkif_req(struct request *rq)
static DEFINE_MUTEX(blkfront_mutex);
static const struct block_device_operations xlvbd_block_fops;
+static struct delayed_work blkfront_work;
+static LIST_HEAD(info_list);
/*
* Maximum number of segments in indirect requests, the actual value used by
@@ -216,6 +219,7 @@ struct blkfront_info
/* Save uncomplete reqs and bios for migration. */
struct list_head requests;
struct bio_list bio_list;
+ struct list_head info_list;
};
static unsigned int nr_minors;
@@ -1759,6 +1763,12 @@ abort_transaction:
return err;
}
+static void free_info(struct blkfront_info *info)
+{
+ list_del(&info->info_list);
+ kfree(info);
+}
+
/* Common code used when first setting up, and when resuming. */
static int talk_to_blkback(struct xenbus_device *dev,
struct blkfront_info *info)
@@ -1880,7 +1890,10 @@ again:
destroy_blkring:
blkif_free(info, 0);
- kfree(info);
+ mutex_lock(&blkfront_mutex);
+ free_info(info);
+ mutex_unlock(&blkfront_mutex);
+
dev_set_drvdata(&dev->dev, NULL);
return err;
@@ -1991,6 +2004,10 @@ static int blkfront_probe(struct xenbus_device *dev,
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
dev_set_drvdata(&dev->dev, info);
+ mutex_lock(&blkfront_mutex);
+ list_add(&info->info_list, &info_list);
+ mutex_unlock(&blkfront_mutex);
+
return 0;
}
@@ -2301,6 +2318,12 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
if (indirect_segments <= BLKIF_MAX_SEGMENTS_PER_REQUEST)
indirect_segments = 0;
info->max_indirect_segments = indirect_segments;
+
+ if (info->feature_persistent) {
+ mutex_lock(&blkfront_mutex);
+ schedule_delayed_work(&blkfront_work, HZ * 10);
+ mutex_unlock(&blkfront_mutex);
+ }
}
/*
@@ -2482,7 +2505,9 @@ static int blkfront_remove(struct xenbus_device *xbdev)
mutex_unlock(&info->mutex);
if (!bdev) {
- kfree(info);
+ mutex_lock(&blkfront_mutex);
+ free_info(info);
+ mutex_unlock(&blkfront_mutex);
return 0;
}
@@ -2502,7 +2527,9 @@ static int blkfront_remove(struct xenbus_device *xbdev)
if (info && !bdev->bd_openers) {
xlvbd_release_gendisk(info);
disk->private_data = NULL;
- kfree(info);
+ mutex_lock(&blkfront_mutex);
+ free_info(info);
+ mutex_unlock(&blkfront_mutex);
}
mutex_unlock(&bdev->bd_mutex);
@@ -2585,7 +2612,7 @@ static void blkif_release(struct gendisk *disk, fmode_t mode)
dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
xlvbd_release_gendisk(info);
disk->private_data = NULL;
- kfree(info);
+ free_info(info);
}
out:
@@ -2618,6 +2645,61 @@ static struct xenbus_driver blkfront_driver = {
.is_ready = blkfront_is_ready,
};
+static void purge_persistent_grants(struct blkfront_info *info)
+{
+ unsigned int i;
+ unsigned long flags;
+
+ for (i = 0; i < info->nr_rings; i++) {
+ struct blkfront_ring_info *rinfo = &info->rinfo[i];
+ struct grant *gnt_list_entry, *tmp;
+
+ spin_lock_irqsave(&rinfo->ring_lock, flags);
+
+ if (rinfo->persistent_gnts_c == 0) {
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ continue;
+ }
+
+ list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants,
+ node) {
+ if (gnt_list_entry->gref == GRANT_INVALID_REF ||
+ gnttab_query_foreign_access(gnt_list_entry->gref))
+ continue;
+
+ list_del(&gnt_list_entry->node);
+ gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
+ rinfo->persistent_gnts_c--;
+ gnt_list_entry->gref = GRANT_INVALID_REF;
+ list_add_tail(&gnt_list_entry->node, &rinfo->grants);
+ }
+
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ }
+}
+
+static void blkfront_delay_work(struct work_struct *work)
+{
+ struct blkfront_info *info;
+ bool need_schedule_work = false;
+
+ mutex_lock(&blkfront_mutex);
+
+ list_for_each_entry(info, &info_list, info_list) {
+ if (info->feature_persistent) {
+ need_schedule_work = true;
+ mutex_lock(&info->mutex);
+ purge_persistent_grants(info);
+ mutex_unlock(&info->mutex);
+ }
+ }
+
+ if (need_schedule_work)
+ schedule_delayed_work(&blkfront_work, HZ * 10);
+
+ mutex_unlock(&blkfront_mutex);
+}
+
static int __init xlblk_init(void)
{
int ret;
@@ -2626,6 +2708,15 @@ static int __init xlblk_init(void)
if (!xen_domain())
return -ENODEV;
+ if (!xen_has_pv_disk_devices())
+ return -ENODEV;
+
+ if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
+ pr_warn("xen_blk: can't get major %d with name %s\n",
+ XENVBD_MAJOR, DEV_NAME);
+ return -ENODEV;
+ }
+
if (xen_blkif_max_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST)
xen_blkif_max_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
@@ -2641,14 +2732,7 @@ static int __init xlblk_init(void)
xen_blkif_max_queues = nr_cpus;
}
- if (!xen_has_pv_disk_devices())
- return -ENODEV;
-
- if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
- printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
- XENVBD_MAJOR, DEV_NAME);
- return -ENODEV;
- }
+ INIT_DELAYED_WORK(&blkfront_work, blkfront_delay_work);
ret = xenbus_register_frontend(&blkfront_driver);
if (ret) {
@@ -2663,6 +2747,8 @@ module_init(xlblk_init);
static void __exit xlblk_exit(void)
{
+ cancel_delayed_work_sync(&blkfront_work);
+
xenbus_unregister_driver(&blkfront_driver);
unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
kfree(minors);
OpenPOWER on IntegriCloud