diff options
author | Ming Lin <ming.l@ssi.samsung.com> | 2015-10-22 09:59:42 -0700 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2015-10-28 09:12:58 +0900 |
commit | a22c4d7e34402ccdf3414f64c50365436eba7b93 (patch) | |
tree | 2d71db7c04930b5f88f4745cb6c8d5c59cf33075 /block/blk-lib.c | |
parent | 23d88271b4f97f66de521ac9b2c1471e6311cf26 (diff) | |
download | talos-obmc-linux-a22c4d7e34402ccdf3414f64c50365436eba7b93.tar.gz talos-obmc-linux-a22c4d7e34402ccdf3414f64c50365436eba7b93.zip |
block: re-add discard_granularity and alignment checks
In commit b49a087("block: remove split code in
blkdev_issue_{discard,write_same}"), discard_granularity and alignment
checks were removed. Ideally, with bio late splitting, the upper layers
shouldn't need to depend on device's limits.
Christoph reported a discard regression on the HGST Ultrastar SN100 NVMe
device when mkfs.xfs. We have not found the root cause yet.
This patch re-adds discard_granularity and alignment checks by reverting
the related changes in commit b49a087. The good thing is now we can
remove the 2G discard size cap and just use UINT_MAX to avoid bi_size
overflow.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lin <ming.l@ssi.samsung.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/blk-lib.c')
-rw-r--r-- | block/blk-lib.c | 31 |
1 files changed, 22 insertions, 9 deletions
diff --git a/block/blk-lib.c b/block/blk-lib.c index bd40292e5009..9ebf65379556 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -26,13 +26,6 @@ static void bio_batch_end_io(struct bio *bio) bio_put(bio); } -/* - * Ensure that max discard sectors doesn't overflow bi_size and hopefully - * it is of the proper granularity as long as the granularity is a power - * of two. - */ -#define MAX_BIO_SECTORS ((1U << 31) >> 9) - /** * blkdev_issue_discard - queue a discard * @bdev: blockdev to issue discard for @@ -50,6 +43,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q = bdev_get_queue(bdev); int type = REQ_WRITE | REQ_DISCARD; + unsigned int granularity; + int alignment; struct bio_batch bb; struct bio *bio; int ret = 0; @@ -61,6 +56,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (!blk_queue_discard(q)) return -EOPNOTSUPP; + /* Zero-sector (unknown) and one-sector granularities are the same. */ + granularity = max(q->limits.discard_granularity >> 9, 1U); + alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; + if (flags & BLKDEV_DISCARD_SECURE) { if (!blk_queue_secdiscard(q)) return -EOPNOTSUPP; @@ -74,7 +73,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, blk_start_plug(&plug); while (nr_sects) { unsigned int req_sects; - sector_t end_sect; + sector_t end_sect, tmp; bio = bio_alloc(gfp_mask, 1); if (!bio) { @@ -82,8 +81,22 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, break; } - req_sects = min_t(sector_t, nr_sects, MAX_BIO_SECTORS); + /* Make sure bi_size doesn't overflow */ + req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9); + + /* + * If splitting a request, and the next starting sector would be + * misaligned, stop the discard at the previous aligned sector. + */ end_sect = sector + req_sects; + tmp = end_sect; + if (req_sects < nr_sects && + sector_div(tmp, granularity) != alignment) { + end_sect = end_sect - alignment; + sector_div(end_sect, granularity); + end_sect = end_sect * granularity + alignment; + req_sects = end_sect - sector; + } bio->bi_iter.bi_sector = sector; bio->bi_end_io = bio_batch_end_io; |