diff options
Diffstat (limited to 'drivers/block/drbd/drbd_receiver.c')
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 140 |
1 files changed, 115 insertions, 25 deletions
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 15b2a0dab0e5..cb80fb406361 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1443,6 +1443,108 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); } +/* + * We *may* ignore the discard-zeroes-data setting, if so configured. + * + * Assumption is that it "discard_zeroes_data=0" is only because the backend + * may ignore partial unaligned discards. + * + * LVM/DM thin as of at least + * LVM version: 2.02.115(2)-RHEL7 (2015-01-28) + * Library version: 1.02.93-RHEL7 (2015-01-28) + * Driver version: 4.29.0 + * still behaves this way. + * + * For unaligned (wrt. alignment and granularity) or too small discards, + * we zero-out the initial (and/or) trailing unaligned partial chunks, + * but discard all the aligned full chunks. + * + * At least for LVM/DM thin, the result is effectively "discard_zeroes_data=1". + */ +int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, bool discard) +{ + struct block_device *bdev = device->ldev->backing_bdev; + struct request_queue *q = bdev_get_queue(bdev); + sector_t tmp, nr; + unsigned int max_discard_sectors, granularity; + int alignment; + int err = 0; + + if (!discard) + goto zero_out; + + /* Zero-sector (unknown) and one-sector granularities are the same. */ + granularity = max(q->limits.discard_granularity >> 9, 1U); + alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; + + max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22)); + max_discard_sectors -= max_discard_sectors % granularity; + if (unlikely(!max_discard_sectors)) + goto zero_out; + + if (nr_sectors < granularity) + goto zero_out; + + tmp = start; + if (sector_div(tmp, granularity) != alignment) { + if (nr_sectors < 2*granularity) + goto zero_out; + /* start + gran - (start + gran - align) % gran */ + tmp = start + granularity - alignment; + tmp = start + granularity - sector_div(tmp, granularity); + + nr = tmp - start; + err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0); + nr_sectors -= nr; + start = tmp; + } + while (nr_sectors >= granularity) { + nr = min_t(sector_t, nr_sectors, max_discard_sectors); + err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0); + nr_sectors -= nr; + start += nr; + } + zero_out: + if (nr_sectors) { + err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 0); + } + return err != 0; +} + +static bool can_do_reliable_discards(struct drbd_device *device) +{ + struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); + struct disk_conf *dc; + bool can_do; + + if (!blk_queue_discard(q)) + return false; + + if (q->limits.discard_zeroes_data) + return true; + + rcu_read_lock(); + dc = rcu_dereference(device->ldev->disk_conf); + can_do = dc->discard_zeroes_if_aligned; + rcu_read_unlock(); + return can_do; +} + +void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req) +{ + /* If the backend cannot discard, or does not guarantee + * read-back zeroes in discarded ranges, we fall back to + * zero-out. Unless configuration specifically requested + * otherwise. */ + if (!can_do_reliable_discards(device)) + peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT; + + if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector, + peer_req->i.size >> 9, !(peer_req->flags & EE_IS_TRIM_USE_ZEROOUT))) + peer_req->flags |= EE_WAS_ERROR; + drbd_endio_write_sec_final(peer_req); +} + /** * drbd_submit_peer_request() * @device: DRBD device. @@ -1474,7 +1576,13 @@ int drbd_submit_peer_request(struct drbd_device *device, unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; int err = -ENOMEM; - if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) { + /* TRIM/DISCARD: for now, always use the helper function + * blkdev_issue_zeroout(..., discard=true). + * It's synchronous, but it does the right thing wrt. bio splitting. + * Correctness first, performance later. Next step is to code an + * asynchronous variant of the same. + */ + if (peer_req->flags & EE_IS_TRIM) { /* wait for all pending IO completions, before we start * zeroing things out. */ conn_wait_active_ee_empty(peer_req->peer_device->connection); @@ -1491,19 +1599,10 @@ int drbd_submit_peer_request(struct drbd_device *device, spin_unlock_irq(&device->resource->req_lock); } - if (blkdev_issue_zeroout(device->ldev->backing_bdev, - sector, data_size >> 9, GFP_NOIO, false)) - peer_req->flags |= EE_WAS_ERROR; - drbd_endio_write_sec_final(peer_req); + drbd_issue_peer_discard(device, peer_req); return 0; } - /* Discards don't have any payload. - * But the scsi layer still expects a bio_vec it can use internally, - * see sd_setup_discard_cmnd() and blk_add_request_payload(). */ - if (peer_req->flags & EE_IS_TRIM) - nr_pages = 1; - /* In most cases, we will only need one bio. But in case the lower * level restrictions happen to be different at this offset on this * side than those of the sending peer, we may need to submit the @@ -1529,11 +1628,6 @@ next_bio: bios = bio; ++n_bios; - if (op == REQ_OP_DISCARD) { - bio->bi_iter.bi_size = data_size; - goto submit; - } - page_chain_for_each(page) { unsigned len = min_t(unsigned, data_size, PAGE_SIZE); if (!bio_add_page(bio, page, len, 0)) { @@ -1555,7 +1649,6 @@ next_bio: --nr_pages; } D_ASSERT(device, data_size == 0); -submit: D_ASSERT(device, page == NULL); atomic_set(&peer_req->pending_bios, n_bios); @@ -2424,10 +2517,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * op = wire_flags_to_bio_op(dp_flags); op_flags = wire_flags_to_bio_flags(dp_flags); if (pi->cmd == P_TRIM) { - struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); peer_req->flags |= EE_IS_TRIM; - if (!blk_queue_discard(q)) - peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT; D_ASSERT(peer_device, peer_req->i.size > 0); D_ASSERT(peer_device, op == REQ_OP_DISCARD); D_ASSERT(peer_device, peer_req->pages == NULL); @@ -2498,7 +2588,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * * and we wait for all pending requests, respectively wait for * active_ee to become empty in drbd_submit_peer_request(); * better not add ourselves here. */ - if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0) + if ((peer_req->flags & EE_IS_TRIM) == 0) list_add_tail(&peer_req->w.list, &device->active_ee); spin_unlock_irq(&device->resource->req_lock); @@ -3916,14 +4006,14 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info } device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); - /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size(). + /* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size(). In case we cleared the QUEUE_FLAG_DISCARD from our queue in - drbd_reconsider_max_bio_size(), we can be sure that after + drbd_reconsider_queue_parameters(), we can be sure that after drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */ ddsf = be16_to_cpu(p->dds_flags); if (get_ldev(device)) { - drbd_reconsider_max_bio_size(device, device->ldev); + drbd_reconsider_queue_parameters(device, device->ldev); dd = drbd_determine_dev_size(device, ddsf, NULL); put_ldev(device); if (dd == DS_ERROR) @@ -3943,7 +4033,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info * However, if he sends a zero current size, * take his (user-capped or) backing disk size anyways. */ - drbd_reconsider_max_bio_size(device, NULL); + drbd_reconsider_queue_parameters(device, NULL); drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size); } |