summaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_receiver.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd/drbd_receiver.c')
-rw-r--r--drivers/block/drbd/drbd_receiver.c140
1 files changed, 115 insertions, 25 deletions
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 15b2a0dab0e5..cb80fb406361 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1443,6 +1443,108 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
}
+/*
+ * We *may* ignore the discard-zeroes-data setting, if so configured.
+ *
+ * Assumption is that it "discard_zeroes_data=0" is only because the backend
+ * may ignore partial unaligned discards.
+ *
+ * LVM/DM thin as of at least
+ * LVM version: 2.02.115(2)-RHEL7 (2015-01-28)
+ * Library version: 1.02.93-RHEL7 (2015-01-28)
+ * Driver version: 4.29.0
+ * still behaves this way.
+ *
+ * For unaligned (wrt. alignment and granularity) or too small discards,
+ * we zero-out the initial (and/or) trailing unaligned partial chunks,
+ * but discard all the aligned full chunks.
+ *
+ * At least for LVM/DM thin, the result is effectively "discard_zeroes_data=1".
+ */
+int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, bool discard)
+{
+ struct block_device *bdev = device->ldev->backing_bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ sector_t tmp, nr;
+ unsigned int max_discard_sectors, granularity;
+ int alignment;
+ int err = 0;
+
+ if (!discard)
+ goto zero_out;
+
+ /* Zero-sector (unknown) and one-sector granularities are the same. */
+ granularity = max(q->limits.discard_granularity >> 9, 1U);
+ alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
+
+ max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
+ max_discard_sectors -= max_discard_sectors % granularity;
+ if (unlikely(!max_discard_sectors))
+ goto zero_out;
+
+ if (nr_sectors < granularity)
+ goto zero_out;
+
+ tmp = start;
+ if (sector_div(tmp, granularity) != alignment) {
+ if (nr_sectors < 2*granularity)
+ goto zero_out;
+ /* start + gran - (start + gran - align) % gran */
+ tmp = start + granularity - alignment;
+ tmp = start + granularity - sector_div(tmp, granularity);
+
+ nr = tmp - start;
+ err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
+ nr_sectors -= nr;
+ start = tmp;
+ }
+ while (nr_sectors >= granularity) {
+ nr = min_t(sector_t, nr_sectors, max_discard_sectors);
+ err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
+ nr_sectors -= nr;
+ start += nr;
+ }
+ zero_out:
+ if (nr_sectors) {
+ err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 0);
+ }
+ return err != 0;
+}
+
+static bool can_do_reliable_discards(struct drbd_device *device)
+{
+ struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
+ struct disk_conf *dc;
+ bool can_do;
+
+ if (!blk_queue_discard(q))
+ return false;
+
+ if (q->limits.discard_zeroes_data)
+ return true;
+
+ rcu_read_lock();
+ dc = rcu_dereference(device->ldev->disk_conf);
+ can_do = dc->discard_zeroes_if_aligned;
+ rcu_read_unlock();
+ return can_do;
+}
+
+void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req)
+{
+ /* If the backend cannot discard, or does not guarantee
+ * read-back zeroes in discarded ranges, we fall back to
+ * zero-out. Unless configuration specifically requested
+ * otherwise. */
+ if (!can_do_reliable_discards(device))
+ peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
+
+ if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
+ peer_req->i.size >> 9, !(peer_req->flags & EE_IS_TRIM_USE_ZEROOUT)))
+ peer_req->flags |= EE_WAS_ERROR;
+ drbd_endio_write_sec_final(peer_req);
+}
+
/**
* drbd_submit_peer_request()
* @device: DRBD device.
@@ -1474,7 +1576,13 @@ int drbd_submit_peer_request(struct drbd_device *device,
unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
int err = -ENOMEM;
- if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
+ /* TRIM/DISCARD: for now, always use the helper function
+ * blkdev_issue_zeroout(..., discard=true).
+ * It's synchronous, but it does the right thing wrt. bio splitting.
+ * Correctness first, performance later. Next step is to code an
+ * asynchronous variant of the same.
+ */
+ if (peer_req->flags & EE_IS_TRIM) {
/* wait for all pending IO completions, before we start
* zeroing things out. */
conn_wait_active_ee_empty(peer_req->peer_device->connection);
@@ -1491,19 +1599,10 @@ int drbd_submit_peer_request(struct drbd_device *device,
spin_unlock_irq(&device->resource->req_lock);
}
- if (blkdev_issue_zeroout(device->ldev->backing_bdev,
- sector, data_size >> 9, GFP_NOIO, false))
- peer_req->flags |= EE_WAS_ERROR;
- drbd_endio_write_sec_final(peer_req);
+ drbd_issue_peer_discard(device, peer_req);
return 0;
}
- /* Discards don't have any payload.
- * But the scsi layer still expects a bio_vec it can use internally,
- * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
- if (peer_req->flags & EE_IS_TRIM)
- nr_pages = 1;
-
/* In most cases, we will only need one bio. But in case the lower
* level restrictions happen to be different at this offset on this
* side than those of the sending peer, we may need to submit the
@@ -1529,11 +1628,6 @@ next_bio:
bios = bio;
++n_bios;
- if (op == REQ_OP_DISCARD) {
- bio->bi_iter.bi_size = data_size;
- goto submit;
- }
-
page_chain_for_each(page) {
unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
if (!bio_add_page(bio, page, len, 0)) {
@@ -1555,7 +1649,6 @@ next_bio:
--nr_pages;
}
D_ASSERT(device, data_size == 0);
-submit:
D_ASSERT(device, page == NULL);
atomic_set(&peer_req->pending_bios, n_bios);
@@ -2424,10 +2517,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
op = wire_flags_to_bio_op(dp_flags);
op_flags = wire_flags_to_bio_flags(dp_flags);
if (pi->cmd == P_TRIM) {
- struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
peer_req->flags |= EE_IS_TRIM;
- if (!blk_queue_discard(q))
- peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
D_ASSERT(peer_device, peer_req->i.size > 0);
D_ASSERT(peer_device, op == REQ_OP_DISCARD);
D_ASSERT(peer_device, peer_req->pages == NULL);
@@ -2498,7 +2588,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
* and we wait for all pending requests, respectively wait for
* active_ee to become empty in drbd_submit_peer_request();
* better not add ourselves here. */
- if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
+ if ((peer_req->flags & EE_IS_TRIM) == 0)
list_add_tail(&peer_req->w.list, &device->active_ee);
spin_unlock_irq(&device->resource->req_lock);
@@ -3916,14 +4006,14 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
}
device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
- /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
+ /* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size().
In case we cleared the QUEUE_FLAG_DISCARD from our queue in
- drbd_reconsider_max_bio_size(), we can be sure that after
+ drbd_reconsider_queue_parameters(), we can be sure that after
drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
ddsf = be16_to_cpu(p->dds_flags);
if (get_ldev(device)) {
- drbd_reconsider_max_bio_size(device, device->ldev);
+ drbd_reconsider_queue_parameters(device, device->ldev);
dd = drbd_determine_dev_size(device, ddsf, NULL);
put_ldev(device);
if (dd == DS_ERROR)
@@ -3943,7 +4033,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
* However, if he sends a zero current size,
* take his (user-capped or) backing disk size anyways.
*/
- drbd_reconsider_max_bio_size(device, NULL);
+ drbd_reconsider_queue_parameters(device, NULL);
drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
}
OpenPOWER on IntegriCloud