Merge tag 'for-4.21/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer: - Eliminate a couple indirect calls from bio-based DM core. - Fix DM to allow reads that exceed readahead limits by setting io_pages in the backing_dev_info. - A couple code cleanups in request-based DM. - Fix various DM targets to check for device sector overflow if CONFIG_LBDAF is not set. - Use u64 instead of sector_t to store iv_offset in DM crypt; sector_t isn't large enough on 32bit when CONFIG_LBDAF is not set. - Performance fixes to DM's kcopyd and the snapshot target focused on limiting memory use and workqueue stalls. - Fix typos in the integrity and writecache targets. - Log which algorithm is used for dm-crypt's encryption and dm-integrity's hashing. - Fix false -EBUSY errors in DM raid target's handling of check/repair messages. - Fix DM flakey target's corrupt_bio_byte feature to reliably corrupt the Nth byte in a bio's payload. * tag 'for-4.21/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm: do not allow readahead to limit IO size dm raid: fix false -EBUSY when handling check/repair message dm rq: cleanup leftover code from recently removed q->mq_ops branching dm verity: log the hash algorithm implementation dm crypt: log the encryption algorithm implementation dm integrity: fix spelling mistake in workqueue name dm flakey: Properly corrupt multi-page bios. dm: Check for device sector overflow if CONFIG_LBDAF is not set dm crypt: use u64 instead of sector_t to store iv_offset dm kcopyd: Fix bug causing workqueue stalls dm snapshot: Fix excessive memory usage and workqueue stalls dm bufio: update comment in dm-bufio.c dm writecache: fix typo in error msg for creating writecache_flush_thread dm: remove indirect calls from __send_changing_extent_only() dm mpath: only flush workqueue when needed dm rq: remove unused arguments from rq_completed() dm: avoid indirect call in __dm_make_request
author: Linus Torvalds <torvalds@linux-foundation.org> 2018-12-28 15:02:26 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2018-12-28 15:02:26 -0800
commit: 4ed7bdc1eb4c82cf4bfdf6a94dd36fd695f6f387 (patch)
tree: c3a74a9825876fd3ffd78eb04b4035c29d6028f9
parent: 5d24ae67a961c51beb255a28c9c417d9710247c2 (diff)
parent: c6d6e9b0f6b4201c77f2cea3964dd122697e3543 (diff)
download: talos-op-linux-4ed7bdc1eb4c82cf4bfdf6a94dd36fd695f6f387.tar.gz
talos-op-linux-4ed7bdc1eb4c82cf4bfdf6a94dd36fd695f6f387.zip
17 files changed, 121 insertions, 82 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index dc385b70e4c3..0e9fcceaefd2 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -65,7 +65,7 @@
 
 /*
  * Linking of buffers:
- *	All buffers are linked to cache_hash with their hash_list field.
+ *	All buffers are linked to buffer_tree with their node field.
  *
  *	Clean buffers that are not being written (B_WRITING not set)
  *	are linked to lru[LIST_CLEAN] with their lru_list field.
@@ -457,7 +457,7 @@ static void free_buffer(struct dm_buffer *b)
 }
 
 /*
- * Link buffer to the hash list and clean or dirty queue.
+ * Link buffer to the buffer tree and clean or dirty queue.
  */
 static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
 {
@@ -472,7 +472,7 @@ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
 }
 
 /*
- * Unlink buffer from the hash list and dirty or clean queue.
+ * Unlink buffer from the buffer tree and dirty or clean queue.
  */
 static void __unlink_buffer(struct dm_buffer *b)
 {
@@ -993,7 +993,7 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
 
 	/*
 	 * We've had a period where the mutex was unlocked, so need to
-	 * recheck the hash table.
+	 * recheck the buffer tree.
 	 */
 	b = __find(c, block);
 	if (b) {
@@ -1327,7 +1327,7 @@ again:
 EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
 
 /*
- * Use dm-io to send and empty barrier flush the device.
+ * Use dm-io to send an empty barrier to flush the device.
  */
 int dm_bufio_issue_flush(struct dm_bufio_client *c)
 {
@@ -1356,7 +1356,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
  * Then, we write the buffer to the original location if it was dirty.
  *
  * Then, if we are the only one who is holding the buffer, relink the buffer
- * in the hash queue for the new location.
+ * in the buffer tree for the new location.
  *
  * If there was someone else holding the buffer, we write it to the new
  * location but not relink it, because that other user needs to have the buffer
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index a7195eb5b8d8..1ea73ace9b9e 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -49,7 +49,7 @@ struct convert_context {
 	struct bio *bio_out;
 	struct bvec_iter iter_in;
 	struct bvec_iter iter_out;
-	sector_t cc_sector;
+	u64 cc_sector;
 	atomic_t cc_pending;
 	union {
 		struct skcipher_request *req;
@@ -81,7 +81,7 @@ struct dm_crypt_request {
 	struct convert_context *ctx;
 	struct scatterlist sg_in[4];
 	struct scatterlist sg_out[4];
-	sector_t iv_sector;
+	u64 iv_sector;
 };
 
 struct crypt_config;
@@ -160,7 +160,7 @@ struct crypt_config {
 		struct iv_lmk_private lmk;
 		struct iv_tcw_private tcw;
 	} iv_gen_private;
-	sector_t iv_offset;
+	u64 iv_offset;
 	unsigned int iv_size;
 	unsigned short int sector_size;
 	unsigned char sector_shift;
@@ -1885,6 +1885,13 @@ static int crypt_alloc_tfms_skcipher(struct crypt_config *cc, char *ciphermode)
 		}
 	}
 
+	/*
+	 * dm-crypt performance can vary greatly depending on which crypto
+	 * algorithm implementation is used.  Help people debug performance
+	 * problems by logging the ->cra_driver_name.
+	 */
+	DMINFO("%s using implementation \"%s\"", ciphermode,
+	       crypto_skcipher_alg(any_tfm(cc))->base.cra_driver_name);
 	return 0;
 }
 
@@ -1903,6 +1910,8 @@ static int crypt_alloc_tfms_aead(struct crypt_config *cc, char *ciphermode)
 		return err;
 	}
 
+	DMINFO("%s using implementation \"%s\"", ciphermode,
+	       crypto_aead_alg(any_tfm_aead(cc))->base.cra_driver_name);
 	return 0;
 }
 
@@ -2781,7 +2790,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	ret = -EINVAL;
-	if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) {
+	if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
 		ti->error = "Invalid device sector";
 		goto bad;
 	}
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 2fb7bb4304ad..fddffe251bf6 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -141,7 +141,7 @@ static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **a
 	unsigned long long tmpll;
 	char dummy;
 
-	if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) {
+	if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
 		ti->error = "Invalid device sector";
 		return -EINVAL;
 	}
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 3cb97fa4c11d..a9bc518156f2 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -213,7 +213,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	devname = dm_shift_arg(&as);
 
 	r = -EINVAL;
-	if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1) {
+	if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
 		ti->error = "Invalid device sector";
 		goto bad;
 	}
@@ -287,20 +287,31 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
 
 static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
 {
-	unsigned bio_bytes = bio_cur_bytes(bio);
-	char *data = bio_data(bio);
+	unsigned int corrupt_bio_byte = fc->corrupt_bio_byte - 1;
+
+	struct bvec_iter iter;
+	struct bio_vec bvec;
+
+	if (!bio_has_data(bio))
+		return;
 
 	/*
-	 * Overwrite the Nth byte of the data returned.
+	 * Overwrite the Nth byte of the bio's data, on whichever page
+	 * it falls.
 	 */
-	if (data && bio_bytes >= fc->corrupt_bio_byte) {
-		data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value;
-
-		DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
-			"(rw=%c bi_opf=%u bi_sector=%llu cur_bytes=%u)\n",
-			bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
-			(bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf,
-			(unsigned long long)bio->bi_iter.bi_sector, bio_bytes);
+	bio_for_each_segment(bvec, bio, iter) {
+		if (bio_iter_len(bio, iter) > corrupt_bio_byte) {
+			char *segment = (page_address(bio_iter_page(bio, iter))
+					 + bio_iter_offset(bio, iter));
+			segment[corrupt_bio_byte] = fc->corrupt_bio_value;
+			DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
+				"(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n",
+				bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
+				(bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf,
+				(unsigned long long)bio->bi_iter.bi_sector, bio->bi_iter.bi_size);
+			break;
+		}
+		corrupt_bio_byte -= bio_iter_len(bio, iter);
 	}
 }
 
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index d4ad0bfee251..2b27abfa428d 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -3460,7 +3460,7 @@ try_smaller_buffer:
 			ti->error = "Recalculate is only valid with internal hash";
 			goto bad;
 		}
-		ic->recalc_wq = alloc_workqueue("dm-intergrity-recalc", WQ_MEM_RECLAIM, 1);
+		ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
 		if (!ic->recalc_wq ) {
 			ti->error = "Cannot allocate workqueue";
 			r = -ENOMEM;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 2fc4213e02b5..671c24332802 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -56,15 +56,17 @@ struct dm_kcopyd_client {
 	atomic_t nr_jobs;
 
 /*
- * We maintain three lists of jobs:
+ * We maintain four lists of jobs:
  *
  * i)   jobs waiting for pages
  * ii)  jobs that have pages, and are waiting for the io to be issued.
- * iii) jobs that have completed.
+ * iii) jobs that don't need to do any IO and just run a callback
+ * iv) jobs that have completed.
  *
- * All three of these are protected by job_lock.
+ * All four of these are protected by job_lock.
  */
 	spinlock_t job_lock;
+	struct list_head callback_jobs;
 	struct list_head complete_jobs;
 	struct list_head io_jobs;
 	struct list_head pages_jobs;
@@ -625,6 +627,7 @@ static void do_work(struct work_struct *work)
 	struct dm_kcopyd_client *kc = container_of(work,
 					struct dm_kcopyd_client, kcopyd_work);
 	struct blk_plug plug;
+	unsigned long flags;
 
 	/*
 	 * The order that these are called is *very* important.
@@ -633,6 +636,10 @@ static void do_work(struct work_struct *work)
 	 * list.  io jobs call wake when they complete and it all
 	 * starts again.
 	 */
+	spin_lock_irqsave(&kc->job_lock, flags);
+	list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs);
+	spin_unlock_irqrestore(&kc->job_lock, flags);
+
 	blk_start_plug(&plug);
 	process_jobs(&kc->complete_jobs, kc, run_complete_job);
 	process_jobs(&kc->pages_jobs, kc, run_pages_job);
@@ -650,7 +657,7 @@ static void dispatch_job(struct kcopyd_job *job)
 	struct dm_kcopyd_client *kc = job->kc;
 	atomic_inc(&kc->nr_jobs);
 	if (unlikely(!job->source.count))
-		push(&kc->complete_jobs, job);
+		push(&kc->callback_jobs, job);
 	else if (job->pages == &zero_page_list)
 		push(&kc->io_jobs, job);
 	else
@@ -858,7 +865,7 @@ void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err)
 	job->read_err = read_err;
 	job->write_err = write_err;
 
-	push(&kc->complete_jobs, job);
+	push(&kc->callback_jobs, job);
 	wake(kc);
 }
 EXPORT_SYMBOL(dm_kcopyd_do_callback);
@@ -888,6 +895,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro
 		return ERR_PTR(-ENOMEM);
 
 	spin_lock_init(&kc->job_lock);
+	INIT_LIST_HEAD(&kc->callback_jobs);
 	INIT_LIST_HEAD(&kc->complete_jobs);
 	INIT_LIST_HEAD(&kc->io_jobs);
 	INIT_LIST_HEAD(&kc->pages_jobs);
@@ -939,6 +947,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc)
 	/* Wait for completion of all jobs submitted by this client. */
 	wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
 
+	BUG_ON(!list_empty(&kc->callback_jobs));
 	BUG_ON(!list_empty(&kc->complete_jobs));
 	BUG_ON(!list_empty(&kc->io_jobs));
 	BUG_ON(!list_empty(&kc->pages_jobs));
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 8d7ddee6ac4d..ad980a38fb1e 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -45,7 +45,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	ret = -EINVAL;
-	if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1) {
+	if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1 || tmp != (sector_t)tmp) {
 		ti->error = "Invalid device sector";
 		goto bad;
 	}
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index d6a66921daf4..2ee5e357a0a7 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1211,14 +1211,16 @@ static void flush_multipath_work(struct multipath *m)
 		set_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
 		smp_mb__after_atomic();
 
-		flush_workqueue(kmpath_handlerd);
+		if (atomic_read(&m->pg_init_in_progress))
+			flush_workqueue(kmpath_handlerd);
 		multipath_wait_for_pg_init_completion(m);
 
 		clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
 		smp_mb__after_atomic();
 	}
 
-	flush_workqueue(kmultipathd);
+	if (m->queue_mode == DM_TYPE_BIO_BASED)
+		flush_work(&m->process_queued_bios);
 	flush_work(&m->trigger_event);
 }
 
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index e1dd1622a290..adcfe8ae10aa 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3690,8 +3690,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
 			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 			md_reap_sync_thread(mddev);
 		}
-	} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
-		   test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
+	} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
 		return -EBUSY;
 	else if (!strcasecmp(argv[0], "resync"))
 		; /* MD_RECOVERY_NEEDED set below */
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 79eab1071ec2..5a51151f680d 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -943,7 +943,8 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
 	char dummy;
 	int ret;
 
-	if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1) {
+	if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1 ||
+	    offset != (sector_t)offset) {
 		ti->error = "Invalid offset";
 		return -EINVAL;
 	}
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 4e06be4f0a62..4eb5f8c56535 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -128,7 +128,7 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
  * the md may be freed in dm_put() at the end of this function.
  * Or do dm_get() before calling this function and dm_put() later.
  */
-static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
+static void rq_completed(struct mapped_device *md)
 {
 	/* nudge anyone waiting on suspend queue */
 	if (unlikely(waitqueue_active(&md->wait)))
@@ -147,7 +147,6 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
  */
 static void dm_end_request(struct request *clone, blk_status_t error)
 {
-	int rw = rq_data_dir(clone);
 	struct dm_rq_target_io *tio = clone->end_io_data;
 	struct mapped_device *md = tio->md;
 	struct request *rq = tio->orig;
@@ -157,7 +156,7 @@ static void dm_end_request(struct request *clone, blk_status_t error)
 
 	rq_end_stats(md, rq);
 	blk_mq_end_request(rq, error);
-	rq_completed(md, rw, true);
+	rq_completed(md);
 }
 
 static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
@@ -181,7 +180,6 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
 {
 	struct mapped_device *md = tio->md;
 	struct request *rq = tio->orig;
-	int rw = rq_data_dir(rq);
 	unsigned long delay_ms = delay_requeue ? 100 : 0;
 
 	rq_end_stats(md, rq);
@@ -191,7 +189,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
 	}
 
 	dm_mq_delay_requeue_request(rq, delay_ms);
-	rq_completed(md, rw, false);
+	rq_completed(md);
 }
 
 static void dm_done(struct request *clone, blk_status_t error, bool mapped)
@@ -246,15 +244,13 @@ static void dm_softirq_done(struct request *rq)
 	bool mapped = true;
 	struct dm_rq_target_io *tio = tio_from_request(rq);
 	struct request *clone = tio->clone;
-	int rw;
 
 	if (!clone) {
 		struct mapped_device *md = tio->md;
 
 		rq_end_stats(md, rq);
-		rw = rq_data_dir(rq);
 		blk_mq_end_request(rq, tio->error);
-		rq_completed(md, rw, false);
+		rq_completed(md);
 		return;
 	}
 
@@ -376,7 +372,6 @@ static int map_request(struct dm_rq_target_io *tio)
 	blk_status_t ret;
 
 	r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
-check_again:
 	switch (r) {
 	case DM_MAPIO_SUBMITTED:
 		/* The target has taken the I/O to submit by itself later */
@@ -396,8 +391,7 @@ check_again:
 			blk_rq_unprep_clone(clone);
 			tio->ti->type->release_clone_rq(clone);
 			tio->clone = NULL;
-			r = DM_MAPIO_REQUEUE;
-			goto check_again;
+			return DM_MAPIO_REQUEUE;
 		}
 		break;
 	case DM_MAPIO_REQUEUE:
@@ -507,7 +501,7 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (map_request(tio) == DM_MAPIO_REQUEUE) {
 		/* Undo dm_start_request() before requeuing */
 		rq_end_stats(md, rq);
-		rq_completed(md, rq_data_dir(rq), false);
+		rq_completed(md);
 		return BLK_STS_RESOURCE;
 	}
 
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index ae4b33d10924..36805b12661e 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -19,6 +19,7 @@
 #include <linux/vmalloc.h>
 #include <linux/log2.h>
 #include <linux/dm-kcopyd.h>
+#include <linux/semaphore.h>
 
 #include "dm.h"
 
@@ -105,6 +106,9 @@ struct dm_snapshot {
 	/* The on disk metadata handler */
 	struct dm_exception_store *store;
 
+	/* Maximum number of in-flight COW jobs. */
+	struct semaphore cow_count;
+
 	struct dm_kcopyd_client *kcopyd_client;
 
 	/* Wait for events based on state_bits */
@@ -145,6 +149,19 @@ struct dm_snapshot {
 #define RUNNING_MERGE          0
 #define SHUTDOWN_MERGE         1
 
+/*
+ * Maximum number of chunks being copied on write.
+ *
+ * The value was decided experimentally as a trade-off between memory
+ * consumption, stalling the kernel's workqueues and maintaining a high enough
+ * throughput.
+ */
+#define DEFAULT_COW_THRESHOLD 2048
+
+static int cow_threshold = DEFAULT_COW_THRESHOLD;
+module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644);
+MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
+
 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
 		"A percentage of time allocated for copy on write");
 
@@ -1190,6 +1207,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad_hash_tables;
 	}
 
+	sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX);
+
 	s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
 	if (IS_ERR(s->kcopyd_client)) {
 		r = PTR_ERR(s->kcopyd_client);
@@ -1575,6 +1594,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
 		rb_link_node(&pe->out_of_order_node, parent, p);
 		rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
 	}
+	up(&s->cow_count);
 }
 
 /*
@@ -1598,6 +1618,7 @@ static void start_copy(struct dm_snap_pending_exception *pe)
 	dest.count = src.count;
 
 	/* Hand over to kcopyd */
+	down(&s->cow_count);
 	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
 }
 
@@ -1617,6 +1638,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
 	pe->full_bio = bio;
 	pe->full_bio_end_io = bio->bi_end_io;
 
+	down(&s->cow_count);
 	callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
 						   copy_callback, pe);
 
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 844f7d0f2ef8..4b1be754cc41 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1927,6 +1927,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	 */
 	if (blk_queue_is_zoned(q))
 		blk_revalidate_disk_zones(t->md->disk);
+
+	/* Allow reads to exceed readahead limits */
+	q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9);
 }
 
 unsigned int dm_table_get_num_targets(struct dm_table *t)
diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c
index 954b7ab4e684..e673dacf6418 100644
--- a/drivers/md/dm-unstripe.c
+++ b/drivers/md/dm-unstripe.c
@@ -78,7 +78,7 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto err;
 	}
 
-	if (sscanf(argv[4], "%llu%c", &start, &dummy) != 1) {
+	if (sscanf(argv[4], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) {
 		ti->error = "Invalid striped device offset";
 		goto err;
 	}
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index fc65f0dedf7f..f4c31ffaa88e 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -1040,6 +1040,15 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		v->tfm = NULL;
 		goto bad;
 	}
+
+	/*
+	 * dm-verity performance can vary greatly depending on which hash
+	 * algorithm implementation is used.  Help people debug performance
+	 * problems by logging the ->cra_driver_name.
+	 */
+	DMINFO("%s using implementation \"%s\"", v->alg_name,
+	       crypto_hash_alg_common(v->tfm)->base.cra_driver_name);
+
 	v->digest_size = crypto_ahash_digestsize(v->tfm);
 	if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
 		ti->error = "Digest size too big";
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 2d50eec94cd7..2b8cee35e4d5 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -2061,7 +2061,7 @@ invalid_optional:
 		if (IS_ERR(wc->flush_thread)) {
 			r = PTR_ERR(wc->flush_thread);
 			wc->flush_thread = NULL;
-			ti->error = "Couldn't spawn endio thread";
+			ti->error = "Couldn't spawn flush thread";
 			goto bad;
 		}
 		wake_up_process(wc->flush_thread);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index a4a06982ed91..d67c95ef8d7e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1486,11 +1486,9 @@ static bool is_split_required_for_discard(struct dm_target *ti)
 }
 
 static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
-				       get_num_bios_fn get_num_bios,
-				       is_split_required_fn is_split_required)
+				       unsigned num_bios, bool is_split_required)
 {
 	unsigned len;
-	unsigned num_bios;
 
 	/*
 	 * Even though the device advertised support for this type of
@@ -1498,11 +1496,10 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
 	 * reconfiguration might also have changed that since the
 	 * check was performed.
 	 */
-	num_bios = get_num_bios ? get_num_bios(ti) : 0;
 	if (!num_bios)
 		return -EOPNOTSUPP;
 
-	if (is_split_required && !is_split_required(ti))
+	if (!is_split_required)
 		len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
 	else
 		len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
@@ -1517,23 +1514,23 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
 
 static int __send_discard(struct clone_info *ci, struct dm_target *ti)
 {
-	return __send_changing_extent_only(ci, ti, get_num_discard_bios,
-					   is_split_required_for_discard);
+	return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti),
+					   is_split_required_for_discard(ti));
 }
 
 static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti)
 {
-	return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios, NULL);
+	return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti), false);
 }
 
 static int __send_write_same(struct clone_info *ci, struct dm_target *ti)
 {
-	return __send_changing_extent_only(ci, ti, get_num_write_same_bios, NULL);
+	return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti), false);
 }
 
 static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
 {
-	return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios, NULL);
+	return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false);
 }
 
 static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
@@ -1716,10 +1713,7 @@ out:
 	return ret;
 }
 
-typedef blk_qc_t (process_bio_fn)(struct mapped_device *, struct dm_table *, struct bio *);
-
-static blk_qc_t __dm_make_request(struct request_queue *q, struct bio *bio,
-				  process_bio_fn process_bio)
+static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct mapped_device *md = q->queuedata;
 	blk_qc_t ret = BLK_QC_T_NONE;
@@ -1739,26 +1733,15 @@ static blk_qc_t __dm_make_request(struct request_queue *q, struct bio *bio,
 		return ret;
 	}
 
-	ret = process_bio(md, map, bio);
+	if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
+		ret = __process_bio(md, map, bio);
+	else
+		ret = __split_and_process_bio(md, map, bio);
 
 	dm_put_live_table(md, srcu_idx);
 	return ret;
 }
 
-/*
- * The request function that remaps the bio to one target and
- * splits off any remainder.
- */
-static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
-{
-	return __dm_make_request(q, bio, __split_and_process_bio);
-}
-
-static blk_qc_t dm_make_request_nvme(struct request_queue *q, struct bio *bio)
-{
-	return __dm_make_request(q, bio, __process_bio);
-}
-
 static int dm_any_congested(void *congested_data, int bdi_bits)
 {
 	int r = bdi_bits;
@@ -2246,12 +2229,9 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
 		break;
 	case DM_TYPE_BIO_BASED:
 	case DM_TYPE_DAX_BIO_BASED:
-		dm_init_normal_md_queue(md);
-		blk_queue_make_request(md->queue, dm_make_request);
-		break;
 	case DM_TYPE_NVME_BIO_BASED:
 		dm_init_normal_md_queue(md);
-		blk_queue_make_request(md->queue, dm_make_request_nvme);
+		blk_queue_make_request(md->queue, dm_make_request);
 		break;
 	case DM_TYPE_NONE:
 		WARN_ON_ONCE(true);
author	Linus Torvalds <torvalds@linux-foundation.org>	2018-12-28 15:02:26 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2018-12-28 15:02:26 -0800
commit	4ed7bdc1eb4c82cf4bfdf6a94dd36fd695f6f387 (patch)
tree	c3a74a9825876fd3ffd78eb04b4035c29d6028f9
parent	5d24ae67a961c51beb255a28c9c417d9710247c2 (diff)
parent	c6d6e9b0f6b4201c77f2cea3964dd122697e3543 (diff)
download	talos-op-linux-4ed7bdc1eb4c82cf4bfdf6a94dd36fd695f6f387.tar.gz talos-op-linux-4ed7bdc1eb4c82cf4bfdf6a94dd36fd695f6f387.zip