Merge branch 'kvm-ppc-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc

This fix was intended for 4.13, but didn't get in because both maintainers were on vacation. Paul Mackerras: "It adds mutual exclusion between list_add_rcu and list_del_rcu calls on the kvm->arch.spapr_tce_tables list. Without this, userspace could potentially trigger corruption of the list and cause a host crash or worse."
author: Radim Krčmář <rkrcmar@redhat.com> 2017-09-08 14:40:43 +0200
committer: Radim Krčmář <rkrcmar@redhat.com> 2017-09-08 14:40:43 +0200
commit: 5f54c8b2d4fad95d1f8ecbe023ebe6038e6d3760 (patch)
tree: daca83ea5f9af1bd158504bd0b5af89c5a99b7fa /block
parent: 78809a68490d84eb632a215be2121d4b44c86954 (diff)
parent: edd03602d97236e8fea13cd76886c576186aa307 (diff)
download: blackbird-op-linux-5f54c8b2d4fad95d1f8ecbe023ebe6038e6d3760.tar.gz
blackbird-op-linux-5f54c8b2d4fad95d1f8ecbe023ebe6038e6d3760.zip
5 files changed, 119 insertions, 79 deletions
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 63e771ab56d8..859f0a8c97c8 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -71,17 +71,29 @@ struct bfq_service_tree {
  *
  * bfq_sched_data is the basic scheduler queue.  It supports three
  * ioprio_classes, and can be used either as a toplevel queue or as an
- * intermediate queue on a hierarchical setup.  @next_in_service
- * points to the active entity of the sched_data service trees that
- * will be scheduled next. It is used to reduce the number of steps
- * needed for each hierarchical-schedule update.
+ * intermediate queue in a hierarchical setup.
  *
  * The supported ioprio_classes are the same as in CFQ, in descending
  * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
  * Requests from higher priority queues are served before all the
  * requests from lower priority queues; among requests of the same
  * queue requests are served according to B-WF2Q+.
- * All the fields are protected by the queue lock of the containing bfqd.
+ *
+ * The schedule is implemented by the service trees, plus the field
+ * @next_in_service, which points to the entity on the active trees
+ * that will be served next, if 1) no changes in the schedule occurs
+ * before the current in-service entity is expired, 2) the in-service
+ * queue becomes idle when it expires, and 3) if the entity pointed by
+ * in_service_entity is not a queue, then the in-service child entity
+ * of the entity pointed by in_service_entity becomes idle on
+ * expiration. This peculiar definition allows for the following
+ * optimization, not yet exploited: while a given entity is still in
+ * service, we already know which is the best candidate for next
+ * service among the other active entitities in the same parent
+ * entity. We can then quickly compare the timestamps of the
+ * in-service entity with those of such best candidate.
+ *
+ * All fields are protected by the lock of the containing bfqd.
  */
 struct bfq_sched_data {
 	/* entity in service */
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 979f8f21b7e2..911aa7431dbe 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -188,21 +188,23 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
 
 /*
  * This function tells whether entity stops being a candidate for next
- * service, according to the following logic.
+ * service, according to the restrictive definition of the field
+ * next_in_service. In particular, this function is invoked for an
+ * entity that is about to be set in service.
  *
- * This function is invoked for an entity that is about to be set in
- * service. If such an entity is a queue, then the entity is no longer
- * a candidate for next service (i.e, a candidate entity to serve
- * after the in-service entity is expired). The function then returns
- * true.
+ * If entity is a queue, then the entity is no longer a candidate for
+ * next service according to the that definition, because entity is
+ * about to become the in-service queue. This function then returns
+ * true if entity is a queue.
  *
- * In contrast, the entity could stil be a candidate for next service
- * if it is not a queue, and has more than one child. In fact, even if
- * one of its children is about to be set in service, other children
- * may still be the next to serve. As a consequence, a non-queue
- * entity is not a candidate for next-service only if it has only one
- * child. And only if this condition holds, then the function returns
- * true for a non-queue entity.
+ * In contrast, entity could still be a candidate for next service if
+ * it is not a queue, and has more than one active child. In fact,
+ * even if one of its children is about to be set in service, other
+ * active children may still be the next to serve, for the parent
+ * entity, even according to the above definition. As a consequence, a
+ * non-queue entity is not a candidate for next-service only if it has
+ * only one active child. And only if this condition holds, then this
+ * function returns true for a non-queue entity.
  */
 static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
 {
@@ -213,6 +215,18 @@ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
 
 	bfqg = container_of(entity, struct bfq_group, entity);
 
+	/*
+	 * The field active_entities does not always contain the
+	 * actual number of active children entities: it happens to
+	 * not account for the in-service entity in case the latter is
+	 * removed from its active tree (which may get done after
+	 * invoking the function bfq_no_longer_next_in_service in
+	 * bfq_get_next_queue). Fortunately, here, i.e., while
+	 * bfq_no_longer_next_in_service is not yet completed in
+	 * bfq_get_next_queue, bfq_active_extract has not yet been
+	 * invoked, and thus active_entities still coincides with the
+	 * actual number of active entities.
+	 */
 	if (bfqg->active_entities == 1)
 		return true;
 
@@ -954,7 +968,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
  * one of its children receives a new request.
  *
  * Basically, this function updates the timestamps of entity and
- * inserts entity into its active tree, ater possible extracting it
+ * inserts entity into its active tree, ater possibly extracting it
  * from its idle tree.
  */
 static void __bfq_activate_entity(struct bfq_entity *entity,
@@ -1048,7 +1062,7 @@ static void __bfq_requeue_entity(struct bfq_entity *entity)
 		entity->start = entity->finish;
 		/*
 		 * In addition, if the entity had more than one child
-		 * when set in service, then was not extracted from
+		 * when set in service, then it was not extracted from
 		 * the active tree. This implies that the position of
 		 * the entity in the active tree may need to be
 		 * changed now, because we have just updated the start
@@ -1056,9 +1070,8 @@ static void __bfq_requeue_entity(struct bfq_entity *entity)
 		 * time in a moment (the requeueing is then, more
 		 * precisely, a repositioning in this case). To
 		 * implement this repositioning, we: 1) dequeue the
-		 * entity here, 2) update the finish time and
-		 * requeue the entity according to the new
-		 * timestamps below.
+		 * entity here, 2) update the finish time and requeue
+		 * the entity according to the new timestamps below.
 		 */
 		if (entity->tree)
 			bfq_active_extract(st, entity);
@@ -1105,9 +1118,10 @@ static void __bfq_activate_requeue_entity(struct bfq_entity *entity,
 
 
 /**
- * bfq_activate_entity - activate or requeue an entity representing a bfq_queue,
- *			 and activate, requeue or reposition all ancestors
- *			 for which such an update becomes necessary.
+ * bfq_activate_requeue_entity - activate or requeue an entity representing a
+ *				 bfq_queue, and activate, requeue or reposition
+ *				 all ancestors for which such an update becomes
+ *				 necessary.
  * @entity: the entity to activate.
  * @non_blocking_wait_rq: true if this entity was waiting for a request
  * @requeue: true if this is a requeue, which implies that bfqq is
@@ -1135,9 +1149,9 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity,
  * @ins_into_idle_tree: if false, the entity will not be put into the
  *			idle tree.
  *
- * Deactivates an entity, independently from its previous state.  Must
+ * Deactivates an entity, independently of its previous state.  Must
  * be invoked only if entity is on a service tree. Extracts the entity
- * from that tree, and if necessary and allowed, puts it on the idle
+ * from that tree, and if necessary and allowed, puts it into the idle
  * tree.
  */
 bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
@@ -1158,8 +1172,10 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
 	st = bfq_entity_service_tree(entity);
 	is_in_service = entity == sd->in_service_entity;
 
-	if (is_in_service)
+	if (is_in_service) {
 		bfq_calc_finish(entity, entity->service);
+		sd->in_service_entity = NULL;
+	}
 
 	if (entity->tree == &st->active)
 		bfq_active_extract(st, entity);
@@ -1177,7 +1193,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
 /**
  * bfq_deactivate_entity - deactivate an entity representing a bfq_queue.
  * @entity: the entity to deactivate.
- * @ins_into_idle_tree: true if the entity can be put on the idle tree
+ * @ins_into_idle_tree: true if the entity can be put into the idle tree
  */
 static void bfq_deactivate_entity(struct bfq_entity *entity,
 				  bool ins_into_idle_tree,
@@ -1208,16 +1224,29 @@ static void bfq_deactivate_entity(struct bfq_entity *entity,
 			 */
 			bfq_update_next_in_service(sd, NULL);
 
-		if (sd->next_in_service)
+		if (sd->next_in_service || sd->in_service_entity) {
 			/*
-			 * The parent entity is still backlogged,
-			 * because next_in_service is not NULL. So, no
-			 * further upwards deactivation must be
-			 * performed.  Yet, next_in_service has
-			 * changed.  Then the schedule does need to be
-			 * updated upwards.
+			 * The parent entity is still active, because
+			 * either next_in_service or in_service_entity
+			 * is not NULL. So, no further upwards
+			 * deactivation must be performed.  Yet,
+			 * next_in_service has changed.	Then the
+			 * schedule does need to be updated upwards.
+			 *
+			 * NOTE If in_service_entity is not NULL, then
+			 * next_in_service may happen to be NULL,
+			 * although the parent entity is evidently
+			 * active. This happens if 1) the entity
+			 * pointed by in_service_entity is the only
+			 * active entity in the parent entity, and 2)
+			 * according to the definition of
+			 * next_in_service, the in_service_entity
+			 * cannot be considered as
+			 * next_in_service. See the comments on the
+			 * definition of next_in_service for details.
 			 */
 			break;
+		}
 
 		/*
 		 * If we get here, then the parent is no more
@@ -1494,47 +1523,34 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
 
 		/*
 		 * If entity is no longer a candidate for next
-		 * service, then we extract it from its active tree,
-		 * for the following reason. To further boost the
-		 * throughput in some special case, BFQ needs to know
-		 * which is the next candidate entity to serve, while
-		 * there is already an entity in service. In this
-		 * respect, to make it easy to compute/update the next
-		 * candidate entity to serve after the current
-		 * candidate has been set in service, there is a case
-		 * where it is necessary to extract the current
-		 * candidate from its service tree. Such a case is
-		 * when the entity just set in service cannot be also
-		 * a candidate for next service. Details about when
-		 * this conditions holds are reported in the comments
-		 * on the function bfq_no_longer_next_in_service()
-		 * invoked below.
+		 * service, then it must be extracted from its active
+		 * tree, so as to make sure that it won't be
+		 * considered when computing next_in_service. See the
+		 * comments on the function
+		 * bfq_no_longer_next_in_service() for details.
 		 */
 		if (bfq_no_longer_next_in_service(entity))
 			bfq_active_extract(bfq_entity_service_tree(entity),
 					   entity);
 
 		/*
-		 * For the same reason why we may have just extracted
-		 * entity from its active tree, we may need to update
-		 * next_in_service for the sched_data of entity too,
-		 * regardless of whether entity has been extracted.
-		 * In fact, even if entity has not been extracted, a
-		 * descendant entity may get extracted. Such an event
-		 * would cause a change in next_in_service for the
-		 * level of the descendant entity, and thus possibly
-		 * back to upper levels.
+		 * Even if entity is not to be extracted according to
+		 * the above check, a descendant entity may get
+		 * extracted in one of the next iterations of this
+		 * loop. Such an event could cause a change in
+		 * next_in_service for the level of the descendant
+		 * entity, and thus possibly back to this level.
 		 *
-		 * We cannot perform the resulting needed update
-		 * before the end of this loop, because, to know which
-		 * is the correct next-to-serve candidate entity for
-		 * each level, we need first to find the leaf entity
-		 * to set in service. In fact, only after we know
-		 * which is the next-to-serve leaf entity, we can
-		 * discover whether the parent entity of the leaf
-		 * entity becomes the next-to-serve, and so on.
+		 * However, we cannot perform the resulting needed
+		 * update of next_in_service for this level before the
+		 * end of the whole loop, because, to know which is
+		 * the correct next-to-serve candidate entity for each
+		 * level, we need first to find the leaf entity to set
+		 * in service. In fact, only after we know which is
+		 * the next-to-serve leaf entity, we can discover
+		 * whether the parent entity of the leaf entity
+		 * becomes the next-to-serve, and so on.
 		 */
-
 	}
 
 	bfqq = bfq_entity_to_bfqq(entity);
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 83e92beb3c9f..9b1ea478577b 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -387,9 +387,11 @@ static void bio_integrity_verify_fn(struct work_struct *work)
  */
 bool __bio_integrity_endio(struct bio *bio)
 {
-	if (bio_op(bio) == REQ_OP_READ && !bio->bi_status) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
+	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 
+	if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
+	    (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->profile->verify_fn) {
 		INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
 		queue_work(kintegrityd_wq, &bip->bip_work);
 		return false;
diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c
index 0c3354cf3552..76944e3271bf 100644
--- a/block/blk-mq-pci.c
+++ b/block/blk-mq-pci.c
@@ -36,12 +36,18 @@ int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev)
 	for (queue = 0; queue < set->nr_hw_queues; queue++) {
 		mask = pci_irq_get_affinity(pdev, queue);
 		if (!mask)
-			return -EINVAL;
+			goto fallback;
 
 		for_each_cpu(cpu, mask)
 			set->mq_map[cpu] = queue;
 	}
 
 	return 0;
+
+fallback:
+	WARN_ON_ONCE(set->nr_hw_queues > 1);
+	for_each_possible_cpu(cpu)
+		set->mq_map[cpu] = 0;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_mq_pci_map_queues);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 041f7b7fa0d6..4603b115e234 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -301,11 +301,12 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 	struct elevator_queue *e = q->elevator;
 	struct request *rq;
 	unsigned int tag;
+	struct blk_mq_ctx *local_ctx = NULL;
 
 	blk_queue_enter_live(q);
 	data->q = q;
 	if (likely(!data->ctx))
-		data->ctx = blk_mq_get_ctx(q);
+		data->ctx = local_ctx = blk_mq_get_ctx(q);
 	if (likely(!data->hctx))
 		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
 	if (op & REQ_NOWAIT)
@@ -324,6 +325,10 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 
 	tag = blk_mq_get_tag(data);
 	if (tag == BLK_MQ_TAG_FAIL) {
+		if (local_ctx) {
+			blk_mq_put_ctx(local_ctx);
+			data->ctx = NULL;
+		}
 		blk_queue_exit(q);
 		return NULL;
 	}
@@ -355,13 +360,13 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 		return ERR_PTR(ret);
 
 	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
-
-	blk_mq_put_ctx(alloc_data.ctx);
 	blk_queue_exit(q);
 
 	if (!rq)
 		return ERR_PTR(-EWOULDBLOCK);
 
+	blk_mq_put_ctx(alloc_data.ctx);
+
 	rq->__data_len = 0;
 	rq->__sector = (sector_t) -1;
 	rq->bio = rq->biotail = NULL;
@@ -406,7 +411,6 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 	alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
 
 	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
-
 	blk_queue_exit(q);
 
 	if (!rq)
@@ -679,8 +683,8 @@ EXPORT_SYMBOL(blk_mq_kick_requeue_list);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q,
 				    unsigned long msecs)
 {
-	kblockd_schedule_delayed_work(&q->requeue_work,
-				      msecs_to_jiffies(msecs));
+	kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work,
+				    msecs_to_jiffies(msecs));
 }
 EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
author	Radim Krčmář <rkrcmar@redhat.com>	2017-09-08 14:40:43 +0200
committer	Radim Krčmář <rkrcmar@redhat.com>	2017-09-08 14:40:43 +0200
commit	5f54c8b2d4fad95d1f8ecbe023ebe6038e6d3760 (patch)
tree	daca83ea5f9af1bd158504bd0b5af89c5a99b7fa /block
parent	78809a68490d84eb632a215be2121d4b44c86954 (diff)
parent	edd03602d97236e8fea13cd76886c576186aa307 (diff)
download	blackbird-op-linux-5f54c8b2d4fad95d1f8ecbe023ebe6038e6d3760.tar.gz blackbird-op-linux-5f54c8b2d4fad95d1f8ecbe023ebe6038e6d3760.zip