summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorRadim Krčmář <rkrcmar@redhat.com>2017-09-08 14:40:43 +0200
committerRadim Krčmář <rkrcmar@redhat.com>2017-09-08 14:40:43 +0200
commit5f54c8b2d4fad95d1f8ecbe023ebe6038e6d3760 (patch)
treedaca83ea5f9af1bd158504bd0b5af89c5a99b7fa /block
parent78809a68490d84eb632a215be2121d4b44c86954 (diff)
parentedd03602d97236e8fea13cd76886c576186aa307 (diff)
downloadblackbird-op-linux-5f54c8b2d4fad95d1f8ecbe023ebe6038e6d3760.tar.gz
blackbird-op-linux-5f54c8b2d4fad95d1f8ecbe023ebe6038e6d3760.zip
Merge branch 'kvm-ppc-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
This fix was intended for 4.13, but didn't get in because both maintainers were on vacation. Paul Mackerras: "It adds mutual exclusion between list_add_rcu and list_del_rcu calls on the kvm->arch.spapr_tce_tables list. Without this, userspace could potentially trigger corruption of the list and cause a host crash or worse."
Diffstat (limited to 'block')
-rw-r--r--block/bfq-iosched.h22
-rw-r--r--block/bfq-wf2q.c146
-rw-r--r--block/bio-integrity.c6
-rw-r--r--block/blk-mq-pci.c8
-rw-r--r--block/blk-mq.c16
5 files changed, 119 insertions, 79 deletions
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 63e771ab56d8..859f0a8c97c8 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -71,17 +71,29 @@ struct bfq_service_tree {
*
* bfq_sched_data is the basic scheduler queue. It supports three
* ioprio_classes, and can be used either as a toplevel queue or as an
- * intermediate queue on a hierarchical setup. @next_in_service
- * points to the active entity of the sched_data service trees that
- * will be scheduled next. It is used to reduce the number of steps
- * needed for each hierarchical-schedule update.
+ * intermediate queue in a hierarchical setup.
*
* The supported ioprio_classes are the same as in CFQ, in descending
* priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
* Requests from higher priority queues are served before all the
* requests from lower priority queues; among requests of the same
* queue requests are served according to B-WF2Q+.
- * All the fields are protected by the queue lock of the containing bfqd.
+ *
+ * The schedule is implemented by the service trees, plus the field
+ * @next_in_service, which points to the entity on the active trees
+ * that will be served next, if 1) no changes in the schedule occurs
+ * before the current in-service entity is expired, 2) the in-service
+ * queue becomes idle when it expires, and 3) if the entity pointed by
+ * in_service_entity is not a queue, then the in-service child entity
+ * of the entity pointed by in_service_entity becomes idle on
+ * expiration. This peculiar definition allows for the following
+ * optimization, not yet exploited: while a given entity is still in
+ * service, we already know which is the best candidate for next
+ * service among the other active entitities in the same parent
+ * entity. We can then quickly compare the timestamps of the
+ * in-service entity with those of such best candidate.
+ *
+ * All fields are protected by the lock of the containing bfqd.
*/
struct bfq_sched_data {
/* entity in service */
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 979f8f21b7e2..911aa7431dbe 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -188,21 +188,23 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
/*
* This function tells whether entity stops being a candidate for next
- * service, according to the following logic.
+ * service, according to the restrictive definition of the field
+ * next_in_service. In particular, this function is invoked for an
+ * entity that is about to be set in service.
*
- * This function is invoked for an entity that is about to be set in
- * service. If such an entity is a queue, then the entity is no longer
- * a candidate for next service (i.e, a candidate entity to serve
- * after the in-service entity is expired). The function then returns
- * true.
+ * If entity is a queue, then the entity is no longer a candidate for
+ * next service according to the that definition, because entity is
+ * about to become the in-service queue. This function then returns
+ * true if entity is a queue.
*
- * In contrast, the entity could stil be a candidate for next service
- * if it is not a queue, and has more than one child. In fact, even if
- * one of its children is about to be set in service, other children
- * may still be the next to serve. As a consequence, a non-queue
- * entity is not a candidate for next-service only if it has only one
- * child. And only if this condition holds, then the function returns
- * true for a non-queue entity.
+ * In contrast, entity could still be a candidate for next service if
+ * it is not a queue, and has more than one active child. In fact,
+ * even if one of its children is about to be set in service, other
+ * active children may still be the next to serve, for the parent
+ * entity, even according to the above definition. As a consequence, a
+ * non-queue entity is not a candidate for next-service only if it has
+ * only one active child. And only if this condition holds, then this
+ * function returns true for a non-queue entity.
*/
static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
{
@@ -213,6 +215,18 @@ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
bfqg = container_of(entity, struct bfq_group, entity);
+ /*
+ * The field active_entities does not always contain the
+ * actual number of active children entities: it happens to
+ * not account for the in-service entity in case the latter is
+ * removed from its active tree (which may get done after
+ * invoking the function bfq_no_longer_next_in_service in
+ * bfq_get_next_queue). Fortunately, here, i.e., while
+ * bfq_no_longer_next_in_service is not yet completed in
+ * bfq_get_next_queue, bfq_active_extract has not yet been
+ * invoked, and thus active_entities still coincides with the
+ * actual number of active entities.
+ */
if (bfqg->active_entities == 1)
return true;
@@ -954,7 +968,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
* one of its children receives a new request.
*
* Basically, this function updates the timestamps of entity and
- * inserts entity into its active tree, ater possible extracting it
+ * inserts entity into its active tree, ater possibly extracting it
* from its idle tree.
*/
static void __bfq_activate_entity(struct bfq_entity *entity,
@@ -1048,7 +1062,7 @@ static void __bfq_requeue_entity(struct bfq_entity *entity)
entity->start = entity->finish;
/*
* In addition, if the entity had more than one child
- * when set in service, then was not extracted from
+ * when set in service, then it was not extracted from
* the active tree. This implies that the position of
* the entity in the active tree may need to be
* changed now, because we have just updated the start
@@ -1056,9 +1070,8 @@ static void __bfq_requeue_entity(struct bfq_entity *entity)
* time in a moment (the requeueing is then, more
* precisely, a repositioning in this case). To
* implement this repositioning, we: 1) dequeue the
- * entity here, 2) update the finish time and
- * requeue the entity according to the new
- * timestamps below.
+ * entity here, 2) update the finish time and requeue
+ * the entity according to the new timestamps below.
*/
if (entity->tree)
bfq_active_extract(st, entity);
@@ -1105,9 +1118,10 @@ static void __bfq_activate_requeue_entity(struct bfq_entity *entity,
/**
- * bfq_activate_entity - activate or requeue an entity representing a bfq_queue,
- * and activate, requeue or reposition all ancestors
- * for which such an update becomes necessary.
+ * bfq_activate_requeue_entity - activate or requeue an entity representing a
+ * bfq_queue, and activate, requeue or reposition
+ * all ancestors for which such an update becomes
+ * necessary.
* @entity: the entity to activate.
* @non_blocking_wait_rq: true if this entity was waiting for a request
* @requeue: true if this is a requeue, which implies that bfqq is
@@ -1135,9 +1149,9 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity,
* @ins_into_idle_tree: if false, the entity will not be put into the
* idle tree.
*
- * Deactivates an entity, independently from its previous state. Must
+ * Deactivates an entity, independently of its previous state. Must
* be invoked only if entity is on a service tree. Extracts the entity
- * from that tree, and if necessary and allowed, puts it on the idle
+ * from that tree, and if necessary and allowed, puts it into the idle
* tree.
*/
bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
@@ -1158,8 +1172,10 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
st = bfq_entity_service_tree(entity);
is_in_service = entity == sd->in_service_entity;
- if (is_in_service)
+ if (is_in_service) {
bfq_calc_finish(entity, entity->service);
+ sd->in_service_entity = NULL;
+ }
if (entity->tree == &st->active)
bfq_active_extract(st, entity);
@@ -1177,7 +1193,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
/**
* bfq_deactivate_entity - deactivate an entity representing a bfq_queue.
* @entity: the entity to deactivate.
- * @ins_into_idle_tree: true if the entity can be put on the idle tree
+ * @ins_into_idle_tree: true if the entity can be put into the idle tree
*/
static void bfq_deactivate_entity(struct bfq_entity *entity,
bool ins_into_idle_tree,
@@ -1208,16 +1224,29 @@ static void bfq_deactivate_entity(struct bfq_entity *entity,
*/
bfq_update_next_in_service(sd, NULL);
- if (sd->next_in_service)
+ if (sd->next_in_service || sd->in_service_entity) {
/*
- * The parent entity is still backlogged,
- * because next_in_service is not NULL. So, no
- * further upwards deactivation must be
- * performed. Yet, next_in_service has
- * changed. Then the schedule does need to be
- * updated upwards.
+ * The parent entity is still active, because
+ * either next_in_service or in_service_entity
+ * is not NULL. So, no further upwards
+ * deactivation must be performed. Yet,
+ * next_in_service has changed. Then the
+ * schedule does need to be updated upwards.
+ *
+ * NOTE If in_service_entity is not NULL, then
+ * next_in_service may happen to be NULL,
+ * although the parent entity is evidently
+ * active. This happens if 1) the entity
+ * pointed by in_service_entity is the only
+ * active entity in the parent entity, and 2)
+ * according to the definition of
+ * next_in_service, the in_service_entity
+ * cannot be considered as
+ * next_in_service. See the comments on the
+ * definition of next_in_service for details.
*/
break;
+ }
/*
* If we get here, then the parent is no more
@@ -1494,47 +1523,34 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
/*
* If entity is no longer a candidate for next
- * service, then we extract it from its active tree,
- * for the following reason. To further boost the
- * throughput in some special case, BFQ needs to know
- * which is the next candidate entity to serve, while
- * there is already an entity in service. In this
- * respect, to make it easy to compute/update the next
- * candidate entity to serve after the current
- * candidate has been set in service, there is a case
- * where it is necessary to extract the current
- * candidate from its service tree. Such a case is
- * when the entity just set in service cannot be also
- * a candidate for next service. Details about when
- * this conditions holds are reported in the comments
- * on the function bfq_no_longer_next_in_service()
- * invoked below.
+ * service, then it must be extracted from its active
+ * tree, so as to make sure that it won't be
+ * considered when computing next_in_service. See the
+ * comments on the function
+ * bfq_no_longer_next_in_service() for details.
*/
if (bfq_no_longer_next_in_service(entity))
bfq_active_extract(bfq_entity_service_tree(entity),
entity);
/*
- * For the same reason why we may have just extracted
- * entity from its active tree, we may need to update
- * next_in_service for the sched_data of entity too,
- * regardless of whether entity has been extracted.
- * In fact, even if entity has not been extracted, a
- * descendant entity may get extracted. Such an event
- * would cause a change in next_in_service for the
- * level of the descendant entity, and thus possibly
- * back to upper levels.
+ * Even if entity is not to be extracted according to
+ * the above check, a descendant entity may get
+ * extracted in one of the next iterations of this
+ * loop. Such an event could cause a change in
+ * next_in_service for the level of the descendant
+ * entity, and thus possibly back to this level.
*
- * We cannot perform the resulting needed update
- * before the end of this loop, because, to know which
- * is the correct next-to-serve candidate entity for
- * each level, we need first to find the leaf entity
- * to set in service. In fact, only after we know
- * which is the next-to-serve leaf entity, we can
- * discover whether the parent entity of the leaf
- * entity becomes the next-to-serve, and so on.
+ * However, we cannot perform the resulting needed
+ * update of next_in_service for this level before the
+ * end of the whole loop, because, to know which is
+ * the correct next-to-serve candidate entity for each
+ * level, we need first to find the leaf entity to set
+ * in service. In fact, only after we know which is
+ * the next-to-serve leaf entity, we can discover
+ * whether the parent entity of the leaf entity
+ * becomes the next-to-serve, and so on.
*/
-
}
bfqq = bfq_entity_to_bfqq(entity);
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 83e92beb3c9f..9b1ea478577b 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -387,9 +387,11 @@ static void bio_integrity_verify_fn(struct work_struct *work)
*/
bool __bio_integrity_endio(struct bio *bio)
{
- if (bio_op(bio) == REQ_OP_READ && !bio->bi_status) {
- struct bio_integrity_payload *bip = bio_integrity(bio);
+ struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+ if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
+ (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->profile->verify_fn) {
INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
queue_work(kintegrityd_wq, &bip->bip_work);
return false;
diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c
index 0c3354cf3552..76944e3271bf 100644
--- a/block/blk-mq-pci.c
+++ b/block/blk-mq-pci.c
@@ -36,12 +36,18 @@ int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev)
for (queue = 0; queue < set->nr_hw_queues; queue++) {
mask = pci_irq_get_affinity(pdev, queue);
if (!mask)
- return -EINVAL;
+ goto fallback;
for_each_cpu(cpu, mask)
set->mq_map[cpu] = queue;
}
return 0;
+
+fallback:
+ WARN_ON_ONCE(set->nr_hw_queues > 1);
+ for_each_possible_cpu(cpu)
+ set->mq_map[cpu] = 0;
+ return 0;
}
EXPORT_SYMBOL_GPL(blk_mq_pci_map_queues);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 041f7b7fa0d6..4603b115e234 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -301,11 +301,12 @@ static struct request *blk_mq_get_request(struct request_queue *q,
struct elevator_queue *e = q->elevator;
struct request *rq;
unsigned int tag;
+ struct blk_mq_ctx *local_ctx = NULL;
blk_queue_enter_live(q);
data->q = q;
if (likely(!data->ctx))
- data->ctx = blk_mq_get_ctx(q);
+ data->ctx = local_ctx = blk_mq_get_ctx(q);
if (likely(!data->hctx))
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
if (op & REQ_NOWAIT)
@@ -324,6 +325,10 @@ static struct request *blk_mq_get_request(struct request_queue *q,
tag = blk_mq_get_tag(data);
if (tag == BLK_MQ_TAG_FAIL) {
+ if (local_ctx) {
+ blk_mq_put_ctx(local_ctx);
+ data->ctx = NULL;
+ }
blk_queue_exit(q);
return NULL;
}
@@ -355,13 +360,13 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
return ERR_PTR(ret);
rq = blk_mq_get_request(q, NULL, op, &alloc_data);
-
- blk_mq_put_ctx(alloc_data.ctx);
blk_queue_exit(q);
if (!rq)
return ERR_PTR(-EWOULDBLOCK);
+ blk_mq_put_ctx(alloc_data.ctx);
+
rq->__data_len = 0;
rq->__sector = (sector_t) -1;
rq->bio = rq->biotail = NULL;
@@ -406,7 +411,6 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
rq = blk_mq_get_request(q, NULL, op, &alloc_data);
-
blk_queue_exit(q);
if (!rq)
@@ -679,8 +683,8 @@ EXPORT_SYMBOL(blk_mq_kick_requeue_list);
void blk_mq_delay_kick_requeue_list(struct request_queue *q,
unsigned long msecs)
{
- kblockd_schedule_delayed_work(&q->requeue_work,
- msecs_to_jiffies(msecs));
+ kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work,
+ msecs_to_jiffies(msecs));
}
EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
OpenPOWER on IntegriCloud