diff options
Diffstat (limited to 'include')
31 files changed, 664 insertions, 604 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h index 056fb627edb3..7380b094dcca 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -491,35 +491,40 @@ do { \ bio_clear_flag(bio, BIO_THROTTLED);\ (bio)->bi_disk = (bdev)->bd_disk; \ (bio)->bi_partno = (bdev)->bd_partno; \ + bio_associate_blkg(bio); \ } while (0) #define bio_copy_dev(dst, src) \ do { \ (dst)->bi_disk = (src)->bi_disk; \ (dst)->bi_partno = (src)->bi_partno; \ + bio_clone_blkg_association(dst, src); \ } while (0) #define bio_dev(bio) \ disk_devt((bio)->bi_disk) #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) -int bio_associate_blkcg_from_page(struct bio *bio, struct page *page); +void bio_associate_blkg_from_page(struct bio *bio, struct page *page); #else -static inline int bio_associate_blkcg_from_page(struct bio *bio, - struct page *page) { return 0; } +static inline void bio_associate_blkg_from_page(struct bio *bio, + struct page *page) { } #endif #ifdef CONFIG_BLK_CGROUP -int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); -int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); -void bio_disassociate_task(struct bio *bio); -void bio_clone_blkcg_association(struct bio *dst, struct bio *src); +void bio_disassociate_blkg(struct bio *bio); +void bio_associate_blkg(struct bio *bio); +void bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css); +void bio_clone_blkg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ -static inline int bio_associate_blkcg(struct bio *bio, - struct cgroup_subsys_state *blkcg_css) { return 0; } -static inline void bio_disassociate_task(struct bio *bio) { } -static inline void bio_clone_blkcg_association(struct bio *dst, - struct bio *src) { } +static inline void bio_disassociate_blkg(struct bio *bio) { } +static inline void bio_associate_blkg(struct bio *bio) { } +static inline void bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ } +static inline void bio_clone_blkg_association(struct bio *dst, + struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_HIGHMEM diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 6d766a19f2bb..f025fd1e22e6 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -21,6 +21,7 @@ #include <linux/blkdev.h> #include <linux/atomic.h> #include <linux/kthread.h> +#include <linux/fs.h> /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) @@ -122,11 +123,8 @@ struct blkcg_gq { /* all non-root blkcg_gq's are guaranteed to have access to parent */ struct blkcg_gq *parent; - /* request allocation list for this blkcg-q pair */ - struct request_list rl; - /* reference count */ - atomic_t refcnt; + struct percpu_ref refcnt; /* is this blkg online? protected by both blkcg and q locks */ bool online; @@ -184,6 +182,8 @@ extern struct cgroup_subsys_state * const blkcg_root_css; struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, struct request_queue *q, bool update_hint); +struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q); struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q); int blkcg_init_queue(struct request_queue *q); @@ -230,22 +230,62 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); +/** + * blkcg_css - find the current css + * + * Find the css associated with either the kthread or the current task. + * This may return a dying css, so it is up to the caller to use tryget logic + * to confirm it is alive and well. + */ +static inline struct cgroup_subsys_state *blkcg_css(void) +{ + struct cgroup_subsys_state *css; + + css = kthread_blkcg(); + if (css) + return css; + return task_css(current, io_cgrp_id); +} static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; } -static inline struct blkcg *bio_blkcg(struct bio *bio) +/** + * __bio_blkcg - internal, inconsistent version to get blkcg + * + * DO NOT USE. + * This function is inconsistent and consequently is dangerous to use. The + * first part of the function returns a blkcg where a reference is owned by the + * bio. This means it does not need to be rcu protected as it cannot go away + * with the bio owning a reference to it. However, the latter potentially gets + * it from task_css(). This can race against task migration and the cgroup + * dying. It is also semantically different as it must be called rcu protected + * and is susceptible to failure when trying to get a reference to it. + * Therefore, it is not ok to assume that *_get() will always succeed on the + * blkcg returned here. + */ +static inline struct blkcg *__bio_blkcg(struct bio *bio) { - struct cgroup_subsys_state *css; + if (bio && bio->bi_blkg) + return bio->bi_blkg->blkcg; + return css_to_blkcg(blkcg_css()); +} - if (bio && bio->bi_css) - return css_to_blkcg(bio->bi_css); - css = kthread_blkcg(); - if (css) - return css_to_blkcg(css); - return css_to_blkcg(task_css(current, io_cgrp_id)); +/** + * bio_blkcg - grab the blkcg associated with a bio + * @bio: target bio + * + * This returns the blkcg associated with a bio, %NULL if not associated. + * Callers are expected to either handle %NULL or know association has been + * done prior to calling this. + */ +static inline struct blkcg *bio_blkcg(struct bio *bio) +{ + if (bio && bio->bi_blkg) + return bio->bi_blkg->blkcg; + return NULL; } static inline bool blk_cgroup_congested(void) @@ -328,16 +368,12 @@ static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, * @q: request_queue of interest * * Lookup blkg for the @blkcg - @q pair. This function should be called - * under RCU read lock and is guaranteed to return %NULL if @q is bypassing - * - see blk_queue_bypass_start() for details. + * under RCU read loc. */ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) { WARN_ON_ONCE(!rcu_read_lock_held()); - - if (unlikely(blk_queue_bypass(q))) - return NULL; return __blkg_lookup(blkcg, q, false); } @@ -451,26 +487,35 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) */ static inline void blkg_get(struct blkcg_gq *blkg) { - WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); - atomic_inc(&blkg->refcnt); + percpu_ref_get(&blkg->refcnt); } /** - * blkg_try_get - try and get a blkg reference + * blkg_tryget - try and get a blkg reference * @blkg: blkg to get * * This is for use when doing an RCU lookup of the blkg. We may be in the midst * of freeing this blkg, so we can only use it if the refcnt is not zero. */ -static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) +static inline bool blkg_tryget(struct blkcg_gq *blkg) { - if (atomic_inc_not_zero(&blkg->refcnt)) - return blkg; - return NULL; + return percpu_ref_tryget(&blkg->refcnt); } +/** + * blkg_tryget_closest - try and get a blkg ref on the closet blkg + * @blkg: blkg to get + * + * This walks up the blkg tree to find the closest non-dying blkg and returns + * the blkg that it did association with as it may not be the passed in blkg. + */ +static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) +{ + while (blkg && !percpu_ref_tryget(&blkg->refcnt)) + blkg = blkg->parent; -void __blkg_release_rcu(struct rcu_head *rcu); + return blkg; +} /** * blkg_put - put a blkg reference @@ -478,9 +523,7 @@ void __blkg_release_rcu(struct rcu_head *rcu); */ static inline void blkg_put(struct blkcg_gq *blkg) { - WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); - if (atomic_dec_and_test(&blkg->refcnt)) - call_rcu(&blkg->rcu_head, __blkg_release_rcu); + percpu_ref_put(&blkg->refcnt); } /** @@ -515,94 +558,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q, false))) -/** - * blk_get_rl - get request_list to use - * @q: request_queue of interest - * @bio: bio which will be attached to the allocated request (may be %NULL) - * - * The caller wants to allocate a request from @q to use for @bio. Find - * the request_list to use and obtain a reference on it. Should be called - * under queue_lock. This function is guaranteed to return non-%NULL - * request_list. - */ -static inline struct request_list *blk_get_rl(struct request_queue *q, - struct bio *bio) -{ - struct blkcg *blkcg; - struct blkcg_gq *blkg; - - rcu_read_lock(); - - blkcg = bio_blkcg(bio); - - /* bypass blkg lookup and use @q->root_rl directly for root */ - if (blkcg == &blkcg_root) - goto root_rl; - - /* - * Try to use blkg->rl. blkg lookup may fail under memory pressure - * or if either the blkcg or queue is going away. Fall back to - * root_rl in such cases. - */ - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) - goto root_rl; - - blkg_get(blkg); - rcu_read_unlock(); - return &blkg->rl; -root_rl: - rcu_read_unlock(); - return &q->root_rl; -} - -/** - * blk_put_rl - put request_list - * @rl: request_list to put - * - * Put the reference acquired by blk_get_rl(). Should be called under - * queue_lock. - */ -static inline void blk_put_rl(struct request_list *rl) -{ - if (rl->blkg->blkcg != &blkcg_root) - blkg_put(rl->blkg); -} - -/** - * blk_rq_set_rl - associate a request with a request_list - * @rq: request of interest - * @rl: target request_list - * - * Associate @rq with @rl so that accounting and freeing can know the - * request_list @rq came from. - */ -static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) -{ - rq->rl = rl; -} - -/** - * blk_rq_rl - return the request_list a request came from - * @rq: request of interest - * - * Return the request_list @rq is allocated from. - */ -static inline struct request_list *blk_rq_rl(struct request *rq) -{ - return rq->rl; -} - -struct request_list *__blk_queue_next_rl(struct request_list *rl, - struct request_queue *q); -/** - * blk_queue_for_each_rl - iterate through all request_lists of a request_queue - * - * Should be used under queue_lock. - */ -#define blk_queue_for_each_rl(rl, q) \ - for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) - static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp) { int ret; @@ -797,32 +752,34 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg struct bio *bio) { return false; } #endif + +static inline void blkcg_bio_issue_init(struct bio *bio) +{ + bio_issue_init(&bio->bi_issue, bio_sectors(bio)); +} + static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { - struct blkcg *blkcg; struct blkcg_gq *blkg; bool throtl = false; rcu_read_lock(); - blkcg = bio_blkcg(bio); - - /* associate blkcg if bio hasn't attached one */ - bio_associate_blkcg(bio, &blkcg->css); - - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) { - spin_lock_irq(q->queue_lock); - blkg = blkg_lookup_create(blkcg, q); - if (IS_ERR(blkg)) - blkg = NULL; - spin_unlock_irq(q->queue_lock); + + if (!bio->bi_blkg) { + char b[BDEVNAME_SIZE]; + + WARN_ONCE(1, + "no blkg associated for bio on block-device: %s\n", + bio_devname(bio, b)); + bio_associate_blkg(bio); } + blkg = bio->bi_blkg; + throtl = blk_throtl_bio(q, blkg, bio); if (!throtl) { - blkg = blkg ?: q->root_blkg; /* * If the bio is flagged with BIO_QUEUE_ENTERED it means this * is a split bio and we would have already accounted for the @@ -834,6 +791,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); } + blkcg_bio_issue_init(bio); + rcu_read_unlock(); return !throtl; } @@ -930,6 +889,7 @@ static inline int blkcg_activate_policy(struct request_queue *q, static inline void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol) { } +static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, @@ -939,12 +899,7 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } static inline void blkg_get(struct blkcg_gq *blkg) { } static inline void blkg_put(struct blkcg_gq *blkg) { } -static inline struct request_list *blk_get_rl(struct request_queue *q, - struct bio *bio) { return &q->root_rl; } -static inline void blk_put_rl(struct request_list *rl) { } -static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } -static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } - +static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { return true; } diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h index 9f4c17f0d2d8..0b1f45c62623 100644 --- a/include/linux/blk-mq-pci.h +++ b/include/linux/blk-mq-pci.h @@ -2,10 +2,10 @@ #ifndef _LINUX_BLK_MQ_PCI_H #define _LINUX_BLK_MQ_PCI_H -struct blk_mq_tag_set; +struct blk_mq_queue_map; struct pci_dev; -int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev, +int blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, int offset); #endif /* _LINUX_BLK_MQ_PCI_H */ diff --git a/include/linux/blk-mq-rdma.h b/include/linux/blk-mq-rdma.h index b4ade198007d..7b6ecf9ac4c3 100644 --- a/include/linux/blk-mq-rdma.h +++ b/include/linux/blk-mq-rdma.h @@ -4,7 +4,7 @@ struct blk_mq_tag_set; struct ib_device; -int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set, +int blk_mq_rdma_map_queues(struct blk_mq_queue_map *map, struct ib_device *dev, int first_vec); #endif /* _LINUX_BLK_MQ_RDMA_H */ diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h index 69b4da262c45..687ae287e1dc 100644 --- a/include/linux/blk-mq-virtio.h +++ b/include/linux/blk-mq-virtio.h @@ -2,10 +2,10 @@ #ifndef _LINUX_BLK_MQ_VIRTIO_H #define _LINUX_BLK_MQ_VIRTIO_H -struct blk_mq_tag_set; +struct blk_mq_queue_map; struct virtio_device; -int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, +int blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, struct virtio_device *vdev, int first_vec); #endif /* _LINUX_BLK_MQ_VIRTIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2286dc12c6bc..0e030f5f76b6 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -37,7 +37,8 @@ struct blk_mq_hw_ctx { struct blk_mq_ctx *dispatch_from; unsigned int dispatch_busy; - unsigned int nr_ctx; + unsigned short type; + unsigned short nr_ctx; struct blk_mq_ctx **ctxs; spinlock_t dispatch_wait_lock; @@ -74,10 +75,31 @@ struct blk_mq_hw_ctx { struct srcu_struct srcu[0]; }; +struct blk_mq_queue_map { + unsigned int *mq_map; + unsigned int nr_queues; + unsigned int queue_offset; +}; + +enum hctx_type { + HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */ + HCTX_TYPE_READ, /* just for READ I/O */ + HCTX_TYPE_POLL, /* polled I/O of any kind */ + + HCTX_MAX_TYPES, +}; + struct blk_mq_tag_set { - unsigned int *mq_map; + /* + * map[] holds ctx -> hctx mappings, one map exists for each type + * that the driver wishes to support. There are no restrictions + * on maps being of the same size, and it's perfectly legal to + * share maps between types. + */ + struct blk_mq_queue_map map[HCTX_MAX_TYPES]; + unsigned int nr_maps; /* nr entries in map[] */ const struct blk_mq_ops *ops; - unsigned int nr_hw_queues; + unsigned int nr_hw_queues; /* nr hw queues across maps */ unsigned int queue_depth; /* max hw supported */ unsigned int reserved_tags; unsigned int cmd_size; /* per-request extra data */ @@ -99,6 +121,7 @@ struct blk_mq_queue_data { typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); +typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *); typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); typedef void (put_budget_fn)(struct blk_mq_hw_ctx *); typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); @@ -109,11 +132,13 @@ typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *, typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *, unsigned int); -typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, +typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); -typedef void (busy_tag_iter_fn)(struct request *, void *, bool); -typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); +typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); +typedef int (poll_fn)(struct blk_mq_hw_ctx *); typedef int (map_queues_fn)(struct blk_mq_tag_set *set); +typedef bool (busy_fn)(struct request_queue *); +typedef void (complete_fn)(struct request *); struct blk_mq_ops { @@ -123,6 +148,15 @@ struct blk_mq_ops { queue_rq_fn *queue_rq; /* + * If a driver uses bd->last to judge when to submit requests to + * hardware, it must define this function. In case of errors that + * make us stop issuing further requests, this hook serves the + * purpose of kicking the hardware (which the last request otherwise + * would have done). + */ + commit_rqs_fn *commit_rqs; + + /* * Reserve budget before queue request, once .queue_rq is * run, it is driver's responsibility to release the * reserved budget. Also we have to handle failure case @@ -141,7 +175,7 @@ struct blk_mq_ops { */ poll_fn *poll; - softirq_done_fn *complete; + complete_fn *complete; /* * Called when the block layer side of a hardware queue has been @@ -165,6 +199,11 @@ struct blk_mq_ops { /* Called from inside blk_get_request() */ void (*initialize_rq_fn)(struct request *rq); + /* + * If set, returns whether or not this queue currently is busy + */ + busy_fn *busy; + map_queues_fn *map_queues; #ifdef CONFIG_BLK_DEBUG_FS @@ -218,6 +257,8 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); +bool blk_mq_queue_inflight(struct request_queue *q); + enum { /* return when out of requests */ BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), @@ -264,7 +305,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); -void blk_mq_complete_request(struct request *rq); +bool blk_mq_complete_request(struct request *rq); bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, struct bio *bio); bool blk_mq_queue_stopped(struct request_queue *q); @@ -288,24 +329,12 @@ void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); -int blk_mq_map_queues(struct blk_mq_tag_set *set); +int blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); -/** - * blk_mq_mark_complete() - Set request state to complete - * @rq: request to set to complete state - * - * Returns true if request state was successfully set to complete. If - * successful, the caller is responsibile for seeing this request is ended, as - * blk_mq_complete_request will not work again. - */ -static inline bool blk_mq_mark_complete(struct request *rq) -{ - return cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) == - MQ_RQ_IN_FLIGHT; -} +unsigned int blk_mq_rq_cpu(struct request *rq); /* * Driver command data is immediately after the request. So subtract request @@ -328,4 +357,14 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) for ((i) = 0; (i) < (hctx)->nr_ctx && \ ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) +static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + if (rq->tag != -1) + return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT); + + return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) | + BLK_QC_T_INTERNAL; +} + #endif diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1dcf652ba0aa..5c7e7f859a24 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -174,11 +174,11 @@ struct bio { void *bi_private; #ifdef CONFIG_BLK_CGROUP /* - * Optional ioc and css associated with this bio. Put on bio - * release. Read comment on top of bio_associate_current(). + * Represents the association of the css and request_queue for the bio. + * If a bio goes direct to device, it will not have a blkg as it will + * not have a request_queue associated with it. The reference is put + * on release of the bio. */ - struct io_context *bi_ioc; - struct cgroup_subsys_state *bi_css; struct blkcg_gq *bi_blkg; struct bio_issue bi_issue; #endif @@ -228,6 +228,7 @@ struct bio { #define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion * of this bio. */ #define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */ +#define BIO_TRACKED 12 /* set if bio goes through the rq_qos path */ /* See BVEC_POOL_OFFSET below before adding new flags */ @@ -323,6 +324,8 @@ enum req_flag_bits { /* command specific flags for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ + __REQ_HIPRI, + /* for driver use */ __REQ_DRV, __REQ_SWAP, /* swapping request. */ @@ -343,8 +346,8 @@ enum req_flag_bits { #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) - #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) +#define REQ_HIPRI (1ULL << __REQ_HIPRI) #define REQ_DRV (1ULL << __REQ_DRV) #define REQ_SWAP (1ULL << __REQ_SWAP) @@ -422,17 +425,6 @@ static inline bool blk_qc_t_valid(blk_qc_t cookie) return cookie != BLK_QC_T_NONE; } -static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num, - bool internal) -{ - blk_qc_t ret = tag | (queue_num << BLK_QC_T_SHIFT); - - if (internal) - ret |= BLK_QC_T_INTERNAL; - - return ret; -} - static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) { return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4293dc1cd160..45552e6eae1e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -58,25 +58,6 @@ struct blk_stat_callback; typedef void (rq_end_io_fn)(struct request *, blk_status_t); -#define BLK_RL_SYNCFULL (1U << 0) -#define BLK_RL_ASYNCFULL (1U << 1) - -struct request_list { - struct request_queue *q; /* the queue this rl belongs to */ -#ifdef CONFIG_BLK_CGROUP - struct blkcg_gq *blkg; /* blkg this request pool belongs to */ -#endif - /* - * count[], starved[], and wait[] are indexed by - * BLK_RW_SYNC/BLK_RW_ASYNC - */ - int count[2]; - int starved[2]; - mempool_t *rq_pool; - wait_queue_head_t wait[2]; - unsigned int flags; -}; - /* * request flags */ typedef __u32 __bitwise req_flags_t; @@ -85,8 +66,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_SORTED ((__force req_flags_t)(1 << 0)) /* drive already may have started this one */ #define RQF_STARTED ((__force req_flags_t)(1 << 1)) -/* uses tagged queueing */ -#define RQF_QUEUED ((__force req_flags_t)(1 << 2)) /* may not be passed by ioscheduler */ #define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3)) /* request for flush sequence */ @@ -150,8 +129,8 @@ enum mq_rq_state { struct request { struct request_queue *q; struct blk_mq_ctx *mq_ctx; + struct blk_mq_hw_ctx *mq_hctx; - int cpu; unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; @@ -245,11 +224,7 @@ struct request { refcount_t ref; unsigned int timeout; - - /* access through blk_rq_set_deadline, blk_rq_deadline */ - unsigned long __deadline; - - struct list_head timeout_list; + unsigned long deadline; union { struct __call_single_data csd; @@ -264,10 +239,6 @@ struct request { /* for bidi */ struct request *next_rq; - -#ifdef CONFIG_BLK_CGROUP - struct request_list *rl; /* rl this rq is alloced from */ -#endif }; static inline bool blk_op_is_scsi(unsigned int op) @@ -311,41 +282,21 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; -typedef void (request_fn_proc) (struct request_queue *q); typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); -typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t); -typedef int (prep_rq_fn) (struct request_queue *, struct request *); -typedef void (unprep_rq_fn) (struct request_queue *, struct request *); struct bio_vec; -typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); -typedef int (lld_busy_fn) (struct request_queue *q); -typedef int (bsg_job_fn) (struct bsg_job *); -typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t); -typedef void (exit_rq_fn)(struct request_queue *, struct request *); enum blk_eh_timer_return { BLK_EH_DONE, /* drivers has completed the command */ BLK_EH_RESET_TIMER, /* reset timer and try again */ }; -typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); - enum blk_queue_state { Queue_down, Queue_up, }; -struct blk_queue_tag { - struct request **tag_index; /* map of busy tags */ - unsigned long *tag_map; /* bit map of free/busy tags */ - int max_depth; /* what we will send to device */ - int real_max_depth; /* what the array can hold */ - atomic_t refcnt; /* map can be shared */ - int alloc_policy; /* tag allocation policy */ - int next_tag; /* next tag */ -}; #define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */ #define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */ @@ -444,40 +395,15 @@ struct request_queue { struct list_head queue_head; struct request *last_merge; struct elevator_queue *elevator; - int nr_rqs[2]; /* # allocated [a]sync rqs */ - int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ struct blk_queue_stats *stats; struct rq_qos *rq_qos; - /* - * If blkcg is not used, @q->root_rl serves all requests. If blkcg - * is used, root blkg allocates from @q->root_rl and all other - * blkgs from their own blkg->rl. Which one to use should be - * determined using bio_request_list(). - */ - struct request_list root_rl; - - request_fn_proc *request_fn; make_request_fn *make_request_fn; - poll_q_fn *poll_fn; - prep_rq_fn *prep_rq_fn; - unprep_rq_fn *unprep_rq_fn; - softirq_done_fn *softirq_done_fn; - rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; - lld_busy_fn *lld_busy_fn; - /* Called just after a request is allocated */ - init_rq_fn *init_rq_fn; - /* Called just before a request is freed */ - exit_rq_fn *exit_rq_fn; - /* Called from inside blk_get_request() */ - void (*initialize_rq_fn)(struct request *rq); const struct blk_mq_ops *mq_ops; - unsigned int *mq_map; - /* sw queues */ struct blk_mq_ctx __percpu *queue_ctx; unsigned int nr_queues; @@ -488,17 +414,6 @@ struct request_queue { struct blk_mq_hw_ctx **queue_hw_ctx; unsigned int nr_hw_queues; - /* - * Dispatch queue sorting - */ - sector_t end_sector; - struct request *boundary_rq; - - /* - * Delayed queue handling - */ - struct delayed_work delay_work; - struct backing_dev_info *backing_dev_info; /* @@ -529,13 +444,7 @@ struct request_queue { */ gfp_t bounce_gfp; - /* - * protects queue structures from reentrancy. ->__queue_lock should - * _never_ be used directly, it is queue private. always use - * ->queue_lock. - */ - spinlock_t __queue_lock; - spinlock_t *queue_lock; + spinlock_t queue_lock; /* * queue kobject @@ -545,7 +454,7 @@ struct request_queue { /* * mq queue kobject */ - struct kobject mq_kobj; + struct kobject *mq_kobj; #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity integrity; @@ -561,27 +470,12 @@ struct request_queue { * queue settings */ unsigned long nr_requests; /* Max # of requests */ - unsigned int nr_congestion_on; - unsigned int nr_congestion_off; - unsigned int nr_batching; unsigned int dma_drain_size; void *dma_drain_buffer; unsigned int dma_pad_mask; unsigned int dma_alignment; - struct blk_queue_tag *queue_tags; - - unsigned int nr_sorted; - unsigned int in_flight[2]; - - /* - * Number of active block driver functions for which blk_drain_queue() - * must wait. Must be incremented around functions that unlock the - * queue_lock internally, e.g. scsi_request_fn(). - */ - unsigned int request_fn_active; - unsigned int rq_timeout; int poll_nsec; @@ -590,7 +484,6 @@ struct request_queue { struct timer_list timeout; struct work_struct timeout_work; - struct list_head timeout_list; struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP @@ -645,11 +538,9 @@ struct request_queue { struct mutex sysfs_lock; - int bypass_depth; atomic_t mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) - bsg_job_fn *bsg_job_fn; struct bsg_class_device bsg_dev; #endif @@ -669,12 +560,12 @@ struct request_queue { #ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; struct dentry *sched_debugfs_dir; + struct dentry *rqos_debugfs_dir; #endif bool mq_sysfs_init_done; size_t cmd_size; - void *rq_alloc_data; struct work_struct release_work; @@ -682,10 +573,8 @@ struct request_queue { u64 write_hints[BLK_MAX_WRITE_HINTS]; }; -#define QUEUE_FLAG_QUEUED 0 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 1 /* queue is stopped */ #define QUEUE_FLAG_DYING 2 /* queue being torn down */ -#define QUEUE_FLAG_BYPASS 3 /* act as dumb FIFO queue */ #define QUEUE_FLAG_BIDI 4 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 5 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 6 /* complete on same CPU-group */ @@ -718,19 +607,15 @@ struct request_queue { (1 << QUEUE_FLAG_ADD_RANDOM)) #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_SAME_COMP) | \ - (1 << QUEUE_FLAG_POLL)) + (1 << QUEUE_FLAG_SAME_COMP)) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); -bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); -#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) -#define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ @@ -757,32 +642,20 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); -static inline int queue_in_flight(struct request_queue *q) -{ - return q->in_flight[0] + q->in_flight[1]; -} - static inline bool blk_account_rq(struct request *rq) { return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); } -#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) -/* rq->queuelist of dequeued request must be list_empty() */ -#define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) -/* - * Driver can handle struct request, if it either has an old style - * request_fn defined, or is blk-mq based. - */ -static inline bool queue_is_rq_based(struct request_queue *q) +static inline bool queue_is_mq(struct request_queue *q) { - return q->request_fn || q->mq_ops; + return q->mq_ops; } static inline unsigned int blk_queue_cluster(struct request_queue *q) @@ -845,27 +718,6 @@ static inline bool rq_is_sync(struct request *rq) return op_is_sync(rq->cmd_flags); } -static inline bool blk_rl_full(struct request_list *rl, bool sync) -{ - unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; - - return rl->flags & flag; -} - -static inline void blk_set_rl_full(struct request_list *rl, bool sync) -{ - unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; - - rl->flags |= flag; -} - -static inline void blk_clear_rl_full(struct request_list *rl, bool sync) -{ - unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; - - rl->flags &= ~flag; -} - static inline bool rq_mergeable(struct request *rq) { if (blk_rq_is_passthrough(rq)) @@ -902,16 +754,6 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) return q->nr_requests; } -/* - * q->prep_rq_fn return values - */ -enum { - BLKPREP_OK, /* serve it */ - BLKPREP_KILL, /* fatal error, kill, return -EIO */ - BLKPREP_DEFER, /* leave on queue */ - BLKPREP_INVALID, /* invalid command, kill, return -EREMOTEIO */ -}; - extern unsigned long blk_max_low_pfn, blk_max_pfn; /* @@ -983,10 +825,8 @@ extern blk_qc_t direct_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_init_request_from_bio(struct request *req, struct bio *bio); extern void blk_put_request(struct request *); -extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, unsigned int op, blk_mq_req_flags_t flags); -extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio_set *bs, gfp_t gfp_mask, @@ -996,7 +836,6 @@ extern void blk_rq_unprep_clone(struct request *rq); extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern int blk_rq_append_bio(struct request *rq, struct bio **bio); -extern void blk_delay_queue(struct request_queue *, unsigned long); extern void blk_queue_split(struct request_queue *, struct bio **); extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); @@ -1009,15 +848,7 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); extern void blk_queue_exit(struct request_queue *q); -extern void blk_start_queue(struct request_queue *q); -extern void blk_start_queue_async(struct request_queue *q); -extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); -extern void __blk_stop_queue(struct request_queue *q); -extern void __blk_run_queue(struct request_queue *q); -extern void __blk_run_queue_uncond(struct request_queue *q); -extern void blk_run_queue(struct request_queue *); -extern void blk_run_queue_async(struct request_queue *q); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); @@ -1034,7 +865,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -bool blk_poll(struct request_queue *q, blk_qc_t cookie); +int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { @@ -1172,13 +1003,6 @@ static inline unsigned int blk_rq_count_bios(struct request *rq) return nr_bios; } -/* - * Request issue related functions. - */ -extern struct request *blk_peek_request(struct request_queue *q); -extern void blk_start_request(struct request *rq); -extern struct request *blk_fetch_request(struct request_queue *q); - void blk_steal_bios(struct bio_list *list, struct request *rq); /* @@ -1196,27 +1020,18 @@ void blk_steal_bios(struct bio_list *list, struct request *rq); */ extern bool blk_update_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); -extern void blk_finish_request(struct request *rq, blk_status_t error); -extern bool blk_end_request(struct request *rq, blk_status_t error, - unsigned int nr_bytes); extern void blk_end_request_all(struct request *rq, blk_status_t error); extern bool __blk_end_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); extern void __blk_end_request_all(struct request *rq, blk_status_t error); extern bool __blk_end_request_cur(struct request *rq, blk_status_t error); -extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); -extern void blk_unprep_request(struct request *); /* * Access functions for manipulating queue properties */ -extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, - spinlock_t *lock, int node_id); -extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); -extern int blk_init_allocated_queue(struct request_queue *); extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); @@ -1255,15 +1070,10 @@ extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size); -extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); -extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); -extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); -extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); -extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); @@ -1299,8 +1109,7 @@ extern long nr_blockdev_pages(void); bool __must_check blk_get_queue(struct request_queue *); struct request_queue *blk_alloc_queue(gfp_t); -struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, - spinlock_t *lock); +struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); @@ -1317,9 +1126,10 @@ extern void blk_set_queue_dying(struct request_queue *); * schedule() where blk_schedule_flush_plug() is called. */ struct blk_plug { - struct list_head list; /* requests */ struct list_head mq_list; /* blk-mq requests */ struct list_head cb_list; /* md requires an unplug callback */ + unsigned short rq_count; + bool multiple_queues; }; #define BLK_MAX_REQUEST_COUNT 16 #define BLK_PLUG_FLUSH_SIZE (128 * 1024) @@ -1358,31 +1168,10 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) struct blk_plug *plug = tsk->plug; return plug && - (!list_empty(&plug->list) || - !list_empty(&plug->mq_list) || + (!list_empty(&plug->mq_list) || !list_empty(&plug->cb_list)); } -/* - * tag stuff - */ -extern int blk_queue_start_tag(struct request_queue *, struct request *); -extern struct request *blk_queue_find_tag(struct request_queue *, int); -extern void blk_queue_end_tag(struct request_queue *, struct request *); -extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int); -extern void blk_queue_free_tags(struct request_queue *); -extern int blk_queue_resize_tags(struct request_queue *, int); -extern struct blk_queue_tag *blk_init_tags(int, int); -extern void blk_free_tags(struct blk_queue_tag *); - -static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, - int tag) -{ - if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) - return NULL; - return bqt->tag_index[tag]; -} - extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); @@ -1982,4 +1771,17 @@ static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, #endif /* CONFIG_BLOCK */ +static inline void blk_wake_io_task(struct task_struct *waiter) +{ + /* + * If we're polling, the task itself is doing the completions. For + * that case, we don't need to signal a wakeup, it's enough to just + * mark us as RUNNING. + */ + if (waiter == current) + __set_current_state(TASK_RUNNING); + else + wake_up_process(waiter); +} + #endif diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 6aeaf6472665..b356e0006731 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -31,6 +31,9 @@ struct device; struct scatterlist; struct request_queue; +typedef int (bsg_job_fn) (struct bsg_job *); +typedef enum blk_eh_timer_return (bsg_timeout_fn)(struct request *); + struct bsg_buffer { unsigned int payload_len; int sg_cnt; @@ -72,7 +75,8 @@ struct bsg_job { void bsg_job_done(struct bsg_job *job, int result, unsigned int reply_payload_rcv_len); struct request_queue *bsg_setup_queue(struct device *dev, const char *name, - bsg_job_fn *job_fn, int dd_job_size); + bsg_job_fn *job_fn, bsg_timeout_fn *timeout, int dd_job_size); +void bsg_remove_queue(struct request_queue *q); void bsg_job_put(struct bsg_job *job); int __must_check bsg_job_get(struct bsg_job *job); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9d12757a65b0..9968332cceed 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -93,6 +93,8 @@ extern struct css_set init_css_set; bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); +struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup, + struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, struct cgroup_subsys *ss); struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 015bb59c0331..2e9e2763bf47 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -23,74 +23,6 @@ enum elv_merge { ELEVATOR_DISCARD_MERGE = 3, }; -typedef enum elv_merge (elevator_merge_fn) (struct request_queue *, struct request **, - struct bio *); - -typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *); - -typedef void (elevator_merged_fn) (struct request_queue *, struct request *, enum elv_merge); - -typedef int (elevator_allow_bio_merge_fn) (struct request_queue *, - struct request *, struct bio *); - -typedef int (elevator_allow_rq_merge_fn) (struct request_queue *, - struct request *, struct request *); - -typedef void (elevator_bio_merged_fn) (struct request_queue *, - struct request *, struct bio *); - -typedef int (elevator_dispatch_fn) (struct request_queue *, int); - -typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); -typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *); -typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); -typedef int (elevator_may_queue_fn) (struct request_queue *, unsigned int); - -typedef void (elevator_init_icq_fn) (struct io_cq *); -typedef void (elevator_exit_icq_fn) (struct io_cq *); -typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, - struct bio *, gfp_t); -typedef void (elevator_put_req_fn) (struct request *); -typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *); -typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *); - -typedef int (elevator_init_fn) (struct request_queue *, - struct elevator_type *e); -typedef void (elevator_exit_fn) (struct elevator_queue *); -typedef void (elevator_registered_fn) (struct request_queue *); - -struct elevator_ops -{ - elevator_merge_fn *elevator_merge_fn; - elevator_merged_fn *elevator_merged_fn; - elevator_merge_req_fn *elevator_merge_req_fn; - elevator_allow_bio_merge_fn *elevator_allow_bio_merge_fn; - elevator_allow_rq_merge_fn *elevator_allow_rq_merge_fn; - elevator_bio_merged_fn *elevator_bio_merged_fn; - - elevator_dispatch_fn *elevator_dispatch_fn; - elevator_add_req_fn *elevator_add_req_fn; - elevator_activate_req_fn *elevator_activate_req_fn; - elevator_deactivate_req_fn *elevator_deactivate_req_fn; - - elevator_completed_req_fn *elevator_completed_req_fn; - - elevator_request_list_fn *elevator_former_req_fn; - elevator_request_list_fn *elevator_latter_req_fn; - - elevator_init_icq_fn *elevator_init_icq_fn; /* see iocontext.h */ - elevator_exit_icq_fn *elevator_exit_icq_fn; /* ditto */ - - elevator_set_req_fn *elevator_set_req_fn; - elevator_put_req_fn *elevator_put_req_fn; - - elevator_may_queue_fn *elevator_may_queue_fn; - - elevator_init_fn *elevator_init_fn; - elevator_exit_fn *elevator_exit_fn; - elevator_registered_fn *elevator_registered_fn; -}; - struct blk_mq_alloc_data; struct blk_mq_hw_ctx; @@ -137,17 +69,14 @@ struct elevator_type struct kmem_cache *icq_cache; /* fields provided by elevator implementation */ - union { - struct elevator_ops sq; - struct elevator_mq_ops mq; - } ops; + struct elevator_mq_ops ops; + size_t icq_size; /* see iocontext.h */ size_t icq_align; /* ditto */ struct elv_fs_entry *elevator_attrs; char elevator_name[ELV_NAME_MAX]; const char *elevator_alias; struct module *elevator_owner; - bool uses_mq; #ifdef CONFIG_BLK_DEBUG_FS const struct blk_mq_debugfs_attr *queue_debugfs_attrs; const struct blk_mq_debugfs_attr *hctx_debugfs_attrs; @@ -175,40 +104,25 @@ struct elevator_queue struct kobject kobj; struct mutex sysfs_lock; unsigned int registered:1; - unsigned int uses_mq:1; DECLARE_HASHTABLE(hash, ELV_HASH_BITS); }; /* * block elevator interface */ -extern void elv_dispatch_sort(struct request_queue *, struct request *); -extern void elv_dispatch_add_tail(struct request_queue *, struct request *); -extern void elv_add_request(struct request_queue *, struct request *, int); -extern void __elv_add_request(struct request_queue *, struct request *, int); extern enum elv_merge elv_merge(struct request_queue *, struct request **, struct bio *); extern void elv_merge_requests(struct request_queue *, struct request *, struct request *); extern void elv_merged_request(struct request_queue *, struct request *, enum elv_merge); -extern void elv_bio_merged(struct request_queue *q, struct request *, - struct bio *); extern bool elv_attempt_insert_merge(struct request_queue *, struct request *); -extern void elv_requeue_request(struct request_queue *, struct request *); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); -extern int elv_may_queue(struct request_queue *, unsigned int); -extern void elv_completed_request(struct request_queue *, struct request *); -extern int elv_set_request(struct request_queue *q, struct request *rq, - struct bio *bio, gfp_t gfp_mask); -extern void elv_put_request(struct request_queue *, struct request *); -extern void elv_drain_elevator(struct request_queue *); /* * io scheduler registration */ -extern void __init load_default_elevator_module(void); extern int elv_register(struct elevator_type *); extern void elv_unregister(struct elevator_type *); @@ -260,9 +174,5 @@ enum { #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) #define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist) -#else /* CONFIG_BLOCK */ - -static inline void load_default_elevator_module(void) { } - #endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 26a8607b3c3c..6d52ce6af4ff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2026,7 +2026,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) .ki_filp = filp, .ki_flags = iocb_flags(filp), .ki_hint = ki_hint_validate(file_write_hint(filp)), - .ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0), + .ki_ioprio = get_current_ioprio(), }; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 70fc838e6773..06c0fd594097 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -17,6 +17,7 @@ #include <linux/percpu-refcount.h> #include <linux/uuid.h> #include <linux/blk_types.h> +#include <asm/local.h> #ifdef CONFIG_BLOCK @@ -89,6 +90,7 @@ struct disk_stats { unsigned long merges[NR_STAT_GROUPS]; unsigned long io_ticks; unsigned long time_in_queue; + local_t in_flight[2]; }; #define PARTITION_META_INFO_VOLNAMELTH 64 @@ -122,14 +124,13 @@ struct hd_struct { int make_it_fail; #endif unsigned long stamp; - atomic_t in_flight[2]; #ifdef CONFIG_SMP struct disk_stats __percpu *dkstats; #else struct disk_stats dkstats; #endif struct percpu_ref ref; - struct rcu_head rcu_head; + struct rcu_work rcu_work; }; #define GENHD_FL_REMOVABLE 1 @@ -295,8 +296,11 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, #define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) #define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) -#define __part_stat_add(cpu, part, field, addnd) \ - (per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd)) +#define part_stat_get_cpu(part, field, cpu) \ + (per_cpu_ptr((part)->dkstats, (cpu))->field) + +#define part_stat_get(part, field) \ + part_stat_get_cpu(part, field, smp_processor_id()) #define part_stat_read(part, field) \ ({ \ @@ -333,10 +337,9 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_lock() ({ rcu_read_lock(); 0; }) #define part_stat_unlock() rcu_read_unlock() -#define __part_stat_add(cpu, part, field, addnd) \ - ((part)->dkstats.field += addnd) - -#define part_stat_read(part, field) ((part)->dkstats.field) +#define part_stat_get(part, field) ((part)->dkstats.field) +#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field) +#define part_stat_read(part, field) part_stat_get(part, field) static inline void part_stat_set_all(struct hd_struct *part, int value) { @@ -362,22 +365,33 @@ static inline void free_part_stats(struct hd_struct *part) part_stat_read(part, field[STAT_WRITE]) + \ part_stat_read(part, field[STAT_DISCARD])) -#define part_stat_add(cpu, part, field, addnd) do { \ - __part_stat_add((cpu), (part), field, addnd); \ +#define __part_stat_add(part, field, addnd) \ + (part_stat_get(part, field) += (addnd)) + +#define part_stat_add(part, field, addnd) do { \ + __part_stat_add((part), field, addnd); \ if ((part)->partno) \ - __part_stat_add((cpu), &part_to_disk((part))->part0, \ + __part_stat_add(&part_to_disk((part))->part0, \ field, addnd); \ } while (0) -#define part_stat_dec(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, -1) -#define part_stat_inc(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, 1) -#define part_stat_sub(cpu, gendiskp, field, subnd) \ - part_stat_add(cpu, gendiskp, field, -subnd) - -void part_in_flight(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]); +#define part_stat_dec(gendiskp, field) \ + part_stat_add(gendiskp, field, -1) +#define part_stat_inc(gendiskp, field) \ + part_stat_add(gendiskp, field, 1) +#define part_stat_sub(gendiskp, field, subnd) \ + part_stat_add(gendiskp, field, -subnd) + +#define part_stat_local_dec(gendiskp, field) \ + local_dec(&(part_stat_get(gendiskp, field))) +#define part_stat_local_inc(gendiskp, field) \ + local_inc(&(part_stat_get(gendiskp, field))) +#define part_stat_local_read(gendiskp, field) \ + local_read(&(part_stat_get(gendiskp, field))) +#define part_stat_local_read_cpu(gendiskp, field, cpu) \ + local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) + +unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part); void part_in_flight_rw(struct request_queue *q, struct hd_struct *part, unsigned int inflight[2]); void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, @@ -398,8 +412,7 @@ static inline void free_part_info(struct hd_struct *part) kfree(part->info); } -/* block/blk-core.c */ -extern void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part); +void update_io_ticks(struct hd_struct *part, unsigned long now); /* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk, diff --git a/include/linux/ide.h b/include/linux/ide.h index c74b0321922a..e7d29ae633cd 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -10,7 +10,7 @@ #include <linux/init.h> #include <linux/ioport.h> #include <linux/ata.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/proc_fs.h> #include <linux/interrupt.h> #include <linux/bitops.h> @@ -50,6 +50,7 @@ struct ide_request { struct scsi_request sreq; u8 sense[SCSI_SENSE_BUFFERSIZE]; u8 type; + void *special; }; static inline struct ide_request *ide_req(struct request *rq) @@ -529,6 +530,10 @@ struct ide_drive_s { struct request_queue *queue; /* request queue */ + bool (*prep_rq)(struct ide_drive_s *, struct request *); + + struct blk_mq_tag_set tag_set; + struct request *rq; /* current request */ void *driver_data; /* extra driver data */ u16 *id; /* identification info */ @@ -612,6 +617,10 @@ struct ide_drive_s { bool sense_rq_armed; struct request *sense_rq; struct request_sense sense_data; + + /* async sense insertion */ + struct work_struct rq_work; + struct list_head rq_list; }; typedef struct ide_drive_s ide_drive_t; @@ -1089,6 +1098,7 @@ extern int ide_pci_clk; int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int); void ide_kill_rq(ide_drive_t *, struct request *); +void ide_insert_request_head(ide_drive_t *, struct request *); void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int); void ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int); @@ -1208,7 +1218,7 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(struct timer_list *t); extern irqreturn_t ide_intr(int irq, void *dev_id); -extern void do_ide_request(struct request_queue *); +extern blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq); void ide_init_disk(struct gendisk *, ide_drive_t *); diff --git a/include/linux/init.h b/include/linux/init.h index 9c2aba1dbabf..5255069f5a9f 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -146,7 +146,6 @@ extern unsigned int reset_devices; /* used by init/main.c */ void setup_arch(char **); void prepare_namespace(void); -void __init load_default_modules(void); int __init init_rootfs(void); #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX) diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 9e30ed6443db..e9bfe6972aed 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -71,6 +71,19 @@ static inline int task_nice_ioclass(struct task_struct *task) } /* + * If the calling process has set an I/O priority, use that. Otherwise, return + * the default I/O priority. + */ +static inline int get_current_ioprio(void) +{ + struct io_context *ioc = current->io_context; + + if (ioc) + return ioc->ioprio; + return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); +} + +/* * For inheritance, return the highest of the two given priorities */ extern int ioprio_best(unsigned short aprio, unsigned short bprio); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 2fdeac1a420d..5d865a5d5cdc 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -90,7 +90,7 @@ typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int, struct nvm_chk_meta *); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *); -typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); +typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int); typedef void (nvm_destroy_dma_pool_fn)(void *); typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t, dma_addr_t *); @@ -357,6 +357,7 @@ struct nvm_geo { u32 clba; /* sectors per chunk */ u16 csecs; /* sector size */ u16 sos; /* out-of-band area size */ + bool ext; /* metadata in extended data buffer */ /* device write constrains */ u32 ws_min; /* minimum write size */ diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 496ff759f84c..91745cc3704c 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -403,7 +403,6 @@ struct nvme_fc_port_template { void **handle); void (*delete_queue)(struct nvme_fc_local_port *, unsigned int qidx, void *handle); - void (*poll_queue)(struct nvme_fc_local_port *, void *handle); int (*ls_req)(struct nvme_fc_local_port *, struct nvme_fc_remote_port *, struct nvmefc_ls_req *); @@ -649,22 +648,6 @@ enum { * sequence in one LLDD operation. Errors during Data * sequence transmit must not allow RSP sequence to be sent. */ - NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 1), - /* Bit 2: When 0, the LLDD is calling the cmd rcv handler - * in a non-isr context, allowing the transport to finish - * op completion in the calling context. When 1, the LLDD - * is calling the cmd rcv handler in an ISR context, - * requiring the transport to transition to a workqueue - * for op completion. - */ - NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 2), - /* Bit 3: When 0, the LLDD is calling the op done handler - * in a non-isr context, allowing the transport to finish - * op completion in the calling context. When 1, the LLDD - * is calling the op done handler in an ISR context, - * requiring the transport to transition to a workqueue - * for op completion. - */ }; diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h new file mode 100644 index 000000000000..03d87c0550a9 --- /dev/null +++ b/include/linux/nvme-tcp.h @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVMe over Fabrics TCP protocol header. + * Copyright (c) 2018 Lightbits Labs. All rights reserved. + */ + +#ifndef _LINUX_NVME_TCP_H +#define _LINUX_NVME_TCP_H + +#include <linux/nvme.h> + +#define NVME_TCP_DISC_PORT 8009 +#define NVME_TCP_ADMIN_CCSZ SZ_8K +#define NVME_TCP_DIGEST_LENGTH 4 + +enum nvme_tcp_pfv { + NVME_TCP_PFV_1_0 = 0x0, +}; + +enum nvme_tcp_fatal_error_status { + NVME_TCP_FES_INVALID_PDU_HDR = 0x01, + NVME_TCP_FES_PDU_SEQ_ERR = 0x02, + NVME_TCP_FES_HDR_DIGEST_ERR = 0x03, + NVME_TCP_FES_DATA_OUT_OF_RANGE = 0x04, + NVME_TCP_FES_R2T_LIMIT_EXCEEDED = 0x05, + NVME_TCP_FES_DATA_LIMIT_EXCEEDED = 0x05, + NVME_TCP_FES_UNSUPPORTED_PARAM = 0x06, +}; + +enum nvme_tcp_digest_option { + NVME_TCP_HDR_DIGEST_ENABLE = (1 << 0), + NVME_TCP_DATA_DIGEST_ENABLE = (1 << 1), +}; + +enum nvme_tcp_pdu_type { + nvme_tcp_icreq = 0x0, + nvme_tcp_icresp = 0x1, + nvme_tcp_h2c_term = 0x2, + nvme_tcp_c2h_term = 0x3, + nvme_tcp_cmd = 0x4, + nvme_tcp_rsp = 0x5, + nvme_tcp_h2c_data = 0x6, + nvme_tcp_c2h_data = 0x7, + nvme_tcp_r2t = 0x9, +}; + +enum nvme_tcp_pdu_flags { + NVME_TCP_F_HDGST = (1 << 0), + NVME_TCP_F_DDGST = (1 << 1), + NVME_TCP_F_DATA_LAST = (1 << 2), + NVME_TCP_F_DATA_SUCCESS = (1 << 3), +}; + +/** + * struct nvme_tcp_hdr - nvme tcp pdu common header + * + * @type: pdu type + * @flags: pdu specific flags + * @hlen: pdu header length + * @pdo: pdu data offset + * @plen: pdu wire byte length + */ +struct nvme_tcp_hdr { + __u8 type; + __u8 flags; + __u8 hlen; + __u8 pdo; + __le32 plen; +}; + +/** + * struct nvme_tcp_icreq_pdu - nvme tcp initialize connection request pdu + * + * @hdr: pdu generic header + * @pfv: pdu version format + * @hpda: host pdu data alignment (dwords, 0's based) + * @digest: digest types enabled + * @maxr2t: maximum r2ts per request supported + */ +struct nvme_tcp_icreq_pdu { + struct nvme_tcp_hdr hdr; + __le16 pfv; + __u8 hpda; + __u8 digest; + __le32 maxr2t; + __u8 rsvd2[112]; +}; + +/** + * struct nvme_tcp_icresp_pdu - nvme tcp initialize connection response pdu + * + * @hdr: pdu common header + * @pfv: pdu version format + * @cpda: controller pdu data alignment (dowrds, 0's based) + * @digest: digest types enabled + * @maxdata: maximum data capsules per r2t supported + */ +struct nvme_tcp_icresp_pdu { + struct nvme_tcp_hdr hdr; + __le16 pfv; + __u8 cpda; + __u8 digest; + __le32 maxdata; + __u8 rsvd[112]; +}; + +/** + * struct nvme_tcp_term_pdu - nvme tcp terminate connection pdu + * + * @hdr: pdu common header + * @fes: fatal error status + * @fei: fatal error information + */ +struct nvme_tcp_term_pdu { + struct nvme_tcp_hdr hdr; + __le16 fes; + __le32 fei; + __u8 rsvd[8]; +}; + +/** + * struct nvme_tcp_cmd_pdu - nvme tcp command capsule pdu + * + * @hdr: pdu common header + * @cmd: nvme command + */ +struct nvme_tcp_cmd_pdu { + struct nvme_tcp_hdr hdr; + struct nvme_command cmd; +}; + +/** + * struct nvme_tcp_rsp_pdu - nvme tcp response capsule pdu + * + * @hdr: pdu common header + * @hdr: nvme-tcp generic header + * @cqe: nvme completion queue entry + */ +struct nvme_tcp_rsp_pdu { + struct nvme_tcp_hdr hdr; + struct nvme_completion cqe; +}; + +/** + * struct nvme_tcp_r2t_pdu - nvme tcp ready-to-transfer pdu + * + * @hdr: pdu common header + * @command_id: nvme command identifier which this relates to + * @ttag: transfer tag (controller generated) + * @r2t_offset: offset from the start of the command data + * @r2t_length: length the host is allowed to send + */ +struct nvme_tcp_r2t_pdu { + struct nvme_tcp_hdr hdr; + __u16 command_id; + __u16 ttag; + __le32 r2t_offset; + __le32 r2t_length; + __u8 rsvd[4]; +}; + +/** + * struct nvme_tcp_data_pdu - nvme tcp data pdu + * + * @hdr: pdu common header + * @command_id: nvme command identifier which this relates to + * @ttag: transfer tag (controller generated) + * @data_offset: offset from the start of the command data + * @data_length: length of the data stream + */ +struct nvme_tcp_data_pdu { + struct nvme_tcp_hdr hdr; + __u16 command_id; + __u16 ttag; + __le32 data_offset; + __le32 data_length; + __u8 rsvd[4]; +}; + +union nvme_tcp_pdu { + struct nvme_tcp_icreq_pdu icreq; + struct nvme_tcp_icresp_pdu icresp; + struct nvme_tcp_cmd_pdu cmd; + struct nvme_tcp_rsp_pdu rsp; + struct nvme_tcp_r2t_pdu r2t; + struct nvme_tcp_data_pdu data; +}; + +#endif /* _LINUX_NVME_TCP_H */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 818dbe9331be..bbcc83886899 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -52,15 +52,20 @@ enum { enum { NVMF_TRTYPE_RDMA = 1, /* RDMA */ NVMF_TRTYPE_FC = 2, /* Fibre Channel */ + NVMF_TRTYPE_TCP = 3, /* TCP/IP */ NVMF_TRTYPE_LOOP = 254, /* Reserved for host usage */ NVMF_TRTYPE_MAX, }; /* Transport Requirements codes for Discovery Log Page entry TREQ field */ enum { - NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */ - NVMF_TREQ_REQUIRED = 1, /* Required */ - NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */ + NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */ + NVMF_TREQ_REQUIRED = 1, /* Required */ + NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */ +#define NVME_TREQ_SECURE_CHANNEL_MASK \ + (NVMF_TREQ_REQUIRED | NVMF_TREQ_NOT_REQUIRED) + + NVMF_TREQ_DISABLE_SQFLOW = (1 << 2), /* Supports SQ flow control disable */ }; /* RDMA QP Service Type codes for Discovery Log Page entry TSAS @@ -198,6 +203,11 @@ enum { NVME_PS_FLAGS_NON_OP_STATE = 1 << 1, }; +enum nvme_ctrl_attr { + NVME_CTRL_ATTR_HID_128_BIT = (1 << 0), + NVME_CTRL_ATTR_TBKAS = (1 << 6), +}; + struct nvme_id_ctrl { __le16 vid; __le16 ssvid; @@ -214,7 +224,11 @@ struct nvme_id_ctrl { __le32 rtd3e; __le32 oaes; __le32 ctratt; - __u8 rsvd100[156]; + __u8 rsvd100[28]; + __le16 crdt1; + __le16 crdt2; + __le16 crdt3; + __u8 rsvd134[122]; __le16 oacs; __u8 acl; __u8 aerl; @@ -481,12 +495,21 @@ enum { NVME_AER_NOTICE_NS_CHANGED = 0x00, NVME_AER_NOTICE_FW_ACT_STARTING = 0x01, NVME_AER_NOTICE_ANA = 0x03, + NVME_AER_NOTICE_DISC_CHANGED = 0xf0, }; enum { - NVME_AEN_CFG_NS_ATTR = 1 << 8, - NVME_AEN_CFG_FW_ACT = 1 << 9, - NVME_AEN_CFG_ANA_CHANGE = 1 << 11, + NVME_AEN_BIT_NS_ATTR = 8, + NVME_AEN_BIT_FW_ACT = 9, + NVME_AEN_BIT_ANA_CHANGE = 11, + NVME_AEN_BIT_DISC_CHANGE = 31, +}; + +enum { + NVME_AEN_CFG_NS_ATTR = 1 << NVME_AEN_BIT_NS_ATTR, + NVME_AEN_CFG_FW_ACT = 1 << NVME_AEN_BIT_FW_ACT, + NVME_AEN_CFG_ANA_CHANGE = 1 << NVME_AEN_BIT_ANA_CHANGE, + NVME_AEN_CFG_DISC_CHANGE = 1 << NVME_AEN_BIT_DISC_CHANGE, }; struct nvme_lba_range_type { @@ -639,7 +662,12 @@ struct nvme_common_command { __le32 cdw2[2]; __le64 metadata; union nvme_data_ptr dptr; - __le32 cdw10[6]; + __le32 cdw10; + __le32 cdw11; + __le32 cdw12; + __le32 cdw13; + __le32 cdw14; + __le32 cdw15; }; struct nvme_rw_command { @@ -738,6 +766,15 @@ enum { NVME_HOST_MEM_RETURN = (1 << 1), }; +struct nvme_feat_host_behavior { + __u8 acre; + __u8 resv1[511]; +}; + +enum { + NVME_ENABLE_ACRE = 1, +}; + /* Admin commands */ enum nvme_admin_opcode { @@ -792,6 +829,7 @@ enum { NVME_FEAT_RRL = 0x12, NVME_FEAT_PLM_CONFIG = 0x13, NVME_FEAT_PLM_WINDOW = 0x14, + NVME_FEAT_HOST_BEHAVIOR = 0x16, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_RESV_MASK = 0x82, @@ -1030,6 +1068,10 @@ struct nvmf_disc_rsp_page_hdr { struct nvmf_disc_rsp_page_entry entries[0]; }; +enum { + NVME_CONNECT_DISABLE_SQFLOW = (1 << 2), +}; + struct nvmf_connect_command { __u8 opcode; __u8 resv1; @@ -1126,6 +1168,20 @@ struct nvme_command { }; }; +struct nvme_error_slot { + __le64 error_count; + __le16 sqid; + __le16 cmdid; + __le16 status_field; + __le16 param_error_location; + __le64 lba; + __le32 nsid; + __u8 vs; + __u8 resv[3]; + __le64 cs; + __u8 resv2[24]; +}; + static inline bool nvme_is_write(struct nvme_command *cmd) { /* @@ -1243,6 +1299,7 @@ enum { NVME_SC_ANA_TRANSITION = 0x303, NVME_SC_HOST_PATH_ERROR = 0x370, + NVME_SC_CRD = 0x1800, NVME_SC_DNR = 0x4000, }; diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 804a50983ec5..14d558146aea 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -30,14 +30,24 @@ struct seq_file; */ struct sbitmap_word { /** - * @word: The bitmap word itself. + * @depth: Number of bits being used in @word/@cleared */ - unsigned long word; + unsigned long depth; /** - * @depth: Number of bits being used in @word. + * @word: word holding free bits */ - unsigned long depth; + unsigned long word ____cacheline_aligned_in_smp; + + /** + * @cleared: word holding cleared bits + */ + unsigned long cleared ____cacheline_aligned_in_smp; + + /** + * @swap_lock: Held while swapping word <-> cleared + */ + spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** @@ -125,6 +135,11 @@ struct sbitmap_queue { */ struct sbq_wait_state *ws; + /* + * @ws_active: count of currently active ws waitqueues + */ + atomic_t ws_active; + /** * @round_robin: Allocate bits in strict round-robin order. */ @@ -250,12 +265,14 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb, nr = SB_NR_TO_BIT(sb, start); while (scanned < sb->depth) { - struct sbitmap_word *word = &sb->map[index]; - unsigned int depth = min_t(unsigned int, word->depth - nr, + unsigned long word; + unsigned int depth = min_t(unsigned int, + sb->map[index].depth - nr, sb->depth - scanned); scanned += depth; - if (!word->word) + word = sb->map[index].word & ~sb->map[index].cleared; + if (!word) goto next; /* @@ -265,7 +282,7 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb, */ depth += nr; while (1) { - nr = find_next_bit(&word->word, depth, nr); + nr = find_next_bit(&word, depth, nr); if (nr >= depth) break; if (!fn(sb, (index << sb->shift) + nr, data)) @@ -310,6 +327,19 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr) clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } +/* + * This one is special, since it doesn't actually clear the bit, rather it + * sets the corresponding bit in the ->cleared mask instead. Paired with + * the caller doing sbitmap_batch_clear() if a given index is full, which + * will clear the previously freed entries in the corresponding ->word. + */ +static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr) +{ + unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared; + + set_bit(SB_NR_TO_BIT(sb, bitnr), addr); +} + static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb, unsigned int bitnr) { @@ -321,8 +351,6 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } -unsigned int sbitmap_weight(const struct sbitmap *sb); - /** * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. * @sb: Bitmap to show. @@ -531,4 +559,45 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); */ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m); +struct sbq_wait { + struct sbitmap_queue *sbq; /* if set, sbq_wait is accounted */ + struct wait_queue_entry wait; +}; + +#define DEFINE_SBQ_WAIT(name) \ + struct sbq_wait name = { \ + .sbq = NULL, \ + .wait = { \ + .private = current, \ + .func = autoremove_wake_function, \ + .entry = LIST_HEAD_INIT((name).wait.entry), \ + } \ + } + +/* + * Wrapper around prepare_to_wait_exclusive(), which maintains some extra + * internal state. + */ +void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, + struct sbq_wait_state *ws, + struct sbq_wait *sbq_wait, int state); + +/* + * Must be paired with sbitmap_prepare_to_wait(). + */ +void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, + struct sbq_wait *sbq_wait); + +/* + * Wrapper around add_wait_queue(), which maintains some extra internal state + */ +void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, + struct sbq_wait_state *ws, + struct sbq_wait *sbq_wait); + +/* + * Must be paired with sbitmap_add_wait_queue() + */ +void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait); + #endif /* __LINUX_SCALE_BITMAP_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2a57a365c711..93f56fddd92a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3339,6 +3339,9 @@ static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, } int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, struct msghdr *msg); +int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset, + struct iov_iter *to, int len, + struct ahash_request *hash); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); diff --git a/include/linux/uio.h b/include/linux/uio.h index 55ce99ddb912..ecf584f6b82d 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/thread_info.h> +#include <crypto/hash.h> #include <uapi/linux/uio.h> struct page; @@ -266,9 +267,11 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) { i->count = count; } -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); +size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); +size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, + struct iov_iter *i); int import_iovec(int type, const struct iovec __user * uvector, unsigned nr_segs, unsigned fast_segs, diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fdfd04e348f6..738a0c24874f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -246,7 +246,8 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, * * @bio is a part of the writeback in progress controlled by @wbc. Perform * writeback specific initialization. This is used to apply the cgroup - * writeback context. + * writeback context. Must be called after the bio has been associated with + * a device. */ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) { @@ -257,7 +258,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) * regular writeback instead of writing things out itself. */ if (wbc->wb) - bio_associate_blkcg(bio, wbc->wb->blkcg_css); + bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); } #else /* CONFIG_CGROUP_WRITEBACK */ diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index c891ada3c5c2..d85e6befa26b 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -61,6 +61,9 @@ struct scsi_pointer { /* flags preserved across unprep / reprep */ #define SCMD_PRESERVED_FLAGS (SCMD_UNCHECKED_ISA_DMA | SCMD_INITIALIZED) +/* for scmd->state */ +#define SCMD_STATE_COMPLETE 0 + struct scsi_cmnd { struct scsi_request req; struct scsi_device *device; @@ -145,6 +148,7 @@ struct scsi_cmnd { int result; /* Status code from lower level driver */ int flags; /* Command flags */ + unsigned long state; /* Command completion state */ unsigned char tag; /* SCSI-II queued command tag */ }; @@ -171,7 +175,7 @@ extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count, size_t *offset, size_t *len); extern void scsi_kunmap_atomic_sg(void *virt); -extern int scsi_init_io(struct scsi_cmnd *cmd); +extern blk_status_t scsi_init_io(struct scsi_cmnd *cmd); #ifdef CONFIG_SCSI_DMA extern int scsi_dma_map(struct scsi_cmnd *cmd); diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h index c7bba2b24849..a862dc23c68d 100644 --- a/include/scsi/scsi_dh.h +++ b/include/scsi/scsi_dh.h @@ -69,7 +69,7 @@ struct scsi_device_handler { int (*attach)(struct scsi_device *); void (*detach)(struct scsi_device *); int (*activate)(struct scsi_device *, activate_complete, void *); - int (*prep_fn)(struct scsi_device *, struct request *); + blk_status_t (*prep_fn)(struct scsi_device *, struct request *); int (*set_params)(struct scsi_device *, const char *); void (*rescan)(struct scsi_device *); }; diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index fae8b465233e..6dffa8555a39 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -2,6 +2,7 @@ #ifndef _SCSI_SCSI_DRIVER_H #define _SCSI_SCSI_DRIVER_H +#include <linux/blk_types.h> #include <linux/device.h> struct module; @@ -13,7 +14,7 @@ struct scsi_driver { struct device_driver gendrv; void (*rescan)(struct device *); - int (*init_command)(struct scsi_cmnd *); + blk_status_t (*init_command)(struct scsi_cmnd *); void (*uninit_command)(struct scsi_cmnd *); int (*done)(struct scsi_cmnd *); int (*eh_action)(struct scsi_cmnd *, int); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 5ea06d310a25..aa760df8c6b3 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -11,7 +11,6 @@ #include <linux/blk-mq.h> #include <scsi/scsi.h> -struct request_queue; struct block_device; struct completion; struct module; @@ -22,7 +21,6 @@ struct scsi_target; struct Scsi_Host; struct scsi_host_cmd_pool; struct scsi_transport_template; -struct blk_queue_tags; /* @@ -547,14 +545,8 @@ struct Scsi_Host { struct scsi_host_template *hostt; struct scsi_transport_template *transportt; - /* - * Area to keep a shared tag map (if needed, will be - * NULL if not). - */ - union { - struct blk_queue_tag *bqt; - struct blk_mq_tag_set tag_set; - }; + /* Area to keep a shared tag map */ + struct blk_mq_tag_set tag_set; atomic_t host_busy; /* commands actually active on low-level */ atomic_t host_blocked; @@ -648,7 +640,6 @@ struct Scsi_Host { /* The controller does not support WRITE SAME */ unsigned no_write_same:1; - unsigned use_blk_mq:1; unsigned use_cmd_list:1; /* Host responded with short (<36 bytes) INQUIRY result */ @@ -742,11 +733,6 @@ static inline int scsi_host_in_recovery(struct Scsi_Host *shost) shost->tmf_in_progress; } -static inline bool shost_use_blk_mq(struct Scsi_Host *shost) -{ - return shost->use_blk_mq; -} - extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *); extern void scsi_flush_work(struct Scsi_Host *); diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index e192a0caa850..6053d46e794e 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -23,19 +23,15 @@ static inline struct scsi_cmnd *scsi_host_find_tag(struct Scsi_Host *shost, int tag) { struct request *req = NULL; + u16 hwq; if (tag == SCSI_NO_TAG) return NULL; - if (shost_use_blk_mq(shost)) { - u16 hwq = blk_mq_unique_tag_to_hwq(tag); - - if (hwq < shost->tag_set.nr_hw_queues) { - req = blk_mq_tag_to_rq(shost->tag_set.tags[hwq], - blk_mq_unique_tag_to_tag(tag)); - } - } else { - req = blk_map_queue_find_tag(shost->bqt, tag); + hwq = blk_mq_unique_tag_to_hwq(tag); + if (hwq < shost->tag_set.nr_hw_queues) { + req = blk_mq_tag_to_rq(shost->tag_set.tags[hwq], + blk_mq_unique_tag_to_tag(tag)); } if (!req) diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index 2cbd6e42ad83..e4526f85c19d 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -221,9 +221,30 @@ DEFINE_EVENT(cache_set, bcache_journal_entry_full, TP_ARGS(c) ); -DEFINE_EVENT(bcache_bio, bcache_journal_write, - TP_PROTO(struct bio *bio), - TP_ARGS(bio) +TRACE_EVENT(bcache_journal_write, + TP_PROTO(struct bio *bio, u32 keys), + TP_ARGS(bio, keys), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(sector_t, sector ) + __field(unsigned int, nr_sector ) + __array(char, rwbs, 6 ) + __field(u32, nr_keys ) + ), + + TP_fast_assign( + __entry->dev = bio_dev(bio); + __entry->sector = bio->bi_iter.bi_sector; + __entry->nr_sector = bio->bi_iter.bi_size >> 9; + __entry->nr_keys = keys; + blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); + ), + + TP_printk("%d,%d %s %llu + %u keys %u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, __entry->nr_sector, + __entry->nr_keys) ); /* Btree */ diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index ce43d340f010..8387e0af0f76 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -50,6 +50,8 @@ enum { * * IOCB_FLAG_RESFD - Set if the "aio_resfd" member of the "struct iocb" * is valid. + * IOCB_FLAG_IOPRIO - Set if the "aio_reqprio" member of the "struct iocb" + * is valid. */ #define IOCB_FLAG_RESFD (1 << 0) #define IOCB_FLAG_IOPRIO (1 << 1) |