diff options
Diffstat (limited to 'include/linux/iocontext.h')
-rw-r--r-- | include/linux/iocontext.h | 136 |
1 files changed, 95 insertions, 41 deletions
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 5037a0ad2312..7e1371c4bccf 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -3,32 +3,92 @@ #include <linux/radix-tree.h> #include <linux/rcupdate.h> +#include <linux/workqueue.h> -struct cfq_queue; -struct cfq_ttime { - unsigned long last_end_request; - - unsigned long ttime_total; - unsigned long ttime_samples; - unsigned long ttime_mean; +enum { + ICQ_IOPRIO_CHANGED, + ICQ_CGROUP_CHANGED, }; -struct cfq_io_context { - void *key; - - struct cfq_queue *cfqq[2]; - - struct io_context *ioc; - - struct cfq_ttime ttime; - - struct list_head queue_list; - struct hlist_node cic_list; - - void (*dtor)(struct io_context *); /* destructor */ - void (*exit)(struct io_context *); /* called on task exit */ +/* + * An io_cq (icq) is association between an io_context (ioc) and a + * request_queue (q). This is used by elevators which need to track + * information per ioc - q pair. + * + * Elevator can request use of icq by setting elevator_type->icq_size and + * ->icq_align. Both size and align must be larger than that of struct + * io_cq and elevator can use the tail area for private information. The + * recommended way to do this is defining a struct which contains io_cq as + * the first member followed by private members and using its size and + * align. For example, + * + * struct snail_io_cq { + * struct io_cq icq; + * int poke_snail; + * int feed_snail; + * }; + * + * struct elevator_type snail_elv_type { + * .ops = { ... }, + * .icq_size = sizeof(struct snail_io_cq), + * .icq_align = __alignof__(struct snail_io_cq), + * ... + * }; + * + * If icq_size is set, block core will manage icq's. All requests will + * have its ->elv.icq field set before elevator_ops->elevator_set_req_fn() + * is called and be holding a reference to the associated io_context. + * + * Whenever a new icq is created, elevator_ops->elevator_init_icq_fn() is + * called and, on destruction, ->elevator_exit_icq_fn(). Both functions + * are called with both the associated io_context and queue locks held. + * + * Elevator is allowed to lookup icq using ioc_lookup_icq() while holding + * queue lock but the returned icq is valid only until the queue lock is + * released. Elevators can not and should not try to create or destroy + * icq's. + * + * As icq's are linked from both ioc and q, the locking rules are a bit + * complex. + * + * - ioc lock nests inside q lock. + * + * - ioc->icq_list and icq->ioc_node are protected by ioc lock. + * q->icq_list and icq->q_node by q lock. + * + * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq + * itself is protected by q lock. However, both the indexes and icq + * itself are also RCU managed and lookup can be performed holding only + * the q lock. + * + * - icq's are not reference counted. They are destroyed when either the + * ioc or q goes away. Each request with icq set holds an extra + * reference to ioc to ensure it stays until the request is completed. + * + * - Linking and unlinking icq's are performed while holding both ioc and q + * locks. Due to the lock ordering, q exit is simple but ioc exit + * requires reverse-order double lock dance. + */ +struct io_cq { + struct request_queue *q; + struct io_context *ioc; - struct rcu_head rcu_head; + /* + * q_node and ioc_node link io_cq through icq_list of q and ioc + * respectively. Both fields are unused once ioc_exit_icq() is + * called and shared with __rcu_icq_cache and __rcu_head which are + * used for RCU free of io_cq. + */ + union { + struct list_head q_node; + struct kmem_cache *__rcu_icq_cache; + }; + union { + struct hlist_node ioc_node; + struct rcu_head __rcu_head; + }; + + unsigned long changed; }; /* @@ -43,11 +103,6 @@ struct io_context { spinlock_t lock; unsigned short ioprio; - unsigned short ioprio_changed; - -#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) - unsigned short cgroup_changed; -#endif /* * For request batching @@ -55,9 +110,11 @@ struct io_context { int nr_batch_requests; /* Number of requests left in the batch */ unsigned long last_waited; /* Time last woken after wait for request */ - struct radix_tree_root radix_root; - struct hlist_head cic_list; - void __rcu *ioc_data; + struct radix_tree_root icq_tree; + struct io_cq __rcu *icq_hint; + struct hlist_head icq_list; + + struct work_struct release_work; }; static inline struct io_context *ioc_task_link(struct io_context *ioc) @@ -76,20 +133,17 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc) struct task_struct; #ifdef CONFIG_BLOCK -int put_io_context(struct io_context *ioc); +void put_io_context(struct io_context *ioc, struct request_queue *locked_q); void exit_io_context(struct task_struct *task); -struct io_context *get_io_context(gfp_t gfp_flags, int node); -struct io_context *alloc_io_context(gfp_t gfp_flags, int node); +struct io_context *get_task_io_context(struct task_struct *task, + gfp_t gfp_flags, int node); +void ioc_ioprio_changed(struct io_context *ioc, int ioprio); +void ioc_cgroup_changed(struct io_context *ioc); #else -static inline void exit_io_context(struct task_struct *task) -{ -} - struct io_context; -static inline int put_io_context(struct io_context *ioc) -{ - return 1; -} +static inline void put_io_context(struct io_context *ioc, + struct request_queue *locked_q) { } +static inline void exit_io_context(struct task_struct *task) { } #endif #endif |