diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 2 | ||||
-rw-r--r-- | block/Kconfig.iosched | 2 | ||||
-rw-r--r-- | block/blk-cgroup.c | 63 | ||||
-rw-r--r-- | block/blk-cgroup.h | 13 | ||||
-rw-r--r-- | block/blk-core.c | 34 | ||||
-rw-r--r-- | block/blk-integrity.c | 2 | ||||
-rw-r--r-- | block/blk-ioc.c | 2 | ||||
-rw-r--r-- | block/blk-merge.c | 8 | ||||
-rw-r--r-- | block/blk-settings.c | 131 | ||||
-rw-r--r-- | block/blk-sysfs.c | 13 | ||||
-rw-r--r-- | block/bsg.c | 2 | ||||
-rw-r--r-- | block/cfq-iosched.c | 147 | ||||
-rw-r--r-- | block/elevator.c | 13 |
13 files changed, 198 insertions, 234 deletions
diff --git a/block/Kconfig b/block/Kconfig index e20fbde0875c..62a5921321cd 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -78,7 +78,7 @@ config BLK_DEV_INTEGRITY Protection. If in doubt, say N. config BLK_CGROUP - bool + tristate depends on CGROUPS default n ---help--- diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index b71abfb0d726..fc71cf071fb2 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -23,6 +23,7 @@ config IOSCHED_DEADLINE config IOSCHED_CFQ tristate "CFQ I/O scheduler" + select BLK_CGROUP if CFQ_GROUP_IOSCHED default y ---help--- The CFQ I/O scheduler tries to distribute bandwidth equally @@ -35,7 +36,6 @@ config IOSCHED_CFQ config CFQ_GROUP_IOSCHED bool "CFQ Group Scheduling support" depends on IOSCHED_CFQ && CGROUPS - select BLK_CGROUP default n ---help--- Enable group IO scheduling in CFQ. diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e7dbbaf5fb3e..4b686ad08eaa 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -23,19 +23,30 @@ static LIST_HEAD(blkio_list); struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; EXPORT_SYMBOL_GPL(blkio_root_cgroup); -bool blkiocg_css_tryget(struct blkio_cgroup *blkcg) -{ - if (!css_tryget(&blkcg->css)) - return false; - return true; -} -EXPORT_SYMBOL_GPL(blkiocg_css_tryget); +static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *, + struct cgroup *); +static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *, + struct task_struct *, bool); +static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *, + struct cgroup *, struct task_struct *, bool); +static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); +static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); -void blkiocg_css_put(struct blkio_cgroup *blkcg) -{ - css_put(&blkcg->css); -} -EXPORT_SYMBOL_GPL(blkiocg_css_put); +struct cgroup_subsys blkio_subsys = { + .name = "blkio", + .create = blkiocg_create, + .can_attach = blkiocg_can_attach, + .attach = blkiocg_attach, + .destroy = blkiocg_destroy, + .populate = blkiocg_populate, +#ifdef CONFIG_BLK_CGROUP + /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */ + .subsys_id = blkio_subsys_id, +#endif + .use_id = 1, + .module = THIS_MODULE, +}; +EXPORT_SYMBOL_GPL(blkio_subsys); struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) { @@ -267,7 +278,8 @@ remove_entry: done: free_css_id(&blkio_subsys, &blkcg->css); rcu_read_unlock(); - kfree(blkcg); + if (blkcg != &blkio_root_cgroup) + kfree(blkcg); } static struct cgroup_subsys_state * @@ -333,17 +345,6 @@ static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, task_unlock(tsk); } -struct cgroup_subsys blkio_subsys = { - .name = "blkio", - .create = blkiocg_create, - .can_attach = blkiocg_can_attach, - .attach = blkiocg_attach, - .destroy = blkiocg_destroy, - .populate = blkiocg_populate, - .subsys_id = blkio_subsys_id, - .use_id = 1, -}; - void blkio_policy_register(struct blkio_policy_type *blkiop) { spin_lock(&blkio_list_lock); @@ -359,3 +360,17 @@ void blkio_policy_unregister(struct blkio_policy_type *blkiop) spin_unlock(&blkio_list_lock); } EXPORT_SYMBOL_GPL(blkio_policy_unregister); + +static int __init init_cgroup_blkio(void) +{ + return cgroup_load_subsys(&blkio_subsys); +} + +static void __exit exit_cgroup_blkio(void) +{ + cgroup_unload_subsys(&blkio_subsys); +} + +module_init(init_cgroup_blkio); +module_exit(exit_cgroup_blkio); +MODULE_LICENSE("GPL"); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 4d316df863b4..8ccc20464dae 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -15,7 +15,13 @@ #include <linux/cgroup.h> -#ifdef CONFIG_BLK_CGROUP +#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) + +#ifndef CONFIG_BLK_CGROUP +/* When blk-cgroup is a module, its subsys_id isn't a compile-time constant */ +extern struct cgroup_subsys blkio_subsys; +#define blkio_subsys_id blkio_subsys.subsys_id +#endif struct blkio_cgroup { struct cgroup_subsys_state css; @@ -43,9 +49,6 @@ struct blkio_group { unsigned long sectors; }; -extern bool blkiocg_css_tryget(struct blkio_cgroup *blkcg); -extern void blkiocg_css_put(struct blkio_cgroup *blkcg); - typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg, unsigned int weight); @@ -94,7 +97,7 @@ static inline void blkiocg_update_blkio_group_dequeue_stats( struct blkio_group *blkg, unsigned long dequeue) {} #endif -#ifdef CONFIG_BLK_CGROUP +#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) extern struct blkio_cgroup blkio_root_cgroup; extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, diff --git a/block/blk-core.c b/block/blk-core.c index d1a9a0a64f95..9fe174dc74d1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1490,9 +1490,9 @@ end_io: /* * We only want one ->make_request_fn to be active at a time, * else stack usage with stacked devices could be a problem. - * So use current->bio_{list,tail} to keep a list of requests + * So use current->bio_list to keep a list of requests * submited by a make_request_fn function. - * current->bio_tail is also used as a flag to say if + * current->bio_list is also used as a flag to say if * generic_make_request is currently active in this task or not. * If it is NULL, then no make_request is active. If it is non-NULL, * then a make_request is active, and new requests should be added @@ -1500,11 +1500,11 @@ end_io: */ void generic_make_request(struct bio *bio) { - if (current->bio_tail) { + struct bio_list bio_list_on_stack; + + if (current->bio_list) { /* make_request is active */ - *(current->bio_tail) = bio; - bio->bi_next = NULL; - current->bio_tail = &bio->bi_next; + bio_list_add(current->bio_list, bio); return; } /* following loop may be a bit non-obvious, and so deserves some @@ -1512,30 +1512,27 @@ void generic_make_request(struct bio *bio) * Before entering the loop, bio->bi_next is NULL (as all callers * ensure that) so we have a list with a single bio. * We pretend that we have just taken it off a longer list, so - * we assign bio_list to the next (which is NULL) and bio_tail - * to &bio_list, thus initialising the bio_list of new bios to be + * we assign bio_list to a pointer to the bio_list_on_stack, + * thus initialising the bio_list of new bios to be * added. __generic_make_request may indeed add some more bios * through a recursive call to generic_make_request. If it * did, we find a non-NULL value in bio_list and re-enter the loop * from the top. In this case we really did just take the bio - * of the top of the list (no pretending) and so fixup bio_list and - * bio_tail or bi_next, and call into __generic_make_request again. + * of the top of the list (no pretending) and so remove it from + * bio_list, and call into __generic_make_request again. * * The loop was structured like this to make only one call to * __generic_make_request (which is important as it is large and * inlined) and to keep the structure simple. */ BUG_ON(bio->bi_next); + bio_list_init(&bio_list_on_stack); + current->bio_list = &bio_list_on_stack; do { - current->bio_list = bio->bi_next; - if (bio->bi_next == NULL) - current->bio_tail = ¤t->bio_list; - else - bio->bi_next = NULL; __generic_make_request(bio); - bio = current->bio_list; + bio = bio_list_pop(current->bio_list); } while (bio); - current->bio_tail = NULL; /* deactivate */ + current->bio_list = NULL; /* deactivate */ } EXPORT_SYMBOL(generic_make_request); @@ -1617,8 +1614,7 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq) * limitation. */ blk_recalc_rq_segments(rq); - if (rq->nr_phys_segments > queue_max_phys_segments(q) || - rq->nr_phys_segments > queue_max_hw_segments(q)) { + if (rq->nr_phys_segments > queue_max_segments(q)) { printk(KERN_ERR "%s: over max segments limit.\n", __func__); return -EIO; } diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 15c630813b1c..96e83c2bdb94 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -278,7 +278,7 @@ static struct attribute *integrity_attrs[] = { NULL, }; -static struct sysfs_ops integrity_ops = { +static const struct sysfs_ops integrity_ops = { .show = &integrity_attr_show, .store = &integrity_attr_store, }; diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 98e6bf61b0ac..3f65c8aadb2f 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -91,7 +91,7 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) spin_lock_init(&ret->lock); ret->ioprio_changed = 0; ret->ioprio = 0; - ret->last_waited = jiffies; /* doesn't matter... */ + ret->last_waited = 0; /* doesn't matter... */ ret->nr_batch_requests = 0; /* because this is 0 */ INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH); INIT_HLIST_HEAD(&ret->cic_list); diff --git a/block/blk-merge.c b/block/blk-merge.c index 99cb5cf1f447..5e7dc9973458 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -206,8 +206,7 @@ static inline int ll_new_hw_segment(struct request_queue *q, { int nr_phys_segs = bio_phys_segments(q, bio); - if (req->nr_phys_segments + nr_phys_segs > queue_max_hw_segments(q) || - req->nr_phys_segments + nr_phys_segs > queue_max_phys_segments(q)) { + if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -300,10 +299,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, total_phys_segments--; } - if (total_phys_segments > queue_max_phys_segments(q)) - return 0; - - if (total_phys_segments > queue_max_hw_segments(q)) + if (total_phys_segments > queue_max_segments(q)) return 0; /* Merge is OK... */ diff --git a/block/blk-settings.c b/block/blk-settings.c index 5eeb9e0d256e..31e7a9375c13 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -91,10 +91,9 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy); */ void blk_set_default_limits(struct queue_limits *lim) { - lim->max_phys_segments = MAX_PHYS_SEGMENTS; - lim->max_hw_segments = MAX_HW_SEGMENTS; + lim->max_segments = BLK_MAX_SEGMENTS; lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; - lim->max_segment_size = MAX_SEGMENT_SIZE; + lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; lim->max_sectors = BLK_DEF_MAX_SECTORS; lim->max_hw_sectors = INT_MAX; lim->max_discard_sectors = 0; @@ -154,7 +153,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) q->unplug_timer.data = (unsigned long)q; blk_set_default_limits(&q->limits); - blk_queue_max_sectors(q, SAFE_MAX_SECTORS); + blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); /* * If the caller didn't supply a lock, fall back to our embedded @@ -210,37 +209,32 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask) EXPORT_SYMBOL(blk_queue_bounce_limit); /** - * blk_queue_max_sectors - set max sectors for a request for this queue + * blk_queue_max_hw_sectors - set max sectors for a request for this queue * @q: the request queue for the device - * @max_sectors: max sectors in the usual 512b unit + * @max_hw_sectors: max hardware sectors in the usual 512b unit * * Description: - * Enables a low level driver to set an upper limit on the size of - * received requests. + * Enables a low level driver to set a hard upper limit, + * max_hw_sectors, on the size of requests. max_hw_sectors is set by + * the device driver based upon the combined capabilities of I/O + * controller and storage device. + * + * max_sectors is a soft limit imposed by the block layer for + * filesystem type requests. This value can be overridden on a + * per-device basis in /sys/block/<device>/queue/max_sectors_kb. + * The soft limit can not exceed max_hw_sectors. **/ -void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors) +void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) { - if ((max_sectors << 9) < PAGE_CACHE_SIZE) { - max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); + if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) { + max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9); printk(KERN_INFO "%s: set to minimum %d\n", - __func__, max_sectors); + __func__, max_hw_sectors); } - if (BLK_DEF_MAX_SECTORS > max_sectors) - q->limits.max_hw_sectors = q->limits.max_sectors = max_sectors; - else { - q->limits.max_sectors = BLK_DEF_MAX_SECTORS; - q->limits.max_hw_sectors = max_sectors; - } -} -EXPORT_SYMBOL(blk_queue_max_sectors); - -void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors) -{ - if (BLK_DEF_MAX_SECTORS > max_sectors) - q->limits.max_hw_sectors = BLK_DEF_MAX_SECTORS; - else - q->limits.max_hw_sectors = max_sectors; + q->limits.max_hw_sectors = max_hw_sectors; + q->limits.max_sectors = min_t(unsigned int, max_hw_sectors, + BLK_DEF_MAX_SECTORS); } EXPORT_SYMBOL(blk_queue_max_hw_sectors); @@ -257,17 +251,15 @@ void blk_queue_max_discard_sectors(struct request_queue *q, EXPORT_SYMBOL(blk_queue_max_discard_sectors); /** - * blk_queue_max_phys_segments - set max phys segments for a request for this queue + * blk_queue_max_segments - set max hw segments for a request for this queue * @q: the request queue for the device * @max_segments: max number of segments * * Description: * Enables a low level driver to set an upper limit on the number of - * physical data segments in a request. This would be the largest sized - * scatter list the driver could handle. + * hw data segments in a request. **/ -void blk_queue_max_phys_segments(struct request_queue *q, - unsigned short max_segments) +void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments) { if (!max_segments) { max_segments = 1; @@ -275,33 +267,9 @@ void blk_queue_max_phys_segments(struct request_queue *q, __func__, max_segments); } - q->limits.max_phys_segments = max_segments; + q->limits.max_segments = max_segments; } -EXPORT_SYMBOL(blk_queue_max_phys_segments); - -/** - * blk_queue_max_hw_segments - set max hw segments for a request for this queue - * @q: the request queue for the device - * @max_segments: max number of segments - * - * Description: - * Enables a low level driver to set an upper limit on the number of - * hw data segments in a request. This would be the largest number of - * address/length pairs the host adapter can actually give at once - * to the device. - **/ -void blk_queue_max_hw_segments(struct request_queue *q, - unsigned short max_segments) -{ - if (!max_segments) { - max_segments = 1; - printk(KERN_INFO "%s: set to minimum %d\n", - __func__, max_segments); - } - - q->limits.max_hw_segments = max_segments; -} -EXPORT_SYMBOL(blk_queue_max_hw_segments); +EXPORT_SYMBOL(blk_queue_max_segments); /** * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg @@ -507,7 +475,7 @@ static unsigned int lcm(unsigned int a, unsigned int b) * blk_stack_limits - adjust queue_limits for stacked devices * @t: the stacking driver limits (top device) * @b: the underlying queue limits (bottom, component device) - * @offset: offset to beginning of data within component device + * @start: first data sector within component device * * Description: * This function is used by stacking drivers like MD and DM to ensure @@ -525,10 +493,9 @@ static unsigned int lcm(unsigned int a, unsigned int b) * the alignment_offset is undefined. */ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, - sector_t offset) + sector_t start) { - sector_t alignment; - unsigned int top, bottom, ret = 0; + unsigned int top, bottom, alignment, ret = 0; t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); @@ -537,18 +504,14 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask); - t->max_phys_segments = min_not_zero(t->max_phys_segments, - b->max_phys_segments); - - t->max_hw_segments = min_not_zero(t->max_hw_segments, - b->max_hw_segments); + t->max_segments = min_not_zero(t->max_segments, b->max_segments); t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); t->misaligned |= b->misaligned; - alignment = queue_limit_alignment_offset(b, offset); + alignment = queue_limit_alignment_offset(b, start); /* Bottom device has different alignment. Check that it is * compatible with the current top alignment. @@ -611,11 +574,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, /* Discard alignment and granularity */ if (b->discard_granularity) { - unsigned int granularity = b->discard_granularity; - offset &= granularity - 1; - - alignment = (granularity + b->discard_alignment - offset) - & (granularity - 1); + alignment = queue_limit_discard_alignment(b, start); if (t->discard_granularity != 0 && t->discard_alignment != alignment) { @@ -657,7 +616,7 @@ int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, start += get_start_sect(bdev); - return blk_stack_limits(t, &bq->limits, start << 9); + return blk_stack_limits(t, &bq->limits, start); } EXPORT_SYMBOL(bdev_stack_limits); @@ -668,9 +627,8 @@ EXPORT_SYMBOL(bdev_stack_limits); * @offset: offset to beginning of data within component device * * Description: - * Merges the limits for two queues. Returns 0 if alignment - * didn't change. Returns -1 if adding the bottom device caused - * misalignment. + * Merges the limits for a top level gendisk and a bottom level + * block_device. */ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset) @@ -678,9 +636,7 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, struct request_queue *t = disk->queue; struct request_queue *b = bdev_get_queue(bdev); - offset += get_start_sect(bdev) << 9; - - if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) { + if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) { char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; disk_name(disk, 0, top); @@ -752,22 +708,19 @@ EXPORT_SYMBOL(blk_queue_update_dma_pad); * does is adjust the queue so that the buf is always appended * silently to the scatterlist. * - * Note: This routine adjusts max_hw_segments to make room for - * appending the drain buffer. If you call - * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after - * calling this routine, you must set the limit to one fewer than your - * device can support otherwise there won't be room for the drain - * buffer. + * Note: This routine adjusts max_hw_segments to make room for appending + * the drain buffer. If you call blk_queue_max_segments() after calling + * this routine, you must set the limit to one fewer than your device + * can support otherwise there won't be room for the drain buffer. */ int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size) { - if (queue_max_hw_segments(q) < 2 || queue_max_phys_segments(q) < 2) + if (queue_max_segments(q) < 2) return -EINVAL; /* make room for appending the drain */ - blk_queue_max_hw_segments(q, queue_max_hw_segments(q) - 1); - blk_queue_max_phys_segments(q, queue_max_phys_segments(q) - 1); + blk_queue_max_segments(q, queue_max_segments(q) - 1); q->dma_drain_needed = dma_drain_needed; q->dma_drain_buffer = buf; q->dma_drain_size = size; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8606c9543fdd..2ae2cb3f362f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -189,7 +189,8 @@ static ssize_t queue_nonrot_store(struct request_queue *q, const char *page, static ssize_t queue_nomerges_show(struct request_queue *q, char *page) { - return queue_var_show(blk_queue_nomerges(q), page); + return queue_var_show((blk_queue_nomerges(q) << 1) | + blk_queue_noxmerges(q), page); } static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, @@ -199,10 +200,12 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, ssize_t ret = queue_var_store(&nm, page, count); spin_lock_irq(q->queue_lock); - if (nm) + queue_flag_clear(QUEUE_FLAG_NOMERGES, q); + queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); + if (nm == 2) queue_flag_set(QUEUE_FLAG_NOMERGES, q); - else - queue_flag_clear(QUEUE_FLAG_NOMERGES, q); + else if (nm) + queue_flag_set(QUEUE_FLAG_NOXMERGES, q); spin_unlock_irq(q->queue_lock); return ret; @@ -447,7 +450,7 @@ static void blk_release_queue(struct kobject *kobj) kmem_cache_free(blk_requestq_cachep, q); } -static struct sysfs_ops queue_sysfs_ops = { +static const struct sysfs_ops queue_sysfs_ops = { .show = queue_attr_show, .store = queue_attr_store, }; diff --git a/block/bsg.c b/block/bsg.c index a9fd2d84b53a..46597a6bd112 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -260,7 +260,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, return ERR_PTR(ret); /* - * map scatter-gather elements seperately and string them to request + * map scatter-gather elements separately and string them to request */ rq = blk_get_request(q, rw, GFP_KERNEL); if (!rq) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 023f4e69a337..dee9d9378fee 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -19,7 +19,7 @@ * tunables */ /* max queue in one round of service */ -static const int cfq_quantum = 4; +static const int cfq_quantum = 8; static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; /* maximum backwards seek, in KiB */ static const int cfq_back_max = 16 * 1024; @@ -46,8 +46,9 @@ static const int cfq_hist_divisor = 4; #define CFQ_HW_QUEUE_MIN (5) #define CFQ_SERVICE_SHIFT 12 -#define CFQQ_SEEK_THR 8 * 1024 -#define CFQQ_SEEKY(cfqq) ((cfqq)->seek_mean > CFQQ_SEEK_THR) +#define CFQQ_SEEK_THR (sector_t)(8 * 100) +#define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32) +#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) #define RQ_CIC(rq) \ ((struct cfq_io_context *) (rq)->elevator_private) @@ -77,11 +78,12 @@ struct cfq_rb_root { struct rb_root rb; struct rb_node *left; unsigned count; + unsigned total_weight; u64 min_vdisktime; struct rb_node *active; - unsigned total_weight; }; -#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, } +#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ + .count = 0, .min_vdisktime = 0, } /* * Per process-grouping structure @@ -115,11 +117,11 @@ struct cfq_queue { /* time when queue got scheduled in to dispatch first request. */ unsigned long dispatch_start; unsigned int allocated_slice; + unsigned int slice_dispatch; /* time when first request from queue completed and slice started. */ unsigned long slice_start; unsigned long slice_end; long slice_resid; - unsigned int slice_dispatch; /* pending metadata requests */ int meta_pending; @@ -130,13 +132,11 @@ struct cfq_queue { unsigned short ioprio, org_ioprio; unsigned short ioprio_class, org_ioprio_class; - unsigned int seek_samples; - u64 seek_total; - sector_t seek_mean; - sector_t last_request_pos; - pid_t pid; + u32 seek_history; + sector_t last_request_pos; + struct cfq_rb_root *service_tree; struct cfq_queue *new_cfqq; struct cfq_group *cfqg; @@ -223,8 +223,8 @@ struct cfq_data { unsigned int busy_queues; - int rq_in_driver[2]; - int sync_flight; + int rq_in_driver; + int rq_in_flight[2]; /* * queue-depth detection @@ -417,11 +417,6 @@ static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool, static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, struct io_context *); -static inline int rq_in_driver(struct cfq_data *cfqd) -{ - return cfqd->rq_in_driver[0] + cfqd->rq_in_driver[1]; -} - static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, bool is_sync) { @@ -951,10 +946,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; unsigned int major, minor; - /* Do we need to take this reference */ - if (!blkiocg_css_tryget(blkcg)) - return NULL;; - cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); if (cfqg || !create) goto done; @@ -985,7 +976,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); done: - blkiocg_css_put(blkcg); return cfqg; } @@ -1420,9 +1410,9 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; - cfqd->rq_in_driver[rq_is_sync(rq)]++; + cfqd->rq_in_driver++; cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", - rq_in_driver(cfqd)); + cfqd->rq_in_driver); cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); } @@ -1430,12 +1420,11 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq) static void cfq_deactivate_request(struct request_queue *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; - const int sync = rq_is_sync(rq); - WARN_ON(!cfqd->rq_in_driver[sync]); - cfqd->rq_in_driver[sync]--; + WARN_ON(!cfqd->rq_in_driver); + cfqd->rq_in_driver--; cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d", - rq_in_driver(cfqd)); + cfqd->rq_in_driver); } static void cfq_remove_request(struct request *rq) @@ -1673,16 +1662,7 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct request *rq, bool for_preempt) { - sector_t sdist = cfqq->seek_mean; - - if (!sample_valid(cfqq->seek_samples)) - sdist = CFQQ_SEEK_THR; - - /* if seek_mean is big, using it as close criteria is meaningless */ - if (sdist > CFQQ_SEEK_THR && !for_preempt) - sdist = CFQQ_SEEK_THR; - - return cfq_dist_from_last(cfqd, rq) <= sdist; + return cfq_dist_from_last(cfqd, rq) <= CFQQ_SEEK_THR; } static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, @@ -1878,8 +1858,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) cfqq->dispatched++; elv_dispatch_sort(q, rq); - if (cfq_cfqq_sync(cfqq)) - cfqd->sync_flight++; + cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; cfqq->nr_sectors += blk_rq_sectors(rq); } @@ -2219,6 +2198,19 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) return dispatched; } +static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, + struct cfq_queue *cfqq) +{ + /* the queue hasn't finished any request, can't estimate */ + if (cfq_cfqq_slice_new(cfqq)) + return 1; + if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, + cfqq->slice_end)) + return 1; + + return 0; +} + static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) { unsigned int max_dispatch; @@ -2226,16 +2218,16 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) /* * Drain async requests before we start sync IO */ - if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC]) + if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_flight[BLK_RW_ASYNC]) return false; /* * If this is an async queue and we have sync IO in flight, let it wait */ - if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) + if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq)) return false; - max_dispatch = cfqd->cfq_quantum; + max_dispatch = max_t(unsigned int, cfqd->cfq_quantum / 2, 1); if (cfq_class_idle(cfqq)) max_dispatch = 1; @@ -2252,13 +2244,22 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) /* * We have other queues, don't allow more IO from this one */ - if (cfqd->busy_queues > 1) + if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq)) return false; /* * Sole queue user, no limit */ - max_dispatch = -1; + if (cfqd->busy_queues == 1) + max_dispatch = -1; + else + /* + * Normally we start throttling cfqq when cfq_quantum/2 + * requests have been dispatched. But we can drive + * deeper queue depths at the beginning of slice + * subjected to upper limit of cfq_quantum. + * */ + max_dispatch = cfqd->cfq_quantum; } /* @@ -2980,30 +2981,20 @@ static void cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct request *rq) { - sector_t sdist; - u64 total; + sector_t sdist = 0; + sector_t n_sec = blk_rq_sectors(rq); + if (cfqq->last_request_pos) { + if (cfqq->last_request_pos < blk_rq_pos(rq)) + sdist = blk_rq_pos(rq) - cfqq->last_request_pos; + else + sdist = cfqq->last_request_pos - blk_rq_pos(rq); + } - if (!cfqq->last_request_pos) - sdist = 0; - else if (cfqq->last_request_pos < blk_rq_pos(rq)) - sdist = blk_rq_pos(rq) - cfqq->last_request_pos; + cfqq->seek_history <<= 1; + if (blk_queue_nonrot(cfqd->queue)) + cfqq->seek_history |= (n_sec < CFQQ_SECT_THR_NONROT); else - sdist = cfqq->last_request_pos - blk_rq_pos(rq); - - /* - * Don't allow the seek distance to get too large from the - * odd fragment, pagein, etc - */ - if (cfqq->seek_samples <= 60) /* second&third seek */ - sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*1024); - else - sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*64); - - cfqq->seek_samples = (7*cfqq->seek_samples + 256) / 8; - cfqq->seek_total = (7*cfqq->seek_total + (u64)256*sdist) / 8; - total = cfqq->seek_total + (cfqq->seek_samples/2); - do_div(total, cfqq->seek_samples); - cfqq->seek_mean = (sector_t)total; + cfqq->seek_history |= (sdist > CFQQ_SEEK_THR); } /* @@ -3028,8 +3019,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfq_mark_cfqq_deep(cfqq); if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || - (!cfq_cfqq_deep(cfqq) && sample_valid(cfqq->seek_samples) - && CFQQ_SEEKY(cfqq))) + (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) enable_idle = 0; else if (sample_valid(cic->ttime_samples)) { if (cic->ttime_mean > cfqd->cfq_slice_idle) @@ -3215,14 +3205,14 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd) { struct cfq_queue *cfqq = cfqd->active_queue; - if (rq_in_driver(cfqd) > cfqd->hw_tag_est_depth) - cfqd->hw_tag_est_depth = rq_in_driver(cfqd); + if (cfqd->rq_in_driver > cfqd->hw_tag_est_depth) + cfqd->hw_tag_est_depth = cfqd->rq_in_driver; if (cfqd->hw_tag == 1) return; if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && - rq_in_driver(cfqd) <= CFQ_HW_QUEUE_MIN) + cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) return; /* @@ -3232,7 +3222,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd) */ if (cfqq && cfq_cfqq_idle_window(cfqq) && cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] < - CFQ_HW_QUEUE_MIN && rq_in_driver(cfqd) < CFQ_HW_QUEUE_MIN) + CFQ_HW_QUEUE_MIN && cfqd->rq_in_driver < CFQ_HW_QUEUE_MIN) return; if (cfqd->hw_tag_samples++ < 50) @@ -3285,13 +3275,12 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfq_update_hw_tag(cfqd); - WARN_ON(!cfqd->rq_in_driver[sync]); + WARN_ON(!cfqd->rq_in_driver); WARN_ON(!cfqq->dispatched); - cfqd->rq_in_driver[sync]--; + cfqd->rq_in_driver--; cfqq->dispatched--; - if (cfq_cfqq_sync(cfqq)) - cfqd->sync_flight--; + cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; if (sync) { RQ_CIC(rq)->last_end_request = now; @@ -3345,7 +3334,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) } } - if (!rq_in_driver(cfqd)) + if (!cfqd->rq_in_driver) cfq_schedule_dispatch(cfqd); } diff --git a/block/elevator.c b/block/elevator.c index 9ad5ccc4c5ee..df75676f6671 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -474,6 +474,15 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) int ret; /* + * Levels of merges: + * nomerges: No merges at all attempted + * noxmerges: Only simple one-hit cache try + * merges: All merge tries attempted + */ + if (blk_queue_nomerges(q)) + return ELEVATOR_NO_MERGE; + + /* * First try one-hit cache. */ if (q->last_merge) { @@ -484,7 +493,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) } } - if (blk_queue_nomerges(q)) + if (blk_queue_noxmerges(q)) return ELEVATOR_NO_MERGE; /* @@ -883,7 +892,7 @@ elv_attr_store(struct kobject *kobj, struct attribute *attr, return error; } -static struct sysfs_ops elv_sysfs_ops = { +static const struct sysfs_ops elv_sysfs_ops = { .show = elv_attr_show, .store = elv_attr_store, }; |