summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/extent_io.c44
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/volumes.c124
-rw-r--r--fs/btrfs/volumes.h13
5 files changed, 141 insertions, 46 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 92caa8035f36..fec18b43c2c3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2095,10 +2095,10 @@ static int write_dev_supers(struct btrfs_device *device,
device->barriers = 0;
get_bh(bh);
lock_buffer(bh);
- ret = submit_bh(WRITE, bh);
+ ret = submit_bh(WRITE_SYNC, bh);
}
} else {
- ret = submit_bh(WRITE, bh);
+ ret = submit_bh(WRITE_SYNC, bh);
}
if (!ret && wait) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index eb2bee8b7fbf..483b6727aaaf 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -50,7 +50,10 @@ struct extent_page_data {
/* tells writepage not to lock the state bits for this range
* it still does the unlocking
*/
- int extent_locked;
+ unsigned int extent_locked:1;
+
+ /* tells the submit_bio code to use a WRITE_SYNC */
+ unsigned int sync_io:1;
};
int __init extent_io_init(void)
@@ -2136,8 +2139,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
u64 delalloc_end;
int page_started;
int compressed;
+ int write_flags;
unsigned long nr_written = 0;
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ write_flags = WRITE_SYNC_PLUG;
+ else
+ write_flags = WRITE;
+
WARN_ON(!PageLocked(page));
pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
if (page->index > end_index ||
@@ -2314,9 +2323,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
(unsigned long long)end);
}
- ret = submit_extent_page(WRITE, tree, page, sector,
- iosize, pg_offset, bdev,
- &epd->bio, max_nr,
+ ret = submit_extent_page(write_flags, tree, page,
+ sector, iosize, pg_offset,
+ bdev, &epd->bio, max_nr,
end_bio_extent_writepage,
0, 0, 0);
if (ret)
@@ -2460,15 +2469,23 @@ retry:
return ret;
}
-static noinline void flush_write_bio(void *data)
+static void flush_epd_write_bio(struct extent_page_data *epd)
{
- struct extent_page_data *epd = data;
if (epd->bio) {
- submit_one_bio(WRITE, epd->bio, 0, 0);
+ if (epd->sync_io)
+ submit_one_bio(WRITE_SYNC, epd->bio, 0, 0);
+ else
+ submit_one_bio(WRITE, epd->bio, 0, 0);
epd->bio = NULL;
}
}
+static noinline void flush_write_bio(void *data)
+{
+ struct extent_page_data *epd = data;
+ flush_epd_write_bio(epd);
+}
+
int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent,
struct writeback_control *wbc)
@@ -2480,6 +2497,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.tree = tree,
.get_extent = get_extent,
.extent_locked = 0,
+ .sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
.bdi = wbc->bdi,
@@ -2490,13 +2508,11 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.range_end = (loff_t)-1,
};
-
ret = __extent_writepage(page, wbc, &epd);
extent_write_cache_pages(tree, mapping, &wbc_writepages,
__extent_writepage, &epd, flush_write_bio);
- if (epd.bio)
- submit_one_bio(WRITE, epd.bio, 0, 0);
+ flush_epd_write_bio(&epd);
return ret;
}
@@ -2515,6 +2531,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
.tree = tree,
.get_extent = get_extent,
.extent_locked = 1,
+ .sync_io = mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
.bdi = inode->i_mapping->backing_dev_info,
@@ -2540,8 +2557,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
start += PAGE_CACHE_SIZE;
}
- if (epd.bio)
- submit_one_bio(WRITE, epd.bio, 0, 0);
+ flush_epd_write_bio(&epd);
return ret;
}
@@ -2556,13 +2572,13 @@ int extent_writepages(struct extent_io_tree *tree,
.tree = tree,
.get_extent = get_extent,
.extent_locked = 0,
+ .sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
ret = extent_write_cache_pages(tree, mapping, wbc,
__extent_writepage, &epd,
flush_write_bio);
- if (epd.bio)
- submit_one_bio(WRITE, epd.bio, 0, 0);
+ flush_epd_write_bio(&epd);
return ret;
}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 53c87b197d70..d6f0806c682f 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -489,7 +489,7 @@ again:
/* start IO across the range first to instantiate any delalloc
* extents
*/
- btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE);
+ btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL);
/* The compression code will leave pages locked but return from
* writepage without setting the page writeback. Starting again
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e0913e469728..e53835b88594 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
return NULL;
}
+static void requeue_list(struct btrfs_pending_bios *pending_bios,
+ struct bio *head, struct bio *tail)
+{
+
+ struct bio *old_head;
+
+ old_head = pending_bios->head;
+ pending_bios->head = head;
+ if (pending_bios->tail)
+ tail->bi_next = old_head;
+ else
+ pending_bios->tail = tail;
+}
+
/*
* we try to collect pending bios for a device so we don't get a large
* number of procs sending bios down to the same device. This greatly
@@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
struct bio *pending;
struct backing_dev_info *bdi;
struct btrfs_fs_info *fs_info;
+ struct btrfs_pending_bios *pending_bios;
struct bio *tail;
struct bio *cur;
int again = 0;
- unsigned long num_run = 0;
+ unsigned long num_run;
+ unsigned long num_sync_run;
unsigned long limit;
unsigned long last_waited = 0;
@@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
+ /* we want to make sure that every time we switch from the sync
+ * list to the normal list, we unplug
+ */
+ num_sync_run = 0;
+
loop:
spin_lock(&device->io_lock);
+ num_run = 0;
loop_lock:
+
/* take all the bios off the list at once and process them
* later on (without the lock held). But, remember the
* tail and other pointers so the bios can be properly reinserted
* into the list if we hit congestion
*/
- pending = device->pending_bios;
- tail = device->pending_bio_tail;
+ if (device->pending_sync_bios.head)
+ pending_bios = &device->pending_sync_bios;
+ else
+ pending_bios = &device->pending_bios;
+
+ pending = pending_bios->head;
+ tail = pending_bios->tail;
WARN_ON(pending && !tail);
- device->pending_bios = NULL;
- device->pending_bio_tail = NULL;
/*
* if pending was null this time around, no bios need processing
@@ -176,16 +202,41 @@ loop_lock:
* device->running_pending is used to synchronize with the
* schedule_bio code.
*/
- if (pending) {
- again = 1;
- device->running_pending = 1;
- } else {
+ if (device->pending_sync_bios.head == NULL &&
+ device->pending_bios.head == NULL) {
again = 0;
device->running_pending = 0;
+ } else {
+ again = 1;
+ device->running_pending = 1;
}
+
+ pending_bios->head = NULL;
+ pending_bios->tail = NULL;
+
spin_unlock(&device->io_lock);
+ /*
+ * if we're doing the regular priority list, make sure we unplug
+ * for any high prio bios we've sent down
+ */
+ if (pending_bios == &device->pending_bios && num_sync_run > 0) {
+ num_sync_run = 0;
+ blk_run_backing_dev(bdi, NULL);
+ }
+
while (pending) {
+
+ rmb();
+ if (pending_bios != &device->pending_sync_bios &&
+ device->pending_sync_bios.head &&
+ num_run > 16) {
+ cond_resched();
+ spin_lock(&device->io_lock);
+ requeue_list(pending_bios, pending, tail);
+ goto loop_lock;
+ }
+
cur = pending;
pending = pending->bi_next;
cur->bi_next = NULL;
@@ -196,10 +247,18 @@ loop_lock:
wake_up(&fs_info->async_submit_wait);
BUG_ON(atomic_read(&cur->bi_cnt) == 0);
- bio_get(cur);
submit_bio(cur->bi_rw, cur);
- bio_put(cur);
num_run++;
+ if (bio_sync(cur))
+ num_sync_run++;
+
+ if (need_resched()) {
+ if (num_sync_run) {
+ blk_run_backing_dev(bdi, NULL);
+ num_sync_run = 0;
+ }
+ cond_resched();
+ }
/*
* we made progress, there is more work to do and the bdi
@@ -208,7 +267,6 @@ loop_lock:
*/
if (pending && bdi_write_congested(bdi) && num_run > 16 &&
fs_info->fs_devices->open_devices > 1) {
- struct bio *old_head;
struct io_context *ioc;
ioc = current->io_context;
@@ -233,17 +291,17 @@ loop_lock:
* against it before looping
*/
last_waited = ioc->last_waited;
+ if (need_resched()) {
+ if (num_sync_run) {
+ blk_run_backing_dev(bdi, NULL);
+ num_sync_run = 0;
+ }
+ cond_resched();
+ }
continue;
}
spin_lock(&device->io_lock);
-
- old_head = device->pending_bios;
- device->pending_bios = pending;
- if (device->pending_bio_tail)
- tail->bi_next = old_head;
- else
- device->pending_bio_tail = tail;
-
+ requeue_list(pending_bios, pending, tail);
device->running_pending = 1;
spin_unlock(&device->io_lock);
@@ -251,11 +309,18 @@ loop_lock:
goto done;
}
}
+
+ if (num_sync_run) {
+ num_sync_run = 0;
+ blk_run_backing_dev(bdi, NULL);
+ }
+
+ cond_resched();
if (again)
goto loop;
spin_lock(&device->io_lock);
- if (device->pending_bios)
+ if (device->pending_bios.head || device->pending_sync_bios.head)
goto loop_lock;
spin_unlock(&device->io_lock);
@@ -2497,7 +2562,7 @@ again:
max_errors = 1;
}
}
- if (multi_ret && rw == WRITE &&
+ if (multi_ret && (rw & (1 << BIO_RW)) &&
stripes_allocated < stripes_required) {
stripes_allocated = map->num_stripes;
free_extent_map(em);
@@ -2762,6 +2827,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
int rw, struct bio *bio)
{
int should_queue = 1;
+ struct btrfs_pending_bios *pending_bios;
/* don't bother with additional async steps for reads, right now */
if (!(rw & (1 << BIO_RW))) {
@@ -2783,13 +2849,17 @@ static noinline int schedule_bio(struct btrfs_root *root,
bio->bi_rw |= rw;
spin_lock(&device->io_lock);
+ if (bio_sync(bio))
+ pending_bios = &device->pending_sync_bios;
+ else
+ pending_bios = &device->pending_bios;
- if (device->pending_bio_tail)
- device->pending_bio_tail->bi_next = bio;
+ if (pending_bios->tail)
+ pending_bios->tail->bi_next = bio;
- device->pending_bio_tail = bio;
- if (!device->pending_bios)
- device->pending_bios = bio;
+ pending_bios->tail = bio;
+ if (!pending_bios->head)
+ pending_bios->head = bio;
if (device->running_pending)
should_queue = 0;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2185de72ff7d..5836327ba5dd 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -23,13 +23,22 @@
#include "async-thread.h"
struct buffer_head;
+struct btrfs_pending_bios {
+ struct bio *head;
+ struct bio *tail;
+};
+
struct btrfs_device {
struct list_head dev_list;
struct list_head dev_alloc_list;
struct btrfs_fs_devices *fs_devices;
struct btrfs_root *dev_root;
- struct bio *pending_bios;
- struct bio *pending_bio_tail;
+
+ /* regular prio bios */
+ struct btrfs_pending_bios pending_bios;
+ /* WRITE_SYNC bios */
+ struct btrfs_pending_bios pending_sync_bios;
+
int running_pending;
u64 generation;
OpenPOWER on IntegriCloud