diff options
author | Chris Mason <chris.mason@oracle.com> | 2009-09-18 16:03:16 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-09-18 16:08:46 -0400 |
commit | f85d7d6c8f2ad4a86a1f4f4e3791f36dede2fa76 (patch) | |
tree | 40538328090b5eb2f3f95a9bc5a290c83384ff43 /fs/btrfs/extent_io.c | |
parent | 11833d66be94b514652466802100378046c16b72 (diff) | |
download | talos-op-linux-f85d7d6c8f2ad4a86a1f4f4e3791f36dede2fa76.tar.gz talos-op-linux-f85d7d6c8f2ad4a86a1f4f4e3791f36dede2fa76.zip |
Btrfs: properly honor wbc->nr_to_write changes
When btrfs fills a delayed allocation, it tries to increase
the wbc nr_to_write to cover a big part of allocation. The
theory is that we're doing contiguous IO and writing a few
more blocks will save seeks overall at a very low cost.
The problem is that extent_write_cache_pages could ignore
the new higher nr_to_write if nr_to_write had already gone
down to zero. We fix that by rechecking the nr_to_write
for every page that is processed in the pagevec.
This updates the math around bumping the nr_to_write value
to make sure we don't leave a tiny amount of IO hanging
around for the very end of a new extent.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 38 |
1 files changed, 27 insertions, 11 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a102422cd92e..7e16c6d8153f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2182,7 +2182,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_end = 0; page_started = 0; if (!epd->extent_locked) { - u64 delalloc_to_write; + u64 delalloc_to_write = 0; /* * make sure the wbc mapping index is at least updated * to this page. @@ -2202,16 +2202,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, tree->ops->fill_delalloc(inode, page, delalloc_start, delalloc_end, &page_started, &nr_written); - delalloc_to_write = (delalloc_end - - max_t(u64, page_offset(page), - delalloc_start) + 1) >> - PAGE_CACHE_SHIFT; - if (wbc->nr_to_write < delalloc_to_write) { - wbc->nr_to_write = min_t(long, 8192, - delalloc_to_write); - } + /* + * delalloc_end is already one less than the total + * length, so we don't subtract one from + * PAGE_CACHE_SIZE + */ + delalloc_to_write += (delalloc_end - delalloc_start + + PAGE_CACHE_SIZE) >> + PAGE_CACHE_SHIFT; delalloc_start = delalloc_end + 1; } + if (wbc->nr_to_write < delalloc_to_write) { + int thresh = 8192; + + if (delalloc_to_write < thresh * 2) + thresh = delalloc_to_write; + wbc->nr_to_write = min_t(u64, delalloc_to_write, + thresh); + } /* did the fill delalloc function already unlock and start * the IO? @@ -2388,6 +2396,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, { int ret = 0; int done = 0; + int nr_to_write_done = 0; struct pagevec pvec; int nr_pages; pgoff_t index; @@ -2407,7 +2416,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, scanned = 1; } retry: - while (!done && (index <= end) && + while (!done && !nr_to_write_done && (index <= end) && (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { @@ -2458,8 +2467,15 @@ retry: unlock_page(page); ret = 0; } - if (ret || wbc->nr_to_write <= 0) + if (ret) done = 1; + + /* + * the filesystem may choose to bump up nr_to_write. + * We have to make sure to honor the new nr_to_write + * at any time + */ + nr_to_write_done = wbc->nr_to_write <= 0; } pagevec_release(&pvec); cond_resched(); |