diff options
author | Eric Sandeen <sandeen@redhat.com> | 2010-10-27 21:30:13 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2010-10-27 21:30:13 -0400 |
commit | 5b41d92437f1ae19b3f3ffa3b16589fd5df50ac0 (patch) | |
tree | ed7e1999a2349811e3cae0f0bdd53a93aea413d7 /fs/ext4/inode.c | |
parent | bbd08344e3df8c7c1d7aa04bc0c8c9367806e12d (diff) | |
download | talos-obmc-linux-5b41d92437f1ae19b3f3ffa3b16589fd5df50ac0.tar.gz talos-obmc-linux-5b41d92437f1ae19b3f3ffa3b16589fd5df50ac0.zip |
ext4: implement writeback livelock avoidance using page tagging
This is analogous to Jan Kara's commit,
f446daaea9d4a420d16c606f755f3689dcb2d0ce
mm: implement writeback livelock avoidance using page tagging
but since we forked write_cache_pages, we need to reimplement
it there (and in ext4_da_writepages, since range_cyclic handling
was moved to there)
If you start a large buffered IO to a file, and then set
fsync after it, you'll find that fsync does not complete
until the other IO stops.
If you continue re-dirtying the file (say, putting dd
with conv=notrunc in a loop), when fsync finally completes
(after all IO is done), it reports via tracing that
it has written many more pages than the file contains;
in other words it has synced and re-synced pages in
the file multiple times.
This then leads to problems with our writeback_index
update, since it advances it by pages written, and
essentially sets writeback_index off the end of the
file...
With the following patch, we only sync as much as was
dirty at the time of the sync.
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6671fcbb5293..c9ea95ba5fde 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2809,16 +2809,21 @@ static int write_cache_pages_da(struct address_space *mapping, pgoff_t index; pgoff_t end; /* Inclusive */ long nr_to_write = wbc->nr_to_write; + int tag; pagevec_init(&pvec, 0); index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->sync_mode == WB_SYNC_ALL) + tag = PAGECACHE_TAG_TOWRITE; + else + tag = PAGECACHE_TAG_DIRTY; + while (!done && (index <= end)) { int i; - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); if (nr_pages == 0) break; @@ -2923,6 +2928,7 @@ static int ext4_da_writepages(struct address_space *mapping, long desired_nr_to_write, nr_to_writebump = 0; loff_t range_start = wbc->range_start; struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); + pgoff_t end; trace_ext4_da_writepages(inode, wbc); @@ -2958,8 +2964,11 @@ static int ext4_da_writepages(struct address_space *mapping, wbc->range_start = index << PAGE_CACHE_SHIFT; wbc->range_end = LLONG_MAX; wbc->range_cyclic = 0; - } else + end = -1; + } else { index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + } /* * This works around two forms of stupidity. The first is in @@ -3000,6 +3009,9 @@ static int ext4_da_writepages(struct address_space *mapping, pages_skipped = wbc->pages_skipped; retry: + if (wbc->sync_mode == WB_SYNC_ALL) + tag_pages_for_writeback(mapping, index, end); + while (!ret && wbc->nr_to_write > 0) { /* |