diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2007-10-17 09:04:24 +0100 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2008-01-25 08:07:28 +0000 |
commit | b8e7cbb65bcc99630e123422c6829ce3c0fcdf14 (patch) | |
tree | a9f68259b90e9e65ea7f0369f448d580a8944f06 /fs/gfs2 | |
parent | 9ff8ec32e58875022447af619bec6e5aee7c77e4 (diff) | |
download | blackbird-op-linux-b8e7cbb65bcc99630e123422c6829ce3c0fcdf14.tar.gz blackbird-op-linux-b8e7cbb65bcc99630e123422c6829ce3c0fcdf14.zip |
[GFS2] Add writepages for GFS2 jdata
This patch resolves a lock ordering issue where we had been getting
a transaction lock in the wrong order with respect to the page lock.
By using writepages rather than just writepage, it is then possible
to start a transaction before locking the page, and thus matching the
locking order elsewhere in the code.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2')
-rw-r--r-- | fs/gfs2/log.c | 2 | ||||
-rw-r--r-- | fs/gfs2/ops_address.c | 213 |
2 files changed, 206 insertions, 9 deletions
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 70b404d2774b..1e1fe8def375 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -650,7 +650,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) get_bh(bh); gfs2_log_unlock(sdp); lock_buffer(bh); - if (test_clear_buffer_dirty(bh)) { + if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; submit_bh(WRITE, bh); } else { diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 4bf73ed945ae..48913e569907 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -21,6 +21,7 @@ #include <linux/gfs2_ondisk.h> #include <linux/lm_interface.h> #include <linux/swap.h> +#include <linux/pagevec.h> #include "gfs2.h" #include "incore.h" @@ -189,6 +190,34 @@ static int gfs2_ordered_writepage(struct page *page, } /** + * __gfs2_jdata_writepage - The core of jdata writepage + * @page: The page to write + * @wbc: The writeback control + * + * This is shared between writepage and writepages and implements the + * core of the writepage operation. If a transaction is required then + * PageChecked will have been set and the transaction will have + * already been started before this is called. + */ + +static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + + if (PageChecked(page)) { + ClearPageChecked(page); + if (!page_has_buffers(page)) { + create_empty_buffers(page, inode->i_sb->s_blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } + gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); + } + return block_write_full_page(page, gfs2_get_block_noalloc, wbc); +} + +/** * gfs2_jdata_writepage - Write complete page * @page: Page to write * @@ -199,7 +228,6 @@ static int gfs2_ordered_writepage(struct page *page, static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); int error; int done_trans = 0; @@ -209,18 +237,14 @@ static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc return error; if (PageChecked(page)) { + if (wbc->sync_mode != WB_SYNC_ALL) + goto out_ignore; error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); if (error) goto out_ignore; - ClearPageChecked(page); - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); done_trans = 1; } - error = block_write_full_page(page, gfs2_get_block_noalloc, wbc); + error = __gfs2_jdata_writepage(page, wbc); if (done_trans) gfs2_trans_end(sdp); return error; @@ -247,6 +271,178 @@ static int gfs2_writeback_writepages(struct address_space *mapping, } /** + * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages + * @mapping: The mapping + * @wbc: The writeback control + * @writepage: The writepage function to call for each page + * @pvec: The vector of pages + * @nr_pages: The number of pages to write + * + * Returns: non-zero if loop should terminate, zero otherwise + */ + +static int gfs2_write_jdata_pagevec(struct address_space *mapping, + struct writeback_control *wbc, + struct pagevec *pvec, + int nr_pages, pgoff_t end) +{ + struct inode *inode = mapping->host; + struct gfs2_sbd *sdp = GFS2_SB(inode); + loff_t i_size = i_size_read(inode); + pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + unsigned offset = i_size & (PAGE_CACHE_SIZE-1); + unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); + struct backing_dev_info *bdi = mapping->backing_dev_info; + int i; + int ret; + + ret = gfs2_trans_begin(sdp, nrblocks, 0); + if (ret < 0) + return ret; + + for(i = 0; i < nr_pages; i++) { + struct page *page = pvec->pages[i]; + + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + ret = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + /* Is the page fully outside i_size? (truncate in progress) */ + if (page->index > end_index || (page->index == end_index && !offset)) { + page->mapping->a_ops->invalidatepage(page, 0); + unlock_page(page); + continue; + } + + ret = __gfs2_jdata_writepage(page, wbc); + + if (ret || (--(wbc->nr_to_write) <= 0)) + ret = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + ret = 1; + } + + } + gfs2_trans_end(sdp); + return ret; +} + +/** + * gfs2_write_cache_jdata - Like write_cache_pages but different + * @mapping: The mapping to write + * @wbc: The writeback control + * @writepage: The writepage function to call + * @data: The data to pass to writepage + * + * The reason that we use our own function here is that we need to + * start transactions before we grab page locks. This allows us + * to get the ordering right. + */ + +static int gfs2_write_cache_jdata(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } + +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + scanned = 1; + ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end); + if (ret) + done = 1; + if (ret > 0) + ret = 0; + + pagevec_release(&pvec); + cond_resched(); + } + + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} + + +/** + * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk + * @mapping: The mapping to write + * @wbc: The writeback control + * + */ + +static int gfs2_jdata_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct gfs2_inode *ip = GFS2_I(mapping->host); + struct gfs2_sbd *sdp = GFS2_SB(mapping->host); + int ret; + + ret = gfs2_write_cache_jdata(mapping, wbc); + if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) { + gfs2_log_flush(sdp, ip->i_gl); + ret = gfs2_write_cache_jdata(mapping, wbc); + } + return ret; +} + +/** * stuffed_readpage - Fill in a Linux page with stuffed file data * @ip: the inode * @page: the page @@ -937,6 +1133,7 @@ static const struct address_space_operations gfs2_ordered_aops = { static const struct address_space_operations gfs2_jdata_aops = { .writepage = gfs2_jdata_writepage, + .writepages = gfs2_jdata_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, .sync_page = block_sync_page, |