diff options
author | Dmitry Torokhov <dtor@insightbb.com> | 2007-02-10 01:26:32 -0500 |
---|---|---|
committer | Dmitry Torokhov <dtor@insightbb.com> | 2007-02-10 01:26:32 -0500 |
commit | b22364c8eec89e6b0c081a237f3b6348df87796f (patch) | |
tree | 233a923281fb640106465d076997ff511efb6edf /mm/page-writeback.c | |
parent | 2c8dc071517ec2843869024dc82be2e246f41064 (diff) | |
parent | 66efc5a7e3061c3597ac43a8bb1026488d57e66b (diff) | |
download | blackbird-op-linux-b22364c8eec89e6b0c081a237f3b6348df87796f.tar.gz blackbird-op-linux-b22364c8eec89e6b0c081a237f3b6348df87796f.zip |
Merge rsync://rsync.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r-- | mm/page-writeback.c | 147 |
1 files changed, 70 insertions, 77 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 8d9b19f239c3..be0efbde4994 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -21,6 +21,7 @@ #include <linux/writeback.h> #include <linux/init.h> #include <linux/backing-dev.h> +#include <linux/task_io_accounting_ops.h> #include <linux/blkdev.h> #include <linux/mpage.h> #include <linux/rmap.h> @@ -132,11 +133,9 @@ get_dirty_limits(long *pbackground, long *pdirty, #ifdef CONFIG_HIGHMEM /* - * If this mapping can only allocate from low memory, - * we exclude high memory from our count. + * We always exclude high memory from our count. */ - if (mapping && !(mapping_gfp_mask(mapping) & __GFP_HIGHMEM)) - available_memory -= totalhigh_pages; + available_memory -= totalhigh_pages; #endif @@ -525,28 +524,25 @@ static struct notifier_block __cpuinitdata ratelimit_nb = { }; /* - * If the machine has a large highmem:lowmem ratio then scale back the default - * dirty memory thresholds: allowing too much dirty highmem pins an excessive - * number of buffer_heads. + * Called early on to tune the page writeback dirty limits. + * + * We used to scale dirty pages according to how total memory + * related to pages that could be allocated for buffers (by + * comparing nr_free_buffer_pages() to vm_total_pages. + * + * However, that was when we used "dirty_ratio" to scale with + * all memory, and we don't do that any more. "dirty_ratio" + * is now applied to total non-HIGHPAGE memory (by subtracting + * totalhigh_pages from vm_total_pages), and as such we can't + * get into the old insane situation any more where we had + * large amounts of dirty pages compared to a small amount of + * non-HIGHMEM memory. + * + * But we might still want to scale the dirty_ratio by how + * much memory the box has.. */ void __init page_writeback_init(void) { - long buffer_pages = nr_free_buffer_pages(); - long correction; - - correction = (100 * 4 * buffer_pages) / vm_total_pages; - - if (correction < 100) { - dirty_background_ratio *= correction; - dirty_background_ratio /= 100; - vm_dirty_ratio *= correction; - vm_dirty_ratio /= 100; - - if (dirty_background_ratio <= 0) - dirty_background_ratio = 1; - if (vm_dirty_ratio <= 0) - vm_dirty_ratio = 1; - } mod_timer(&wb_timer, jiffies + dirty_writeback_interval); writeback_set_ratelimit(); register_cpu_notifier(&ratelimit_nb); @@ -761,23 +757,24 @@ int __set_page_dirty_nobuffers(struct page *page) struct address_space *mapping = page_mapping(page); struct address_space *mapping2; - if (mapping) { - write_lock_irq(&mapping->tree_lock); - mapping2 = page_mapping(page); - if (mapping2) { /* Race with truncate? */ - BUG_ON(mapping2 != mapping); - if (mapping_cap_account_dirty(mapping)) - __inc_zone_page_state(page, - NR_FILE_DIRTY); - radix_tree_tag_set(&mapping->page_tree, - page_index(page), PAGECACHE_TAG_DIRTY); - } - write_unlock_irq(&mapping->tree_lock); - if (mapping->host) { - /* !PageAnon && !swapper_space */ - __mark_inode_dirty(mapping->host, - I_DIRTY_PAGES); + if (!mapping) + return 1; + + write_lock_irq(&mapping->tree_lock); + mapping2 = page_mapping(page); + if (mapping2) { /* Race with truncate? */ + BUG_ON(mapping2 != mapping); + if (mapping_cap_account_dirty(mapping)) { + __inc_zone_page_state(page, NR_FILE_DIRTY); + task_io_account_write(PAGE_CACHE_SIZE); } + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + } + write_unlock_irq(&mapping->tree_lock); + if (mapping->host) { + /* !PageAnon && !swapper_space */ + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } return 1; } @@ -843,39 +840,6 @@ int set_page_dirty_lock(struct page *page) EXPORT_SYMBOL(set_page_dirty_lock); /* - * Clear a page's dirty flag, while caring for dirty memory accounting. - * Returns true if the page was previously dirty. - */ -int test_clear_page_dirty(struct page *page) -{ - struct address_space *mapping = page_mapping(page); - unsigned long flags; - - if (mapping) { - write_lock_irqsave(&mapping->tree_lock, flags); - if (TestClearPageDirty(page)) { - radix_tree_tag_clear(&mapping->page_tree, - page_index(page), - PAGECACHE_TAG_DIRTY); - write_unlock_irqrestore(&mapping->tree_lock, flags); - /* - * We can continue to use `mapping' here because the - * page is locked, which pins the address_space - */ - if (mapping_cap_account_dirty(mapping)) { - page_mkclean(page); - dec_zone_page_state(page, NR_FILE_DIRTY); - } - return 1; - } - write_unlock_irqrestore(&mapping->tree_lock, flags); - return 0; - } - return TestClearPageDirty(page); -} -EXPORT_SYMBOL(test_clear_page_dirty); - -/* * Clear a page's dirty flag, while caring for dirty memory accounting. * Returns true if the page was previously dirty. * @@ -893,12 +857,41 @@ int clear_page_dirty_for_io(struct page *page) { struct address_space *mapping = page_mapping(page); - if (mapping) { + if (mapping && mapping_cap_account_dirty(mapping)) { + /* + * Yes, Virginia, this is indeed insane. + * + * We use this sequence to make sure that + * (a) we account for dirty stats properly + * (b) we tell the low-level filesystem to + * mark the whole page dirty if it was + * dirty in a pagetable. Only to then + * (c) clean the page again and return 1 to + * cause the writeback. + * + * This way we avoid all nasty races with the + * dirty bit in multiple places and clearing + * them concurrently from different threads. + * + * Note! Normally the "set_page_dirty(page)" + * has no effect on the actual dirty bit - since + * that will already usually be set. But we + * need the side effects, and it can help us + * avoid races. + * + * We basically use the page "master dirty bit" + * as a serialization point for all the different + * threads doing their things. + * + * FIXME! We still have a race here: if somebody + * adds the page back to the page tables in + * between the "page_mkclean()" and the "TestClearPageDirty()", + * we might have it mapped without the dirty bit set. + */ + if (page_mkclean(page)) + set_page_dirty(page); if (TestClearPageDirty(page)) { - if (mapping_cap_account_dirty(mapping)) { - page_mkclean(page); - dec_zone_page_state(page, NR_FILE_DIRTY); - } + dec_zone_page_state(page, NR_FILE_DIRTY); return 1; } return 0; |