diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 221 |
1 files changed, 107 insertions, 114 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index a8c69c8c0a90..00ae878b2a38 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -114,14 +114,11 @@ static void page_cache_tree_delete(struct address_space *mapping, struct page *page, void *shadow) { struct radix_tree_node *node; - unsigned long index; - unsigned int offset; - unsigned int tag; - void **slot; VM_BUG_ON(!PageLocked(page)); - __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot); + node = radix_tree_replace_clear_tags(&mapping->page_tree, page->index, + shadow); if (shadow) { mapping->nrexceptional++; @@ -135,23 +132,9 @@ static void page_cache_tree_delete(struct address_space *mapping, } mapping->nrpages--; - if (!node) { - /* Clear direct pointer tags in root node */ - mapping->page_tree.gfp_mask &= __GFP_BITS_MASK; - radix_tree_replace_slot(slot, shadow); + if (!node) return; - } - /* Clear tree tags for the removed page */ - index = page->index; - offset = index & RADIX_TREE_MAP_MASK; - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { - if (test_bit(offset, node->tags[tag])) - radix_tree_tag_clear(&mapping->page_tree, index, tag); - } - - /* Delete page, swap shadow entry */ - radix_tree_replace_slot(slot, shadow); workingset_node_pages_dec(node); if (shadow) workingset_node_shadows_inc(node); @@ -160,13 +143,15 @@ static void page_cache_tree_delete(struct address_space *mapping, return; /* - * Track node that only contains shadow entries. + * Track node that only contains shadow entries. DAX mappings contain + * no shadow entries and may contain other exceptional entries so skip + * those. * * Avoid acquiring the list_lru lock if already tracked. The * list_empty() test is safe as node->private_list is * protected by mapping->tree_lock. */ - if (!workingset_node_pages(node) && + if (!dax_mapping(mapping) && !workingset_node_pages(node) && list_empty(&node->private_list)) { node->private_data = mapping; list_lru_add(&workingset_shadow_nodes, &node->private_list); @@ -213,7 +198,7 @@ void __delete_from_page_cache(struct page *page, void *shadow) * some other bad page check should catch it later. */ page_mapcount_reset(page); - atomic_sub(mapcount, &page->_count); + page_ref_sub(page, mapcount); } } @@ -265,7 +250,7 @@ void delete_from_page_cache(struct page *page) if (freepage) freepage(page); - page_cache_release(page); + put_page(page); } EXPORT_SYMBOL(delete_from_page_cache); @@ -352,8 +337,8 @@ EXPORT_SYMBOL(filemap_flush); static int __filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, loff_t end_byte) { - pgoff_t index = start_byte >> PAGE_CACHE_SHIFT; - pgoff_t end = end_byte >> PAGE_CACHE_SHIFT; + pgoff_t index = start_byte >> PAGE_SHIFT; + pgoff_t end = end_byte >> PAGE_SHIFT; struct pagevec pvec; int nr_pages; int ret = 0; @@ -550,7 +535,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) pgoff_t offset = old->index; freepage = mapping->a_ops->freepage; - page_cache_get(new); + get_page(new); new->mapping = mapping; new->index = offset; @@ -572,7 +557,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) radix_tree_preload_end(); if (freepage) freepage(old); - page_cache_release(old); + put_page(old); } return error; @@ -597,14 +582,24 @@ static int page_cache_tree_insert(struct address_space *mapping, if (!radix_tree_exceptional_entry(p)) return -EEXIST; - if (WARN_ON(dax_mapping(mapping))) - return -EINVAL; - - if (shadowp) - *shadowp = p; mapping->nrexceptional--; - if (node) - workingset_node_shadows_dec(node); + if (!dax_mapping(mapping)) { + if (shadowp) + *shadowp = p; + if (node) + workingset_node_shadows_dec(node); + } else { + /* DAX can replace empty locked entry with a hole */ + WARN_ON_ONCE(p != + (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | + RADIX_DAX_ENTRY_LOCK)); + /* DAX accounts exceptional entries as normal pages */ + if (node) + workingset_node_pages_dec(node); + /* Wakeup waiters for exceptional entry lock */ + dax_wake_mapping_entry_waiter(mapping, page->index, + false); + } } radix_tree_replace_slot(slot, page); mapping->nrpages++; @@ -651,7 +646,7 @@ static int __add_to_page_cache_locked(struct page *page, return error; } - page_cache_get(page); + get_page(page); page->mapping = mapping; page->index = offset; @@ -675,7 +670,7 @@ err_insert: spin_unlock_irq(&mapping->tree_lock); if (!huge) mem_cgroup_cancel_charge(page, memcg, false); - page_cache_release(page); + put_page(page); return error; } @@ -713,8 +708,12 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, * The page might have been evicted from cache only * recently, in which case it should be activated like * any other repeatedly accessed page. + * The exception is pages getting rewritten; evicting other + * data from the working set, only to cache data that will + * get overwritten with something else, is a waste of memory. */ - if (shadow && workingset_refault(shadow)) { + if (!(gfp_mask & __GFP_WRITE) && + shadow && workingset_refault(shadow)) { SetPageActive(page); workingset_activation(page); } else @@ -1083,7 +1082,7 @@ repeat: * include/linux/pagemap.h for details. */ if (unlikely(page != *pagep)) { - page_cache_release(page); + put_page(page); goto repeat; } } @@ -1121,7 +1120,7 @@ repeat: /* Has the page been truncated? */ if (unlikely(page->mapping != mapping)) { unlock_page(page); - page_cache_release(page); + put_page(page); goto repeat; } VM_BUG_ON_PAGE(page->index != offset, page); @@ -1168,7 +1167,7 @@ repeat: if (fgp_flags & FGP_LOCK) { if (fgp_flags & FGP_NOWAIT) { if (!trylock_page(page)) { - page_cache_release(page); + put_page(page); return NULL; } } else { @@ -1178,7 +1177,7 @@ repeat: /* Has the page been truncated? */ if (unlikely(page->mapping != mapping)) { unlock_page(page); - page_cache_release(page); + put_page(page); goto repeat; } VM_BUG_ON_PAGE(page->index != offset, page); @@ -1209,7 +1208,7 @@ no_page: err = add_to_page_cache_lru(page, mapping, offset, gfp_mask & GFP_RECLAIM_MASK); if (unlikely(err)) { - page_cache_release(page); + put_page(page); page = NULL; if (err == -EEXIST) goto repeat; @@ -1278,7 +1277,7 @@ repeat: /* Has the page moved? */ if (unlikely(page != *slot)) { - page_cache_release(page); + put_page(page); goto repeat; } export: @@ -1343,7 +1342,7 @@ repeat: /* Has the page moved? */ if (unlikely(page != *slot)) { - page_cache_release(page); + put_page(page); goto repeat; } @@ -1405,7 +1404,7 @@ repeat: /* Has the page moved? */ if (unlikely(page != *slot)) { - page_cache_release(page); + put_page(page); goto repeat; } @@ -1415,7 +1414,7 @@ repeat: * negatives, which is just confusing to the caller. */ if (page->mapping == NULL || page->index != iter.index) { - page_cache_release(page); + put_page(page); break; } @@ -1482,7 +1481,7 @@ repeat: /* Has the page moved? */ if (unlikely(page != *slot)) { - page_cache_release(page); + put_page(page); goto repeat; } @@ -1549,7 +1548,7 @@ repeat: /* Has the page moved? */ if (unlikely(page != *slot)) { - page_cache_release(page); + put_page(page); goto repeat; } export: @@ -1610,11 +1609,11 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, unsigned int prev_offset; int error = 0; - index = *ppos >> PAGE_CACHE_SHIFT; - prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; - prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); - last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; - offset = *ppos & ~PAGE_CACHE_MASK; + index = *ppos >> PAGE_SHIFT; + prev_index = ra->prev_pos >> PAGE_SHIFT; + prev_offset = ra->prev_pos & (PAGE_SIZE-1); + last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT; + offset = *ppos & ~PAGE_MASK; for (;;) { struct page *page; @@ -1648,7 +1647,7 @@ find_page: if (PageUptodate(page)) goto page_ok; - if (inode->i_blkbits == PAGE_CACHE_SHIFT || + if (inode->i_blkbits == PAGE_SHIFT || !mapping->a_ops->is_partially_uptodate) goto page_not_up_to_date; if (!trylock_page(page)) @@ -1672,18 +1671,18 @@ page_ok: */ isize = i_size_read(inode); - end_index = (isize - 1) >> PAGE_CACHE_SHIFT; + end_index = (isize - 1) >> PAGE_SHIFT; if (unlikely(!isize || index > end_index)) { - page_cache_release(page); + put_page(page); goto out; } /* nr is the maximum number of bytes to copy from this page */ - nr = PAGE_CACHE_SIZE; + nr = PAGE_SIZE; if (index == end_index) { - nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; + nr = ((isize - 1) & ~PAGE_MASK) + 1; if (nr <= offset) { - page_cache_release(page); + put_page(page); goto out; } } @@ -1711,11 +1710,11 @@ page_ok: ret = copy_page_to_iter(page, offset, nr, iter); offset += ret; - index += offset >> PAGE_CACHE_SHIFT; - offset &= ~PAGE_CACHE_MASK; + index += offset >> PAGE_SHIFT; + offset &= ~PAGE_MASK; prev_offset = offset; - page_cache_release(page); + put_page(page); written += ret; if (!iov_iter_count(iter)) goto out; @@ -1735,7 +1734,7 @@ page_not_up_to_date_locked: /* Did it get truncated before we got the lock? */ if (!page->mapping) { unlock_page(page); - page_cache_release(page); + put_page(page); continue; } @@ -1757,7 +1756,7 @@ readpage: if (unlikely(error)) { if (error == AOP_TRUNCATED_PAGE) { - page_cache_release(page); + put_page(page); error = 0; goto find_page; } @@ -1774,7 +1773,7 @@ readpage: * invalidate_mapping_pages got it */ unlock_page(page); - page_cache_release(page); + put_page(page); goto find_page; } unlock_page(page); @@ -1789,7 +1788,7 @@ readpage: readpage_error: /* UHHUH! A synchronous read error occurred. Report it */ - page_cache_release(page); + put_page(page); goto out; no_cached_page: @@ -1805,7 +1804,7 @@ no_cached_page: error = add_to_page_cache_lru(page, mapping, index, mapping_gfp_constraint(mapping, GFP_KERNEL)); if (error) { - page_cache_release(page); + put_page(page); if (error == -EEXIST) { error = 0; goto find_page; @@ -1817,10 +1816,10 @@ no_cached_page: out: ra->prev_pos = prev_index; - ra->prev_pos <<= PAGE_CACHE_SHIFT; + ra->prev_pos <<= PAGE_SHIFT; ra->prev_pos |= prev_offset; - *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; + *ppos = ((loff_t)index << PAGE_SHIFT) + offset; file_accessed(filp); return written ? written : error; } @@ -1838,8 +1837,6 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; ssize_t retval = 0; - loff_t *ppos = &iocb->ki_pos; - loff_t pos = *ppos; size_t count = iov_iter_count(iter); if (!count) @@ -1851,15 +1848,15 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) loff_t size; size = i_size_read(inode); - retval = filemap_write_and_wait_range(mapping, pos, - pos + count - 1); + retval = filemap_write_and_wait_range(mapping, iocb->ki_pos, + iocb->ki_pos + count - 1); if (!retval) { struct iov_iter data = *iter; - retval = mapping->a_ops->direct_IO(iocb, &data, pos); + retval = mapping->a_ops->direct_IO(iocb, &data); } if (retval > 0) { - *ppos = pos + retval; + iocb->ki_pos += retval; iov_iter_advance(iter, retval); } @@ -1872,14 +1869,14 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) * the rest of the read. Buffered reads will not work for * DAX files, so don't bother trying. */ - if (retval < 0 || !iov_iter_count(iter) || *ppos >= size || + if (retval < 0 || !iov_iter_count(iter) || iocb->ki_pos >= size || IS_DAX(inode)) { file_accessed(file); goto out; } } - retval = do_generic_file_read(file, ppos, iter, retval); + retval = do_generic_file_read(file, &iocb->ki_pos, iter, retval); out: return retval; } @@ -1912,7 +1909,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask) else if (ret == -EEXIST) ret = 0; /* losing race to add is OK */ - page_cache_release(page); + put_page(page); } while (ret == AOP_TRUNCATED_PAGE); @@ -2022,8 +2019,8 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) loff_t size; int ret = 0; - size = round_up(i_size_read(inode), PAGE_CACHE_SIZE); - if (offset >= size >> PAGE_CACHE_SHIFT) + size = round_up(i_size_read(inode), PAGE_SIZE); + if (offset >= size >> PAGE_SHIFT) return VM_FAULT_SIGBUS; /* @@ -2049,7 +2046,7 @@ retry_find: } if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) { - page_cache_release(page); + put_page(page); return ret | VM_FAULT_RETRY; } @@ -2072,10 +2069,10 @@ retry_find: * Found the page and have a reference on it. * We must recheck i_size under page lock. */ - size = round_up(i_size_read(inode), PAGE_CACHE_SIZE); - if (unlikely(offset >= size >> PAGE_CACHE_SHIFT)) { + size = round_up(i_size_read(inode), PAGE_SIZE); + if (unlikely(offset >= size >> PAGE_SHIFT)) { unlock_page(page); - page_cache_release(page); + put_page(page); return VM_FAULT_SIGBUS; } @@ -2120,7 +2117,7 @@ page_not_uptodate: if (!PageUptodate(page)) error = -EIO; } - page_cache_release(page); + put_page(page); if (!error || error == AOP_TRUNCATED_PAGE) goto retry_find; @@ -2164,7 +2161,7 @@ repeat: /* Has the page moved? */ if (unlikely(page != *slot)) { - page_cache_release(page); + put_page(page); goto repeat; } @@ -2178,8 +2175,8 @@ repeat: if (page->mapping != mapping || !PageUptodate(page)) goto unlock; - size = round_up(i_size_read(mapping->host), PAGE_CACHE_SIZE); - if (page->index >= size >> PAGE_CACHE_SHIFT) + size = round_up(i_size_read(mapping->host), PAGE_SIZE); + if (page->index >= size >> PAGE_SHIFT) goto unlock; pte = vmf->pte + page->index - vmf->pgoff; @@ -2189,13 +2186,13 @@ repeat: if (file->f_ra.mmap_miss > 0) file->f_ra.mmap_miss--; addr = address + (page->index - vmf->pgoff) * PAGE_SIZE; - do_set_pte(vma, addr, page, pte, false, false); + do_set_pte(vma, addr, page, pte, false, false, true); unlock_page(page); goto next; unlock: unlock_page(page); skip: - page_cache_release(page); + put_page(page); next: if (iter.index == vmf->max_pgoff) break; @@ -2278,7 +2275,7 @@ static struct page *wait_on_page_read(struct page *page) if (!IS_ERR(page)) { wait_on_page_locked(page); if (!PageUptodate(page)) { - page_cache_release(page); + put_page(page); page = ERR_PTR(-EIO); } } @@ -2301,7 +2298,7 @@ repeat: return ERR_PTR(-ENOMEM); err = add_to_page_cache_lru(page, mapping, index, gfp); if (unlikely(err)) { - page_cache_release(page); + put_page(page); if (err == -EEXIST) goto repeat; /* Presumably ENOMEM for radix tree node */ @@ -2311,7 +2308,7 @@ repeat: filler: err = filler(data, page); if (err < 0) { - page_cache_release(page); + put_page(page); return ERR_PTR(err); } @@ -2364,7 +2361,7 @@ filler: /* Case c or d, restart the operation */ if (!page->mapping) { unlock_page(page); - page_cache_release(page); + put_page(page); goto repeat; } @@ -2500,18 +2497,19 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, EXPORT_SYMBOL(pagecache_write_end); ssize_t -generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) +generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; + loff_t pos = iocb->ki_pos; ssize_t written; size_t write_len; pgoff_t end; struct iov_iter data; write_len = iov_iter_count(from); - end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT; + end = (pos + write_len - 1) >> PAGE_SHIFT; written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1); if (written) @@ -2525,7 +2523,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) */ if (mapping->nrpages) { written = invalidate_inode_pages2_range(mapping, - pos >> PAGE_CACHE_SHIFT, end); + pos >> PAGE_SHIFT, end); /* * If a page can not be invalidated, return 0 to fall back * to buffered write. @@ -2538,7 +2536,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) } data = *from; - written = mapping->a_ops->direct_IO(iocb, &data, pos); + written = mapping->a_ops->direct_IO(iocb, &data); /* * Finally, try again to invalidate clean pages which might have been @@ -2550,7 +2548,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) */ if (mapping->nrpages) { invalidate_inode_pages2_range(mapping, - pos >> PAGE_CACHE_SHIFT, end); + pos >> PAGE_SHIFT, end); } if (written > 0) { @@ -2575,7 +2573,7 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index, unsigned flags) { struct page *page; - int fgp_flags = FGP_LOCK|FGP_ACCESSED|FGP_WRITE|FGP_CREAT; + int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT; if (flags & AOP_FLAG_NOFS) fgp_flags |= FGP_NOFS; @@ -2611,8 +2609,8 @@ ssize_t generic_perform_write(struct file *file, size_t copied; /* Bytes copied from user */ void *fsdata; - offset = (pos & (PAGE_CACHE_SIZE - 1)); - bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, + offset = (pos & (PAGE_SIZE - 1)); + bytes = min_t(unsigned long, PAGE_SIZE - offset, iov_iter_count(i)); again: @@ -2665,7 +2663,7 @@ again: * because not all segments in the iov can be copied at * once without a pagefault. */ - bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, + bytes = min_t(unsigned long, PAGE_SIZE - offset, iov_iter_single_seg_count(i)); goto again; } @@ -2718,7 +2716,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_DIRECT) { loff_t pos, endbyte; - written = generic_file_direct_write(iocb, from, iocb->ki_pos); + written = generic_file_direct_write(iocb, from); /* * If the write stopped short of completing, fall back to * buffered writes. Some filesystems do this for writes to @@ -2752,8 +2750,8 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) iocb->ki_pos = endbyte + 1; written += status; invalidate_mapping_pages(mapping, - pos >> PAGE_CACHE_SHIFT, - endbyte >> PAGE_CACHE_SHIFT); + pos >> PAGE_SHIFT, + endbyte >> PAGE_SHIFT); } else { /* * We don't know how much we wrote, so just return @@ -2792,13 +2790,8 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = __generic_file_write_iter(iocb, from); inode_unlock(inode); - if (ret > 0) { - ssize_t err; - - err = generic_write_sync(file, iocb->ki_pos - ret, ret); - if (err < 0) - ret = err; - } + if (ret > 0) + ret = generic_write_sync(iocb, ret); return ret; } EXPORT_SYMBOL(generic_file_write_iter); |