summaryrefslogtreecommitdiffstats
path: root/fs/f2fs/node.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-09 19:11:44 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-09 19:11:44 -0700
commit64b2d1fbbfda07765dae3f601862796a61b2c451 (patch)
tree67947ede8fc007a9f0925e697a302a02bd087032 /fs/f2fs/node.c
parentb1cce8032f6abe900b078d24f3c3938726528f97 (diff)
parent9ab701349247368f9d57a993b95a5bb05bb37e10 (diff)
downloadtalos-op-linux-64b2d1fbbfda07765dae3f601862796a61b2c451.tar.gz
talos-op-linux-64b2d1fbbfda07765dae3f601862796a61b2c451.zip
Merge tag 'for-f2fs-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this round, there is no special interesting feature, but we've investigated a couple of tuning points with respect to the I/O flow. Several major bug fixes and a bunch of clean-ups also have been made. This patch-set includes the following major enhancement patches: - enhance wait_on_page_writeback - support SEEK_DATA and SEEK_HOLE - enhance readahead flows - enhance IO flushes - support fiemap - add some tracepoints The other bug fixes are as follows: - fix to support a large volume > 2TB correctly - recovery bug fix wrt fallocated space - fix recursive lock on xattr operations - fix some cases on the remount flow And, there are a bunch of cleanups" * tag 'for-f2fs-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (52 commits) f2fs: support f2fs_fiemap f2fs: avoid not to call remove_dirty_inode f2fs: recover fallocated space f2fs: fix to recover data written by dio f2fs: large volume support f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages f2fs: avoid overflow when large directory feathure is enabled f2fs: fix recursive lock by f2fs_setxattr MAINTAINERS: add a co-maintainer from samsung for F2FS MAINTAINERS: change the email address for f2fs f2fs: use inode_init_owner() to simplify codes f2fs: avoid to use slab memory in f2fs_issue_flush for efficiency f2fs: add a tracepoint for f2fs_read_data_page f2fs: add a tracepoint for f2fs_write_{meta,node,data}_pages f2fs: add a tracepoint for f2fs_write_{meta,node,data}_page f2fs: add a tracepoint for f2fs_write_end f2fs: add a tracepoint for f2fs_write_begin f2fs: fix checkpatch warning f2fs: deactivate inode page if the inode is evicted f2fs: decrease the lock granularity during write_begin ...
Diffstat (limited to 'fs/f2fs/node.c')
-rw-r--r--fs/f2fs/node.c154
1 files changed, 84 insertions, 70 deletions
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 57caa6eaf47b..9dfb9a042fd2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -26,20 +26,26 @@
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
-static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type)
+bool available_free_memory(struct f2fs_sb_info *sbi, int type)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
struct sysinfo val;
unsigned long mem_size = 0;
+ bool res = false;
si_meminfo(&val);
- if (type == FREE_NIDS)
- mem_size = nm_i->fcnt * sizeof(struct free_nid);
- else if (type == NAT_ENTRIES)
- mem_size += nm_i->nat_cnt * sizeof(struct nat_entry);
- mem_size >>= 12;
-
- /* give 50:50 memory for free nids and nat caches respectively */
- return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11));
+ /* give 25%, 25%, 50% memory for each components respectively */
+ if (type == FREE_NIDS) {
+ mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12;
+ res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+ } else if (type == NAT_ENTRIES) {
+ mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
+ res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+ } else if (type == DIRTY_DENTS) {
+ mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
+ res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
+ }
+ return res;
}
static void clear_node_page_dirty(struct page *page)
@@ -147,6 +153,18 @@ bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
return fsync_done;
}
+void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct nat_entry *e;
+
+ write_lock(&nm_i->nat_tree_lock);
+ e = __lookup_nat_cache(nm_i, nid);
+ if (e)
+ e->fsync_done = false;
+ write_unlock(&nm_i->nat_tree_lock);
+}
+
static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
{
struct nat_entry *new;
@@ -179,9 +197,7 @@ retry:
write_unlock(&nm_i->nat_tree_lock);
goto retry;
}
- nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
- nat_set_ino(e, le32_to_cpu(ne->ino));
- nat_set_version(e, ne->version);
+ node_info_from_raw_nat(&e->ni, ne);
}
write_unlock(&nm_i->nat_tree_lock);
}
@@ -243,7 +259,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- if (available_free_memory(nm_i, NAT_ENTRIES))
+ if (available_free_memory(sbi, NAT_ENTRIES))
return 0;
write_lock(&nm_i->nat_tree_lock);
@@ -849,8 +865,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return ERR_PTR(-EPERM);
- page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
- dn->nid, AOP_FLAG_NOFS);
+ page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -867,6 +882,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
new_ni.ino = dn->inode->i_ino;
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
+ f2fs_wait_on_page_writeback(page, NODE);
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(dn->inode, page);
SetPageUptodate(page);
@@ -946,8 +962,7 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
struct page *page;
int err;
repeat:
- page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
- nid, AOP_FLAG_NOFS);
+ page = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -1194,6 +1209,8 @@ static int f2fs_write_node_page(struct page *page,
.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
};
+ trace_f2fs_writepage(page, NODE);
+
if (unlikely(sbi->por_doing))
goto redirty_out;
@@ -1225,10 +1242,7 @@ static int f2fs_write_node_page(struct page *page,
return 0;
redirty_out:
- dec_page_count(sbi, F2FS_DIRTY_NODES);
- wbc->pages_skipped++;
- account_page_redirty(page);
- set_page_dirty(page);
+ redirty_page_for_writepage(wbc, page);
return AOP_WRITEPAGE_ACTIVATE;
}
@@ -1238,6 +1252,8 @@ static int f2fs_write_node_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
long diff;
+ trace_f2fs_writepages(mapping->host, wbc, NODE);
+
/* balancing f2fs's metadata in background */
f2fs_balance_fs_bg(sbi);
@@ -1313,13 +1329,14 @@ static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
radix_tree_delete(&nm_i->free_nid_root, i->nid);
}
-static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
+static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
struct nat_entry *ne;
bool allocated = false;
- if (!available_free_memory(nm_i, FREE_NIDS))
+ if (!available_free_memory(sbi, FREE_NIDS))
return -1;
/* 0 nid should not be used */
@@ -1372,9 +1389,10 @@ static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
kmem_cache_free(free_nid_slab, i);
}
-static void scan_nat_page(struct f2fs_nm_info *nm_i,
+static void scan_nat_page(struct f2fs_sb_info *sbi,
struct page *nat_page, nid_t start_nid)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
struct f2fs_nat_block *nat_blk = page_address(nat_page);
block_t blk_addr;
int i;
@@ -1389,7 +1407,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i,
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
f2fs_bug_on(blk_addr == NEW_ADDR);
if (blk_addr == NULL_ADDR) {
- if (add_free_nid(nm_i, start_nid, true) < 0)
+ if (add_free_nid(sbi, start_nid, true) < 0)
break;
}
}
@@ -1413,7 +1431,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
- scan_nat_page(nm_i, page, nid);
+ scan_nat_page(sbi, page, nid);
f2fs_put_page(page, 1);
nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
@@ -1433,7 +1451,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
nid = le32_to_cpu(nid_in_journal(sum, i));
if (addr == NULL_ADDR)
- add_free_nid(nm_i, nid, true);
+ add_free_nid(sbi, nid, true);
else
remove_free_nid(nm_i, nid);
}
@@ -1450,7 +1468,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i = NULL;
retry:
- if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid))
+ if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
return false;
spin_lock(&nm_i->free_nid_list_lock);
@@ -1510,7 +1528,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
spin_lock(&nm_i->free_nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
f2fs_bug_on(!i || i->state != NID_ALLOC);
- if (!available_free_memory(nm_i, FREE_NIDS)) {
+ if (!available_free_memory(sbi, FREE_NIDS)) {
__del_from_free_nid_list(nm_i, i);
need_free = true;
} else {
@@ -1532,7 +1550,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
clear_node_page_dirty(page);
}
-void recover_inline_xattr(struct inode *inode, struct page *page)
+static void recover_inline_xattr(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
void *src_addr, *dst_addr;
@@ -1557,6 +1575,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
src_addr = inline_xattr_addr(page);
inline_size = inline_xattr_size(inode);
+ f2fs_wait_on_page_writeback(ipage, NODE);
memcpy(dst_addr, src_addr, inline_size);
update_inode(inode, ipage);
@@ -1612,6 +1631,11 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
struct node_info old_ni, new_ni;
struct page *ipage;
+ get_node_info(sbi, ino, &old_ni);
+
+ if (unlikely(old_ni.blk_addr != NULL_ADDR))
+ return -EINVAL;
+
ipage = grab_cache_page(NODE_MAPPING(sbi), ino);
if (!ipage)
return -ENOMEM;
@@ -1619,7 +1643,6 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
/* Should not use this inode from free nid list */
remove_free_nid(NM_I(sbi), ino);
- get_node_info(sbi, ino, &old_ni);
SetPageUptodate(ipage);
fill_node_footer(ipage, ino, ino, 0, true);
@@ -1645,35 +1668,29 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
/*
* ra_sum_pages() merge contiguous pages into one bio and submit.
- * these pre-readed pages are linked in pages list.
+ * these pre-readed pages are alloced in bd_inode's mapping tree.
*/
-static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
+static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
int start, int nrpages)
{
- struct page *page;
- int page_idx = start;
+ struct inode *inode = sbi->sb->s_bdev->bd_inode;
+ struct address_space *mapping = inode->i_mapping;
+ int i, page_idx = start;
struct f2fs_io_info fio = {
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO
};
- for (; page_idx < start + nrpages; page_idx++) {
- /* alloc temporal page for read node summary info*/
- page = alloc_page(GFP_F2FS_ZERO);
- if (!page)
+ for (i = 0; page_idx < start + nrpages; page_idx++, i++) {
+ /* alloc page in bd_inode for reading node summary info */
+ pages[i] = grab_cache_page(mapping, page_idx);
+ if (!pages[i])
break;
-
- lock_page(page);
- page->index = page_idx;
- list_add_tail(&page->lru, pages);
+ f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio);
}
- list_for_each_entry(page, pages, lru)
- f2fs_submit_page_mbio(sbi, page, page->index, &fio);
-
f2fs_submit_merged_bio(sbi, META, READ);
-
- return page_idx - start;
+ return i;
}
int restore_node_summary(struct f2fs_sb_info *sbi,
@@ -1681,11 +1698,11 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
{
struct f2fs_node *rn;
struct f2fs_summary *sum_entry;
- struct page *page, *tmp;
+ struct inode *inode = sbi->sb->s_bdev->bd_inode;
block_t addr;
int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
- int i, last_offset, nrpages, err = 0;
- LIST_HEAD(page_list);
+ struct page *pages[bio_blocks];
+ int i, idx, last_offset, nrpages, err = 0;
/* scan the node segment */
last_offset = sbi->blocks_per_seg;
@@ -1696,29 +1713,31 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
nrpages = min(last_offset - i, bio_blocks);
/* read ahead node pages */
- nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages);
+ nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
if (!nrpages)
return -ENOMEM;
- list_for_each_entry_safe(page, tmp, &page_list, lru) {
+ for (idx = 0; idx < nrpages; idx++) {
if (err)
goto skip;
- lock_page(page);
- if (unlikely(!PageUptodate(page))) {
+ lock_page(pages[idx]);
+ if (unlikely(!PageUptodate(pages[idx]))) {
err = -EIO;
} else {
- rn = F2FS_NODE(page);
+ rn = F2FS_NODE(pages[idx]);
sum_entry->nid = rn->footer.nid;
sum_entry->version = 0;
sum_entry->ofs_in_node = 0;
sum_entry++;
}
- unlock_page(page);
+ unlock_page(pages[idx]);
skip:
- list_del(&page->lru);
- __free_pages(page, 0);
+ page_cache_release(pages[idx]);
}
+
+ invalidate_mapping_pages(inode->i_mapping, addr,
+ addr + nrpages);
}
return err;
}
@@ -1756,9 +1775,7 @@ retry:
write_unlock(&nm_i->nat_tree_lock);
goto retry;
}
- nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr));
- nat_set_ino(ne, le32_to_cpu(raw_ne.ino));
- nat_set_version(ne, raw_ne.version);
+ node_info_from_raw_nat(&ne->ni, &raw_ne);
__set_nat_cache_dirty(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
}
@@ -1791,7 +1808,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
nid_t nid;
struct f2fs_nat_entry raw_ne;
int offset = -1;
- block_t new_blkaddr;
if (nat_get_blkaddr(ne) == NEW_ADDR)
continue;
@@ -1827,11 +1843,7 @@ to_nat_page:
f2fs_bug_on(!nat_blk);
raw_ne = nat_blk->entries[nid - start_nid];
flush_now:
- new_blkaddr = nat_get_blkaddr(ne);
-
- raw_ne.ino = cpu_to_le32(nat_get_ino(ne));
- raw_ne.block_addr = cpu_to_le32(new_blkaddr);
- raw_ne.version = nat_get_version(ne);
+ raw_nat_from_node_info(&raw_ne, &ne->ni);
if (offset < 0) {
nat_blk->entries[nid - start_nid] = raw_ne;
@@ -1841,7 +1853,7 @@ flush_now:
}
if (nat_get_blkaddr(ne) == NULL_ADDR &&
- add_free_nid(NM_I(sbi), nid, false) <= 0) {
+ add_free_nid(sbi, nid, false) <= 0) {
write_lock(&nm_i->nat_tree_lock);
__del_from_nat_cache(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
@@ -1869,8 +1881,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
+ nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+
/* not used nids: 0, node, meta, (and root counted as valid node) */
- nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3;
+ nm_i->available_nids = nm_i->max_nid - 3;
nm_i->fcnt = 0;
nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
OpenPOWER on IntegriCloud