From 6437d1b0adb46f29aafcbf10950a89211028ca09 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Feb 2014 18:23:32 +0900 Subject: f2fs: fix to do build_stat prior to the recovery procedure At the end of the recovery procedure, write_checkpoint is called and updates the cp count which is managed by f2fs stat. But, previously build_stat() is called after the recovery procedure, which results in: BUG: unable to handle kernel NULL pointer dereference at 000000000000012c IP: [] write_checkpoint+0x720/0xbc0 [f2fs] Call Trace: [] ? mark_held_locks+0x74/0x140 [] ? __init_waitqueue_head+0x60/0x60 [] recover_fsync_data+0x656/0xf20 [f2fs] [] ? security_d_instantiate+0x1b/0x30 [] f2fs_fill_super+0x94d/0xa00 [f2fs] [] mount_bdev+0x1a5/0x1f0 [] ? __get_free_pages+0xe/0x40 [] ? f2fs_remount+0x130/0x130 [f2fs] [] f2fs_mount+0x15/0x20 [f2fs] [] mount_fs+0x43/0x1b0 [] vfs_kern_mount+0x74/0x160 [] ? __get_fs_type+0x51/0x60 [] do_mount+0x237/0xb50 [] ? copy_mount_options+0x3a/0x170 So, this patche changes the order of recovery_fsync_data() and f2fs_build_stats(). Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1a85f83abd53..475560e5ee71 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -989,28 +989,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_root_inode; } - /* recover fsynced data */ - if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { - err = recover_fsync_data(sbi); - if (err) - f2fs_msg(sb, KERN_ERR, - "Cannot recover all fsync data errno=%ld", err); - } - - /* - * If filesystem is not mounted as read-only then - * do start the gc_thread. - */ - if (!(sb->s_flags & MS_RDONLY)) { - /* After POR, we can run background GC thread.*/ - err = start_gc_thread(sbi); - if (err) - goto free_gc; - } - err = f2fs_build_stats(sbi); if (err) - goto free_gc; + goto free_root_inode; if (f2fs_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); @@ -1032,17 +1013,36 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, "%s", sb->s_id); if (err) - goto fail; + goto free_proc; + /* recover fsynced data */ + if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { + err = recover_fsync_data(sbi); + if (err) + f2fs_msg(sb, KERN_ERR, + "Cannot recover all fsync data errno=%ld", err); + } + + /* + * If filesystem is not mounted as read-only then + * do start the gc_thread. + */ + if (!(sb->s_flags & MS_RDONLY)) { + /* After POR, we can run background GC thread.*/ + err = start_gc_thread(sbi); + if (err) + goto free_kobj; + } return 0; -fail: + +free_kobj: + kobject_del(&sbi->s_kobj); +free_proc: if (sbi->s_proc) { remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry(sb->s_id, f2fs_proc_root); } f2fs_destroy_stats(sbi); -free_gc: - stop_gc_thread(sbi); free_root_inode: dput(sb->s_root); sb->s_root = NULL; -- cgit v1.2.1 From ab9fa662e4867455f44f4de96d29a7f09cf292c6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Feb 2014 20:09:05 +0900 Subject: f2fs: add an sysfs entry to control the directory level This patch adds an sysfs entry to control dir_level used by the large directory. The description of this entry is: dir_level This parameter controls the directory level to support large directory. If a directory has a number of files, it can reduce the file lookup latency by increasing this dir_level value. Otherwise, it needs to decrease this value to reduce the space overhead. The default value is 0. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 475560e5ee71..1bd915362154 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -184,6 +184,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -196,6 +197,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), ATTR_LIST(max_victim_search), + ATTR_LIST(dir_level), NULL, }; @@ -359,6 +361,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) if (test_opt(F2FS_SB(sb), INLINE_XATTR)) set_inode_flag(fi, FI_INLINE_XATTR); + /* Will be used by directory only */ + fi->i_dir_level = F2FS_SB(sb)->dir_level; + return &fi->vfs_inode; } @@ -785,6 +790,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) for (i = 0; i < NR_COUNT_TYPE; i++) atomic_set(&sbi->nr_pages[i], 0); + + sbi->dir_level = DEF_DIR_LEVEL; } /* -- cgit v1.2.1 From e8512d2e0c4eb38cd78b1499bb08d7d8eea6c723 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 7 Mar 2014 18:43:28 +0800 Subject: f2fs: remove the unused ctor argument of f2fs_kmem_cache_create() Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1bd915362154..72df734764e7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1089,7 +1089,7 @@ MODULE_ALIAS_FS("f2fs"); static int __init init_inodecache(void) { f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", - sizeof(struct f2fs_inode_info), NULL); + sizeof(struct f2fs_inode_info)); if (!f2fs_inode_cachep) return -ENOMEM; return 0; -- cgit v1.2.1 From 46c04366bbfd112a74dcfebbe41c9bf3f496ea75 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 7 Mar 2014 18:43:33 +0800 Subject: f2fs: format segment_info's show for better legibility The original segment_info's show is a bit out-of-format: [root@guz Demoes]# cat /proc/fs/f2fs/loop0/segment_info 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...... 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 [root@guz Demoes]# so we fix it here for better legibility. [root@guz Demoes]# cat /proc/fs/f2fs/loop0/segment_info 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...... 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 [root@guz Demoes]# Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 72df734764e7..6e4851ce029b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -546,11 +546,12 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset) for (i = 0; i < total_segs; i++) { seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1)); - if (i != 0 && (i % 10) == 0) - seq_puts(seq, "\n"); + if ((i % 10) == 9 || i == (total_segs - 1)) + seq_putc(seq, '\n'); else - seq_puts(seq, " "); + seq_putc(seq, ' '); } + return 0; } -- cgit v1.2.1 From 910bb12d29cc64144506333bfeaeeee9715c3872 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 Mar 2014 17:08:36 +0800 Subject: f2fs: check upper bound of ino value in f2fs_nfs_get_inode Upper bound checking of ino should be added to f2fs_nfs_get_inode, so unneeded process before do_read_inode in f2fs_iget could be avoided when ino is invalid. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6e4851ce029b..3a51d7a4a6c9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -644,6 +644,8 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb, if (unlikely(ino < F2FS_ROOT_INO(sbi))) return ERR_PTR(-ESTALE); + if (unlikely(ino >= NM_I(sbi)->max_nid)) + return ERR_PTR(-ESTALE); /* * f2fs_iget isn't quite right if the inode is currently unallocated! -- cgit v1.2.1 From 90aa6dc9b9dd9b3ee832bb6e91e2d8ba8ebb93b6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 17 Mar 2014 10:31:06 +0800 Subject: f2fs: print type for each segment in segment_info's show The original segment_info's show looks out-of-format: cat /proc/fs/f2fs/loop0/segment_info 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 512 512 512 512 512 512 512 512 0 0 512 348 0 263 0 0 512 0 0 512 512 512 512 0 512 512 512 512 512 512 512 512 512 511 328 512 512 512 512 512 512 512 512 512 512 512 512 512 0 0 175 Let's fix this and show type for each segment. cat /proc/fs/f2fs/loop0/segment_info format: segment_type|valid_blocks segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN) 0 2|0 1|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 10 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 20 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 30 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 40 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 50 3|0 3|0 3|0 3|0 3|0 3|0 3|0 0|0 3|0 3|0 60 3|0 3|0 3|0 3|0 3|0 3|0 3|0 3|0 3|0 3|512 70 3|512 3|512 3|512 3|512 3|512 3|512 3|512 3|0 3|0 3|512 80 3|0 3|0 3|0 3|0 3|0 3|512 3|0 3|0 3|512 3|512 90 3|512 0|512 3|274 0|512 0|512 0|512 0|512 0|512 0|512 3|512 100 3|512 0|512 3|511 0|328 3|512 0|512 0|512 3|512 0|512 0|512 110 0|512 0|512 0|512 0|512 0|512 0|512 0|512 5|0 4|0 3|512 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3a51d7a4a6c9..057a3efb2487 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -544,8 +544,16 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset) le32_to_cpu(sbi->raw_super->segment_count_main); int i; + seq_puts(seq, "format: segment_type|valid_blocks\n" + "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); + for (i = 0; i < total_segs; i++) { - seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1)); + struct seg_entry *se = get_seg_entry(sbi, i); + + if ((i % 10) == 0) + seq_printf(seq, "%-5d", i); + seq_printf(seq, "%d|%-3u", se->type, + get_valid_blocks(sbi, i, 1)); if ((i % 10) == 9 || i == (total_segs - 1)) seq_putc(seq, '\n'); else -- cgit v1.2.1 From 04c0938844695ab97b79a477a9f57748fe97d2f5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Mar 2014 09:07:59 +0800 Subject: f2fs: fix incorrect parsing with option string Previously 'background_gc={on***,off***}' is being parsed as correct option, with this patch we cloud fix the trivial bug in mount process. Change log from v1: o need to check length of parameter suggested by Jaegeuk Kim. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 057a3efb2487..dbe402b1a4b7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -258,9 +258,9 @@ static int parse_options(struct super_block *sb, char *options) if (!name) return -ENOMEM; - if (!strncmp(name, "on", 2)) + if (strlen(name) == 2 && !strncmp(name, "on", 2)) set_opt(sbi, BG_GC); - else if (!strncmp(name, "off", 3)) + else if (strlen(name) == 3 && !strncmp(name, "off", 3)) clear_opt(sbi, BG_GC); else { kfree(name); -- cgit v1.2.1 From cdfc41c134d48c1923066bcfa6630b94588ad6bc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Mar 2014 13:31:37 +0900 Subject: f2fs: throttle the memory footprint with a sysfs entry This patch introduces ram_thresh, a sysfs entry, which controls the memory footprint used by the free nid list and the nat cache. Previously, the free nid list was controlled by MAX_FREE_NIDS, while the nat cache was managed by NM_WOUT_THRESHOLD. However, this approach cannot be applied dynamically according to the system. So, this patch adds ram_thresh that users can specify the threshold, which is in order of 1 / 1024. For example, if the total ram size is 4GB and the value is set to 10 by default, f2fs tries to control the number of free nids and nat caches not to consume over 10 * (4GB / 1024) = 10MB. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index dbe402b1a4b7..34c47b2010bc 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -74,6 +74,7 @@ static match_table_t f2fs_tokens = { enum { GC_THREAD, /* struct f2fs_gc_thread */ SM_INFO, /* struct f2fs_sm_info */ + NM_INFO, /* struct f2fs_nm_info */ F2FS_SBI, /* struct f2fs_sb_info */ }; @@ -92,6 +93,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) return (unsigned char *)sbi->gc_thread; else if (struct_type == SM_INFO) return (unsigned char *)SM_I(sbi); + else if (struct_type == NM_INFO) + return (unsigned char *)NM_I(sbi); else if (struct_type == F2FS_SBI) return (unsigned char *)sbi; return NULL; @@ -183,6 +186,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); @@ -198,6 +202,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(min_ipu_util), ATTR_LIST(max_victim_search), ATTR_LIST(dir_level), + ATTR_LIST(ram_thresh), NULL, }; -- cgit v1.2.1 From d928bfbfe77aa457b765c19e9db8cd4cc72b3c89 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 20 Mar 2014 19:10:08 +0900 Subject: f2fs: introduce fi->i_sem to protect fi's info This patch introduces fi->i_sem to protect fi's info that includes xattr_ver, pino, i_nlink. This enables to remove i_mutex during f2fs_sync_file, resulting in performance improvement when a number of fsync calls are triggered from many concurrent threads. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 34c47b2010bc..89ea046c846d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -360,6 +360,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) fi->i_current_depth = 1; fi->i_advise = 0; rwlock_init(&fi->ext.ext_lock); + init_rwsem(&fi->i_sem); set_inode_flag(fi, FI_NEW_INODE); -- cgit v1.2.1 From df0f8dc0e154de13e3a54846f384b674dd557c85 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 22 Mar 2014 14:57:23 +0800 Subject: f2fs: avoid unnecessary bio submit when wait page writeback This patch introduce is_merged_page() to check whether current page is merged in f2fs bio cache. When page is not in cache, we can avoid submitting bio cache, resulting in having more chance to merge pages. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 89ea046c846d..959834066d60 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -920,11 +920,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->por_doing = false; spin_lock_init(&sbi->stat_lock); - mutex_init(&sbi->read_io.io_mutex); + init_rwsem(&sbi->read_io.io_rwsem); sbi->read_io.sbi = sbi; sbi->read_io.bio = NULL; for (i = 0; i < NR_PAGE_TYPE; i++) { - mutex_init(&sbi->write_io[i].io_mutex); + init_rwsem(&sbi->write_io[i].io_rwsem); sbi->write_io[i].sbi = sbi; sbi->write_io[i].bio = NULL; } -- cgit v1.2.1 From 6b4afdd794783fe515b50838aa36591e3feea990 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 2 Apr 2014 15:34:36 +0900 Subject: f2fs: introduce f2fs_issue_flush to avoid redundant flush issue Some storage devices show relatively high latencies to complete cache_flush commands, even though their normal IO speed is prettry much high. In such the case, it needs to merge cache_flush commands as much as possible to avoid issuing them redundantly. So, this patch introduces a mount option, "-o flush_merge", to mitigate such the overhead. If this option is enabled by user, F2FS merges the cache_flush commands and then issues just one cache_flush on behalf of them. Once the single command is finished, F2FS sends a completion signal to all the pending threads. Note that, this option can be used under a workload consisting of very intensive concurrent fsync calls, while the storage handles cache_flush commands slowly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 959834066d60..d31b767fde73 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -51,6 +51,7 @@ enum { Opt_disable_ext_identify, Opt_inline_xattr, Opt_inline_data, + Opt_flush_merge, Opt_err, }; @@ -67,6 +68,7 @@ static match_table_t f2fs_tokens = { {Opt_disable_ext_identify, "disable_ext_identify"}, {Opt_inline_xattr, "inline_xattr"}, {Opt_inline_data, "inline_data"}, + {Opt_flush_merge, "flush_merge"}, {Opt_err, NULL}, }; @@ -334,6 +336,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_inline_data: set_opt(sbi, INLINE_DATA); break; + case Opt_flush_merge: + set_opt(sbi, FLUSH_MERGE); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -537,6 +542,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_ext_identify"); if (test_opt(sbi, INLINE_DATA)) seq_puts(seq, ",inline_data"); + if (test_opt(sbi, FLUSH_MERGE)) + seq_puts(seq, ",flush_merge"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; -- cgit v1.2.1