summaryrefslogtreecommitdiffstats
path: root/fs/f2fs/data.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 09:07:36 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 09:07:36 -0800
commit09cb6464fe5e7fcd5177911429badd139c4481b7 (patch)
tree5f7af2d0778f699053da6ed2e43662fff2d51e73 /fs/f2fs/data.c
parent19d37ce2a7159ee30bd59d14fe5fe13c932bd5b7 (diff)
parentc0ed4405a99ec9be2a0f062eaafc002d8d26c99f (diff)
downloadtalos-obmc-linux-09cb6464fe5e7fcd5177911429badd139c4481b7.tar.gz
talos-obmc-linux-09cb6464fe5e7fcd5177911429badd139c4481b7.zip
Merge tag 'for-f2fs-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "This patch series contains several performance tuning patches regarding to the IO submission flow, in addition to supporting new features such as a ZBC-base drive and multiple devices. It also includes some major bug fixes such as: - checkpoint version control - fdatasync-related roll-forward recovery routine - memory boundary or null-pointer access in corner cases - missing error cases It has various minor clean-up patches as well" * tag 'for-f2fs-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (66 commits) f2fs: fix a missing size change in f2fs_setattr f2fs: fix to access nullified flush_cmd_control pointer f2fs: free meta pages if sanity check for ckpt is failed f2fs: detect wrong layout f2fs: call sync_fs when f2fs is idle Revert "f2fs: use percpu_counter for # of dirty pages in inode" f2fs: return AOP_WRITEPAGE_ACTIVATE for writepage f2fs: do not activate auto_recovery for fallocated i_size f2fs: fix to determine start_cp_addr by sbi->cur_cp_pack f2fs: fix 32-bit build f2fs: set ->owner for debugfs status file's file_operations f2fs: fix incorrect free inode count in ->statfs f2fs: drop duplicate header timer.h f2fs: fix wrong AUTO_RECOVER condition f2fs: do not recover i_size if it's valid f2fs: fix fdatasync f2fs: fix to account total free nid correctly f2fs: fix an infinite loop when flush nodes in cp f2fs: don't wait writeback for datas during checkpoint f2fs: fix wrong written_valid_blocks counting ...
Diffstat (limited to 'fs/f2fs/data.c')
-rw-r--r--fs/f2fs/data.c192
1 files changed, 127 insertions, 65 deletions
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9e5561fa4cb6..7c344b3ad70f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -29,6 +29,26 @@
#include "trace.h"
#include <trace/events/f2fs.h>
+static bool __is_cp_guaranteed(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode;
+ struct f2fs_sb_info *sbi;
+
+ if (!mapping)
+ return false;
+
+ inode = mapping->host;
+ sbi = F2FS_I_SB(inode);
+
+ if (inode->i_ino == F2FS_META_INO(sbi) ||
+ inode->i_ino == F2FS_NODE_INO(sbi) ||
+ S_ISDIR(inode->i_mode) ||
+ is_cold_data(page))
+ return true;
+ return false;
+}
+
static void f2fs_read_end_io(struct bio *bio)
{
struct bio_vec *bvec;
@@ -71,6 +91,7 @@ static void f2fs_write_end_io(struct bio *bio)
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
+ enum count_type type = WB_DATA_TYPE(page);
fscrypt_pullback_bio_page(&page, true);
@@ -78,9 +99,11 @@ static void f2fs_write_end_io(struct bio *bio)
mapping_set_error(page->mapping, -EIO);
f2fs_stop_checkpoint(sbi, true);
}
+ dec_page_count(sbi, type);
+ clear_cold_data(page);
end_page_writeback(page);
}
- if (atomic_dec_and_test(&sbi->nr_wb_bios) &&
+ if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
wq_has_sleeper(&sbi->cp_wait))
wake_up(&sbi->cp_wait);
@@ -88,6 +111,46 @@ static void f2fs_write_end_io(struct bio *bio)
}
/*
+ * Return true, if pre_bio's bdev is same as its target device.
+ */
+struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
+ block_t blk_addr, struct bio *bio)
+{
+ struct block_device *bdev = sbi->sb->s_bdev;
+ int i;
+
+ for (i = 0; i < sbi->s_ndevs; i++) {
+ if (FDEV(i).start_blk <= blk_addr &&
+ FDEV(i).end_blk >= blk_addr) {
+ blk_addr -= FDEV(i).start_blk;
+ bdev = FDEV(i).bdev;
+ break;
+ }
+ }
+ if (bio) {
+ bio->bi_bdev = bdev;
+ bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
+ }
+ return bdev;
+}
+
+int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
+{
+ int i;
+
+ for (i = 0; i < sbi->s_ndevs; i++)
+ if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
+ return i;
+ return 0;
+}
+
+static bool __same_bdev(struct f2fs_sb_info *sbi,
+ block_t blk_addr, struct bio *bio)
+{
+ return f2fs_target_device(sbi, blk_addr, NULL) == bio->bi_bdev;
+}
+
+/*
* Low-level block read/write IO operations.
*/
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
@@ -97,8 +160,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
bio = f2fs_bio_alloc(npages);
- bio->bi_bdev = sbi->sb->s_bdev;
- bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
+ f2fs_target_device(sbi, blk_addr, bio);
bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
bio->bi_private = is_read ? NULL : sbi;
@@ -109,8 +171,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
struct bio *bio, enum page_type type)
{
if (!is_read_io(bio_op(bio))) {
- atomic_inc(&sbi->nr_wb_bios);
- if (f2fs_sb_mounted_hmsmr(sbi->sb) &&
+ if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
current->plug && (type == DATA || type == NODE))
blk_finish_plug(current->plug);
}
@@ -268,22 +329,24 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
verify_block_addr(sbi, fio->old_blkaddr);
verify_block_addr(sbi, fio->new_blkaddr);
+ bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+
+ if (!is_read)
+ inc_page_count(sbi, WB_DATA_TYPE(bio_page));
+
down_write(&io->io_rwsem);
if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
- (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags)))
+ (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
+ !__same_bdev(sbi, fio->new_blkaddr, io->bio)))
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
- int bio_blocks = MAX_BIO_BLOCKS(sbi);
-
io->bio = __bio_alloc(sbi, fio->new_blkaddr,
- bio_blocks, is_read);
+ BIO_MAX_PAGES, is_read);
io->fio = *fio;
}
- bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
-
if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
PAGE_SIZE) {
__submit_merged_bio(io);
@@ -588,7 +651,6 @@ static int __allocate_data_block(struct dnode_of_data *dn)
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_summary sum;
struct node_info ni;
- int seg = CURSEG_WARM_DATA;
pgoff_t fofs;
blkcnt_t count = 1;
@@ -606,11 +668,8 @@ alloc:
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
- if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
- seg = CURSEG_DIRECT_IO;
-
allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
- &sum, seg);
+ &sum, CURSEG_WARM_DATA);
set_data_blkaddr(dn);
/* update i_size */
@@ -622,11 +681,18 @@ alloc:
return 0;
}
-ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
+static inline bool __force_buffered_io(struct inode *inode, int rw)
+{
+ return ((f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) ||
+ (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
+ F2FS_I_SB(inode)->s_ndevs);
+}
+
+int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct f2fs_map_blocks map;
- ssize_t ret = 0;
+ int err = 0;
map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
@@ -638,19 +704,22 @@ ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
map.m_next_pgofs = NULL;
if (iocb->ki_flags & IOCB_DIRECT) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
- return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ return f2fs_map_blocks(inode, &map, 1,
+ __force_buffered_io(inode, WRITE) ?
+ F2FS_GET_BLOCK_PRE_AIO :
+ F2FS_GET_BLOCK_PRE_DIO);
}
if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
}
if (!f2fs_has_inline_data(inode))
return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
- return ret;
+ return err;
}
/*
@@ -674,7 +743,6 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
unsigned int ofs_in_node, last_ofs_in_node;
blkcnt_t prealloc;
struct extent_info ei;
- bool allocated = false;
block_t blkaddr;
if (!maxblocks)
@@ -714,7 +782,7 @@ next_dnode:
}
prealloc = 0;
- ofs_in_node = dn.ofs_in_node;
+ last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
next_block:
@@ -733,10 +801,8 @@ next_block:
}
} else {
err = __allocate_data_block(&dn);
- if (!err) {
+ if (!err)
set_inode_flag(inode, FI_APPEND_WRITE);
- allocated = true;
- }
}
if (err)
goto sync_out;
@@ -791,7 +857,6 @@ skip:
err = reserve_new_blocks(&dn, prealloc);
if (err)
goto sync_out;
- allocated = dn.node_changed;
map->m_len += dn.ofs_in_node - ofs_in_node;
if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
@@ -810,9 +875,8 @@ skip:
if (create) {
f2fs_unlock_op(sbi);
- f2fs_balance_fs(sbi, allocated);
+ f2fs_balance_fs(sbi, dn.node_changed);
}
- allocated = false;
goto next_dnode;
sync_out:
@@ -820,7 +884,7 @@ sync_out:
unlock_out:
if (create) {
f2fs_unlock_op(sbi);
- f2fs_balance_fs(sbi, allocated);
+ f2fs_balance_fs(sbi, dn.node_changed);
}
out:
trace_f2fs_map_blocks(inode, map, err);
@@ -832,19 +896,19 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
pgoff_t *next_pgofs)
{
struct f2fs_map_blocks map;
- int ret;
+ int err;
map.m_lblk = iblock;
map.m_len = bh->b_size >> inode->i_blkbits;
map.m_next_pgofs = next_pgofs;
- ret = f2fs_map_blocks(inode, &map, create, flag);
- if (!ret) {
+ err = f2fs_map_blocks(inode, &map, create, flag);
+ if (!err) {
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
bh->b_size = map.m_len << inode->i_blkbits;
}
- return ret;
+ return err;
}
static int get_data_block(struct inode *inode, sector_t iblock,
@@ -889,7 +953,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
struct buffer_head map_bh;
sector_t start_blk, last_blk;
pgoff_t next_pgofs;
- loff_t isize;
u64 logical = 0, phys = 0, size = 0;
u32 flags = 0;
int ret = 0;
@@ -906,13 +969,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
inode_lock(inode);
- isize = i_size_read(inode);
- if (start >= isize)
- goto out;
-
- if (start + len > isize)
- len = isize - start;
-
if (logical_to_blk(inode, len) == 0)
len = blk_to_logical(inode, 1);
@@ -931,13 +987,11 @@ next:
/* HOLE */
if (!buffer_mapped(&map_bh)) {
start_blk = next_pgofs;
- /* Go through holes util pass the EOF */
- if (blk_to_logical(inode, start_blk) < isize)
+
+ if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
+ F2FS_I_SB(inode)->max_file_blocks))
goto prep_next;
- /* Found a hole beyond isize means no more extents.
- * Note that the premise is that filesystems don't
- * punch holes beyond isize and keep size unchanged.
- */
+
flags |= FIEMAP_EXTENT_LAST;
}
@@ -980,7 +1034,6 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct fscrypt_ctx *ctx = NULL;
- struct block_device *bdev = sbi->sb->s_bdev;
struct bio *bio;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
@@ -998,8 +1051,7 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
fscrypt_release_ctx(ctx);
return ERR_PTR(-ENOMEM);
}
- bio->bi_bdev = bdev;
- bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blkaddr);
+ f2fs_target_device(sbi, blkaddr, bio);
bio->bi_end_io = f2fs_read_end_io;
bio->bi_private = ctx;
@@ -1094,7 +1146,8 @@ got_it:
* This page will go to BIO. Do we need to send this
* BIO off first?
*/
- if (bio && (last_block_in_bio != block_nr - 1)) {
+ if (bio && (last_block_in_bio != block_nr - 1 ||
+ !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
submit_and_realloc:
__submit_bio(F2FS_I_SB(inode), bio, DATA);
bio = NULL;
@@ -1309,7 +1362,6 @@ done:
if (err && err != -ENOENT)
goto redirty_out;
- clear_cold_data(page);
out:
inode_dec_dirty_pages(inode);
if (err)
@@ -1330,6 +1382,8 @@ out:
redirty_out:
redirty_page_for_writepage(wbc, page);
+ if (!err)
+ return AOP_WRITEPAGE_ACTIVATE;
unlock_page(page);
return err;
}
@@ -1425,6 +1479,15 @@ continue_unlock:
ret = mapping->a_ops->writepage(page, wbc);
if (unlikely(ret)) {
+ /*
+ * keep nr_to_write, since vfs uses this to
+ * get # of written pages.
+ */
+ if (ret == AOP_WRITEPAGE_ACTIVATE) {
+ unlock_page(page);
+ ret = 0;
+ continue;
+ }
done_index = page->index + 1;
done = 1;
break;
@@ -1712,7 +1775,6 @@ static int f2fs_write_end(struct file *file,
goto unlock_out;
set_page_dirty(page);
- clear_cold_data(page);
if (pos + copied > i_size_read(inode))
f2fs_i_size_write(inode, pos + copied);
@@ -1749,9 +1811,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (err)
return err;
- if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
- return 0;
- if (test_opt(F2FS_I_SB(inode), LFS))
+ if (__force_buffered_io(inode, rw))
return 0;
trace_f2fs_direct_IO_enter(inode, offset, count, rw);
@@ -1783,12 +1843,14 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
return;
if (PageDirty(page)) {
- if (inode->i_ino == F2FS_META_INO(sbi))
+ if (inode->i_ino == F2FS_META_INO(sbi)) {
dec_page_count(sbi, F2FS_DIRTY_META);
- else if (inode->i_ino == F2FS_NODE_INO(sbi))
+ } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
dec_page_count(sbi, F2FS_DIRTY_NODES);
- else
+ } else {
inode_dec_dirty_pages(inode);
+ remove_dirty_inode(inode);
+ }
}
/* This is atomic written page, keep Private */
OpenPOWER on IntegriCloud