diff options
Diffstat (limited to 'fs/f2fs')
-rw-r--r-- | fs/f2fs/Kconfig | 33 | ||||
-rw-r--r-- | fs/f2fs/Makefile | 2 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 8 | ||||
-rw-r--r-- | fs/f2fs/compress.c | 1176 | ||||
-rw-r--r-- | fs/f2fs/data.c | 1089 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 92 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 210 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 453 | ||||
-rw-r--r-- | fs/f2fs/file.c | 570 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 91 | ||||
-rw-r--r-- | fs/f2fs/hash.c | 37 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 62 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 63 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 189 | ||||
-rw-r--r-- | fs/f2fs/node.c | 62 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 22 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 473 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 33 | ||||
-rw-r--r-- | fs/f2fs/super.c | 448 | ||||
-rw-r--r-- | fs/f2fs/sysfs.c | 170 | ||||
-rw-r--r-- | fs/f2fs/verity.c | 290 | ||||
-rw-r--r-- | fs/f2fs/xattr.c | 20 | ||||
-rw-r--r-- | fs/f2fs/xattr.h | 2 |
23 files changed, 4895 insertions, 700 deletions
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 110a38ca5d53..f0faada30f30 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -2,9 +2,11 @@ config F2FS_FS tristate "F2FS filesystem support" depends on BLOCK + select NLS select CRYPTO select CRYPTO_CRC32 select F2FS_FS_XATTR if FS_ENCRYPTION + select FS_ENCRYPTION_ALGS if FS_ENCRYPTION help F2FS is based on Log-structured File System (LFS), which supports versatile "flash-friendly" features. The design has been focused on @@ -20,7 +22,7 @@ config F2FS_FS config F2FS_STAT_FS bool "F2FS Status Information" - depends on F2FS_FS && DEBUG_FS + depends on F2FS_FS default y help /sys/kernel/debug/f2fs/ contains information about all the partitions @@ -60,7 +62,9 @@ config F2FS_FS_SECURITY Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO Linux. This option enables an extended attribute handler for file security labels in the f2fs filesystem, so that it requires enabling - the extended attribute support in advance. + the extended attribute support in advance. In particular you need this + option if you use the setcap command to assign initial process capabi- + lities to executables (the security.* extended attributes). If you are not using a security module, say N. @@ -89,3 +93,28 @@ config F2FS_FAULT_INJECTION Test F2FS to inject faults such as ENOMEM, ENOSPC, and so on. If unsure, say N. + +config F2FS_FS_COMPRESSION + bool "F2FS compression feature" + depends on F2FS_FS + help + Enable filesystem-level compression on f2fs regular files, + multiple back-end compression algorithms are supported. + +config F2FS_FS_LZO + bool "LZO compression support" + depends on F2FS_FS_COMPRESSION + select LZO_COMPRESS + select LZO_DECOMPRESS + default y + help + Support LZO compress algorithm, if unsure, say Y. + +config F2FS_FS_LZ4 + bool "LZ4 compression support" + depends on F2FS_FS_COMPRESSION + select LZ4_COMPRESS + select LZ4_DECOMPRESS + default y + help + Support LZ4 compress algorithm, if unsure, say Y. diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 776c4b936504..ee7316b42f69 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -8,3 +8,5 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o +f2fs-$(CONFIG_FS_VERITY) += verity.o +f2fs-$(CONFIG_F2FS_FS_COMPRESSION) += compress.o diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a0eef95b9e0e..44e84ac5c941 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -581,7 +581,7 @@ int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi) if (time_to_inject(sbi, FAULT_ORPHAN)) { spin_unlock(&im->ino_lock); - f2fs_show_injection_info(FAULT_ORPHAN); + f2fs_show_injection_info(sbi, FAULT_ORPHAN); return -ENOSPC; } @@ -1509,10 +1509,10 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_wait_on_all_pages_writeback(sbi); /* - * invalidate intermediate page cache borrowed from meta inode - * which are used for migration of encrypted inode's blocks. + * invalidate intermediate page cache borrowed from meta inode which are + * used for migration of encrypted or verity inode's blocks. */ - if (f2fs_sb_has_encrypt(sbi)) + if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi)) invalidate_mapping_pages(META_MAPPING(sbi), MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c new file mode 100644 index 000000000000..d8a64be90a50 --- /dev/null +++ b/fs/f2fs/compress.c @@ -0,0 +1,1176 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * f2fs compress support + * + * Copyright (c) 2019 Chao Yu <chao@kernel.org> + */ + +#include <linux/fs.h> +#include <linux/f2fs_fs.h> +#include <linux/writeback.h> +#include <linux/backing-dev.h> +#include <linux/lzo.h> +#include <linux/lz4.h> + +#include "f2fs.h" +#include "node.h" +#include <trace/events/f2fs.h> + +struct f2fs_compress_ops { + int (*init_compress_ctx)(struct compress_ctx *cc); + void (*destroy_compress_ctx)(struct compress_ctx *cc); + int (*compress_pages)(struct compress_ctx *cc); + int (*decompress_pages)(struct decompress_io_ctx *dic); +}; + +static unsigned int offset_in_cluster(struct compress_ctx *cc, pgoff_t index) +{ + return index & (cc->cluster_size - 1); +} + +static pgoff_t cluster_idx(struct compress_ctx *cc, pgoff_t index) +{ + return index >> cc->log_cluster_size; +} + +static pgoff_t start_idx_of_cluster(struct compress_ctx *cc) +{ + return cc->cluster_idx << cc->log_cluster_size; +} + +bool f2fs_is_compressed_page(struct page *page) +{ + if (!PagePrivate(page)) + return false; + if (!page_private(page)) + return false; + if (IS_ATOMIC_WRITTEN_PAGE(page) || IS_DUMMY_WRITTEN_PAGE(page)) + return false; + f2fs_bug_on(F2FS_M_SB(page->mapping), + *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC); + return true; +} + +static void f2fs_set_compressed_page(struct page *page, + struct inode *inode, pgoff_t index, void *data, refcount_t *r) +{ + SetPagePrivate(page); + set_page_private(page, (unsigned long)data); + + /* i_crypto_info and iv index */ + page->index = index; + page->mapping = inode->i_mapping; + if (r) + refcount_inc(r); +} + +static void f2fs_put_compressed_page(struct page *page) +{ + set_page_private(page, (unsigned long)NULL); + ClearPagePrivate(page); + page->mapping = NULL; + unlock_page(page); + put_page(page); +} + +static void f2fs_drop_rpages(struct compress_ctx *cc, int len, bool unlock) +{ + int i; + + for (i = 0; i < len; i++) { + if (!cc->rpages[i]) + continue; + if (unlock) + unlock_page(cc->rpages[i]); + else + put_page(cc->rpages[i]); + } +} + +static void f2fs_put_rpages(struct compress_ctx *cc) +{ + f2fs_drop_rpages(cc, cc->cluster_size, false); +} + +static void f2fs_unlock_rpages(struct compress_ctx *cc, int len) +{ + f2fs_drop_rpages(cc, len, true); +} + +static void f2fs_put_rpages_mapping(struct compress_ctx *cc, + struct address_space *mapping, + pgoff_t start, int len) +{ + int i; + + for (i = 0; i < len; i++) { + struct page *page = find_get_page(mapping, start + i); + + put_page(page); + put_page(page); + } +} + +static void f2fs_put_rpages_wbc(struct compress_ctx *cc, + struct writeback_control *wbc, bool redirty, int unlock) +{ + unsigned int i; + + for (i = 0; i < cc->cluster_size; i++) { + if (!cc->rpages[i]) + continue; + if (redirty) + redirty_page_for_writepage(wbc, cc->rpages[i]); + f2fs_put_page(cc->rpages[i], unlock); + } +} + +struct page *f2fs_compress_control_page(struct page *page) +{ + return ((struct compress_io_ctx *)page_private(page))->rpages[0]; +} + +int f2fs_init_compress_ctx(struct compress_ctx *cc) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + + if (cc->nr_rpages) + return 0; + + cc->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) << + cc->log_cluster_size, GFP_NOFS); + return cc->rpages ? 0 : -ENOMEM; +} + +void f2fs_destroy_compress_ctx(struct compress_ctx *cc) +{ + kfree(cc->rpages); + cc->rpages = NULL; + cc->nr_rpages = 0; + cc->nr_cpages = 0; + cc->cluster_idx = NULL_CLUSTER; +} + +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page) +{ + unsigned int cluster_ofs; + + if (!f2fs_cluster_can_merge_page(cc, page->index)) + f2fs_bug_on(F2FS_I_SB(cc->inode), 1); + + cluster_ofs = offset_in_cluster(cc, page->index); + cc->rpages[cluster_ofs] = page; + cc->nr_rpages++; + cc->cluster_idx = cluster_idx(cc, page->index); +} + +#ifdef CONFIG_F2FS_FS_LZO +static int lzo_init_compress_ctx(struct compress_ctx *cc) +{ + cc->private = f2fs_kvmalloc(F2FS_I_SB(cc->inode), + LZO1X_MEM_COMPRESS, GFP_NOFS); + if (!cc->private) + return -ENOMEM; + + cc->clen = lzo1x_worst_compress(PAGE_SIZE << cc->log_cluster_size); + return 0; +} + +static void lzo_destroy_compress_ctx(struct compress_ctx *cc) +{ + kvfree(cc->private); + cc->private = NULL; +} + +static int lzo_compress_pages(struct compress_ctx *cc) +{ + int ret; + + ret = lzo1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata, + &cc->clen, cc->private); + if (ret != LZO_E_OK) { + printk_ratelimited("%sF2FS-fs (%s): lzo compress failed, ret:%d\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret); + return -EIO; + } + return 0; +} + +static int lzo_decompress_pages(struct decompress_io_ctx *dic) +{ + int ret; + + ret = lzo1x_decompress_safe(dic->cbuf->cdata, dic->clen, + dic->rbuf, &dic->rlen); + if (ret != LZO_E_OK) { + printk_ratelimited("%sF2FS-fs (%s): lzo decompress failed, ret:%d\n", + KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, ret); + return -EIO; + } + + if (dic->rlen != PAGE_SIZE << dic->log_cluster_size) { + printk_ratelimited("%sF2FS-fs (%s): lzo invalid rlen:%zu, " + "expected:%lu\n", KERN_ERR, + F2FS_I_SB(dic->inode)->sb->s_id, + dic->rlen, + PAGE_SIZE << dic->log_cluster_size); + return -EIO; + } + return 0; +} + +static const struct f2fs_compress_ops f2fs_lzo_ops = { + .init_compress_ctx = lzo_init_compress_ctx, + .destroy_compress_ctx = lzo_destroy_compress_ctx, + .compress_pages = lzo_compress_pages, + .decompress_pages = lzo_decompress_pages, +}; +#endif + +#ifdef CONFIG_F2FS_FS_LZ4 +static int lz4_init_compress_ctx(struct compress_ctx *cc) +{ + cc->private = f2fs_kvmalloc(F2FS_I_SB(cc->inode), + LZ4_MEM_COMPRESS, GFP_NOFS); + if (!cc->private) + return -ENOMEM; + + cc->clen = LZ4_compressBound(PAGE_SIZE << cc->log_cluster_size); + return 0; +} + +static void lz4_destroy_compress_ctx(struct compress_ctx *cc) +{ + kvfree(cc->private); + cc->private = NULL; +} + +static int lz4_compress_pages(struct compress_ctx *cc) +{ + int len; + + len = LZ4_compress_default(cc->rbuf, cc->cbuf->cdata, cc->rlen, + cc->clen, cc->private); + if (!len) { + printk_ratelimited("%sF2FS-fs (%s): lz4 compress failed\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id); + return -EIO; + } + cc->clen = len; + return 0; +} + +static int lz4_decompress_pages(struct decompress_io_ctx *dic) +{ + int ret; + + ret = LZ4_decompress_safe(dic->cbuf->cdata, dic->rbuf, + dic->clen, dic->rlen); + if (ret < 0) { + printk_ratelimited("%sF2FS-fs (%s): lz4 decompress failed, ret:%d\n", + KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, ret); + return -EIO; + } + + if (ret != PAGE_SIZE << dic->log_cluster_size) { + printk_ratelimited("%sF2FS-fs (%s): lz4 invalid rlen:%zu, " + "expected:%lu\n", KERN_ERR, + F2FS_I_SB(dic->inode)->sb->s_id, + dic->rlen, + PAGE_SIZE << dic->log_cluster_size); + return -EIO; + } + return 0; +} + +static const struct f2fs_compress_ops f2fs_lz4_ops = { + .init_compress_ctx = lz4_init_compress_ctx, + .destroy_compress_ctx = lz4_destroy_compress_ctx, + .compress_pages = lz4_compress_pages, + .decompress_pages = lz4_decompress_pages, +}; +#endif + +static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { +#ifdef CONFIG_F2FS_FS_LZO + &f2fs_lzo_ops, +#else + NULL, +#endif +#ifdef CONFIG_F2FS_FS_LZ4 + &f2fs_lz4_ops, +#else + NULL, +#endif +}; + +bool f2fs_is_compress_backend_ready(struct inode *inode) +{ + if (!f2fs_compressed_file(inode)) + return true; + return f2fs_cops[F2FS_I(inode)->i_compress_algorithm]; +} + +static struct page *f2fs_grab_page(void) +{ + struct page *page; + + page = alloc_page(GFP_NOFS); + if (!page) + return NULL; + lock_page(page); + return page; +} + +static int f2fs_compress_pages(struct compress_ctx *cc) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + struct f2fs_inode_info *fi = F2FS_I(cc->inode); + const struct f2fs_compress_ops *cops = + f2fs_cops[fi->i_compress_algorithm]; + unsigned int max_len, nr_cpages; + int i, ret; + + trace_f2fs_compress_pages_start(cc->inode, cc->cluster_idx, + cc->cluster_size, fi->i_compress_algorithm); + + ret = cops->init_compress_ctx(cc); + if (ret) + goto out; + + max_len = COMPRESS_HEADER_SIZE + cc->clen; + cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE); + + cc->cpages = f2fs_kzalloc(sbi, sizeof(struct page *) * + cc->nr_cpages, GFP_NOFS); + if (!cc->cpages) { + ret = -ENOMEM; + goto destroy_compress_ctx; + } + + for (i = 0; i < cc->nr_cpages; i++) { + cc->cpages[i] = f2fs_grab_page(); + if (!cc->cpages[i]) { + ret = -ENOMEM; + goto out_free_cpages; + } + } + + cc->rbuf = vmap(cc->rpages, cc->cluster_size, VM_MAP, PAGE_KERNEL_RO); + if (!cc->rbuf) { + ret = -ENOMEM; + goto out_free_cpages; + } + + cc->cbuf = vmap(cc->cpages, cc->nr_cpages, VM_MAP, PAGE_KERNEL); + if (!cc->cbuf) { + ret = -ENOMEM; + goto out_vunmap_rbuf; + } + + ret = cops->compress_pages(cc); + if (ret) + goto out_vunmap_cbuf; + + max_len = PAGE_SIZE * (cc->cluster_size - 1) - COMPRESS_HEADER_SIZE; + + if (cc->clen > max_len) { + ret = -EAGAIN; + goto out_vunmap_cbuf; + } + + cc->cbuf->clen = cpu_to_le32(cc->clen); + cc->cbuf->chksum = cpu_to_le32(0); + + for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++) + cc->cbuf->reserved[i] = cpu_to_le32(0); + + vunmap(cc->cbuf); + vunmap(cc->rbuf); + + nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE); + + for (i = nr_cpages; i < cc->nr_cpages; i++) { + f2fs_put_compressed_page(cc->cpages[i]); + cc->cpages[i] = NULL; + } + + cc->nr_cpages = nr_cpages; + + trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx, + cc->clen, ret); + return 0; + +out_vunmap_cbuf: + vunmap(cc->cbuf); +out_vunmap_rbuf: + vunmap(cc->rbuf); +out_free_cpages: + for (i = 0; i < cc->nr_cpages; i++) { + if (cc->cpages[i]) + f2fs_put_compressed_page(cc->cpages[i]); + } + kfree(cc->cpages); + cc->cpages = NULL; +destroy_compress_ctx: + cops->destroy_compress_ctx(cc); +out: + trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx, + cc->clen, ret); + return ret; +} + +void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) +{ + struct decompress_io_ctx *dic = + (struct decompress_io_ctx *)page_private(page); + struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + struct f2fs_inode_info *fi= F2FS_I(dic->inode); + const struct f2fs_compress_ops *cops = + f2fs_cops[fi->i_compress_algorithm]; + int ret; + + dec_page_count(sbi, F2FS_RD_DATA); + + if (bio->bi_status || PageError(page)) + dic->failed = true; + + if (refcount_dec_not_one(&dic->ref)) + return; + + trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx, + dic->cluster_size, fi->i_compress_algorithm); + + /* submit partial compressed pages */ + if (dic->failed) { + ret = -EIO; + goto out_free_dic; + } + + dic->rbuf = vmap(dic->tpages, dic->cluster_size, VM_MAP, PAGE_KERNEL); + if (!dic->rbuf) { + ret = -ENOMEM; + goto out_free_dic; + } + + dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO); + if (!dic->cbuf) { + ret = -ENOMEM; + goto out_vunmap_rbuf; + } + + dic->clen = le32_to_cpu(dic->cbuf->clen); + dic->rlen = PAGE_SIZE << dic->log_cluster_size; + + if (dic->clen > PAGE_SIZE * dic->nr_cpages - COMPRESS_HEADER_SIZE) { + ret = -EFSCORRUPTED; + goto out_vunmap_cbuf; + } + + ret = cops->decompress_pages(dic); + +out_vunmap_cbuf: + vunmap(dic->cbuf); +out_vunmap_rbuf: + vunmap(dic->rbuf); +out_free_dic: + if (!verity) + f2fs_decompress_end_io(dic->rpages, dic->cluster_size, + ret, false); + + trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx, + dic->clen, ret); + if (!verity) + f2fs_free_dic(dic); +} + +static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index) +{ + if (cc->cluster_idx == NULL_CLUSTER) + return true; + return cc->cluster_idx == cluster_idx(cc, index); +} + +bool f2fs_cluster_is_empty(struct compress_ctx *cc) +{ + return cc->nr_rpages == 0; +} + +static bool f2fs_cluster_is_full(struct compress_ctx *cc) +{ + return cc->cluster_size == cc->nr_rpages; +} + +bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index) +{ + if (f2fs_cluster_is_empty(cc)) + return true; + return is_page_in_cluster(cc, index); +} + +static bool __cluster_may_compress(struct compress_ctx *cc) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + loff_t i_size = i_size_read(cc->inode); + unsigned nr_pages = DIV_ROUND_UP(i_size, PAGE_SIZE); + int i; + + for (i = 0; i < cc->cluster_size; i++) { + struct page *page = cc->rpages[i]; + + f2fs_bug_on(sbi, !page); + + if (unlikely(f2fs_cp_error(sbi))) + return false; + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + return false; + + /* beyond EOF */ + if (page->index >= nr_pages) + return false; + } + return true; +} + +/* return # of compressed block addresses */ +static int f2fs_compressed_blocks(struct compress_ctx *cc) +{ + struct dnode_of_data dn; + int ret; + + set_new_dnode(&dn, cc->inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, start_idx_of_cluster(cc), + LOOKUP_NODE); + if (ret) { + if (ret == -ENOENT) + ret = 0; + goto fail; + } + + if (dn.data_blkaddr == COMPRESS_ADDR) { + int i; + + ret = 1; + for (i = 1; i < cc->cluster_size; i++) { + block_t blkaddr; + + blkaddr = datablock_addr(dn.inode, + dn.node_page, dn.ofs_in_node + i); + if (blkaddr != NULL_ADDR) + ret++; + } + } +fail: + f2fs_put_dnode(&dn); + return ret; +} + +int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) +{ + struct compress_ctx cc = { + .inode = inode, + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size, + }; + + return f2fs_compressed_blocks(&cc); +} + +static bool cluster_may_compress(struct compress_ctx *cc) +{ + if (!f2fs_compressed_file(cc->inode)) + return false; + if (f2fs_is_atomic_file(cc->inode)) + return false; + if (f2fs_is_mmap_file(cc->inode)) + return false; + if (!f2fs_cluster_is_full(cc)) + return false; + return __cluster_may_compress(cc); +} + +static void set_cluster_writeback(struct compress_ctx *cc) +{ + int i; + + for (i = 0; i < cc->cluster_size; i++) { + if (cc->rpages[i]) + set_page_writeback(cc->rpages[i]); + } +} + +static void set_cluster_dirty(struct compress_ctx *cc) +{ + int i; + + for (i = 0; i < cc->cluster_size; i++) + if (cc->rpages[i]) + set_page_dirty(cc->rpages[i]); +} + +static int prepare_compress_overwrite(struct compress_ctx *cc, + struct page **pagep, pgoff_t index, void **fsdata) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + struct address_space *mapping = cc->inode->i_mapping; + struct page *page; + struct dnode_of_data dn; + sector_t last_block_in_bio; + unsigned fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT; + pgoff_t start_idx = start_idx_of_cluster(cc); + int i, ret; + bool prealloc; + +retry: + ret = f2fs_compressed_blocks(cc); + if (ret <= 0) + return ret; + + /* compressed case */ + prealloc = (ret < cc->cluster_size); + + ret = f2fs_init_compress_ctx(cc); + if (ret) + return ret; + + /* keep page reference to avoid page reclaim */ + for (i = 0; i < cc->cluster_size; i++) { + page = f2fs_pagecache_get_page(mapping, start_idx + i, + fgp_flag, GFP_NOFS); + if (!page) { + ret = -ENOMEM; + goto unlock_pages; + } + + if (PageUptodate(page)) + unlock_page(page); + else + f2fs_compress_ctx_add_page(cc, page); + } + + if (!f2fs_cluster_is_empty(cc)) { + struct bio *bio = NULL; + + ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size, + &last_block_in_bio, false); + f2fs_destroy_compress_ctx(cc); + if (ret) + goto release_pages; + if (bio) + f2fs_submit_bio(sbi, bio, DATA); + + ret = f2fs_init_compress_ctx(cc); + if (ret) + goto release_pages; + } + + for (i = 0; i < cc->cluster_size; i++) { + f2fs_bug_on(sbi, cc->rpages[i]); + + page = find_lock_page(mapping, start_idx + i); + f2fs_bug_on(sbi, !page); + + f2fs_wait_on_page_writeback(page, DATA, true, true); + + f2fs_compress_ctx_add_page(cc, page); + f2fs_put_page(page, 0); + + if (!PageUptodate(page)) { + f2fs_unlock_rpages(cc, i + 1); + f2fs_put_rpages_mapping(cc, mapping, start_idx, + cc->cluster_size); + f2fs_destroy_compress_ctx(cc); + goto retry; + } + } + + if (prealloc) { + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + + set_new_dnode(&dn, cc->inode, NULL, NULL, 0); + + for (i = cc->cluster_size - 1; i > 0; i--) { + ret = f2fs_get_block(&dn, start_idx + i); + if (ret) { + i = cc->cluster_size; + break; + } + + if (dn.data_blkaddr != NEW_ADDR) + break; + } + + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + } + + if (likely(!ret)) { + *fsdata = cc->rpages; + *pagep = cc->rpages[offset_in_cluster(cc, index)]; + return cc->cluster_size; + } + +unlock_pages: + f2fs_unlock_rpages(cc, i); +release_pages: + f2fs_put_rpages_mapping(cc, mapping, start_idx, i); + f2fs_destroy_compress_ctx(cc); + return ret; +} + +int f2fs_prepare_compress_overwrite(struct inode *inode, + struct page **pagep, pgoff_t index, void **fsdata) +{ + struct compress_ctx cc = { + .inode = inode, + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size, + .rpages = NULL, + .nr_rpages = 0, + }; + + return prepare_compress_overwrite(&cc, pagep, index, fsdata); +} + +bool f2fs_compress_write_end(struct inode *inode, void *fsdata, + pgoff_t index, unsigned copied) + +{ + struct compress_ctx cc = { + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .rpages = fsdata, + }; + bool first_index = (index == cc.rpages[0]->index); + + if (copied) + set_cluster_dirty(&cc); + + f2fs_put_rpages_wbc(&cc, NULL, false, 1); + f2fs_destroy_compress_ctx(&cc); + + return first_index; +} + +static int f2fs_write_compressed_pages(struct compress_ctx *cc, + int *submitted, + struct writeback_control *wbc, + enum iostat_type io_type) +{ + struct inode *inode = cc->inode; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_io_info fio = { + .sbi = sbi, + .ino = cc->inode->i_ino, + .type = DATA, + .op = REQ_OP_WRITE, + .op_flags = wbc_to_write_flags(wbc), + .old_blkaddr = NEW_ADDR, + .page = NULL, + .encrypted_page = NULL, + .compressed_page = NULL, + .submitted = false, + .need_lock = LOCK_RETRY, + .io_type = io_type, + .io_wbc = wbc, + .encrypted = f2fs_encrypted_file(cc->inode), + }; + struct dnode_of_data dn; + struct node_info ni; + struct compress_io_ctx *cic; + pgoff_t start_idx = start_idx_of_cluster(cc); + unsigned int last_index = cc->cluster_size - 1; + loff_t psize; + int i, err; + + set_new_dnode(&dn, cc->inode, NULL, NULL, 0); + + f2fs_lock_op(sbi); + + err = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); + if (err) + goto out_unlock_op; + + for (i = 0; i < cc->cluster_size; i++) { + if (datablock_addr(dn.inode, dn.node_page, + dn.ofs_in_node + i) == NULL_ADDR) + goto out_put_dnode; + } + + psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT; + + err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); + if (err) + goto out_put_dnode; + + fio.version = ni.version; + + cic = f2fs_kzalloc(sbi, sizeof(struct compress_io_ctx), GFP_NOFS); + if (!cic) + goto out_put_dnode; + + cic->magic = F2FS_COMPRESSED_PAGE_MAGIC; + cic->inode = inode; + refcount_set(&cic->ref, 1); + cic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) << + cc->log_cluster_size, GFP_NOFS); + if (!cic->rpages) + goto out_put_cic; + + cic->nr_rpages = cc->cluster_size; + + for (i = 0; i < cc->nr_cpages; i++) { + f2fs_set_compressed_page(cc->cpages[i], inode, + cc->rpages[i + 1]->index, + cic, i ? &cic->ref : NULL); + fio.compressed_page = cc->cpages[i]; + if (fio.encrypted) { + fio.page = cc->rpages[i + 1]; + err = f2fs_encrypt_one_page(&fio); + if (err) + goto out_destroy_crypt; + cc->cpages[i] = fio.encrypted_page; + } + } + + set_cluster_writeback(cc); + + for (i = 0; i < cc->cluster_size; i++) + cic->rpages[i] = cc->rpages[i]; + + for (i = 0; i < cc->cluster_size; i++, dn.ofs_in_node++) { + block_t blkaddr; + + blkaddr = datablock_addr(dn.inode, dn.node_page, + dn.ofs_in_node); + fio.page = cic->rpages[i]; + fio.old_blkaddr = blkaddr; + + /* cluster header */ + if (i == 0) { + if (blkaddr == COMPRESS_ADDR) + fio.compr_blocks++; + if (__is_valid_data_blkaddr(blkaddr)) + f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_update_data_blkaddr(&dn, COMPRESS_ADDR); + goto unlock_continue; + } + + if (fio.compr_blocks && __is_valid_data_blkaddr(blkaddr)) + fio.compr_blocks++; + + if (i > cc->nr_cpages) { + if (__is_valid_data_blkaddr(blkaddr)) { + f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_update_data_blkaddr(&dn, NEW_ADDR); + } + goto unlock_continue; + } + + f2fs_bug_on(fio.sbi, blkaddr == NULL_ADDR); + + if (fio.encrypted) + fio.encrypted_page = cc->cpages[i - 1]; + else + fio.compressed_page = cc->cpages[i - 1]; + + cc->cpages[i - 1] = NULL; + f2fs_outplace_write_data(&dn, &fio); + (*submitted)++; +unlock_continue: + inode_dec_dirty_pages(cc->inode); + unlock_page(fio.page); + } + + if (fio.compr_blocks) + f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false); + f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true); + + set_inode_flag(cc->inode, FI_APPEND_WRITE); + if (cc->cluster_idx == 0) + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + + down_write(&fi->i_sem); + if (fi->last_disk_size < psize) + fi->last_disk_size = psize; + up_write(&fi->i_sem); + + f2fs_put_rpages(cc); + f2fs_destroy_compress_ctx(cc); + return 0; + +out_destroy_crypt: + kfree(cic->rpages); + + for (--i; i >= 0; i--) + fscrypt_finalize_bounce_page(&cc->cpages[i]); + for (i = 0; i < cc->nr_cpages; i++) { + if (!cc->cpages[i]) + continue; + f2fs_put_page(cc->cpages[i], 1); + } +out_put_cic: + kfree(cic); +out_put_dnode: + f2fs_put_dnode(&dn); +out_unlock_op: + f2fs_unlock_op(sbi); + return -EAGAIN; +} + +void f2fs_compress_write_end_io(struct bio *bio, struct page *page) +{ + struct f2fs_sb_info *sbi = bio->bi_private; + struct compress_io_ctx *cic = + (struct compress_io_ctx *)page_private(page); + int i; + + if (unlikely(bio->bi_status)) + mapping_set_error(cic->inode->i_mapping, -EIO); + + f2fs_put_compressed_page(page); + + dec_page_count(sbi, F2FS_WB_DATA); + + if (refcount_dec_not_one(&cic->ref)) + return; + + for (i = 0; i < cic->nr_rpages; i++) { + WARN_ON(!cic->rpages[i]); + clear_cold_data(cic->rpages[i]); + end_page_writeback(cic->rpages[i]); + } + + kfree(cic->rpages); + kfree(cic); +} + +static int f2fs_write_raw_pages(struct compress_ctx *cc, + int *submitted, + struct writeback_control *wbc, + enum iostat_type io_type) +{ + struct address_space *mapping = cc->inode->i_mapping; + int _submitted, compr_blocks, ret; + int i = -1, err = 0; + + compr_blocks = f2fs_compressed_blocks(cc); + if (compr_blocks < 0) { + err = compr_blocks; + goto out_err; + } + + for (i = 0; i < cc->cluster_size; i++) { + if (!cc->rpages[i]) + continue; +retry_write: + if (cc->rpages[i]->mapping != mapping) { + unlock_page(cc->rpages[i]); + continue; + } + + BUG_ON(!PageLocked(cc->rpages[i])); + + ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted, + NULL, NULL, wbc, io_type, + compr_blocks); + if (ret) { + if (ret == AOP_WRITEPAGE_ACTIVATE) { + unlock_page(cc->rpages[i]); + ret = 0; + } else if (ret == -EAGAIN) { + ret = 0; + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + lock_page(cc->rpages[i]); + clear_page_dirty_for_io(cc->rpages[i]); + goto retry_write; + } + err = ret; + goto out_fail; + } + + *submitted += _submitted; + } + return 0; + +out_fail: + /* TODO: revoke partially updated block addresses */ + BUG_ON(compr_blocks); +out_err: + for (++i; i < cc->cluster_size; i++) { + if (!cc->rpages[i]) + continue; + redirty_page_for_writepage(wbc, cc->rpages[i]); + unlock_page(cc->rpages[i]); + } + return err; +} + +int f2fs_write_multi_pages(struct compress_ctx *cc, + int *submitted, + struct writeback_control *wbc, + enum iostat_type io_type) +{ + struct f2fs_inode_info *fi = F2FS_I(cc->inode); + const struct f2fs_compress_ops *cops = + f2fs_cops[fi->i_compress_algorithm]; + int err; + + *submitted = 0; + if (cluster_may_compress(cc)) { + err = f2fs_compress_pages(cc); + if (err == -EAGAIN) { + goto write; + } else if (err) { + f2fs_put_rpages_wbc(cc, wbc, true, 1); + goto destroy_out; + } + + err = f2fs_write_compressed_pages(cc, submitted, + wbc, io_type); + cops->destroy_compress_ctx(cc); + if (!err) + return 0; + f2fs_bug_on(F2FS_I_SB(cc->inode), err != -EAGAIN); + } +write: + f2fs_bug_on(F2FS_I_SB(cc->inode), *submitted); + + err = f2fs_write_raw_pages(cc, submitted, wbc, io_type); + f2fs_put_rpages_wbc(cc, wbc, false, 0); +destroy_out: + f2fs_destroy_compress_ctx(cc); + return err; +} + +struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + struct decompress_io_ctx *dic; + pgoff_t start_idx = start_idx_of_cluster(cc); + int i; + + dic = f2fs_kzalloc(sbi, sizeof(struct decompress_io_ctx), GFP_NOFS); + if (!dic) + return ERR_PTR(-ENOMEM); + + dic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) << + cc->log_cluster_size, GFP_NOFS); + if (!dic->rpages) { + kfree(dic); + return ERR_PTR(-ENOMEM); + } + + dic->magic = F2FS_COMPRESSED_PAGE_MAGIC; + dic->inode = cc->inode; + refcount_set(&dic->ref, 1); + dic->cluster_idx = cc->cluster_idx; + dic->cluster_size = cc->cluster_size; + dic->log_cluster_size = cc->log_cluster_size; + dic->nr_cpages = cc->nr_cpages; + dic->failed = false; + + for (i = 0; i < dic->cluster_size; i++) + dic->rpages[i] = cc->rpages[i]; + dic->nr_rpages = cc->cluster_size; + + dic->cpages = f2fs_kzalloc(sbi, sizeof(struct page *) * + dic->nr_cpages, GFP_NOFS); + if (!dic->cpages) + goto out_free; + + for (i = 0; i < dic->nr_cpages; i++) { + struct page *page; + + page = f2fs_grab_page(); + if (!page) + goto out_free; + + f2fs_set_compressed_page(page, cc->inode, + start_idx + i + 1, + dic, i ? &dic->ref : NULL); + dic->cpages[i] = page; + } + + dic->tpages = f2fs_kzalloc(sbi, sizeof(struct page *) * + dic->cluster_size, GFP_NOFS); + if (!dic->tpages) + goto out_free; + + for (i = 0; i < dic->cluster_size; i++) { + if (cc->rpages[i]) + continue; + + dic->tpages[i] = f2fs_grab_page(); + if (!dic->tpages[i]) + goto out_free; + } + + for (i = 0; i < dic->cluster_size; i++) { + if (dic->tpages[i]) + continue; + dic->tpages[i] = cc->rpages[i]; + } + + return dic; + +out_free: + f2fs_free_dic(dic); + return ERR_PTR(-ENOMEM); +} + +void f2fs_free_dic(struct decompress_io_ctx *dic) +{ + int i; + + if (dic->tpages) { + for (i = 0; i < dic->cluster_size; i++) { + if (dic->rpages[i]) + continue; + f2fs_put_page(dic->tpages[i], 1); + } + kfree(dic->tpages); + } + + if (dic->cpages) { + for (i = 0; i < dic->nr_cpages; i++) { + if (!dic->cpages[i]) + continue; + f2fs_put_compressed_page(dic->cpages[i]); + } + kfree(dic->cpages); + } + + kfree(dic->rpages); + kfree(dic); +} + +void f2fs_decompress_end_io(struct page **rpages, + unsigned int cluster_size, bool err, bool verity) +{ + int i; + + for (i = 0; i < cluster_size; i++) { + struct page *rpage = rpages[i]; + + if (!rpage) + continue; + + if (err || PageError(rpage)) { + ClearPageUptodate(rpage); + ClearPageError(rpage); + } else { + if (!verity || fsverity_verify_page(rpage)) + SetPageUptodate(rpage); + else + SetPageError(rpage); + } + unlock_page(rpage); + } +} diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index abbf14e9bd72..b27b72107911 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -29,7 +29,49 @@ #define NUM_PREALLOC_POST_READ_CTXS 128 static struct kmem_cache *bio_post_read_ctx_cache; +static struct kmem_cache *bio_entry_slab; static mempool_t *bio_post_read_ctx_pool; +static struct bio_set f2fs_bioset; + +#define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE + +int __init f2fs_init_bioset(void) +{ + if (bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE, + 0, BIOSET_NEED_BVECS)) + return -ENOMEM; + return 0; +} + +void f2fs_destroy_bioset(void) +{ + bioset_exit(&f2fs_bioset); +} + +static inline struct bio *__f2fs_bio_alloc(gfp_t gfp_mask, + unsigned int nr_iovecs) +{ + return bio_alloc_bioset(gfp_mask, nr_iovecs, &f2fs_bioset); +} + +struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool no_fail) +{ + struct bio *bio; + + if (no_fail) { + /* No failure on bio allocation */ + bio = __f2fs_bio_alloc(GFP_NOIO, npages); + if (!bio) + bio = __f2fs_bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); + return bio; + } + if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { + f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO); + return NULL; + } + + return __f2fs_bio_alloc(GFP_KERNEL, npages); +} static bool __is_cp_guaranteed(struct page *page) { @@ -40,6 +82,9 @@ static bool __is_cp_guaranteed(struct page *page) if (!mapping) return false; + if (f2fs_is_compressed_page(page)) + return false; + inode = mapping->host; sbi = F2FS_I_SB(inode); @@ -72,18 +117,19 @@ static enum count_type __read_io_type(struct page *page) /* postprocessing steps for read bios */ enum bio_post_read_step { - STEP_INITIAL = 0, STEP_DECRYPT, + STEP_DECOMPRESS, + STEP_VERITY, }; struct bio_post_read_ctx { struct bio *bio; + struct f2fs_sb_info *sbi; struct work_struct work; - unsigned int cur_step; unsigned int enabled_steps; }; -static void __read_end_io(struct bio *bio) +static void __read_end_io(struct bio *bio, bool compr, bool verity) { struct page *page; struct bio_vec *bv; @@ -92,6 +138,13 @@ static void __read_end_io(struct bio *bio) bio_for_each_segment_all(bv, bio, iter_all) { page = bv->bv_page; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (compr && f2fs_is_compressed_page(page)) { + f2fs_decompress_pages(bio, page, verity); + continue; + } +#endif + /* PG_error was set if any post_read step failed */ if (bio->bi_status || PageError(page)) { ClearPageUptodate(page); @@ -103,61 +156,155 @@ static void __read_end_io(struct bio *bio) dec_page_count(F2FS_P_SB(page), __read_io_type(page)); unlock_page(page); } - if (bio->bi_private) - mempool_free(bio->bi_private, bio_post_read_ctx_pool); - bio_put(bio); +} + +static void f2fs_release_read_bio(struct bio *bio); +static void __f2fs_read_end_io(struct bio *bio, bool compr, bool verity) +{ + if (!compr) + __read_end_io(bio, false, verity); + f2fs_release_read_bio(bio); +} + +static void f2fs_decompress_bio(struct bio *bio, bool verity) +{ + __read_end_io(bio, true, verity); } static void bio_post_read_processing(struct bio_post_read_ctx *ctx); -static void decrypt_work(struct work_struct *work) +static void f2fs_decrypt_work(struct bio_post_read_ctx *ctx) +{ + fscrypt_decrypt_bio(ctx->bio); +} + +static void f2fs_decompress_work(struct bio_post_read_ctx *ctx) +{ + f2fs_decompress_bio(ctx->bio, ctx->enabled_steps & (1 << STEP_VERITY)); +} + +#ifdef CONFIG_F2FS_FS_COMPRESSION +static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size) +{ + f2fs_decompress_end_io(rpages, cluster_size, false, true); +} + +static void f2fs_verify_bio(struct bio *bio) +{ + struct page *page = bio_first_page_all(bio); + struct decompress_io_ctx *dic = + (struct decompress_io_ctx *)page_private(page); + + f2fs_verify_pages(dic->rpages, dic->cluster_size); + f2fs_free_dic(dic); +} +#endif + +static void f2fs_verity_work(struct work_struct *work) { struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); + struct bio *bio = ctx->bio; +#ifdef CONFIG_F2FS_FS_COMPRESSION + unsigned int enabled_steps = ctx->enabled_steps; +#endif - fscrypt_decrypt_bio(ctx->bio); + /* + * fsverity_verify_bio() may call readpages() again, and while verity + * will be disabled for this, decryption may still be needed, resulting + * in another bio_post_read_ctx being allocated. So to prevent + * deadlocks we need to release the current ctx to the mempool first. + * This assumes that verity is the last post-read step. + */ + mempool_free(ctx, bio_post_read_ctx_pool); + bio->bi_private = NULL; + +#ifdef CONFIG_F2FS_FS_COMPRESSION + /* previous step is decompression */ + if (enabled_steps & (1 << STEP_DECOMPRESS)) { + f2fs_verify_bio(bio); + f2fs_release_read_bio(bio); + return; + } +#endif - bio_post_read_processing(ctx); + fsverity_verify_bio(bio); + __f2fs_read_end_io(bio, false, false); +} + +static void f2fs_post_read_work(struct work_struct *work) +{ + struct bio_post_read_ctx *ctx = + container_of(work, struct bio_post_read_ctx, work); + + if (ctx->enabled_steps & (1 << STEP_DECRYPT)) + f2fs_decrypt_work(ctx); + + if (ctx->enabled_steps & (1 << STEP_DECOMPRESS)) + f2fs_decompress_work(ctx); + + if (ctx->enabled_steps & (1 << STEP_VERITY)) { + INIT_WORK(&ctx->work, f2fs_verity_work); + fsverity_enqueue_verify_work(&ctx->work); + return; + } + + __f2fs_read_end_io(ctx->bio, + ctx->enabled_steps & (1 << STEP_DECOMPRESS), false); +} + +static void f2fs_enqueue_post_read_work(struct f2fs_sb_info *sbi, + struct work_struct *work) +{ + queue_work(sbi->post_read_wq, work); } static void bio_post_read_processing(struct bio_post_read_ctx *ctx) { - switch (++ctx->cur_step) { - case STEP_DECRYPT: - if (ctx->enabled_steps & (1 << STEP_DECRYPT)) { - INIT_WORK(&ctx->work, decrypt_work); - fscrypt_enqueue_decrypt_work(&ctx->work); - return; - } - ctx->cur_step++; - /* fall-through */ - default: - __read_end_io(ctx->bio); + /* + * We use different work queues for decryption and for verity because + * verity may require reading metadata pages that need decryption, and + * we shouldn't recurse to the same workqueue. + */ + + if (ctx->enabled_steps & (1 << STEP_DECRYPT) || + ctx->enabled_steps & (1 << STEP_DECOMPRESS)) { + INIT_WORK(&ctx->work, f2fs_post_read_work); + f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work); + return; + } + + if (ctx->enabled_steps & (1 << STEP_VERITY)) { + INIT_WORK(&ctx->work, f2fs_verity_work); + fsverity_enqueue_verify_work(&ctx->work); + return; } + + __f2fs_read_end_io(ctx->bio, false, false); } static bool f2fs_bio_post_read_required(struct bio *bio) { - return bio->bi_private && !bio->bi_status; + return bio->bi_private; } static void f2fs_read_end_io(struct bio *bio) { - if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), - FAULT_READ_IO)) { - f2fs_show_injection_info(FAULT_READ_IO); + struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); + + if (time_to_inject(sbi, FAULT_READ_IO)) { + f2fs_show_injection_info(sbi, FAULT_READ_IO); bio->bi_status = BLK_STS_IOERR; } if (f2fs_bio_post_read_required(bio)) { struct bio_post_read_ctx *ctx = bio->bi_private; - ctx->cur_step = STEP_INITIAL; bio_post_read_processing(ctx); return; } - __read_end_io(bio); + __f2fs_read_end_io(bio, false, false); } static void f2fs_write_end_io(struct bio *bio) @@ -167,7 +314,7 @@ static void f2fs_write_end_io(struct bio *bio) struct bvec_iter_all iter_all; if (time_to_inject(sbi, FAULT_WRITE_IO)) { - f2fs_show_injection_info(FAULT_WRITE_IO); + f2fs_show_injection_info(sbi, FAULT_WRITE_IO); bio->bi_status = BLK_STS_IOERR; } @@ -188,6 +335,13 @@ static void f2fs_write_end_io(struct bio *bio) fscrypt_finalize_bounce_page(&page); +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_is_compressed_page(page)) { + f2fs_compress_write_end_io(bio, page); + continue; + } +#endif + if (unlikely(bio->bi_status)) { mapping_set_error(page->mapping, -EIO); if (type == F2FS_WB_CP_DATA) @@ -259,26 +413,25 @@ static bool __same_bdev(struct f2fs_sb_info *sbi, /* * Low-level block read/write IO operations. */ -static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, - struct writeback_control *wbc, - int npages, bool is_read, - enum page_type type, enum temp_type temp) +static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) { + struct f2fs_sb_info *sbi = fio->sbi; struct bio *bio; bio = f2fs_bio_alloc(sbi, npages, true); - f2fs_target_device(sbi, blk_addr, bio); - if (is_read) { + f2fs_target_device(sbi, fio->new_blkaddr, bio); + if (is_read_io(fio->op)) { bio->bi_end_io = f2fs_read_end_io; bio->bi_private = NULL; } else { bio->bi_end_io = f2fs_write_end_io; bio->bi_private = sbi; - bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp); + bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, + fio->type, fio->temp); } - if (wbc) - wbc_init_bio(wbc, bio); + if (fio->io_wbc) + wbc_init_bio(fio->io_wbc, bio); return bio; } @@ -295,6 +448,9 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, if (test_opt(sbi, LFS) && current->plug) blk_finish_plug(current->plug); + if (F2FS_IO_ALIGNED(sbi)) + goto submit_io; + start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; start %= F2FS_IO_SIZE(sbi); @@ -330,6 +486,12 @@ submit_io: submit_bio(bio); } +void f2fs_submit_bio(struct f2fs_sb_info *sbi, + struct bio *bio, enum page_type type) +{ + __submit_bio(sbi, bio, type); +} + static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; @@ -352,7 +514,6 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, struct page *page, nid_t ino) { struct bio_vec *bvec; - struct page *target; struct bvec_iter_all iter_all; if (!bio) @@ -362,10 +523,18 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, return true; bio_for_each_segment_all(bvec, bio, iter_all) { + struct page *target = bvec->bv_page; - target = bvec->bv_page; - if (fscrypt_is_bounce_page(target)) + if (fscrypt_is_bounce_page(target)) { target = fscrypt_pagecache_page(target); + if (IS_ERR(target)) + continue; + } + if (f2fs_is_compressed_page(target)) { + target = f2fs_compress_control_page(target); + if (IS_ERR(target)) + continue; + } if (inode && inode == target->mapping->host) return true; @@ -461,8 +630,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) f2fs_trace_ios(fio, 0); /* Allocate a new bio */ - bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc, - 1, is_read_io(fio->op), fio->type, fio->temp); + bio = __bio_alloc(fio, 1); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); @@ -481,6 +649,164 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) return 0; } +static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, + block_t last_blkaddr, block_t cur_blkaddr) +{ + if (last_blkaddr + 1 != cur_blkaddr) + return false; + return __same_bdev(sbi, cur_blkaddr, bio); +} + +static bool io_type_is_mergeable(struct f2fs_bio_info *io, + struct f2fs_io_info *fio) +{ + if (io->fio.op != fio->op) + return false; + return io->fio.op_flags == fio->op_flags; +} + +static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, + struct f2fs_bio_info *io, + struct f2fs_io_info *fio, + block_t last_blkaddr, + block_t cur_blkaddr) +{ + if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) { + unsigned int filled_blocks = + F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size); + unsigned int io_size = F2FS_IO_SIZE(sbi); + unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt; + + /* IOs in bio is aligned and left space of vectors is not enough */ + if (!(filled_blocks % io_size) && left_vecs < io_size) + return false; + } + if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr)) + return false; + return io_type_is_mergeable(io, fio); +} + +static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, + struct page *page, enum temp_type temp) +{ + struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; + struct bio_entry *be; + + be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS); + be->bio = bio; + bio_get(bio); + + if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) + f2fs_bug_on(sbi, 1); + + down_write(&io->bio_list_lock); + list_add_tail(&be->list, &io->bio_list); + up_write(&io->bio_list_lock); +} + +static void del_bio_entry(struct bio_entry *be) +{ + list_del(&be->list); + kmem_cache_free(bio_entry_slab, be); +} + +static int add_ipu_page(struct f2fs_sb_info *sbi, struct bio **bio, + struct page *page) +{ + enum temp_type temp; + bool found = false; + int ret = -EAGAIN; + + for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { + struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; + struct list_head *head = &io->bio_list; + struct bio_entry *be; + + down_write(&io->bio_list_lock); + list_for_each_entry(be, head, list) { + if (be->bio != *bio) + continue; + + found = true; + + if (bio_add_page(*bio, page, PAGE_SIZE, 0) == + PAGE_SIZE) { + ret = 0; + break; + } + + /* bio is full */ + del_bio_entry(be); + __submit_bio(sbi, *bio, DATA); + break; + } + up_write(&io->bio_list_lock); + } + + if (ret) { + bio_put(*bio); + *bio = NULL; + } + + return ret; +} + +void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, + struct bio **bio, struct page *page) +{ + enum temp_type temp; + bool found = false; + struct bio *target = bio ? *bio : NULL; + + for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { + struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; + struct list_head *head = &io->bio_list; + struct bio_entry *be; + + if (list_empty(head)) + continue; + + down_read(&io->bio_list_lock); + list_for_each_entry(be, head, list) { + if (target) + found = (target == be->bio); + else + found = __has_merged_page(be->bio, NULL, + page, 0); + if (found) + break; + } + up_read(&io->bio_list_lock); + + if (!found) + continue; + + found = false; + + down_write(&io->bio_list_lock); + list_for_each_entry(be, head, list) { + if (target) + found = (target == be->bio); + else + found = __has_merged_page(be->bio, NULL, + page, 0); + if (found) { + target = be->bio; + del_bio_entry(be); + break; + } + } + up_write(&io->bio_list_lock); + } + + if (found) + __submit_bio(sbi, target, DATA); + if (bio && *bio) { + bio_put(*bio); + *bio = NULL; + } +} + int f2fs_merge_page_bio(struct f2fs_io_info *fio) { struct bio *bio = *fio->bio; @@ -494,22 +820,18 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); - if (bio && (*fio->last_block + 1 != fio->new_blkaddr || - !__same_bdev(fio->sbi, fio->new_blkaddr, bio))) { - __submit_bio(fio->sbi, bio, fio->type); - bio = NULL; - } + if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, + fio->new_blkaddr)) + f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL); alloc_new: if (!bio) { - bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc, - BIO_MAX_PAGES, false, fio->type, fio->temp); + bio = __bio_alloc(fio, BIO_MAX_PAGES); bio_set_op_attrs(bio, fio->op, fio->op_flags); - } - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - __submit_bio(fio->sbi, bio, fio->type); - bio = NULL; - goto alloc_new; + add_bio_entry(fio->sbi, bio, page, fio->temp); + } else { + if (add_ipu_page(fio->sbi, &bio, page)) + goto alloc_new; } if (fio->io_wbc) @@ -523,19 +845,6 @@ alloc_new: return 0; } -static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio, - struct page *page) -{ - if (!bio) - return; - - if (!__has_merged_page(*bio, NULL, page, 0)) - return; - - __submit_bio(sbi, *bio, DATA); - *bio = NULL; -} - void f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; @@ -561,28 +870,31 @@ next: verify_fio_blkaddr(fio); - bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; + if (fio->encrypted_page) + bio_page = fio->encrypted_page; + else if (fio->compressed_page) + bio_page = fio->compressed_page; + else + bio_page = fio->page; /* set submitted = true as a return value */ fio->submitted = true; inc_page_count(sbi, WB_DATA_TYPE(bio_page)); - if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 || - (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) || - !__same_bdev(sbi, fio->new_blkaddr, io->bio))) + if (io->bio && !io_is_mergeable(sbi, io->bio, io, fio, + io->last_block_in_bio, fio->new_blkaddr)) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { - if ((fio->type == DATA || fio->type == NODE) && + if (F2FS_IO_ALIGNED(sbi) && + (fio->type == DATA || fio->type == NODE) && fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { dec_page_count(sbi, WB_DATA_TYPE(bio_page)); fio->retry = true; goto skip; } - io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc, - BIO_MAX_PAGES, false, - fio->type, fio->temp); + io->bio = __bio_alloc(fio, BIO_MAX_PAGES); io->fio = *fio; } @@ -603,13 +915,20 @@ skip: goto next; out: if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || - f2fs_is_checkpoint_ready(sbi)) + !f2fs_is_checkpoint_ready(sbi)) __submit_merged_bio(io); up_write(&io->io_rwsem); } +static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) +{ + return fsverity_active(inode) && + idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); +} + static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, - unsigned nr_pages, unsigned op_flag) + unsigned nr_pages, unsigned op_flag, + pgoff_t first_idx) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; @@ -625,13 +944,16 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, if (f2fs_encrypted_file(inode)) post_read_steps |= 1 << STEP_DECRYPT; + if (f2fs_compressed_file(inode)) + post_read_steps |= 1 << STEP_DECOMPRESS; + if (f2fs_need_verity(inode, first_idx)) + post_read_steps |= 1 << STEP_VERITY; + if (post_read_steps) { + /* Due to the mempool, this never fails. */ ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); - if (!ctx) { - bio_put(bio); - return ERR_PTR(-ENOMEM); - } ctx->bio = bio; + ctx->sbi = sbi; ctx->enabled_steps = post_read_steps; bio->bi_private = ctx; } @@ -639,6 +961,13 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, return bio; } +static void f2fs_release_read_bio(struct bio *bio) +{ + if (bio->bi_private) + mempool_free(bio->bi_private, bio_post_read_ctx_pool); + bio_put(bio); +} + /* This can handle encryption stuffs */ static int f2fs_submit_page_read(struct inode *inode, struct page *page, block_t blkaddr) @@ -646,7 +975,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; - bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0); + bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -987,7 +1316,7 @@ alloc: if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) invalidate_mapping_pages(META_MAPPING(sbi), old_blkaddr, old_blkaddr); - f2fs_set_data_blkaddr(dn); + f2fs_update_data_blkaddr(dn, dn->data_blkaddr); /* * i_size will be updated by direct_IO. Otherwise, we'll get stale @@ -1004,19 +1333,6 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) int err = 0; bool direct_io = iocb->ki_flags & IOCB_DIRECT; - /* convert inline data for Direct I/O*/ - if (direct_io) { - err = f2fs_convert_inline_inode(inode); - if (err) - return err; - } - - if (direct_io && allow_outplace_dio(inode, iocb, from)) - return 0; - - if (is_inode_flag_set(inode, FI_NO_PREALLOC)) - return 0; - map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from)); if (map.m_len > map.m_lblk) @@ -1164,10 +1480,10 @@ next_block: if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) { err = __allocate_data_block(&dn, map->m_seg_type); - if (!err) { - blkaddr = dn.data_blkaddr; - set_inode_flag(inode, FI_APPEND_WRITE); - } + if (err) + goto sync_out; + blkaddr = dn.data_blkaddr; + set_inode_flag(inode, FI_APPEND_WRITE); } } else { if (create) { @@ -1372,7 +1688,7 @@ static int get_data_block_dio_write(struct inode *inode, sector_t iblock, return __get_data_block(inode, iblock, bh_result, create, F2FS_GET_BLOCK_DIO, NULL, f2fs_rw_hint_to_seg_type(inode->i_write_hint), - true); + IS_SWAPFILE(inode) ? false : true); } static int get_data_block_dio(struct inode *inode, sector_t iblock, @@ -1503,7 +1819,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, goto out; } - if (f2fs_has_inline_data(inode)) { + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) { ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len); if (ret != -EAGAIN) goto out; @@ -1569,6 +1885,15 @@ out: return ret; } +static inline loff_t f2fs_readpage_limit(struct inode *inode) +{ + if (IS_ENABLED(CONFIG_FS_VERITY) && + (IS_VERITY(inode) || f2fs_verity_in_progress(inode))) + return inode->i_sb->s_maxbytes; + + return i_size_read(inode); +} + static int f2fs_read_single_page(struct inode *inode, struct page *page, unsigned nr_pages, struct f2fs_map_blocks *map, @@ -1587,7 +1912,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, block_in_file = (sector_t)page_index(page); last_block = block_in_file + nr_pages; - last_block_in_file = (i_size_read(inode) + blocksize - 1) >> + last_block_in_file = (f2fs_readpage_limit(inode) + blocksize - 1) >> blkbits; if (last_block > last_block_in_file) last_block = last_block_in_file; @@ -1632,6 +1957,11 @@ got_it: } else { zero_out: zero_user_segment(page, 0, PAGE_SIZE); + if (f2fs_need_verity(inode, page->index) && + !fsverity_verify_page(page)) { + ret = -EIO; + goto out; + } if (!PageUptodate(page)) SetPageUptodate(page); unlock_page(page); @@ -1642,15 +1972,15 @@ zero_out: * This page will go to BIO. Do we need to send this * BIO off first? */ - if (bio && (*last_block_in_bio != block_nr - 1 || - !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) { + if (bio && !page_is_mergeable(F2FS_I_SB(inode), bio, + *last_block_in_bio, block_nr)) { submit_and_realloc: __submit_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } if (bio == NULL) { bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - is_readahead ? REQ_RAHEAD : 0); + is_readahead ? REQ_RAHEAD : 0, page->index); if (IS_ERR(bio)) { ret = PTR_ERR(bio); bio = NULL; @@ -1682,6 +2012,144 @@ out: return ret; } +#ifdef CONFIG_F2FS_FS_COMPRESSION +int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, + unsigned nr_pages, sector_t *last_block_in_bio, + bool is_readahead) +{ + struct dnode_of_data dn; + struct inode *inode = cc->inode; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct bio *bio = *bio_ret; + unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size; + sector_t last_block_in_file; + const unsigned blkbits = inode->i_blkbits; + const unsigned blocksize = 1 << blkbits; + struct decompress_io_ctx *dic = NULL; + int i; + int ret = 0; + + f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc)); + + last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; + + /* get rid of pages beyond EOF */ + for (i = 0; i < cc->cluster_size; i++) { + struct page *page = cc->rpages[i]; + + if (!page) + continue; + if ((sector_t)page->index >= last_block_in_file) { + zero_user_segment(page, 0, PAGE_SIZE); + if (!PageUptodate(page)) + SetPageUptodate(page); + } else if (!PageUptodate(page)) { + continue; + } + unlock_page(page); + cc->rpages[i] = NULL; + cc->nr_rpages--; + } + + /* we are done since all pages are beyond EOF */ + if (f2fs_cluster_is_empty(cc)) + goto out; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); + if (ret) + goto out; + + /* cluster was overwritten as normal cluster */ + if (dn.data_blkaddr != COMPRESS_ADDR) + goto out; + + for (i = 1; i < cc->cluster_size; i++) { + block_t blkaddr; + + blkaddr = datablock_addr(dn.inode, dn.node_page, + dn.ofs_in_node + i); + + if (!__is_valid_data_blkaddr(blkaddr)) + break; + + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) { + ret = -EFAULT; + goto out_put_dnode; + } + cc->nr_cpages++; + } + + /* nothing to decompress */ + if (cc->nr_cpages == 0) { + ret = 0; + goto out_put_dnode; + } + + dic = f2fs_alloc_dic(cc); + if (IS_ERR(dic)) { + ret = PTR_ERR(dic); + goto out_put_dnode; + } + + for (i = 0; i < dic->nr_cpages; i++) { + struct page *page = dic->cpages[i]; + block_t blkaddr; + + blkaddr = datablock_addr(dn.inode, dn.node_page, + dn.ofs_in_node + i + 1); + + if (bio && !page_is_mergeable(sbi, bio, + *last_block_in_bio, blkaddr)) { +submit_and_realloc: + __submit_bio(sbi, bio, DATA); + bio = NULL; + } + + if (!bio) { + bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, + is_readahead ? REQ_RAHEAD : 0, + page->index); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + bio = NULL; + dic->failed = true; + if (refcount_sub_and_test(dic->nr_cpages - i, + &dic->ref)) + f2fs_decompress_end_io(dic->rpages, + cc->cluster_size, true, + false); + f2fs_free_dic(dic); + f2fs_put_dnode(&dn); + *bio_ret = bio; + return ret; + } + } + + f2fs_wait_on_block_writeback(inode, blkaddr); + + if (bio_add_page(bio, page, blocksize, 0) < blocksize) + goto submit_and_realloc; + + inc_page_count(sbi, F2FS_RD_DATA); + ClearPageError(page); + *last_block_in_bio = blkaddr; + } + + f2fs_put_dnode(&dn); + + *bio_ret = bio; + return 0; + +out_put_dnode: + f2fs_put_dnode(&dn); +out: + f2fs_decompress_end_io(cc->rpages, cc->cluster_size, true, false); + *bio_ret = bio; + return ret; +} +#endif + /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. @@ -1691,7 +2159,7 @@ out: * use ->readpage() or do the necessary surgery to decouple ->readpages() * from read-ahead. */ -static int f2fs_mpage_readpages(struct address_space *mapping, +int f2fs_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, unsigned nr_pages, bool is_readahead) { @@ -1699,6 +2167,19 @@ static int f2fs_mpage_readpages(struct address_space *mapping, sector_t last_block_in_bio = 0; struct inode *inode = mapping->host; struct f2fs_map_blocks map; +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct compress_ctx cc = { + .inode = inode, + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .cluster_idx = NULL_CLUSTER, + .rpages = NULL, + .cpages = NULL, + .nr_rpages = 0, + .nr_cpages = 0, + }; +#endif + unsigned max_nr_pages = nr_pages; int ret = 0; map.m_pblk = 0; @@ -1722,9 +2203,41 @@ static int f2fs_mpage_readpages(struct address_space *mapping, goto next_page; } - ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio, - &last_block_in_bio, is_readahead); +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + /* there are remained comressed pages, submit them */ + if (!f2fs_cluster_can_merge_page(&cc, page->index)) { + ret = f2fs_read_multi_pages(&cc, &bio, + max_nr_pages, + &last_block_in_bio, + is_readahead); + f2fs_destroy_compress_ctx(&cc); + if (ret) + goto set_error_page; + } + ret = f2fs_is_compressed_cluster(inode, page->index); + if (ret < 0) + goto set_error_page; + else if (!ret) + goto read_single_page; + + ret = f2fs_init_compress_ctx(&cc); + if (ret) + goto set_error_page; + + f2fs_compress_ctx_add_page(&cc, page); + + goto next_page; + } +read_single_page: +#endif + + ret = f2fs_read_single_page(inode, page, max_nr_pages, &map, + &bio, &last_block_in_bio, is_readahead); if (ret) { +#ifdef CONFIG_F2FS_FS_COMPRESSION +set_error_page: +#endif SetPageError(page); zero_user_segment(page, 0, PAGE_SIZE); unlock_page(page); @@ -1732,6 +2245,19 @@ static int f2fs_mpage_readpages(struct address_space *mapping, next_page: if (pages) put_page(page); + +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + /* last page */ + if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) { + ret = f2fs_read_multi_pages(&cc, &bio, + max_nr_pages, + &last_block_in_bio, + is_readahead); + f2fs_destroy_compress_ctx(&cc); + } + } +#endif } BUG_ON(pages && !list_empty(pages)); if (bio) @@ -1746,6 +2272,11 @@ static int f2fs_read_data_page(struct file *file, struct page *page) trace_f2fs_readpage(page, DATA); + if (!f2fs_is_compress_backend_ready(inode)) { + unlock_page(page); + return -EOPNOTSUPP; + } + /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); @@ -1764,6 +2295,9 @@ static int f2fs_read_data_pages(struct file *file, trace_f2fs_readpages(inode, page, nr_pages); + if (!f2fs_is_compress_backend_ready(inode)) + return 0; + /* If the file has inline data, skip readpages */ if (f2fs_has_inline_data(inode)) return 0; @@ -1771,22 +2305,23 @@ static int f2fs_read_data_pages(struct file *file, return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true); } -static int encrypt_one_page(struct f2fs_io_info *fio) +int f2fs_encrypt_one_page(struct f2fs_io_info *fio) { struct inode *inode = fio->page->mapping->host; - struct page *mpage; + struct page *mpage, *page; gfp_t gfp_flags = GFP_NOFS; if (!f2fs_encrypted_file(inode)) return 0; + page = fio->compressed_page ? fio->compressed_page : fio->page; + /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); retry_encrypt: - fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(fio->page, - PAGE_SIZE, 0, - gfp_flags); + fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page, + PAGE_SIZE, 0, gfp_flags); if (IS_ERR(fio->encrypted_page)) { /* flush pending IOs and wait for a while in the ENOMEM case */ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { @@ -1946,7 +2481,7 @@ got_it: if (ipu_force || (__is_valid_data_blkaddr(fio->old_blkaddr) && need_inplace_update(fio))) { - err = encrypt_one_page(fio); + err = f2fs_encrypt_one_page(fio); if (err) goto out_writepage; @@ -1982,13 +2517,16 @@ got_it: fio->version = ni.version; - err = encrypt_one_page(fio); + err = f2fs_encrypt_one_page(fio); if (err) goto out_writepage; set_page_writeback(page); ClearPageError(page); + if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR) + f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false); + /* LFS mode write path */ f2fs_outplace_write_data(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); @@ -2003,18 +2541,19 @@ out: return err; } -static int __write_data_page(struct page *page, bool *submitted, +int f2fs_write_single_data_page(struct page *page, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, - enum iostat_type io_type) + enum iostat_type io_type, + int compr_blocks) { struct inode *inode = page->mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t i_size = i_size_read(inode); - const pgoff_t end_index = ((unsigned long long) i_size) + const pgoff_t end_index = ((unsigned long long)i_size) >> PAGE_SHIFT; - loff_t psize = (page->index + 1) << PAGE_SHIFT; + loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; int err = 0; @@ -2028,6 +2567,7 @@ static int __write_data_page(struct page *page, bool *submitted, .page = page, .encrypted_page = NULL, .submitted = false, + .compr_blocks = compr_blocks, .need_lock = LOCK_RETRY, .io_type = io_type, .io_wbc = wbc, @@ -2052,7 +2592,9 @@ static int __write_data_page(struct page *page, bool *submitted, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (page->index < end_index) + if (page->index < end_index || + f2fs_verity_in_progress(inode) || + compr_blocks) goto write; /* @@ -2128,22 +2670,19 @@ out: f2fs_remove_dirty_inode(inode); submitted = NULL; } - unlock_page(page); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && - !F2FS_I(inode)->cp_task) { - f2fs_submit_ipu_bio(sbi, bio, page); + !F2FS_I(inode)->cp_task) f2fs_balance_fs(sbi, need_balance_fs); - } if (unlikely(f2fs_cp_error(sbi))) { - f2fs_submit_ipu_bio(sbi, bio, page); f2fs_submit_merged_write(sbi, DATA); + f2fs_submit_merged_ipu_write(sbi, bio, NULL); submitted = NULL; } if (submitted) - *submitted = fio.submitted; + *submitted = fio.submitted ? 1 : 0; return 0; @@ -2164,7 +2703,23 @@ redirty_out: static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { - return __write_data_page(page, NULL, NULL, NULL, wbc, FS_DATA_IO); +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct inode *inode = page->mapping->host; + + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + goto out; + + if (f2fs_compressed_file(inode)) { + if (f2fs_is_compressed_cluster(inode, page->index)) { + redirty_page_for_writepage(wbc, page); + return AOP_WRITEPAGE_ACTIVATE; + } + } +out: +#endif + + return f2fs_write_single_data_page(page, NULL, NULL, NULL, + wbc, FS_DATA_IO, 0); } /* @@ -2177,11 +2732,27 @@ static int f2fs_write_cache_pages(struct address_space *mapping, enum iostat_type io_type) { int ret = 0; - int done = 0; + int done = 0, retry = 0; struct pagevec pvec; struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); struct bio *bio = NULL; sector_t last_block; +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct inode *inode = mapping->host; + struct compress_ctx cc = { + .inode = inode, + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .cluster_idx = NULL_CLUSTER, + .rpages = NULL, + .nr_rpages = 0, + .cpages = NULL, + .rbuf = NULL, + .cbuf = NULL, + .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size, + .private = NULL, + }; +#endif int nr_pages; pgoff_t uninitialized_var(writeback_index); pgoff_t index; @@ -2191,6 +2762,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int range_whole = 0; xa_mark_t tag; int nwritten = 0; + int submitted = 0; + int i; pagevec_init(&pvec); @@ -2220,12 +2793,11 @@ static int f2fs_write_cache_pages(struct address_space *mapping, else tag = PAGECACHE_TAG_DIRTY; retry: + retry = 0; if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag_pages_for_writeback(mapping, index, end); done_index = index; - while (!done && (index <= end)) { - int i; - + while (!done && !retry && (index <= end)) { nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, tag); if (nr_pages == 0) @@ -2233,15 +2805,62 @@ retry: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; - bool submitted = false; + bool need_readd; +readd: + need_readd = false; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + ret = f2fs_init_compress_ctx(&cc); + if (ret) { + done = 1; + break; + } + if (!f2fs_cluster_can_merge_page(&cc, + page->index)) { + ret = f2fs_write_multi_pages(&cc, + &submitted, wbc, io_type); + if (!ret) + need_readd = true; + goto result; + } + + if (unlikely(f2fs_cp_error(sbi))) + goto lock_page; + + if (f2fs_cluster_is_empty(&cc)) { + void *fsdata = NULL; + struct page *pagep; + int ret2; + + ret2 = f2fs_prepare_compress_overwrite( + inode, &pagep, + page->index, &fsdata); + if (ret2 < 0) { + ret = ret2; + done = 1; + break; + } else if (ret2 && + !f2fs_compress_write_end(inode, + fsdata, page->index, + 1)) { + retry = 1; + break; + } + } else { + goto lock_page; + } + } +#endif /* give a priority to WB_SYNC threads */ if (atomic_read(&sbi->wb_sync_req[DATA]) && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } - +#ifdef CONFIG_F2FS_FS_COMPRESSION +lock_page: +#endif done_index = page->index; retry_write: lock_page(page); @@ -2258,57 +2877,81 @@ continue_unlock: } if (PageWriteback(page)) { - if (wbc->sync_mode != WB_SYNC_NONE) { + if (wbc->sync_mode != WB_SYNC_NONE) f2fs_wait_on_page_writeback(page, DATA, true, true); - f2fs_submit_ipu_bio(sbi, &bio, page); - } else { + else goto continue_unlock; - } } if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = __write_data_page(page, &submitted, &bio, - &last_block, wbc, io_type); +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + get_page(page); + f2fs_compress_ctx_add_page(&cc, page); + continue; + } +#endif + ret = f2fs_write_single_data_page(page, &submitted, + &bio, &last_block, wbc, io_type, 0); + if (ret == AOP_WRITEPAGE_ACTIVATE) + unlock_page(page); +#ifdef CONFIG_F2FS_FS_COMPRESSION +result: +#endif + nwritten += submitted; + wbc->nr_to_write -= submitted; + if (unlikely(ret)) { /* * keep nr_to_write, since vfs uses this to * get # of written pages. */ if (ret == AOP_WRITEPAGE_ACTIVATE) { - unlock_page(page); ret = 0; - continue; + goto next; } else if (ret == -EAGAIN) { ret = 0; if (wbc->sync_mode == WB_SYNC_ALL) { cond_resched(); congestion_wait(BLK_RW_ASYNC, - HZ/50); + HZ/50); goto retry_write; } - continue; + goto next; } done_index = page->index + 1; done = 1; break; - } else if (submitted) { - nwritten++; } - if (--wbc->nr_to_write <= 0 && + if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } +next: + if (need_readd) + goto readd; } pagevec_release(&pvec); cond_resched(); } - - if (!cycled && !done) { +#ifdef CONFIG_F2FS_FS_COMPRESSION + /* flush remained pages in compress cluster */ + if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) { + ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type); + nwritten += submitted; + wbc->nr_to_write -= submitted; + if (ret) { + done = 1; + retry = 0; + } + } +#endif + if ((!cycled && !done) || retry) { cycled = 1; index = 0; end = writeback_index - 1; @@ -2322,7 +2965,7 @@ continue_unlock: NULL, 0, DATA); /* submit cached bio of IPU write */ if (bio) - __submit_bio(sbi, bio, DATA); + f2fs_submit_merged_ipu_write(sbi, &bio, NULL); return ret; } @@ -2332,6 +2975,8 @@ static inline bool __should_serialize_io(struct inode *inode, { if (!S_ISREG(inode->i_mode)) return false; + if (f2fs_compressed_file(inode)) + return true; if (IS_NOQUOTA(inode)) return false; /* to avoid deadlock in path of data flush */ @@ -2427,13 +3072,16 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; loff_t i_size = i_size_read(inode); - if (to > i_size) { + if (IS_NOQUOTA(inode)) + return; + + /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */ + if (to > i_size && !f2fs_verity_in_progress(inode)) { down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache(inode, i_size); - if (!IS_NOQUOTA(inode)) - f2fs_truncate_blocks(inode, i_size, true); + f2fs_truncate_blocks(inode, i_size, true); up_write(&F2FS_I(inode)->i_mmap_sem); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -2458,7 +3106,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, * the block addresses when there is no need to fill the page. */ if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE && - !is_inode_flag_set(inode, FI_NO_PREALLOC)) + !is_inode_flag_set(inode, FI_NO_PREALLOC) && + !f2fs_verity_in_progress(inode)) return 0; /* f2fs_lock_op avoids race between write CP and convert_inline_page */ @@ -2472,6 +3121,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, __do_map_lock(sbi, flag, true); locked = true; } + restart: /* check inline_data */ ipage = f2fs_get_node_page(sbi, inode->i_ino); @@ -2539,9 +3189,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); - err = f2fs_is_checkpoint_ready(sbi); - if (err) + if (!f2fs_is_checkpoint_ready(sbi)) { + err = -ENOSPC; goto fail; + } if ((f2fs_is_atomic_file(inode) && !f2fs_available_free_memory(sbi, INMEM_PAGES)) || @@ -2561,6 +3212,24 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, if (err) goto fail; } + +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + int ret; + + *fsdata = NULL; + + ret = f2fs_prepare_compress_overwrite(inode, pagep, + index, fsdata); + if (ret < 0) { + err = ret; + goto fail; + } else if (ret) { + return 0; + } + } +#endif + repeat: /* * Do not use grab_cache_page_write_begin() to avoid deadlock due to @@ -2573,6 +3242,8 @@ repeat: goto fail; } + /* TODO: cluster can be compressed due to race with .writepage */ + *pagep = page; err = prepare_write_begin(sbi, page, pos, len, @@ -2597,7 +3268,8 @@ repeat: if (len == PAGE_SIZE || PageUptodate(page)) return 0; - if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) { + if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) && + !f2fs_verity_in_progress(inode)) { zero_user_segment(page, len, PAGE_SIZE); return 0; } @@ -2655,12 +3327,23 @@ static int f2fs_write_end(struct file *file, else SetPageUptodate(page); } + +#ifdef CONFIG_F2FS_FS_COMPRESSION + /* overwrite compressed file */ + if (f2fs_compressed_file(inode) && fsdata) { + f2fs_compress_write_end(inode, fsdata, page->index, copied); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + return copied; + } +#endif + if (!copied) goto unlock_out; set_page_dirty(page); - if (pos + copied > i_size_read(inode)) + if (pos + copied > i_size_read(inode) && + !f2fs_verity_in_progress(inode)) f2fs_i_size_write(inode, pos + copied); unlock_out: f2fs_put_page(page, 1); @@ -2954,7 +3637,8 @@ int f2fs_migrate_page(struct address_space *mapping, #ifdef CONFIG_SWAP /* Copied from generic_swapfile_activate() to check any holes */ -static int check_swap_activate(struct file *swap_file, unsigned int max) +static int check_swap_activate(struct swap_info_struct *sis, + struct file *swap_file, sector_t *span) { struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; @@ -2965,6 +3649,8 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) sector_t last_block; sector_t lowest_block = -1; sector_t highest_block = 0; + int nr_extents = 0; + int ret; blkbits = inode->i_blkbits; blocks_per_page = PAGE_SIZE >> blkbits; @@ -2976,15 +3662,20 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) probe_block = 0; page_no = 0; last_block = i_size_read(inode) >> blkbits; - while ((probe_block + blocks_per_page) <= last_block && page_no < max) { + while ((probe_block + blocks_per_page) <= last_block && + page_no < sis->max) { unsigned block_in_page; sector_t first_block; + sector_t block = 0; + int err = 0; cond_resched(); - first_block = bmap(inode, probe_block); - if (first_block == 0) + block = probe_block; + err = bmap(inode, &block); + if (err || !block) goto bad_bmap; + first_block = block; /* * It must be PAGE_SIZE aligned on-disk @@ -2996,11 +3687,13 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) for (block_in_page = 1; block_in_page < blocks_per_page; block_in_page++) { - sector_t block; - block = bmap(inode, probe_block + block_in_page); - if (block == 0) + block = probe_block + block_in_page; + err = bmap(inode, &block); + + if (err || !block) goto bad_bmap; + if (block != first_block + block_in_page) { /* Discontiguity */ probe_block++; @@ -3016,13 +3709,27 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) highest_block = first_block; } + /* + * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks + */ + ret = add_swap_extent(sis, page_no, 1, first_block); + if (ret < 0) + goto out; + nr_extents += ret; page_no++; probe_block += blocks_per_page; reprobe: continue; } - return 0; - + ret = nr_extents; + *span = 1 + highest_block - lowest_block; + if (page_no == 0) + page_no = 1; /* force Empty message */ + sis->max = page_no; + sis->pages = page_no - 1; + sis->highest_bit = page_no - 1; +out: + return ret; bad_bmap: pr_err("swapon: swapfile has holes\n"); return -EINVAL; @@ -3044,14 +3751,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, if (ret) return ret; - ret = check_swap_activate(file, sis->max); - if (ret) + if (f2fs_disable_compressed_file(inode)) + return -EINVAL; + + ret = check_swap_activate(sis, file, span); + if (ret < 0) return ret; set_inode_flag(inode, FI_PIN_FILE); f2fs_precache_extents(inode); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); - return 0; + return ret; } static void f2fs_swap_deactivate(struct file *file) @@ -3104,7 +3814,9 @@ void f2fs_clear_page_cache_dirty_tag(struct page *page) int __init f2fs_init_post_read_processing(void) { - bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0); + bio_post_read_ctx_cache = + kmem_cache_create("f2fs_bio_post_read_ctx", + sizeof(struct bio_post_read_ctx), 0, 0, NULL); if (!bio_post_read_ctx_cache) goto fail; bio_post_read_ctx_pool = @@ -3120,8 +3832,43 @@ fail: return -ENOMEM; } -void __exit f2fs_destroy_post_read_processing(void) +void f2fs_destroy_post_read_processing(void) { mempool_destroy(bio_post_read_ctx_pool); kmem_cache_destroy(bio_post_read_ctx_cache); } + +int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi) +{ + if (!f2fs_sb_has_encrypt(sbi) && + !f2fs_sb_has_verity(sbi) && + !f2fs_sb_has_compression(sbi)) + return 0; + + sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq", + WQ_UNBOUND | WQ_HIGHPRI, + num_online_cpus()); + if (!sbi->post_read_wq) + return -ENOMEM; + return 0; +} + +void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi) +{ + if (sbi->post_read_wq) + destroy_workqueue(sbi->post_read_wq); +} + +int __init f2fs_init_bio_entry_cache(void) +{ + bio_entry_slab = f2fs_kmem_cache_create("bio_entry_slab", + sizeof(struct bio_entry)); + if (!bio_entry_slab) + return -ENOMEM; + return 0; +} + +void f2fs_destroy_bio_entry_cache(void) +{ + kmem_cache_destroy(bio_entry_slab); +} diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 7706049d23bf..6b89eae5e4ca 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -21,9 +21,45 @@ #include "gc.h" static LIST_HEAD(f2fs_stat_list); -static struct dentry *f2fs_debugfs_root; static DEFINE_MUTEX(f2fs_stat_mutex); +#ifdef CONFIG_DEBUG_FS +static struct dentry *f2fs_debugfs_root; +#endif + +/* + * This function calculates BDF of every segments + */ +void f2fs_update_sit_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + unsigned long long blks_per_sec, hblks_per_sec, total_vblocks; + unsigned long long bimodal, dist; + unsigned int segno, vblocks; + int ndirty = 0; + + bimodal = 0; + total_vblocks = 0; + blks_per_sec = BLKS_PER_SEC(sbi); + hblks_per_sec = blks_per_sec / 2; + for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { + vblocks = get_valid_blocks(sbi, segno, true); + dist = abs(vblocks - hblks_per_sec); + bimodal += dist * dist; + + if (vblocks > 0 && vblocks < blks_per_sec) { + total_vblocks += vblocks; + ndirty++; + } + } + dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100); + si->bimodal = div64_u64(bimodal, dist); + if (si->dirty_count) + si->avg_vblocks = div_u64(total_vblocks, ndirty); + else + si->avg_vblocks = 0; +} +#ifdef CONFIG_DEBUG_FS static void update_general_status(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); @@ -56,7 +92,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->nquota_files = sbi->nquota_files; si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); - si->aw_cnt = atomic_read(&sbi->aw_cnt); + si->aw_cnt = sbi->atomic_files; si->vw_cnt = atomic_read(&sbi->vw_cnt); si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); @@ -67,7 +103,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA); si->nr_rd_node = get_pages(sbi, F2FS_RD_NODE); si->nr_rd_meta = get_pages(sbi, F2FS_RD_META); - if (SM_I(sbi) && SM_I(sbi)->fcc_info) { + if (SM_I(sbi)->fcc_info) { si->nr_flushed = atomic_read(&SM_I(sbi)->fcc_info->issued_flush); si->nr_flushing = @@ -75,7 +111,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->flush_list_empty = llist_empty(&SM_I(sbi)->fcc_info->issue_list); } - if (SM_I(sbi) && SM_I(sbi)->dcc_info) { + if (SM_I(sbi)->dcc_info) { si->nr_discarded = atomic_read(&SM_I(sbi)->dcc_info->issued_discard); si->nr_discarding = @@ -94,6 +130,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->inline_xattr = atomic_read(&sbi->inline_xattr); si->inline_inode = atomic_read(&sbi->inline_inode); si->inline_dir = atomic_read(&sbi->inline_dir); + si->compr_inode = atomic_read(&sbi->compr_inode); + si->compr_blocks = atomic_read(&sbi->compr_blocks); si->append = sbi->im[APPEND_INO].ino_num; si->update = sbi->im[UPDATE_INO].ino_num; si->orphans = sbi->im[ORPHAN_INO].ino_num; @@ -114,7 +152,6 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID]; si->avail_nids = NM_I(sbi)->available_nids; si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; - si->bg_gc = sbi->bg_gc; si->io_skip_bggc = sbi->io_skip_bggc; si->other_skip_bggc = sbi->other_skip_bggc; si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC]; @@ -146,39 +183,6 @@ static void update_general_status(struct f2fs_sb_info *sbi) } /* - * This function calculates BDF of every segments - */ -static void update_sit_info(struct f2fs_sb_info *sbi) -{ - struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned long long blks_per_sec, hblks_per_sec, total_vblocks; - unsigned long long bimodal, dist; - unsigned int segno, vblocks; - int ndirty = 0; - - bimodal = 0; - total_vblocks = 0; - blks_per_sec = BLKS_PER_SEC(sbi); - hblks_per_sec = blks_per_sec / 2; - for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { - vblocks = get_valid_blocks(sbi, segno, true); - dist = abs(vblocks - hblks_per_sec); - bimodal += dist * dist; - - if (vblocks > 0 && vblocks < blks_per_sec) { - total_vblocks += vblocks; - ndirty++; - } - } - dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100); - si->bimodal = div64_u64(bimodal, dist); - if (si->dirty_count) - si->avg_vblocks = div_u64(total_vblocks, ndirty); - else - si->avg_vblocks = 0; -} - -/* * This function calculates memory footprint. */ static void update_mem_info(struct f2fs_sb_info *sbi) @@ -315,6 +319,8 @@ static int stat_show(struct seq_file *s, void *v) si->inline_inode); seq_printf(s, " - Inline_dentry Inode: %u\n", si->inline_dir); + seq_printf(s, " - Compressed Inode: %u, Blocks: %u\n", + si->compr_inode, si->compr_blocks); seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n", si->orphans, si->append, si->update); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", @@ -441,7 +447,7 @@ static int stat_show(struct seq_file *s, void *v) si->block_count[LFS], si->segment_count[LFS]); /* segment usage info */ - update_sit_info(si->sbi); + f2fs_update_sit_info(si->sbi); seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n", si->bimodal, si->avg_vblocks); @@ -461,6 +467,7 @@ static int stat_show(struct seq_file *s, void *v) } DEFINE_SHOW_ATTRIBUTE(stat); +#endif int f2fs_build_stats(struct f2fs_sb_info *sbi) { @@ -491,11 +498,12 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inline_xattr, 0); atomic_set(&sbi->inline_inode, 0); atomic_set(&sbi->inline_dir, 0); + atomic_set(&sbi->compr_inode, 0); + atomic_set(&sbi->compr_blocks, 0); atomic_set(&sbi->inplace_count, 0); for (i = META_CP; i < META_MAX; i++) atomic_set(&sbi->meta_count[i], 0); - atomic_set(&sbi->aw_cnt, 0); atomic_set(&sbi->vw_cnt, 0); atomic_set(&sbi->max_aw_cnt, 0); atomic_set(&sbi->max_vw_cnt, 0); @@ -520,14 +528,18 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) void __init f2fs_create_root_stats(void) { +#ifdef CONFIG_DEBUG_FS f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL, &stat_fops); +#endif } void f2fs_destroy_root_stats(void) { +#ifdef CONFIG_DEBUG_FS debugfs_remove_recursive(f2fs_debugfs_root); f2fs_debugfs_root = NULL; +#endif } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 85a1528f319f..27d0dd7a16d6 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -8,6 +8,7 @@ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/sched/signal.h> +#include <linux/unicode.h> #include "f2fs.h" #include "node.h" #include "acl.h" @@ -81,7 +82,8 @@ static unsigned long dir_block_index(unsigned int level, return bidx; } -static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, +static struct f2fs_dir_entry *find_in_block(struct inode *dir, + struct page *dentry_page, struct fscrypt_name *fname, f2fs_hash_t namehash, int *max_slots, @@ -93,7 +95,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); - make_dentry_ptr_block(NULL, &d, dentry_blk); + make_dentry_ptr_block(dir, &d, dentry_blk); de = f2fs_find_target_dentry(fname, namehash, max_slots, &d); if (de) *res_page = dentry_page; @@ -101,14 +103,116 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, return de; } +#ifdef CONFIG_UNICODE +/* + * Test whether a case-insensitive directory entry matches the filename + * being searched for. + * + * Returns: 0 if the directory entry matches, more than 0 if it + * doesn't match or less than zero on error. + */ +int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, + const struct qstr *entry, bool quick) +{ + const struct f2fs_sb_info *sbi = F2FS_SB(parent->i_sb); + const struct unicode_map *um = sbi->s_encoding; + int ret; + + if (quick) + ret = utf8_strncasecmp_folded(um, name, entry); + else + ret = utf8_strncasecmp(um, name, entry); + + if (ret < 0) { + /* Handle invalid character sequence as either an error + * or as an opaque byte sequence. + */ + if (f2fs_has_strict_mode(sbi)) + return -EINVAL; + + if (name->len != entry->len) + return 1; + + return !!memcmp(name->name, entry->name, name->len); + } + + return ret; +} + +static void f2fs_fname_setup_ci_filename(struct inode *dir, + const struct qstr *iname, + struct fscrypt_str *cf_name) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + + if (!IS_CASEFOLDED(dir)) { + cf_name->name = NULL; + return; + } + + cf_name->name = f2fs_kmalloc(sbi, F2FS_NAME_LEN, GFP_NOFS); + if (!cf_name->name) + return; + + cf_name->len = utf8_casefold(sbi->s_encoding, + iname, cf_name->name, + F2FS_NAME_LEN); + if ((int)cf_name->len <= 0) { + kvfree(cf_name->name); + cf_name->name = NULL; + } +} +#endif + +static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, + struct f2fs_dir_entry *de, + struct fscrypt_name *fname, + struct fscrypt_str *cf_str, + unsigned long bit_pos, + f2fs_hash_t namehash) +{ +#ifdef CONFIG_UNICODE + struct inode *parent = d->inode; + struct f2fs_sb_info *sbi = F2FS_I_SB(parent); + struct qstr entry; +#endif + + if (de->hash_code != namehash) + return false; + +#ifdef CONFIG_UNICODE + entry.name = d->filename[bit_pos]; + entry.len = de->name_len; + + if (sbi->s_encoding && IS_CASEFOLDED(parent)) { + if (cf_str->name) { + struct qstr cf = {.name = cf_str->name, + .len = cf_str->len}; + return !f2fs_ci_compare(parent, &cf, &entry, true); + } + return !f2fs_ci_compare(parent, fname->usr_fname, &entry, + false); + } +#endif + if (fscrypt_match_name(fname, d->filename[bit_pos], + le16_to_cpu(de->name_len))) + return true; + return false; +} + struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, f2fs_hash_t namehash, int *max_slots, struct f2fs_dentry_ptr *d) { struct f2fs_dir_entry *de; + struct fscrypt_str cf_str = { .name = NULL, .len = 0 }; unsigned long bit_pos = 0; int max_len = 0; +#ifdef CONFIG_UNICODE + f2fs_fname_setup_ci_filename(d->inode, fname->usr_fname, &cf_str); +#endif + if (max_slots) *max_slots = 0; while (bit_pos < d->max) { @@ -125,9 +229,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, continue; } - if (de->hash_code == namehash && - fscrypt_match_name(fname, d->filename[bit_pos], - le16_to_cpu(de->name_len))) + if (f2fs_match_name(d, de, fname, &cf_str, bit_pos, namehash)) goto found; if (max_slots && max_len > *max_slots) @@ -141,6 +243,10 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, found: if (max_slots && max_len > *max_slots) *max_slots = max_len; + +#ifdef CONFIG_UNICODE + kvfree(cf_str.name); +#endif return de; } @@ -157,7 +263,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, struct f2fs_dir_entry *de = NULL; bool room = false; int max_slots; - f2fs_hash_t namehash = f2fs_dentry_hash(&name, fname); + f2fs_hash_t namehash = f2fs_dentry_hash(dir, &name, fname); nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); @@ -179,8 +285,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, } } - de = find_in_block(dentry_page, fname, namehash, &max_slots, - res_page); + de = find_in_block(dir, dentry_page, fname, namehash, + &max_slots, res_page); if (de) break; @@ -250,6 +356,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, struct fscrypt_name fname; int err; +#ifdef CONFIG_UNICODE + if (f2fs_has_strict_mode(F2FS_I_SB(dir)) && IS_CASEFOLDED(dir) && + utf8_validate(F2FS_I_SB(dir)->s_encoding, child)) { + *res_page = ERR_PTR(-EINVAL); + return NULL; + } +#endif + err = fscrypt_setup_filename(dir, child, 1, &fname); if (err) { if (err == -ENOENT) @@ -464,6 +578,20 @@ next: goto next; } +bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, + struct fscrypt_name *fname) +{ + struct f2fs_dentry_ptr d; + unsigned int bit_pos; + int slots = GET_DENTRY_SLOTS(fname_len(fname)); + + make_dentry_ptr_inline(dir, &d, inline_data_addr(dir, ipage)); + + bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max); + + return bit_pos < d.max; +} + void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos) @@ -504,7 +632,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, level = 0; slots = GET_DENTRY_SLOTS(new_name->len); - dentry_hash = f2fs_dentry_hash(new_name, NULL); + dentry_hash = f2fs_dentry_hash(dir, new_name, NULL); current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { @@ -514,7 +642,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, start: if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) { - f2fs_show_injection_info(FAULT_DIR_DEPTH); + f2fs_show_injection_info(F2FS_I_SB(dir), FAULT_DIR_DEPTH); return -ENOSPC; } @@ -568,6 +696,11 @@ add_dentry: if (inode) { f2fs_i_pino_write(inode, dir->i_ino); + + /* synchronize inode page's data from inode cache */ + if (is_inode_flag_set(inode, FI_NEW_INODE)) + f2fs_update_inode(inode, page); + f2fs_put_page(page, 1); } @@ -800,8 +933,9 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, bit_pos++; ctx->pos = start_pos + bit_pos; printk_ratelimited( - "%s, invalid namelen(0), ino:%u, run fsck to fix.", - KERN_WARNING, le32_to_cpu(de->ino)); + "%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.", + KERN_WARNING, sbi->sb->s_id, + le32_to_cpu(de->ino)); set_sbi_flag(sbi, SBI_NEED_FSCK); continue; } @@ -867,7 +1001,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) if (IS_ENCRYPTED(inode)) { err = fscrypt_get_encryption_info(inode); - if (err && err != -ENOKEY) + if (err) goto out; err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr); @@ -943,3 +1077,53 @@ const struct file_operations f2fs_dir_operations = { .compat_ioctl = f2fs_compat_ioctl, #endif }; + +#ifdef CONFIG_UNICODE +static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +{ + struct qstr qstr = {.name = str, .len = len }; + const struct dentry *parent = READ_ONCE(dentry->d_parent); + const struct inode *inode = READ_ONCE(parent->d_inode); + + if (!inode || !IS_CASEFOLDED(inode)) { + if (len != name->len) + return -1; + return memcmp(str, name->name, len); + } + + return f2fs_ci_compare(inode, name, &qstr, false); +} + +static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + const struct unicode_map *um = sbi->s_encoding; + const struct inode *inode = READ_ONCE(dentry->d_inode); + unsigned char *norm; + int len, ret = 0; + + if (!inode || !IS_CASEFOLDED(inode)) + return 0; + + norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC); + if (!norm) + return -ENOMEM; + + len = utf8_casefold(um, str, norm, PATH_MAX); + if (len < 0) { + if (f2fs_has_strict_mode(sbi)) + ret = -EINVAL; + goto out; + } + str->hash = full_name_hash(dentry, norm, len); +out: + kvfree(norm); + return ret; +} + +const struct dentry_operations f2fs_dentry_ops = { + .d_hash = f2fs_d_hash, + .d_compare = f2fs_d_compare, +}; +#endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 17382da7f0bd..5355be6b6755 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -25,6 +25,7 @@ #include <crypto/hash.h> #include <linux/fscrypt.h> +#include <linux/fsverity.h> #ifdef CONFIG_F2FS_CHECK_FS #define f2fs_bug_on(sbi, condition) BUG_ON(condition) @@ -115,6 +116,8 @@ typedef u32 block_t; /* */ typedef u32 nid_t; +#define COMPRESS_EXT_NUM 16 + struct f2fs_mount_info { unsigned int opt; int write_io_size_bits; /* Write IO size bits */ @@ -139,6 +142,12 @@ struct f2fs_mount_info { block_t unusable_cap; /* Amount of space allowed to be * unusable when disabling checkpoint */ + + /* For compression */ + unsigned char compress_algorithm; /* algorithm type */ + unsigned compress_log_size; /* cluster log size */ + unsigned char compress_ext_cnt; /* extension count */ + unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 @@ -151,8 +160,10 @@ struct f2fs_mount_info { #define F2FS_FEATURE_QUOTA_INO 0x0080 #define F2FS_FEATURE_INODE_CRTIME 0x0100 #define F2FS_FEATURE_LOST_FOUND 0x0200 -#define F2FS_FEATURE_VERITY 0x0400 /* reserved */ +#define F2FS_FEATURE_VERITY 0x0400 #define F2FS_FEATURE_SB_CHKSUM 0x0800 +#define F2FS_FEATURE_CASEFOLD 0x1000 +#define F2FS_FEATURE_COMPRESSION 0x2000 #define __F2FS_HAS_FEATURE(raw_super, mask) \ ((raw_super->feature & cpu_to_le32(mask)) != 0) @@ -417,6 +428,9 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) #define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) +#define F2FS_IOC_GET_VOLUME_NAME FS_IOC_GETFSLABEL +#define F2FS_IOC_SET_VOLUME_NAME FS_IOC_SETFSLABEL + #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT @@ -630,7 +644,7 @@ enum { #define FADVISE_ENC_NAME_BIT 0x08 #define FADVISE_KEEP_SIZE_BIT 0x10 #define FADVISE_HOT_BIT 0x20 -#define FADVISE_VERITY_BIT 0x40 /* reserved */ +#define FADVISE_VERITY_BIT 0x40 #define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT) @@ -650,6 +664,8 @@ enum { #define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT) #define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT) #define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT) +#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT) +#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT) #define DEF_DIR_LEVEL 0 @@ -705,6 +721,12 @@ struct f2fs_inode_info { int i_inline_xattr_size; /* inline xattr size */ struct timespec64 i_crtime; /* inode creation time */ struct timespec64 i_disk_time[4];/* inode disk times */ + + /* for file compress */ + u64 i_compr_blocks; /* # of compressed blocks */ + unsigned char i_compress_algorithm; /* algorithm type */ + unsigned char i_log_cluster_size; /* log of cluster size */ + unsigned int i_cluster_size; /* cluster size */ }; static inline void get_extent_info(struct extent_info *ext, @@ -883,6 +905,7 @@ enum { CURSEG_WARM_NODE, /* direct node blocks of normal files */ CURSEG_COLD_NODE, /* indirect node blocks */ NO_CHECK_TYPE, + CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */ }; struct flush_cmd { @@ -1010,6 +1033,7 @@ enum need_lock_type { enum cp_reason_type { CP_NO_NEEDED, CP_NON_REGULAR, + CP_COMPRESSED, CP_HARDLINK, CP_SB_NEED_CP, CP_WRONG_PINO, @@ -1048,12 +1072,15 @@ struct f2fs_io_info { block_t old_blkaddr; /* old block address before Cow */ struct page *page; /* page to be written */ struct page *encrypted_page; /* encrypted page */ + struct page *compressed_page; /* compressed page */ struct list_head list; /* serialize IOs */ bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ bool is_por; /* indicate IO is from recovery or not */ bool retry; /* need to reallocate block address */ + int compr_blocks; /* # of compressed block addresses */ + bool encrypted; /* indicate file is encrypted */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ struct bio **bio; /* bio for ipu */ @@ -1061,6 +1088,11 @@ struct f2fs_io_info { unsigned char version; /* version of the node */ }; +struct bio_entry { + struct bio *bio; + struct list_head list; +}; + #define is_read_io(rw) ((rw) == READ) struct f2fs_bio_info { struct f2fs_sb_info *sbi; /* f2fs superblock */ @@ -1070,6 +1102,8 @@ struct f2fs_bio_info { struct rw_semaphore io_rwsem; /* blocking op for bio */ spinlock_t io_lock; /* serialize DATA/NODE IOs */ struct list_head io_list; /* track fios */ + struct list_head bio_list; /* bio entry list head */ + struct rw_semaphore bio_list_lock; /* lock to protect bio entry list */ }; #define FDEV(i) (sbi->devs[i]) @@ -1154,6 +1188,18 @@ enum fsync_mode { FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */ }; +/* + * this value is set in page as a private data which indicate that + * the page is atomically written, and it is in inmem_pages list. + */ +#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1) +#define DUMMY_WRITTEN_PAGE ((unsigned long)-2) + +#define IS_ATOMIC_WRITTEN_PAGE(page) \ + (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE) +#define IS_DUMMY_WRITTEN_PAGE(page) \ + (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE) + #ifdef CONFIG_FS_ENCRYPTION #define DUMMY_ENCRYPTION_ENABLED(sbi) \ (unlikely(F2FS_OPTION(sbi).test_dummy_encryption)) @@ -1161,6 +1207,75 @@ enum fsync_mode { #define DUMMY_ENCRYPTION_ENABLED(sbi) (0) #endif +/* For compression */ +enum compress_algorithm_type { + COMPRESS_LZO, + COMPRESS_LZ4, + COMPRESS_MAX, +}; + +#define COMPRESS_DATA_RESERVED_SIZE 4 +struct compress_data { + __le32 clen; /* compressed data size */ + __le32 chksum; /* checksum of compressed data */ + __le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */ + u8 cdata[]; /* compressed data */ +}; + +#define COMPRESS_HEADER_SIZE (sizeof(struct compress_data)) + +#define F2FS_COMPRESSED_PAGE_MAGIC 0xF5F2C000 + +/* compress context */ +struct compress_ctx { + struct inode *inode; /* inode the context belong to */ + pgoff_t cluster_idx; /* cluster index number */ + unsigned int cluster_size; /* page count in cluster */ + unsigned int log_cluster_size; /* log of cluster size */ + struct page **rpages; /* pages store raw data in cluster */ + unsigned int nr_rpages; /* total page number in rpages */ + struct page **cpages; /* pages store compressed data in cluster */ + unsigned int nr_cpages; /* total page number in cpages */ + void *rbuf; /* virtual mapped address on rpages */ + struct compress_data *cbuf; /* virtual mapped address on cpages */ + size_t rlen; /* valid data length in rbuf */ + size_t clen; /* valid data length in cbuf */ + void *private; /* payload buffer for specified compression algorithm */ +}; + +/* compress context for write IO path */ +struct compress_io_ctx { + u32 magic; /* magic number to indicate page is compressed */ + struct inode *inode; /* inode the context belong to */ + struct page **rpages; /* pages store raw data in cluster */ + unsigned int nr_rpages; /* total page number in rpages */ + refcount_t ref; /* referrence count of raw page */ +}; + +/* decompress io context for read IO path */ +struct decompress_io_ctx { + u32 magic; /* magic number to indicate page is compressed */ + struct inode *inode; /* inode the context belong to */ + pgoff_t cluster_idx; /* cluster index number */ + unsigned int cluster_size; /* page count in cluster */ + unsigned int log_cluster_size; /* log of cluster size */ + struct page **rpages; /* pages store raw data in cluster */ + unsigned int nr_rpages; /* total page number in rpages */ + struct page **cpages; /* pages store compressed data in cluster */ + unsigned int nr_cpages; /* total page number in cpages */ + struct page **tpages; /* temp pages to pad holes in cluster */ + void *rbuf; /* virtual mapped address on rpages */ + struct compress_data *cbuf; /* virtual mapped address on cpages */ + size_t rlen; /* valid data length in rbuf */ + size_t clen; /* valid data length in cbuf */ + refcount_t ref; /* referrence count of compressed page */ + bool failed; /* indicate IO error during decompression */ +}; + +#define NULL_CLUSTER ((unsigned int)(~0)) +#define MIN_COMPRESS_LOG_SIZE 2 +#define MAX_COMPRESS_LOG_SIZE 8 + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -1169,6 +1284,10 @@ struct f2fs_sb_info { int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ struct mutex writepages; /* mutex for writepages() */ +#ifdef CONFIG_UNICODE + struct unicode_map *s_encoding; + __u16 s_encoding_flags; +#endif #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ @@ -1272,17 +1391,22 @@ struct f2fs_sb_info { struct f2fs_mount_info mount_opt; /* mount options */ /* for cleaning operations */ - struct mutex gc_mutex; /* mutex for GC */ + struct rw_semaphore gc_lock; /* + * semaphore for GC, avoid + * race between GC and GC or CP + */ struct f2fs_gc_kthread *gc_thread; /* GC thread */ unsigned int cur_victim_sec; /* current victim section num */ unsigned int gc_mode; /* current GC state */ unsigned int next_victim_seg[2]; /* next segment in victim section */ /* for skip statistic */ + unsigned int atomic_files; /* # of opened atomic file */ unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ /* threshold for gc trials on pinned files */ u64 gc_pin_file_threshold; + struct rw_semaphore pin_sem; /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; @@ -1306,11 +1430,11 @@ struct f2fs_sb_info { atomic_t inline_xattr; /* # of inline_xattr inodes */ atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ - atomic_t aw_cnt; /* # of atomic writes */ + atomic_t compr_inode; /* # of compressed inodes */ + atomic_t compr_blocks; /* # of compressed blocks */ atomic_t vw_cnt; /* # of volatile writes */ atomic_t max_aw_cnt; /* max # of atomic writes */ atomic_t max_vw_cnt; /* max # of volatile writes */ - int bg_gc; /* background gc calls */ unsigned int io_skip_bggc; /* skip background gc for in-flight IO */ unsigned int other_skip_bggc; /* skip background gc for other reasons */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ @@ -1344,6 +1468,8 @@ struct f2fs_sb_info { /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_chksum_seed; + + struct workqueue_struct *post_read_wq; /* post read workqueue */ }; struct f2fs_private_dio { @@ -1354,9 +1480,10 @@ struct f2fs_private_dio { }; #ifdef CONFIG_F2FS_FAULT_INJECTION -#define f2fs_show_injection_info(type) \ - printk_ratelimited("%sF2FS-fs : inject %s in %s of %pS\n", \ - KERN_INFO, f2fs_fault_name[type], \ +#define f2fs_show_injection_info(sbi, type) \ + printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \ + KERN_INFO, sbi->sb->s_id, \ + f2fs_fault_name[type], \ __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { @@ -1376,7 +1503,7 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) return false; } #else -#define f2fs_show_injection_info(type) do { } while (0) +#define f2fs_show_injection_info(sbi, type) do { } while (0) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { return false; @@ -1640,6 +1767,7 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) { unsigned long flags; + unsigned char *nat_bits; /* * In order to re-enable nat_bits we need to call fsck.f2fs by @@ -1650,10 +1778,12 @@ static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) if (lock) spin_lock_irqsave(&sbi->cp_lock, flags); __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG); - kvfree(NM_I(sbi)->nat_bits); + nat_bits = NM_I(sbi)->nat_bits; NM_I(sbi)->nat_bits = NULL; if (lock) spin_unlock_irqrestore(&sbi->cp_lock, flags); + + kvfree(nat_bits); } static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi, @@ -1758,9 +1888,9 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, return ret; if (time_to_inject(sbi, FAULT_BLOCK)) { - f2fs_show_injection_info(FAULT_BLOCK); + f2fs_show_injection_info(sbi, FAULT_BLOCK); release = *count; - goto enospc; + goto release_quota; } /* @@ -1805,6 +1935,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, enospc: percpu_counter_sub(&sbi->alloc_valid_block_count, release); +release_quota: dquot_release_reservation_block(inode, release); return -ENOSPC; } @@ -2009,7 +2140,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, } if (time_to_inject(sbi, FAULT_BLOCK)) { - f2fs_show_injection_info(FAULT_BLOCK); + f2fs_show_injection_info(sbi, FAULT_BLOCK); goto enospc; } @@ -2124,7 +2255,8 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, return page; if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) { - f2fs_show_injection_info(FAULT_PAGE_ALLOC); + f2fs_show_injection_info(F2FS_M_SB(mapping), + FAULT_PAGE_ALLOC); return NULL; } } @@ -2139,7 +2271,7 @@ static inline struct page *f2fs_pagecache_get_page( int fgp_flags, gfp_t gfp_mask) { if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) { - f2fs_show_injection_info(FAULT_PAGE_GET); + f2fs_show_injection_info(F2FS_M_SB(mapping), FAULT_PAGE_GET); return NULL; } @@ -2195,26 +2327,6 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, return entry; } -static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, - int npages, bool no_fail) -{ - struct bio *bio; - - if (no_fail) { - /* No failure on bio allocation */ - bio = bio_alloc(GFP_NOIO, npages); - if (!bio) - bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); - return bio; - } - if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { - f2fs_show_injection_info(FAULT_ALLOC_BIO); - return NULL; - } - - return bio_alloc(GFP_KERNEL, npages); -} - static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { if (sbi->gc_mode == GC_URGENT) @@ -2351,21 +2463,26 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) /* * On-disk inode flags (f2fs_inode::i_flags) */ +#define F2FS_COMPR_FL 0x00000004 /* Compress file */ #define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */ #define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ #define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */ #define F2FS_NODUMP_FL 0x00000040 /* do not dump file */ #define F2FS_NOATIME_FL 0x00000080 /* do not update atime */ +#define F2FS_NOCOMP_FL 0x00000400 /* Don't compress */ #define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */ #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ +#define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */ /* Flags that should be inherited by new inodes from their parent. */ #define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \ - F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL) + F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \ + F2FS_CASEFOLD_FL | F2FS_COMPR_FL | F2FS_NOCOMP_FL) /* Flags that are appropriate for regular files (all but dir-specific ones). */ -#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL)) +#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \ + F2FS_CASEFOLD_FL)) /* Flags that are appropriate for non-directories/regular files. */ #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) @@ -2412,6 +2529,9 @@ enum { FI_PROJ_INHERIT, /* indicate file inherits projectid */ FI_PIN_FILE, /* indicate file should not be gced */ FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ + FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ + FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ + FI_MMAP_FILE, /* indicate file was mmapped */ }; static inline void __mark_inode_dirty_flag(struct inode *inode, @@ -2428,6 +2548,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_DATA_EXIST: case FI_INLINE_DOTS: case FI_PIN_FILE: + case FI_COMPRESSED_FILE: f2fs_mark_inode_dirty_sync(inode, true); } } @@ -2451,6 +2572,12 @@ static inline void clear_inode_flag(struct inode *inode, int flag) __mark_inode_dirty_flag(inode, flag, false); } +static inline bool f2fs_verity_in_progress(struct inode *inode) +{ + return IS_ENABLED(CONFIG_FS_VERITY) && + is_inode_flag_set(inode, FI_VERITY_IN_PROGRESS); +} + static inline void set_acl_inode(struct inode *inode, umode_t mode) { F2FS_I(inode)->i_acl_mode = mode; @@ -2577,16 +2704,27 @@ static inline int f2fs_has_inline_xattr(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_XATTR); } +static inline int f2fs_compressed_file(struct inode *inode) +{ + return S_ISREG(inode->i_mode) && + is_inode_flag_set(inode, FI_COMPRESSED_FILE); +} + static inline unsigned int addrs_per_inode(struct inode *inode) { unsigned int addrs = CUR_ADDRS_PER_INODE(inode) - get_inline_xattr_addrs(inode); - return ALIGN_DOWN(addrs, 1); + + if (!f2fs_compressed_file(inode)) + return addrs; + return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size); } static inline unsigned int addrs_per_block(struct inode *inode) { - return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, 1); + if (!f2fs_compressed_file(inode)) + return DEF_ADDRS_PER_BLOCK; + return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, F2FS_I(inode)->i_cluster_size); } static inline void *inline_xattr_addr(struct inode *inode, struct page *page) @@ -2619,6 +2757,11 @@ static inline int f2fs_has_inline_dots(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_DOTS); } +static inline int f2fs_is_mmap_file(struct inode *inode) +{ + return is_inode_flag_set(inode, FI_MMAP_FILE); +} + static inline bool f2fs_is_pinned_file(struct inode *inode) { return is_inode_flag_set(inode, FI_PIN_FILE); @@ -2679,6 +2822,20 @@ static inline void clear_file(struct inode *inode, int type) f2fs_mark_inode_dirty_sync(inode, true); } +static inline bool f2fs_is_time_consistent(struct inode *inode) +{ + if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) + return false; + if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) + return false; + if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) + return false; + if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 3, + &F2FS_I(inode)->i_crtime)) + return false; + return true; +} + static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) { bool ret; @@ -2696,14 +2853,7 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) i_size_read(inode) & ~PAGE_MASK) return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) - return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) - return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) - return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 3, - &F2FS_I(inode)->i_crtime)) + if (!f2fs_is_time_consistent(inode)) return false; down_read(&F2FS_I(inode)->i_sem); @@ -2739,7 +2889,8 @@ static inline bool f2fs_may_extent_tree(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); if (!test_opt(sbi, EXTENT_CACHE) || - is_inode_flag_set(inode, FI_NO_EXTENT)) + is_inode_flag_set(inode, FI_NO_EXTENT) || + is_inode_flag_set(inode, FI_COMPRESSED_FILE)) return false; /* @@ -2758,7 +2909,7 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, void *ret; if (time_to_inject(sbi, FAULT_KMALLOC)) { - f2fs_show_injection_info(FAULT_KMALLOC); + f2fs_show_injection_info(sbi, FAULT_KMALLOC); return NULL; } @@ -2779,7 +2930,7 @@ static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { if (time_to_inject(sbi, FAULT_KVMALLOC)) { - f2fs_show_injection_info(FAULT_KVMALLOC); + f2fs_show_injection_info(sbi, FAULT_KVMALLOC); return NULL; } @@ -2859,7 +3010,8 @@ static inline void verify_blkaddr(struct f2fs_sb_info *sbi, static inline bool __is_valid_data_blkaddr(block_t blkaddr) { - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR || + blkaddr == COMPRESS_ADDR) return false; return true; } @@ -2925,6 +3077,11 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); +extern int f2fs_ci_compare(const struct inode *parent, + const struct qstr *name, + const struct qstr *entry, + bool quick); + /* * dir.c */ @@ -2952,6 +3109,8 @@ ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, struct page **page); void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode); +bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, + struct fscrypt_name *fname); void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos); @@ -2988,8 +3147,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); /* * hash.c */ -f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, - struct fscrypt_name *fname); +f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, + const struct qstr *name_info, struct fscrypt_name *fname); /* * node.c @@ -3072,7 +3231,7 @@ void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); @@ -3106,6 +3265,8 @@ void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, unsigned int val, int alloc); void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); +int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi); +int f2fs_check_write_pointer(struct f2fs_sb_info *sbi); int f2fs_build_segment_manager(struct f2fs_sb_info *sbi); void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi); int __init f2fs_create_segment_manager_caches(void); @@ -3156,12 +3317,19 @@ void f2fs_destroy_checkpoint_caches(void); /* * data.c */ -int f2fs_init_post_read_processing(void); -void f2fs_destroy_post_read_processing(void); +int __init f2fs_init_bioset(void); +void f2fs_destroy_bioset(void); +struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool no_fail); +int f2fs_init_bio_entry_cache(void); +void f2fs_destroy_bio_entry_cache(void); +void f2fs_submit_bio(struct f2fs_sb_info *sbi, + struct bio *bio, enum page_type type); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page, nid_t ino, enum page_type type); +void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, + struct bio **bio, struct page *page); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); int f2fs_merge_page_bio(struct f2fs_io_info *fio); @@ -3176,6 +3344,9 @@ int f2fs_reserve_new_block(struct dnode_of_data *dn); int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); +int f2fs_mpage_readpages(struct address_space *mapping, + struct list_head *pages, struct page *page, + unsigned nr_pages, bool is_readahead); struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, int op_flags, bool for_write); struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index); @@ -3189,8 +3360,14 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); +int f2fs_encrypt_one_page(struct f2fs_io_info *fio); bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); +int f2fs_write_single_data_page(struct page *page, int *submitted, + struct bio **bio, sector_t *last_block, + struct writeback_control *wbc, + enum iostat_type io_type, + int compr_blocks); void f2fs_invalidate_page(struct page *page, unsigned int offset, unsigned int length); int f2fs_release_page(struct page *page, gfp_t wait); @@ -3200,6 +3377,10 @@ int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, #endif bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); void f2fs_clear_page_cache_dirty_tag(struct page *page); +int f2fs_init_post_read_processing(void); +void f2fs_destroy_post_read_processing(void); +int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); +void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi); /* * gc.c @@ -3246,6 +3427,7 @@ struct f2fs_stat_info { int nr_discard_cmd; unsigned int undiscard_blks; int inline_xattr, inline_inode, inline_dir, append, update, orphans; + int compr_inode, compr_blocks; int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt; unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; unsigned int bimodal, avg_vblocks; @@ -3277,7 +3459,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_cp_count(si) ((si)->cp_count++) #define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++) #define stat_inc_call_count(si) ((si)->call_count++) -#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) +#define stat_inc_bggc_count(si) ((si)->bg_gc++) #define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) #define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) @@ -3316,6 +3498,20 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (f2fs_has_inline_dentry(inode)) \ (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) +#define stat_inc_compr_inode(inode) \ + do { \ + if (f2fs_compressed_file(inode)) \ + (atomic_inc(&F2FS_I_SB(inode)->compr_inode)); \ + } while (0) +#define stat_dec_compr_inode(inode) \ + do { \ + if (f2fs_compressed_file(inode)) \ + (atomic_dec(&F2FS_I_SB(inode)->compr_inode)); \ + } while (0) +#define stat_add_compr_blocks(inode, blocks) \ + (atomic_add(blocks, &F2FS_I_SB(inode)->compr_blocks)) +#define stat_sub_compr_blocks(inode, blocks) \ + (atomic_sub(blocks, &F2FS_I_SB(inode)->compr_blocks)) #define stat_inc_meta_count(sbi, blkaddr) \ do { \ if (blkaddr < SIT_I(sbi)->sit_base_addr) \ @@ -3333,13 +3529,9 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) ((sbi)->block_count[(curseg)->alloc_type]++) #define stat_inc_inplace_blocks(sbi) \ (atomic_inc(&(sbi)->inplace_count)) -#define stat_inc_atomic_write(inode) \ - (atomic_inc(&F2FS_I_SB(inode)->aw_cnt)) -#define stat_dec_atomic_write(inode) \ - (atomic_dec(&F2FS_I_SB(inode)->aw_cnt)) #define stat_update_max_atomic_write(inode) \ do { \ - int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt); \ + int cur = F2FS_I_SB(inode)->atomic_files; \ int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt); \ if (cur > max) \ atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \ @@ -3391,6 +3583,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi); void f2fs_destroy_stats(struct f2fs_sb_info *sbi); void __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); +void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #else #define stat_inc_cp_count(si) do { } while (0) #define stat_inc_bg_cp_count(si) do { } while (0) @@ -3400,8 +3593,8 @@ void f2fs_destroy_root_stats(void); #define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) #define stat_dec_dirty_inode(sbi, type) do { } while (0) -#define stat_inc_total_hit(sb) do { } while (0) -#define stat_inc_rbtree_node_hit(sb) do { } while (0) +#define stat_inc_total_hit(sbi) do { } while (0) +#define stat_inc_rbtree_node_hit(sbi) do { } while (0) #define stat_inc_largest_node_hit(sbi) do { } while (0) #define stat_inc_cached_node_hit(sbi) do { } while (0) #define stat_inc_inline_xattr(inode) do { } while (0) @@ -3410,6 +3603,10 @@ void f2fs_destroy_root_stats(void); #define stat_dec_inline_inode(inode) do { } while (0) #define stat_inc_inline_dir(inode) do { } while (0) #define stat_dec_inline_dir(inode) do { } while (0) +#define stat_inc_compr_inode(inode) do { } while (0) +#define stat_dec_compr_inode(inode) do { } while (0) +#define stat_add_compr_blocks(inode, blocks) do { } while (0) +#define stat_sub_compr_blocks(inode, blocks) do { } while (0) #define stat_inc_atomic_write(inode) do { } while (0) #define stat_dec_atomic_write(inode) do { } while (0) #define stat_update_max_atomic_write(inode) do { } while (0) @@ -3429,9 +3626,13 @@ static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } static inline void __init f2fs_create_root_stats(void) { } static inline void f2fs_destroy_root_stats(void) { } +static inline void update_sit_info(struct f2fs_sb_info *sbi) {} #endif extern const struct file_operations f2fs_dir_operations; +#ifdef CONFIG_UNICODE +extern const struct dentry_operations f2fs_dentry_ops; +#endif extern const struct file_operations f2fs_file_operations; extern const struct inode_operations f2fs_file_inode_operations; extern const struct address_space_operations f2fs_dblock_aops; @@ -3454,6 +3655,7 @@ void f2fs_truncate_inline_inode(struct inode *inode, int f2fs_read_inline_data(struct inode *inode, struct page *page); int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); int f2fs_convert_inline_inode(struct inode *inode); +int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry); int f2fs_write_inline_data(struct inode *inode, struct page *page); bool f2fs_recover_inline_data(struct inode *inode, struct page *npage); struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, @@ -3521,6 +3723,9 @@ void f2fs_exit_sysfs(void); int f2fs_register_sysfs(struct f2fs_sb_info *sbi); void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi); +/* verity.c */ +extern const struct fsverity_operations f2fs_verityops; + /* * crypto support */ @@ -3543,7 +3748,85 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode) */ static inline bool f2fs_post_read_required(struct inode *inode) { - return f2fs_encrypted_file(inode); + return f2fs_encrypted_file(inode) || fsverity_active(inode) || + f2fs_compressed_file(inode); +} + +/* + * compress.c + */ +#ifdef CONFIG_F2FS_FS_COMPRESSION +bool f2fs_is_compressed_page(struct page *page); +struct page *f2fs_compress_control_page(struct page *page); +int f2fs_prepare_compress_overwrite(struct inode *inode, + struct page **pagep, pgoff_t index, void **fsdata); +bool f2fs_compress_write_end(struct inode *inode, void *fsdata, + pgoff_t index, unsigned copied); +void f2fs_compress_write_end_io(struct bio *bio, struct page *page); +bool f2fs_is_compress_backend_ready(struct inode *inode); +void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity); +bool f2fs_cluster_is_empty(struct compress_ctx *cc); +bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); +int f2fs_write_multi_pages(struct compress_ctx *cc, + int *submitted, + struct writeback_control *wbc, + enum iostat_type io_type); +int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); +int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, + unsigned nr_pages, sector_t *last_block_in_bio, + bool is_readahead); +struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); +void f2fs_free_dic(struct decompress_io_ctx *dic); +void f2fs_decompress_end_io(struct page **rpages, + unsigned int cluster_size, bool err, bool verity); +int f2fs_init_compress_ctx(struct compress_ctx *cc); +void f2fs_destroy_compress_ctx(struct compress_ctx *cc); +void f2fs_init_compress_info(struct f2fs_sb_info *sbi); +#else +static inline bool f2fs_is_compressed_page(struct page *page) { return false; } +static inline bool f2fs_is_compress_backend_ready(struct inode *inode) +{ + if (!f2fs_compressed_file(inode)) + return true; + /* not support compression */ + return false; +} +static inline struct page *f2fs_compress_control_page(struct page *page) +{ + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +} +#endif + +static inline void set_compress_context(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + F2FS_I(inode)->i_compress_algorithm = + F2FS_OPTION(sbi).compress_algorithm; + F2FS_I(inode)->i_log_cluster_size = + F2FS_OPTION(sbi).compress_log_size; + F2FS_I(inode)->i_cluster_size = + 1 << F2FS_I(inode)->i_log_cluster_size; + F2FS_I(inode)->i_flags |= F2FS_COMPR_FL; + set_inode_flag(inode, FI_COMPRESSED_FILE); + stat_inc_compr_inode(inode); +} + +static inline u64 f2fs_disable_compressed_file(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + + if (!f2fs_compressed_file(inode)) + return 0; + if (fi->i_compr_blocks) + return fi->i_compr_blocks; + + fi->i_flags &= ~F2FS_COMPR_FL; + clear_inode_flag(inode, FI_COMPRESSED_FILE); + stat_dec_compr_inode(inode); + return 0; } #define F2FS_FEATURE_FUNCS(name, flagname) \ @@ -3561,7 +3844,10 @@ F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); +F2FS_FEATURE_FUNCS(verity, VERITY); F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); +F2FS_FEATURE_FUNCS(casefold, CASEFOLD); +F2FS_FEATURE_FUNCS(compression, COMPRESSION); #ifdef CONFIG_BLK_DEV_ZONED static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, @@ -3643,6 +3929,30 @@ static inline bool f2fs_may_encrypt(struct inode *inode) #endif } +static inline bool f2fs_may_compress(struct inode *inode) +{ + if (IS_SWAPFILE(inode) || f2fs_is_pinned_file(inode) || + f2fs_is_atomic_file(inode) || + f2fs_is_volatile_file(inode)) + return false; + return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); +} + +static inline void f2fs_i_compr_blocks_update(struct inode *inode, + u64 blocks, bool add) +{ + int diff = F2FS_I(inode)->i_cluster_size - blocks; + + if (add) { + F2FS_I(inode)->i_compr_blocks += diff; + stat_add_compr_blocks(inode, diff); + } else { + F2FS_I(inode)->i_compr_blocks -= diff; + stat_sub_compr_blocks(inode, diff); + } + f2fs_mark_inode_dirty_sync(inode, true); +} + static inline int block_unaligned_IO(struct inode *inode, struct kiocb *iocb, struct iov_iter *iter) { @@ -3674,17 +3984,22 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, return true; if (f2fs_is_multi_device(sbi)) return true; + if (f2fs_compressed_file(inode)) + return true; /* * for blkzoned device, fallback direct IO to buffered IO, so * all IOs can be serialized by log-structured write. */ if (f2fs_sb_has_blkzoned(sbi)) return true; - if (test_opt(sbi, LFS) && (rw == WRITE) && - block_unaligned_IO(inode, iocb, iter)) - return true; + if (test_opt(sbi, LFS) && (rw == WRITE)) { + if (block_unaligned_IO(inode, iocb, iter)) + return true; + if (F2FS_IO_ALIGNED(sbi)) + return true; + } if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) && - !(inode->i_flags & S_SWAPFILE)) + !IS_SWAPFILE(inode)) return true; return false; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3e58a6f697dd..0d4da644df3b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -20,6 +20,7 @@ #include <linux/uio.h> #include <linux/uuid.h> #include <linux/file.h> +#include <linux/nls.h> #include "f2fs.h" #include "node.h" @@ -49,14 +50,40 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) struct page *page = vmf->page; struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dnode_of_data dn = { .node_changed = false }; - int err; + struct dnode_of_data dn; + bool need_alloc = true; + int err = 0; if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; goto err; } + if (!f2fs_is_checkpoint_ready(sbi)) { + err = -ENOSPC; + goto err; + } + +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + int ret = f2fs_is_compressed_cluster(inode, page->index); + + if (ret < 0) { + err = ret; + goto err; + } else if (ret) { + if (ret < F2FS_I(inode)->i_cluster_size) { + err = -EAGAIN; + goto err; + } + need_alloc = false; + } + } +#endif + /* should do out of any locked page */ + if (need_alloc) + f2fs_balance_fs(sbi, true); + sb_start_pagefault(inode->i_sb); f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); @@ -72,15 +99,17 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) goto out_sem; } - /* block allocation */ - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_block(&dn, page->index); - f2fs_put_dnode(&dn); - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); - if (err) { - unlock_page(page); - goto out_sem; + if (need_alloc) { + /* block allocation */ + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_block(&dn, page->index); + f2fs_put_dnode(&dn); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + if (err) { + unlock_page(page); + goto out_sem; + } } /* fill the page */ @@ -114,8 +143,6 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) out_sem: up_read(&F2FS_I(inode)->i_mmap_sem); - f2fs_balance_fs(sbi, dn.node_changed); - sb_end_pagefault(inode->i_sb); err: return block_page_mkwrite_return(err); @@ -149,6 +176,8 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) if (!S_ISREG(inode->i_mode)) cp_reason = CP_NON_REGULAR; + else if (f2fs_compressed_file(inode)) + cp_reason = CP_COMPRESSED; else if (inode->i_nlink != 1) cp_reason = CP_HARDLINK; else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) @@ -479,6 +508,9 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + /* we don't need to use inline_data strictly */ err = f2fs_convert_inline_inode(inode); if (err) @@ -486,6 +518,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); vma->vm_ops = &f2fs_file_vm_ops; + set_inode_flag(inode, FI_MMAP_FILE); return 0; } @@ -496,6 +529,13 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + + err = fsverity_file_open(inode, filp); + if (err) + return err; + filp->f_mode |= FMODE_NOWAIT; return dquot_file_open(inode, filp); @@ -508,6 +548,9 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) int nr_free = 0, ofs = dn->ofs_in_node, len = count; __le32 *addr; int base = 0; + bool compressed_cluster = false; + int cluster_index = 0, valid_blocks = 0; + int cluster_size = F2FS_I(dn->inode)->i_cluster_size; if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) base = get_extra_isize(dn->inode); @@ -515,26 +558,43 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) raw_node = F2FS_NODE(dn->node_page); addr = blkaddr_in_node(raw_node) + base + ofs; - for (; count > 0; count--, addr++, dn->ofs_in_node++) { + /* Assumption: truncateion starts with cluster */ + for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { block_t blkaddr = le32_to_cpu(*addr); + if (f2fs_compressed_file(dn->inode) && + !(cluster_index & (cluster_size - 1))) { + if (compressed_cluster) + f2fs_i_compr_blocks_update(dn->inode, + valid_blocks, false); + compressed_cluster = (blkaddr == COMPRESS_ADDR); + valid_blocks = 0; + } + if (blkaddr == NULL_ADDR) continue; dn->data_blkaddr = NULL_ADDR; f2fs_set_data_blkaddr(dn); - if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, + if (__is_valid_data_blkaddr(blkaddr)) { + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) - continue; + continue; + if (compressed_cluster) + valid_blocks++; + } - f2fs_invalidate_blocks(sbi, blkaddr); if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); + + f2fs_invalidate_blocks(sbi, blkaddr); nr_free++; } + if (compressed_cluster) + f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); + if (nr_free) { pgoff_t fofs; /* @@ -577,6 +637,9 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, return 0; } + if (f2fs_compressed_file(inode)) + return 0; + page = f2fs_get_lock_data_page(inode, index, true); if (IS_ERR(page)) return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); @@ -592,7 +655,7 @@ truncate_out: return 0; } -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) +static int do_truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -657,6 +720,28 @@ free_partial: return err; } +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) +{ + u64 free_from = from; + + /* + * for compressed file, only support cluster size + * aligned truncation. + */ + if (f2fs_compressed_file(inode)) { + size_t cluster_shift = PAGE_SHIFT + + F2FS_I(inode)->i_log_cluster_size; + size_t cluster_mask = (1 << cluster_shift) - 1; + + free_from = from >> cluster_shift; + if (from & cluster_mask) + free_from++; + free_from <<= cluster_shift; + } + + return do_truncate_blocks(inode, free_from, lock); +} + int f2fs_truncate(struct inode *inode) { int err; @@ -671,7 +756,7 @@ int f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) { - f2fs_show_injection_info(FAULT_TRUNCATE); + f2fs_show_injection_info(F2FS_I_SB(inode), FAULT_TRUNCATE); return -EIO; } @@ -716,11 +801,14 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, stat->attributes |= STATX_ATTR_IMMUTABLE; if (flags & F2FS_NODUMP_FL) stat->attributes |= STATX_ATTR_NODUMP; + if (IS_VERITY(inode)) + stat->attributes |= STATX_ATTR_VERITY; stat->attributes_mask |= (STATX_ATTR_APPEND | STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE | - STATX_ATTR_NODUMP); + STATX_ATTR_NODUMP | + STATX_ATTR_VERITY); generic_fillattr(inode, stat); @@ -742,14 +830,11 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr) if (ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; if (ia_valid & ATTR_ATIME) - inode->i_atime = timespec64_trunc(attr->ia_atime, - inode->i_sb->s_time_gran); + inode->i_atime = attr->ia_atime; if (ia_valid & ATTR_MTIME) - inode->i_mtime = timespec64_trunc(attr->ia_mtime, - inode->i_sb->s_time_gran); + inode->i_mtime = attr->ia_mtime; if (ia_valid & ATTR_CTIME) - inode->i_ctime = timespec64_trunc(attr->ia_ctime, - inode->i_sb->s_time_gran); + inode->i_ctime = attr->ia_ctime; if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; @@ -770,6 +855,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; + if ((attr->ia_valid & ATTR_SIZE) && + !f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + err = setattr_prepare(dentry, attr); if (err) return err; @@ -778,6 +867,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; + err = fsverity_prepare_setattr(dentry, attr); + if (err) + return err; + if (is_quota_modification(inode, attr)) { err = dquot_initialize(inode); if (err) @@ -808,14 +901,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_SIZE) { - bool to_smaller = (attr->ia_size <= i_size_read(inode)); + loff_t old_size = i_size_read(inode); + + if (attr->ia_size > MAX_INLINE_DATA(inode)) { + /* + * should convert inline inode before i_size_write to + * keep smaller than inline_data size with inline flag. + */ + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); truncate_setsize(inode, attr->ia_size); - if (to_smaller) + if (attr->ia_size <= old_size) err = f2fs_truncate(inode); /* * do not trim all blocks after i_size if target size is @@ -823,21 +926,11 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) */ up_write(&F2FS_I(inode)->i_mmap_sem); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (err) return err; - if (!to_smaller) { - /* should convert inline inode here */ - if (!f2fs_may_inline_data(inode)) { - err = f2fs_convert_inline_inode(inode); - if (err) - return err; - } - inode->i_mtime = inode->i_ctime = current_time(inode); - } - down_write(&F2FS_I(inode)->i_sem); + inode->i_mtime = inode->i_ctime = current_time(inode); F2FS_I(inode)->last_disk_size = i_size_read(inode); up_write(&F2FS_I(inode)->i_sem); } @@ -1006,8 +1099,8 @@ next_dnode: } else if (ret == -ENOENT) { if (dn.max_level == 0) return -ENOENT; - done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - dn.ofs_in_node, - len); + done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - + dn.ofs_in_node, len); blkaddr += done; do_replace += done; goto next; @@ -1030,7 +1123,7 @@ next_dnode: if (test_opt(sbi, LFS)) { f2fs_put_dnode(&dn); - return -ENOTSUPP; + return -EOPNOTSUPP; } /* do not invalidate this block address */ @@ -1122,7 +1215,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } dn.ofs_in_node++; i++; - new_size = (dst + i) << PAGE_SHIFT; + new_size = (loff_t)(dst + i) << PAGE_SHIFT; if (dst_inode->i_size < new_size) f2fs_i_size_write(dst_inode, new_size); } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR)); @@ -1170,13 +1263,13 @@ static int __exchange_data_block(struct inode *src_inode, src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), array_size(olen, sizeof(block_t)), - GFP_KERNEL); + GFP_NOFS); if (!src_blkaddr) return -ENOMEM; do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), array_size(olen, sizeof(int)), - GFP_KERNEL); + GFP_NOFS); if (!do_replace) { kvfree(src_blkaddr); return -ENOMEM; @@ -1528,12 +1621,44 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (off_end) map.m_len++; - if (f2fs_is_pinned_file(inode)) - map.m_seg_type = CURSEG_COLD_DATA; + if (!map.m_len) + return 0; + + if (f2fs_is_pinned_file(inode)) { + block_t len = (map.m_len >> sbi->log_blocks_per_seg) << + sbi->log_blocks_per_seg; + block_t done = 0; + + if (map.m_len % sbi->blocks_per_seg) + len += sbi->blocks_per_seg; + + map.m_len = sbi->blocks_per_seg; +next_alloc: + if (has_not_enough_free_secs(sbi, 0, + GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { + down_write(&sbi->gc_lock); + err = f2fs_gc(sbi, true, false, NULL_SEGNO); + if (err && err != -ENODATA && err != -EAGAIN) + goto out_err; + } + + down_write(&sbi->pin_sem); + map.m_seg_type = CURSEG_COLD_DATA_PINNED; + f2fs_allocate_new_segments(sbi, CURSEG_COLD_DATA); + err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO); + up_write(&sbi->pin_sem); + + done += map.m_len; + len -= map.m_len; + map.m_lblk += map.m_len; + if (!err && len) + goto next_alloc; - err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ? - F2FS_GET_BLOCK_PRE_DIO : - F2FS_GET_BLOCK_PRE_AIO)); + map.m_len = done; + } else { + err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + } +out_err: if (err) { pgoff_t last_off; @@ -1567,6 +1692,10 @@ static long f2fs_fallocate(struct file *file, int mode, if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; + if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) + return -ENOSPC; + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; /* f2fs only support ->fallocate for regular file */ if (!S_ISREG(inode->i_mode)) @@ -1576,6 +1705,11 @@ static long f2fs_fallocate(struct file *file, int mode, (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; + if (f2fs_compressed_file(inode) && + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) + return -EOPNOTSUPP; + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)) @@ -1658,7 +1792,47 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) if (IS_NOQUOTA(inode)) return -EPERM; + if ((iflags ^ fi->i_flags) & F2FS_CASEFOLD_FL) { + if (!f2fs_sb_has_casefold(F2FS_I_SB(inode))) + return -EOPNOTSUPP; + if (!f2fs_empty_dir(inode)) + return -ENOTEMPTY; + } + + if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) { + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return -EOPNOTSUPP; + if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL)) + return -EINVAL; + } + + if ((iflags ^ fi->i_flags) & F2FS_COMPR_FL) { + if (S_ISREG(inode->i_mode) && + (fi->i_flags & F2FS_COMPR_FL || i_size_read(inode) || + F2FS_HAS_BLOCKS(inode))) + return -EINVAL; + if (iflags & F2FS_NOCOMP_FL) + return -EINVAL; + if (iflags & F2FS_COMPR_FL) { + int err = f2fs_convert_inline_inode(inode); + + if (err) + return err; + + if (!f2fs_may_compress(inode)) + return -EINVAL; + + set_compress_context(inode); + } + } + if ((iflags ^ fi->i_flags) & F2FS_NOCOMP_FL) { + if (fi->i_flags & F2FS_COMPR_FL) + return -EINVAL; + } + fi->i_flags = iflags | (fi->i_flags & ~mask); + f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) && + (fi->i_flags & F2FS_NOCOMP_FL)); if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); @@ -1684,37 +1858,47 @@ static const struct { u32 iflag; u32 fsflag; } f2fs_fsflags_map[] = { + { F2FS_COMPR_FL, FS_COMPR_FL }, { F2FS_SYNC_FL, FS_SYNC_FL }, { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, { F2FS_APPEND_FL, FS_APPEND_FL }, { F2FS_NODUMP_FL, FS_NODUMP_FL }, { F2FS_NOATIME_FL, FS_NOATIME_FL }, + { F2FS_NOCOMP_FL, FS_NOCOMP_FL }, { F2FS_INDEX_FL, FS_INDEX_FL }, { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, + { F2FS_CASEFOLD_FL, FS_CASEFOLD_FL }, }; #define F2FS_GETTABLE_FS_FL ( \ + FS_COMPR_FL | \ FS_SYNC_FL | \ FS_IMMUTABLE_FL | \ FS_APPEND_FL | \ FS_NODUMP_FL | \ FS_NOATIME_FL | \ + FS_NOCOMP_FL | \ FS_INDEX_FL | \ FS_DIRSYNC_FL | \ FS_PROJINHERIT_FL | \ FS_ENCRYPT_FL | \ FS_INLINE_DATA_FL | \ - FS_NOCOW_FL) + FS_NOCOW_FL | \ + FS_VERITY_FL | \ + FS_CASEFOLD_FL) #define F2FS_SETTABLE_FS_FL ( \ + FS_COMPR_FL | \ FS_SYNC_FL | \ FS_IMMUTABLE_FL | \ FS_APPEND_FL | \ FS_NODUMP_FL | \ FS_NOATIME_FL | \ + FS_NOCOMP_FL | \ FS_DIRSYNC_FL | \ - FS_PROJINHERIT_FL) + FS_PROJINHERIT_FL | \ + FS_CASEFOLD_FL) /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */ static inline u32 f2fs_iflags_to_fsflags(u32 iflags) @@ -1750,6 +1934,8 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) if (IS_ENCRYPTED(inode)) fsflags |= FS_ENCRYPT_FL; + if (IS_VERITY(inode)) + fsflags |= FS_VERITY_FL; if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) fsflags |= FS_INLINE_DATA_FL; if (is_inode_flag_set(inode, FI_PIN_FILE)) @@ -1811,6 +1997,8 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) static int f2fs_ioc_start_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int ret; if (!inode_owner_or_capable(inode)) @@ -1819,12 +2007,17 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) if (!S_ISREG(inode->i_mode)) return -EINVAL; + if (filp->f_flags & O_DIRECT) + return -EINVAL; + ret = mnt_want_write_file(filp); if (ret) return ret; inode_lock(inode); + f2fs_disable_compressed_file(inode); + if (f2fs_is_atomic_file(inode)) { if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) ret = -EINVAL; @@ -1850,13 +2043,19 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) goto out; } + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (list_empty(&fi->inmem_ilist)) + list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); + sbi->atomic_files++; + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + + /* add inode in inmem_list first and set atomic_file */ set_inode_flag(inode, FI_ATOMIC_FILE); clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); F2FS_I(inode)->inmem_task = current; - stat_inc_atomic_write(inode); stat_update_max_atomic_write(inode); out: inode_unlock(inode); @@ -1891,11 +2090,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) goto err_out; ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); - if (!ret) { - clear_inode_flag(inode, FI_ATOMIC_FILE); - F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC] = 0; - stat_dec_atomic_write(inode); - } + if (!ret) + f2fs_drop_inmem_pages(inode); } else { ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); } @@ -2184,6 +2380,49 @@ out_err: return err; } +static int f2fs_ioc_get_encryption_policy_ex(struct file *filp, + unsigned long arg) +{ + if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) + return -EOPNOTSUPP; + + return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg); +} + +static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg) +{ + if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) + return -EOPNOTSUPP; + + return fscrypt_ioctl_add_key(filp, (void __user *)arg); +} + +static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg) +{ + if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) + return -EOPNOTSUPP; + + return fscrypt_ioctl_remove_key(filp, (void __user *)arg); +} + +static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp, + unsigned long arg) +{ + if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) + return -EOPNOTSUPP; + + return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg); +} + +static int f2fs_ioc_get_encryption_key_status(struct file *filp, + unsigned long arg) +{ + if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) + return -EOPNOTSUPP; + + return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); +} + static int f2fs_ioc_gc(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -2205,12 +2444,12 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg) return ret; if (!sync) { - if (!mutex_trylock(&sbi->gc_mutex)) { + if (!down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } } else { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); } ret = f2fs_gc(sbi, sync, true, NULL_SEGNO); @@ -2238,9 +2477,9 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) return -EROFS; end = range.start + range.len; - if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) { + if (end < range.start || range.start < MAIN_BLKADDR(sbi) || + end >= MAX_BLKADDR(sbi)) return -EINVAL; - } ret = mnt_want_write_file(filp); if (ret) @@ -2248,12 +2487,12 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) do_more: if (!range.sync) { - if (!mutex_trylock(&sbi->gc_mutex)) { + if (!down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } } else { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); } ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); @@ -2364,8 +2603,10 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, map.m_lblk += map.m_len; } - if (!fragmented) + if (!fragmented) { + total = 0; goto out; + } sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi)); @@ -2395,7 +2636,7 @@ do_map: if (!(map.m_flags & F2FS_MAP_FLAGS)) { map.m_lblk = next_pgofs; - continue; + goto check; } set_inode_flag(inode, FI_DO_DEFRAG); @@ -2419,8 +2660,8 @@ do_map: } map.m_lblk = idx; - - if (idx < pg_end && cnt < blk_per_seg) +check: + if (map.m_lblk < pg_end && cnt < blk_per_seg) goto do_map; clear_inode_flag(inode, FI_DO_DEFRAG); @@ -2682,7 +2923,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) end_segno = min(start_segno + range.segments, dev_end_segno); while (start_segno < end_segno) { - if (!mutex_trylock(&sbi->gc_mutex)) { + if (!down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } @@ -2977,10 +3218,16 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) ret = -EAGAIN; goto out; } + ret = f2fs_convert_inline_inode(inode); if (ret) goto out; + if (f2fs_disable_compressed_file(inode)) { + ret = -EOPNOTSUPP; + goto out; + } + set_inode_flag(inode, FI_PIN_FILE); ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; done: @@ -3060,10 +3307,98 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) return ret; } +static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + + if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) { + f2fs_warn(F2FS_I_SB(inode), + "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem.\n", + inode->i_ino); + return -EOPNOTSUPP; + } + + return fsverity_ioctl_enable(filp, (const void __user *)arg); +} + +static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg) +{ + if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) + return -EOPNOTSUPP; + + return fsverity_ioctl_measure(filp, (void __user *)arg); +} + +static int f2fs_get_volume_name(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + char *vbuf; + int count; + int err = 0; + + vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL); + if (!vbuf) + return -ENOMEM; + + down_read(&sbi->sb_lock); + count = utf16s_to_utf8s(sbi->raw_super->volume_name, + ARRAY_SIZE(sbi->raw_super->volume_name), + UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME); + up_read(&sbi->sb_lock); + + if (copy_to_user((char __user *)arg, vbuf, + min(FSLABEL_MAX, count))) + err = -EFAULT; + + kvfree(vbuf); + return err; +} + +static int f2fs_set_volume_name(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + char *vbuf; + int err = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX); + if (IS_ERR(vbuf)) + return PTR_ERR(vbuf); + + err = mnt_want_write_file(filp); + if (err) + goto out; + + down_write(&sbi->sb_lock); + + memset(sbi->raw_super->volume_name, 0, + sizeof(sbi->raw_super->volume_name)); + utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN, + sbi->raw_super->volume_name, + ARRAY_SIZE(sbi->raw_super->volume_name)); + + err = f2fs_commit_super(sbi, false); + + up_write(&sbi->sb_lock); + + mnt_drop_write_file(filp); +out: + kfree(vbuf); + return err; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) return -EIO; + if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp)))) + return -ENOSPC; switch (cmd) { case F2FS_IOC_GETFLAGS: @@ -3092,6 +3427,16 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_get_encryption_policy(filp, arg); case F2FS_IOC_GET_ENCRYPTION_PWSALT: return f2fs_ioc_get_encryption_pwsalt(filp, arg); + case FS_IOC_GET_ENCRYPTION_POLICY_EX: + return f2fs_ioc_get_encryption_policy_ex(filp, arg); + case FS_IOC_ADD_ENCRYPTION_KEY: + return f2fs_ioc_add_encryption_key(filp, arg); + case FS_IOC_REMOVE_ENCRYPTION_KEY: + return f2fs_ioc_remove_encryption_key(filp, arg); + case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: + return f2fs_ioc_remove_encryption_key_all_users(filp, arg); + case FS_IOC_GET_ENCRYPTION_KEY_STATUS: + return f2fs_ioc_get_encryption_key_status(filp, arg); case F2FS_IOC_GARBAGE_COLLECT: return f2fs_ioc_gc(filp, arg); case F2FS_IOC_GARBAGE_COLLECT_RANGE: @@ -3118,11 +3463,30 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_precache_extents(filp, arg); case F2FS_IOC_RESIZE_FS: return f2fs_ioc_resize_fs(filp, arg); + case FS_IOC_ENABLE_VERITY: + return f2fs_ioc_enable_verity(filp, arg); + case FS_IOC_MEASURE_VERITY: + return f2fs_ioc_measure_verity(filp, arg); + case F2FS_IOC_GET_VOLUME_NAME: + return f2fs_get_volume_name(filp, arg); + case F2FS_IOC_SET_VOLUME_NAME: + return f2fs_set_volume_name(filp, arg); default: return -ENOTTY; } } +static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + + return generic_file_read_iter(iocb, iter); +} + static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; @@ -3134,16 +3498,15 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; } - if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) { - ret = -EINVAL; - goto out; - } + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; - if (!inode_trylock(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) { + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) { ret = -EAGAIN; goto out; } + } else { inode_lock(inode); } @@ -3166,18 +3529,41 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = -EAGAIN; goto out; } - } else { - preallocated = true; - target_size = iocb->ki_pos + iov_iter_count(from); + goto write; + } - err = f2fs_preallocate_blocks(iocb, from); - if (err) { - clear_inode_flag(inode, FI_NO_PREALLOC); - inode_unlock(inode); - ret = err; - goto out; - } + if (is_inode_flag_set(inode, FI_NO_PREALLOC)) + goto write; + + if (iocb->ki_flags & IOCB_DIRECT) { + /* + * Convert inline data for Direct I/O before entering + * f2fs_direct_IO(). + */ + err = f2fs_convert_inline_inode(inode); + if (err) + goto out_err; + /* + * If force_buffere_io() is true, we have to allocate + * blocks all the time, since f2fs_direct_IO will fall + * back to buffered IO. + */ + if (!f2fs_force_buffered_io(inode, iocb, from) && + allow_outplace_dio(inode, iocb, from)) + goto write; } + preallocated = true; + target_size = iocb->ki_pos + iov_iter_count(from); + + err = f2fs_preallocate_blocks(iocb, from); + if (err) { +out_err: + clear_inode_flag(inode, FI_NO_PREALLOC); + inode_unlock(inode); + ret = err; + goto out; + } +write: ret = __generic_file_write_iter(iocb, from); clear_inode_flag(inode, FI_NO_PREALLOC); @@ -3216,9 +3602,15 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_RELEASE_VOLATILE_WRITE: case F2FS_IOC_ABORT_VOLATILE_WRITE: case F2FS_IOC_SHUTDOWN: + case FITRIM: case F2FS_IOC_SET_ENCRYPTION_POLICY: case F2FS_IOC_GET_ENCRYPTION_PWSALT: case F2FS_IOC_GET_ENCRYPTION_POLICY: + case FS_IOC_GET_ENCRYPTION_POLICY_EX: + case FS_IOC_ADD_ENCRYPTION_KEY: + case FS_IOC_REMOVE_ENCRYPTION_KEY: + case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: + case FS_IOC_GET_ENCRYPTION_KEY_STATUS: case F2FS_IOC_GARBAGE_COLLECT: case F2FS_IOC_GARBAGE_COLLECT_RANGE: case F2FS_IOC_WRITE_CHECKPOINT: @@ -3232,6 +3624,10 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_SET_PIN_FILE: case F2FS_IOC_PRECACHE_EXTENTS: case F2FS_IOC_RESIZE_FS: + case FS_IOC_ENABLE_VERITY: + case FS_IOC_MEASURE_VERITY: + case F2FS_IOC_GET_VOLUME_NAME: + case F2FS_IOC_SET_VOLUME_NAME: break; default: return -ENOIOCTLCMD; @@ -3242,7 +3638,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) const struct file_operations f2fs_file_operations = { .llseek = f2fs_llseek, - .read_iter = generic_file_read_iter, + .read_iter = f2fs_file_read_iter, .write_iter = f2fs_file_write_iter, .open = f2fs_file_open, .release = f2fs_release_file, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 8974672db78f..db8725d473b5 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -54,7 +54,7 @@ static int gc_thread_func(void *data) } if (time_to_inject(sbi, FAULT_CHECKPOINT)) { - f2fs_show_injection_info(FAULT_CHECKPOINT); + f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } @@ -78,18 +78,18 @@ static int gc_thread_func(void *data) */ if (sbi->gc_mode == GC_URGENT) { wait_ms = gc_th->urgent_sleep_time; - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); goto do_gc; } - if (!mutex_trylock(&sbi->gc_mutex)) { + if (!down_write_trylock(&sbi->gc_lock)) { stat_other_skip_bggc_count(sbi); goto next; } if (!is_idle(sbi, GC_TIME)) { increase_sleep_time(gc_th, &wait_ms); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); stat_io_skip_bggc_count(sbi); goto next; } @@ -99,7 +99,7 @@ static int gc_thread_func(void *data) else increase_sleep_time(gc_th, &wait_ms); do_gc: - stat_inc_bggc_count(sbi); + stat_inc_bggc_count(sbi->stat_info); /* if return value is not zero, no victim was selected */ if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO)) @@ -382,6 +382,16 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, nsearched++; } +#ifdef CONFIG_F2FS_CHECK_FS + /* + * skip selecting the invalid segno (that is failed due to block + * validity check failure during GC) to avoid endless GC loop in + * such cases. + */ + if (test_bit(segno, sm->invalid_segmap)) + goto next; +#endif + secno = GET_SEC_FROM_SEG(sbi, segno); if (sec_usage_check(sbi, secno)) @@ -627,8 +637,21 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node); f2fs_put_page(node_page, 1); - if (source_blkaddr != blkaddr) + if (source_blkaddr != blkaddr) { +#ifdef CONFIG_F2FS_CHECK_FS + unsigned int segno = GET_SEGNO(sbi, blkaddr); + unsigned long offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); + + if (unlikely(check_valid_map(sbi, segno, offset))) { + if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) { + f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u\n", + blkaddr, source_blkaddr, segno); + f2fs_bug_on(sbi, 1); + } + } +#endif return false; + } return true; } @@ -989,8 +1012,14 @@ next_step: block_t start_bidx; nid_t nid = le32_to_cpu(entry->nid); - /* stop BG_GC if there is not enough free sections. */ - if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) + /* + * stop BG_GC if there is not enough free sections. + * Or, stop GC if the segment becomes fully valid caused by + * race condition along with SSR block allocation. + */ + if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) || + get_valid_blocks(sbi, segno, false) == + sbi->blocks_per_seg) return submitted; if (check_valid_map(sbi, segno, off) == 0) @@ -1020,8 +1049,10 @@ next_step: if (phase == 3) { inode = f2fs_iget(sb, dni.ino); - if (IS_ERR(inode) || is_bad_inode(inode)) + if (IS_ERR(inode) || is_bad_inode(inode)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); continue; + } if (!down_write_trylock( &F2FS_I(inode)->i_gc_rwsem[WRITE])) { @@ -1303,7 +1334,7 @@ gc_more: round++; } - if (gc_type == FG_GC) + if (gc_type == FG_GC && seg_freed) sbi->cur_victim_sec = NULL_SEGNO; if (sync) @@ -1339,7 +1370,7 @@ stop: reserved_segments(sbi), prefree_segments(sbi)); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); put_gc_inode(&gc_list); @@ -1378,9 +1409,9 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); do_garbage_collect(sbi, segno, &gc_list, FG_GC); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); put_gc_inode(&gc_list); if (get_valid_blocks(sbi, segno, true)) @@ -1414,11 +1445,20 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); raw_sb->block_count = cpu_to_le64(block_count + (long long)segs * sbi->blocks_per_seg); + if (f2fs_is_multi_device(sbi)) { + int last_dev = sbi->s_ndevs - 1; + int dev_segs = + le32_to_cpu(raw_sb->devs[last_dev].total_segments); + + raw_sb->devs[last_dev].total_segments = + cpu_to_le32(dev_segs + segs); + } } static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) { int segs = secs * sbi->segs_per_sec; + long long blks = (long long)segs * sbi->blocks_per_seg; long long user_block_count = le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); @@ -1426,8 +1466,20 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; - F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + - (long long)segs * sbi->blocks_per_seg); + F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks); + + if (f2fs_is_multi_device(sbi)) { + int last_dev = sbi->s_ndevs - 1; + + FDEV(last_dev).total_segments = + (int)FDEV(last_dev).total_segments + segs; + FDEV(last_dev).end_blk = + (long long)FDEV(last_dev).end_blk + blks; +#ifdef CONFIG_BLK_DEV_ZONED + FDEV(last_dev).nr_blkz = (int)FDEV(last_dev).nr_blkz + + (int)(blks >> sbi->log_blocks_per_blkz); +#endif + } } int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) @@ -1442,6 +1494,15 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) if (block_count > old_block_count) return -EINVAL; + if (f2fs_is_multi_device(sbi)) { + int last_dev = sbi->s_ndevs - 1; + __u64 last_segs = FDEV(last_dev).total_segments; + + if (block_count + last_segs * sbi->blocks_per_seg <= + old_block_count) + return -EINVAL; + } + /* new fs size should align to section size */ div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem); if (rem) diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index cc82f142f811..5bc4dcd8fc03 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -14,6 +14,7 @@ #include <linux/f2fs_fs.h> #include <linux/cryptohash.h> #include <linux/pagemap.h> +#include <linux/unicode.h> #include "f2fs.h" @@ -67,7 +68,7 @@ static void str2hashbuf(const unsigned char *msg, size_t len, *buf++ = pad; } -f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, +static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, struct fscrypt_name *fname) { __u32 hash; @@ -103,3 +104,37 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT); return f2fs_hash; } + +f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, + const struct qstr *name_info, struct fscrypt_name *fname) +{ +#ifdef CONFIG_UNICODE + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + const struct unicode_map *um = sbi->s_encoding; + int r, dlen; + unsigned char *buff; + struct qstr folded; + + if (!name_info->len || !IS_CASEFOLDED(dir)) + goto opaque_seq; + + buff = f2fs_kzalloc(sbi, sizeof(char) * PATH_MAX, GFP_KERNEL); + if (!buff) + return -ENOMEM; + + dlen = utf8_casefold(um, name_info, buff, PATH_MAX); + if (dlen < 0) { + kvfree(buff); + goto opaque_seq; + } + folded.name = buff; + folded.len = dlen; + r = __f2fs_dentry_hash(&folded, fname); + + kvfree(buff); + return r; + +opaque_seq: +#endif + return __f2fs_dentry_hash(name_info, fname); +} diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 3613efca8c00..4167e5408151 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -131,6 +131,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) err = f2fs_get_node_info(fio.sbi, dn->nid, &ni); if (err) { + f2fs_truncate_data_blocks_range(dn, 1); f2fs_put_dnode(dn); return err; } @@ -320,7 +321,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, return NULL; } - namehash = f2fs_dentry_hash(&name, fname); + namehash = f2fs_dentry_hash(dir, &name, fname); inline_dentry = inline_data_addr(dir, ipage); @@ -367,7 +368,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, struct f2fs_dentry_ptr src, dst; int err; - page = f2fs_grab_cache_page(dir->i_mapping, 0, false); + page = f2fs_grab_cache_page(dir->i_mapping, 0, true); if (!page) { f2fs_put_page(ipage, 1); return -ENOMEM; @@ -529,7 +530,7 @@ recover: return err; } -static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, +static int do_convert_inline_dir(struct inode *dir, struct page *ipage, void *inline_dentry) { if (!F2FS_I(dir)->i_dir_level) @@ -538,6 +539,44 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry); } +int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + struct fscrypt_name fname; + void *inline_dentry = NULL; + int err = 0; + + if (!f2fs_has_inline_dentry(dir)) + return 0; + + f2fs_lock_op(sbi); + + err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname); + if (err) + goto out; + + ipage = f2fs_get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; + } + + if (f2fs_has_enough_room(dir, ipage, &fname)) { + f2fs_put_page(ipage, 1); + goto out; + } + + inline_dentry = inline_data_addr(dir, ipage); + + err = do_convert_inline_dir(dir, ipage, inline_dentry); + if (!err) + f2fs_put_page(ipage, 1); +out: + f2fs_unlock_op(sbi); + return err; +} + int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, struct inode *inode, nid_t ino, umode_t mode) @@ -561,7 +600,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max); if (bit_pos >= d.max) { - err = f2fs_convert_inline_dir(dir, ipage, inline_dentry); + err = do_convert_inline_dir(dir, ipage, inline_dentry); if (err) return err; err = -EAGAIN; @@ -580,7 +619,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, f2fs_wait_on_page_writeback(ipage, NODE, true, true); - name_hash = f2fs_dentry_hash(new_name, NULL); + name_hash = f2fs_dentry_hash(dir, new_name, NULL); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); set_page_dirty(ipage); @@ -588,6 +627,11 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, /* we don't need to mark_inode_dirty now */ if (inode) { f2fs_i_pino_write(inode, dir->i_ino); + + /* synchronize inode page's data from inode cache */ + if (is_inode_flag_set(inode, FI_NEW_INODE)) + f2fs_update_inode(inode, page); + f2fs_put_page(page, 1); } @@ -704,7 +748,13 @@ int f2fs_inline_data_fiemap(struct inode *inode, if (IS_ERR(ipage)) return PTR_ERR(ipage); - if (!f2fs_has_inline_data(inode)) { + if ((S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) && + !f2fs_has_inline_data(inode)) { + err = -EAGAIN; + goto out; + } + + if (S_ISDIR(inode->i_mode) && !f2fs_has_inline_dentry(inode)) { err = -EAGAIN; goto out; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index a33d7a849b2d..78c3f1d70f1d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -46,9 +46,13 @@ void f2fs_set_inode_flags(struct inode *inode) new_fl |= S_DIRSYNC; if (file_is_encrypt(inode)) new_fl |= S_ENCRYPTED; + if (file_is_verity(inode)) + new_fl |= S_VERITY; + if (flags & F2FS_CASEFOLD_FL) + new_fl |= S_CASEFOLD; inode_set_flags(inode, new_fl, S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC| - S_ENCRYPTED); + S_ENCRYPTED|S_VERITY|S_CASEFOLD); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -196,6 +200,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_inode *ri = F2FS_INODE(node_page); unsigned long long iblocks; iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); @@ -282,6 +287,19 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) && + fi->i_flags & F2FS_COMPR_FL && + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, + i_log_cluster_size)) { + if (ri->i_compress_algorithm >= COMPRESS_MAX) + return false; + if (le64_to_cpu(ri->i_compr_blocks) > inode->i_blocks) + return false; + if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE || + ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) + return false; + } + return true; } @@ -403,6 +421,18 @@ static int do_read_inode(struct inode *inode) fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec); } + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) && + (fi->i_flags & F2FS_COMPR_FL)) { + if (F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, + i_log_cluster_size)) { + fi->i_compr_blocks = le64_to_cpu(ri->i_compr_blocks); + fi->i_compress_algorithm = ri->i_compress_algorithm; + fi->i_log_cluster_size = ri->i_log_cluster_size; + fi->i_cluster_size = 1 << fi->i_log_cluster_size; + set_inode_flag(inode, FI_COMPRESSED_FILE); + } + } + F2FS_I(inode)->i_disk_time[0] = inode->i_atime; F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; @@ -412,6 +442,8 @@ static int do_read_inode(struct inode *inode) stat_inc_inline_xattr(inode); stat_inc_inline_inode(inode); stat_inc_inline_dir(inode); + stat_inc_compr_inode(inode); + stat_add_compr_blocks(inode, F2FS_I(inode)->i_compr_blocks); return 0; } @@ -565,6 +597,17 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri->i_crtime_nsec = cpu_to_le32(F2FS_I(inode)->i_crtime.tv_nsec); } + + if (f2fs_sb_has_compression(F2FS_I_SB(inode)) && + F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, + i_log_cluster_size)) { + ri->i_compr_blocks = + cpu_to_le64(F2FS_I(inode)->i_compr_blocks); + ri->i_compress_algorithm = + F2FS_I(inode)->i_compress_algorithm; + ri->i_log_cluster_size = + F2FS_I(inode)->i_log_cluster_size; + } } __set_inode_rdev(inode, ri); @@ -611,10 +654,14 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) inode->i_ino == F2FS_META_INO(sbi)) return 0; - if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) + /* + * atime could be updated without dirtying f2fs inode in lazytime mode + */ + if (f2fs_is_time_consistent(inode) && + !is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; - if (f2fs_is_checkpoint_ready(sbi)) + if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; /* @@ -673,7 +720,7 @@ retry: err = f2fs_truncate(inode); if (time_to_inject(sbi, FAULT_EVICT_INODE)) { - f2fs_show_injection_info(FAULT_EVICT_INODE); + f2fs_show_injection_info(sbi, FAULT_EVICT_INODE); err = -EIO; } @@ -693,7 +740,8 @@ retry: if (err) { f2fs_update_inode_page(inode); - set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + if (dquot_initialize_needed(inode)) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); } sb_end_intwrite(inode->i_sb); no_delete: @@ -702,8 +750,10 @@ no_delete: stat_dec_inline_xattr(inode); stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); + stat_dec_compr_inode(inode); + stat_sub_compr_blocks(inode, F2FS_I(inode)->i_compr_blocks); - if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) && + if (likely(!f2fs_cp_error(sbi) && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))) f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); else @@ -733,6 +783,7 @@ no_delete: } out_clear: fscrypt_put_encryption_info(inode); + fsverity_cleanup_inode(inode); clear_inode(inode); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c5b99042e6f2..2aa035422c0f 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -119,6 +119,13 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); + if (f2fs_sb_has_compression(sbi)) { + /* Inherit the compression flag in directory */ + if ((F2FS_I(dir)->i_flags & F2FS_COMPR_FL) && + f2fs_may_compress(inode)) + set_compress_context(inode); + } + f2fs_set_inode_flags(inode); trace_f2fs_new_inode(inode, 0); @@ -149,6 +156,9 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub) size_t sublen = strlen(sub); int i; + if (sublen == 1 && *sub == '*') + return 1; + /* * filename format of multimedia file should be defined as: * "filename + '.' + extension + (optional: '.' + temp extension)". @@ -262,6 +272,45 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, return 0; } +static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, + const unsigned char *name) +{ + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + unsigned char (*ext)[F2FS_EXTENSION_LEN]; + unsigned int ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + int i, cold_count, hot_count; + + if (!f2fs_sb_has_compression(sbi) || + is_inode_flag_set(inode, FI_COMPRESSED_FILE) || + F2FS_I(inode)->i_flags & F2FS_NOCOMP_FL || + !f2fs_may_compress(inode)) + return; + + down_read(&sbi->sb_lock); + + cold_count = le32_to_cpu(sbi->raw_super->extension_count); + hot_count = sbi->raw_super->hot_ext_count; + + for (i = cold_count; i < cold_count + hot_count; i++) { + if (is_extension_exist(name, extlist[i])) { + up_read(&sbi->sb_lock); + return; + } + } + + up_read(&sbi->sb_lock); + + ext = F2FS_OPTION(sbi).extensions; + + for (i = 0; i < ext_cnt; i++) { + if (!is_extension_exist(name, ext[i])) + continue; + + set_compress_context(inode); + return; + } +} + static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { @@ -272,9 +321,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (unlikely(f2fs_cp_error(sbi))) return -EIO; - err = f2fs_is_checkpoint_ready(sbi); - if (err) - return err; + if (!f2fs_is_checkpoint_ready(sbi)) + return -ENOSPC; err = dquot_initialize(dir); if (err) @@ -287,6 +335,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) set_file_temperature(sbi, inode, dentry->d_name.name); + set_compress_inode(sbi, inode, dentry->d_name.name); + inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; @@ -321,9 +371,8 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (unlikely(f2fs_cp_error(sbi))) return -EIO; - err = f2fs_is_checkpoint_ready(sbi); - if (err) - return err; + if (!f2fs_is_checkpoint_ready(sbi)) + return -ENOSPC; err = fscrypt_prepare_link(old_dentry, dir, dentry); if (err) @@ -489,6 +538,17 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out_iput; } out_splice: +#ifdef CONFIG_UNICODE + if (!inode && IS_CASEFOLDED(dir)) { + /* Eventually we want to call d_add_ci(dentry, NULL) + * for negative dentries in the encoding case as + * well. For now, prevent the negative dentry + * from being cached. + */ + trace_f2fs_lookup_end(dir, dentry, ino, err); + return NULL; + } +#endif new = d_splice_alias(inode, dentry); err = PTR_ERR_OR_ZERO(new); trace_f2fs_lookup_end(dir, dentry, ino, err); @@ -537,6 +597,16 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) goto fail; } f2fs_delete_entry(de, page, dir, inode); +#ifdef CONFIG_UNICODE + /* VFS negative dentries are incompatible with Encoding and + * Case-insensitiveness. Eventually we'll want avoid + * invalidating the dentries here, alongside with returning the + * negative dentries at f2fs_lookup(), when it is better + * supported by the VFS for the CI case. + */ + if (IS_CASEFOLDED(dir)) + d_invalidate(dentry); +#endif f2fs_unlock_op(sbi); if (IS_DIRSYNC(dir)) @@ -571,9 +641,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; - err = f2fs_is_checkpoint_ready(sbi); - if (err) - return err; + if (!f2fs_is_checkpoint_ready(sbi)) + return -ENOSPC; err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, &disk_link); @@ -703,9 +772,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; - err = f2fs_is_checkpoint_ready(sbi); - if (err) - return err; + if (!f2fs_is_checkpoint_ready(sbi)) + return -ENOSPC; err = dquot_initialize(dir); if (err) @@ -780,6 +848,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, if (whiteout) { f2fs_i_links_write(inode, false); + inode->i_state |= I_LINKABLE; *whiteout = inode; } else { d_tmpfile(dentry, inode); @@ -804,6 +873,8 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) if (unlikely(f2fs_cp_error(sbi))) return -EIO; + if (!f2fs_is_checkpoint_ready(sbi)) + return -ENOSPC; if (IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) { int err = fscrypt_get_encryption_info(dir); @@ -830,25 +901,43 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); struct inode *whiteout = NULL; - struct page *old_dir_page; + struct page *old_dir_page = NULL; struct page *old_page, *new_page = NULL; struct f2fs_dir_entry *old_dir_entry = NULL; struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; - bool is_old_inline = f2fs_has_inline_dentry(old_dir); int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; - err = f2fs_is_checkpoint_ready(sbi); - if (err) - return err; + if (!f2fs_is_checkpoint_ready(sbi)) + return -ENOSPC; if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(new_dir)->i_projid, F2FS_I(old_dentry->d_inode)->i_projid))) return -EXDEV; + /* + * If new_inode is null, the below renaming flow will + * add a link in old_dir which can conver inline_dir. + * After then, if we failed to get the entry due to other + * reasons like ENOMEM, we had to remove the new entry. + * Instead of adding such the error handling routine, let's + * simply convert first here. + */ + if (old_dir == new_dir && !new_inode) { + err = f2fs_try_convert_inline_dir(old_dir, new_dentry); + if (err) + return err; + } + + if (flags & RENAME_WHITEOUT) { + err = f2fs_create_whiteout(old_dir, &whiteout); + if (err) + return err; + } + err = dquot_initialize(old_dir); if (err) goto out; @@ -880,17 +969,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } } - if (flags & RENAME_WHITEOUT) { - err = f2fs_create_whiteout(old_dir, &whiteout); - if (err) - goto out_dir; - } - if (new_inode) { err = -ENOTEMPTY; if (old_dir_entry && !f2fs_empty_dir(new_inode)) - goto out_whiteout; + goto out_dir; err = -ENOENT; new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, @@ -898,7 +981,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!new_entry) { if (IS_ERR(new_page)) err = PTR_ERR(new_page); - goto out_whiteout; + goto out_dir; } f2fs_balance_fs(sbi, true); @@ -910,6 +993,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto put_out_dir; f2fs_set_link(new_dir, new_entry, new_page, old_inode); + new_page = NULL; new_inode->i_ctime = current_time(new_inode); down_write(&F2FS_I(new_inode)->i_sem); @@ -930,49 +1014,28 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, err = f2fs_add_link(new_dentry, old_inode); if (err) { f2fs_unlock_op(sbi); - goto out_whiteout; + goto out_dir; } if (old_dir_entry) f2fs_i_links_write(new_dir, true); - - /* - * old entry and new entry can locate in the same inline - * dentry in inode, when attaching new entry in inline dentry, - * it could force inline dentry conversion, after that, - * old_entry and old_page will point to wrong address, in - * order to avoid this, let's do the check and update here. - */ - if (is_old_inline && !f2fs_has_inline_dentry(old_dir)) { - f2fs_put_page(old_page, 0); - old_page = NULL; - - old_entry = f2fs_find_entry(old_dir, - &old_dentry->d_name, &old_page); - if (!old_entry) { - err = -ENOENT; - if (IS_ERR(old_page)) - err = PTR_ERR(old_page); - f2fs_unlock_op(sbi); - goto out_whiteout; - } - } } down_write(&F2FS_I(old_inode)->i_sem); if (!old_dir_entry || whiteout) file_lost_pino(old_inode); else - F2FS_I(old_inode)->i_pino = new_dir->i_ino; + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(old_inode, new_dir->i_ino); up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = current_time(old_inode); f2fs_mark_inode_dirty_sync(old_inode, false); f2fs_delete_entry(old_entry, old_page, old_dir, NULL); + old_page = NULL; if (whiteout) { - whiteout->i_state |= I_LINKABLE; set_inode_flag(whiteout, FI_INC_LINK); err = f2fs_add_link(old_dentry, whiteout); if (err) @@ -1006,17 +1069,15 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, put_out_dir: f2fs_unlock_op(sbi); - if (new_page) - f2fs_put_page(new_page, 0); -out_whiteout: - if (whiteout) - iput(whiteout); + f2fs_put_page(new_page, 0); out_dir: if (old_dir_entry) f2fs_put_page(old_dir_page, 0); out_old: f2fs_put_page(old_page, 0); out: + if (whiteout) + iput(whiteout); return err; } @@ -1035,9 +1096,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; - err = f2fs_is_checkpoint_ready(sbi); - if (err) - return err; + if (!f2fs_is_checkpoint_ready(sbi)) + return -ENOSPC; if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(new_dir)->i_projid, @@ -1124,7 +1184,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_set_link(old_dir, old_entry, old_page, new_inode); down_write(&F2FS_I(old_inode)->i_sem); - file_lost_pino(old_inode); + if (!old_dir_entry) + file_lost_pino(old_inode); + else + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(old_inode, new_dir->i_ino); up_write(&F2FS_I(old_inode)->i_sem); old_dir->i_ctime = current_time(old_dir); @@ -1139,7 +1203,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_set_link(new_dir, new_entry, new_page, old_inode); down_write(&F2FS_I(new_inode)->i_sem); - file_lost_pino(new_inode); + if (!new_dir_entry) + file_lost_pino(new_inode); + else + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(new_inode, old_dir->i_ino); up_write(&F2FS_I(new_inode)->i_sem); new_dir->i_ctime = current_time(new_dir); @@ -1250,6 +1318,7 @@ const struct inode_operations f2fs_dir_inode_operations = { #ifdef CONFIG_F2FS_FS_XATTR .listxattr = f2fs_listxattr, #endif + .fiemap = f2fs_fiemap, }; const struct inode_operations f2fs_symlink_inode_operations = { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a18b2a895771..9d02cdcdbb07 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -875,7 +875,7 @@ static int truncate_dnode(struct dnode_of_data *dn) /* get direct node */ page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid); - if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) + if (PTR_ERR(page) == -ENOENT) return 1; else if (IS_ERR(page)) return PTR_ERR(page); @@ -1524,7 +1524,8 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (wbc->sync_mode == WB_SYNC_NONE && + if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + wbc->sync_mode == WB_SYNC_NONE && IS_DNODE(page) && is_cold_node(page)) goto redirty_out; @@ -1762,6 +1763,47 @@ out: return ret ? -EIO: 0; } +static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + bool clean; + + if (inode->i_ino != ino) + return 0; + + if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) + return 0; + + spin_lock(&sbi->inode_lock[DIRTY_META]); + clean = list_empty(&F2FS_I(inode)->gdirty_list); + spin_unlock(&sbi->inode_lock[DIRTY_META]); + + if (clean) + return 0; + + inode = igrab(inode); + if (!inode) + return 0; + return 1; +} + +static bool flush_dirty_inode(struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct inode *inode; + nid_t ino = ino_of_node(page); + + inode = find_inode_nowait(sbi->sb, ino, f2fs_match_ino, NULL); + if (!inode) + return false; + + f2fs_update_inode(inode, page); + unlock_page(page); + + iput(inode); + return true; +} + int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, bool do_balance, enum iostat_type io_type) @@ -1785,6 +1827,7 @@ next_step: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; bool submitted = false; + bool may_dirty = true; /* give a priority to WB_SYNC threads */ if (atomic_read(&sbi->wb_sync_req[NODE]) && @@ -1832,6 +1875,13 @@ continue_unlock: goto lock_node; } + /* flush dirty inode */ + if (IS_INODE(page) && may_dirty) { + may_dirty = false; + if (flush_dirty_inode(page)) + goto lock_node; + } + f2fs_wait_on_page_writeback(page, NODE, true, true); if (!clear_page_dirty_for_io(page)) @@ -1860,7 +1910,8 @@ continue_unlock: } if (step < 2) { - if (wbc->sync_mode == WB_SYNC_NONE && step == 1) + if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + wbc->sync_mode == WB_SYNC_NONE && step == 1) goto out; step++; goto next_step; @@ -2298,7 +2349,6 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, if (ret) { up_read(&nm_i->nat_tree_lock); - f2fs_bug_on(sbi, !mount); f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); return ret; } @@ -2348,7 +2398,7 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct free_nid *i = NULL; retry: if (time_to_inject(sbi, FAULT_ALLOC_NID)) { - f2fs_show_injection_info(FAULT_ALLOC_NID); + f2fs_show_injection_info(sbi, FAULT_ALLOC_NID); return false; } @@ -2964,7 +3014,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) /* not used nids: 0, node, meta, (and root counted as valid node) */ nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count - - sbi->nquota_files - F2FS_RESERVED_NODE_NUM; + F2FS_RESERVED_NODE_NUM; nm_i->nid_cnt[FREE_NID] = 0; nm_i->nid_cnt[PREALLOC_NID] = 0; nm_i->nat_cnt = 0; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 783773e4560d..763d5c0951d1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -711,7 +711,7 @@ next: f2fs_put_page(page, 1); } if (!err) - f2fs_allocate_new_segments(sbi); + f2fs_allocate_new_segments(sbi, NO_CHECK_TYPE); return err; } @@ -723,6 +723,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) int ret = 0; unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; + bool fix_curseg_write_pointer = false; #ifdef CONFIG_QUOTA int quota_enabled; #endif @@ -774,6 +775,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) sbi->sb->s_flags = s_flags; } skip: + fix_curseg_write_pointer = !check_only || list_empty(&inode_list); + destroy_fsync_dnodes(&inode_list, err); destroy_fsync_dnodes(&tmp_inode_list, err); @@ -784,9 +787,22 @@ skip: if (err) { truncate_inode_pages_final(NODE_MAPPING(sbi)); truncate_inode_pages_final(META_MAPPING(sbi)); - } else { - clear_sbi_flag(sbi, SBI_POR_DOING); } + + /* + * If fsync data succeeds or there is no fsync data to recover, + * and the f2fs is not read only, check and fix zoned block devices' + * write pointer consistency. + */ + if (!err && fix_curseg_write_pointer && !f2fs_readonly(sbi->sb) && + f2fs_sb_has_blkzoned(sbi)) { + err = f2fs_fix_curseg_write_pointer(sbi); + ret = err; + } + + if (!err) + clear_sbi_flag(sbi, SBI_POR_DOING); + mutex_unlock(&sbi->cp_mutex); /* let's drop all the directory inodes for clean checkpoint */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a661ac32e829..cf0eb002cfd4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -185,8 +185,6 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) void f2fs_register_inmem_page(struct inode *inode, struct page *page) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; f2fs_trace_pid(page); @@ -200,15 +198,11 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page) INIT_LIST_HEAD(&new->list); /* increase reference count with clean state */ - mutex_lock(&fi->inmem_lock); get_page(page); - list_add_tail(&new->list, &fi->inmem_pages); - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); - if (list_empty(&fi->inmem_ilist)) - list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + mutex_lock(&F2FS_I(inode)->inmem_lock); + list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages); inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); - mutex_unlock(&fi->inmem_lock); + mutex_unlock(&F2FS_I(inode)->inmem_lock); trace_f2fs_register_inmem_page(page, INMEM); } @@ -294,6 +288,8 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure) struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; struct inode *inode; struct f2fs_inode_info *fi; + unsigned int count = sbi->atomic_files; + unsigned int looped = 0; next: spin_lock(&sbi->inode_lock[ATOMIC_FILE]); if (list_empty(head)) { @@ -302,22 +298,26 @@ next: } fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist); inode = igrab(&fi->vfs_inode); + if (inode) + list_move_tail(&fi->inmem_ilist, head); spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); if (inode) { if (gc_failure) { - if (fi->i_gc_failures[GC_FAILURE_ATOMIC]) - goto drop; - goto skip; + if (!fi->i_gc_failures[GC_FAILURE_ATOMIC]) + goto skip; } -drop: set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); f2fs_drop_inmem_pages(inode); +skip: iput(inode); } -skip: congestion_wait(BLK_RW_ASYNC, HZ/50); cond_resched(); + if (gc_failure) { + if (++looped >= count) + return; + } goto next; } @@ -330,19 +330,19 @@ void f2fs_drop_inmem_pages(struct inode *inode) mutex_lock(&fi->inmem_lock); __revoke_inmem_pages(inode, &fi->inmem_pages, true, false, true); - - if (list_empty(&fi->inmem_pages)) { - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); - if (!list_empty(&fi->inmem_ilist)) - list_del_init(&fi->inmem_ilist); - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); - } mutex_unlock(&fi->inmem_lock); } - clear_inode_flag(inode, FI_ATOMIC_FILE); fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; - stat_dec_atomic_write(inode); + + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + if (f2fs_is_atomic_file(inode)) { + clear_inode_flag(inode, FI_ATOMIC_FILE); + sbi->atomic_files--; + } + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); } void f2fs_drop_inmem_page(struct inode *inode, struct page *page) @@ -471,11 +471,6 @@ int f2fs_commit_inmem_pages(struct inode *inode) mutex_lock(&fi->inmem_lock); err = __f2fs_commit_inmem_pages(inode); - - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); - if (!list_empty(&fi->inmem_ilist)) - list_del_init(&fi->inmem_ilist); - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_COMMIT); @@ -493,7 +488,7 @@ int f2fs_commit_inmem_pages(struct inode *inode) void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { if (time_to_inject(sbi, FAULT_CHECKPOINT)) { - f2fs_show_injection_info(FAULT_CHECKPOINT); + f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } @@ -501,7 +496,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi); - if (f2fs_is_checkpoint_ready(sbi)) + if (!f2fs_is_checkpoint_ready(sbi)) return; /* @@ -509,7 +504,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) * dir/node pages without enough free segments. */ if (has_not_enough_free_secs(sbi, 0, 0)) { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); f2fs_gc(sbi, false, false, NULL_SEGNO); } } @@ -817,9 +812,13 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]--; - if (get_valid_blocks(sbi, segno, true) == 0) + if (get_valid_blocks(sbi, segno, true) == 0) { clear_bit(GET_SEC_FROM_SEG(sbi, segno), dirty_i->victim_secmap); +#ifdef CONFIG_F2FS_CHECK_FS + clear_bit(segno, SIT_I(sbi)->invalid_segmap); +#endif + } } } @@ -1017,8 +1016,9 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, if (dc->error) printk_ratelimited( - "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d", - KERN_INFO, dc->lstart, dc->start, dc->len, dc->error); + "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d", + KERN_INFO, sbi->sb->s_id, + dc->lstart, dc->start, dc->len, dc->error); __detach_discard_cmd(dcc, dc); } @@ -1158,7 +1158,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, dc->len += len; if (time_to_inject(sbi, FAULT_DISCARD)) { - f2fs_show_injection_info(FAULT_DISCARD); + f2fs_show_injection_info(sbi, FAULT_DISCARD); err = -EIO; goto submit; } @@ -1780,7 +1780,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, return -EIO; } trace_f2fs_issue_reset_zone(bdev, blkstart); - return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS); + return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, + sector, nr_sects, GFP_NOFS); } /* For conventional zones, use regular discard if supported */ @@ -2084,6 +2085,13 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) f2fs_stop_discard_thread(sbi); + /* + * Recovery can cache discard commands, so in error path of + * fill_super(), it needs to give a chance to handle them. + */ + if (unlikely(atomic_read(&dcc->discard_cmd_cnt))) + f2fs_issue_discard_timeout(sbi); + kvfree(dcc); SM_I(sbi)->dcc_info = NULL; } @@ -2156,9 +2164,11 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) if (!f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; - /* don't overwrite by SSR to keep node chain */ - if (IS_NODESEG(se->type) && - !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { + /* + * SSR should never reuse block which is checkpointed + * or newly invalidated. + */ + if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks++; } @@ -2214,7 +2224,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) struct sit_info *sit_i = SIT_I(sbi); f2fs_bug_on(sbi, addr == NULL_ADDR); - if (addr == NEW_ADDR) + if (addr == NEW_ADDR || addr == COMPRESS_ADDR) return; invalidate_mapping_pages(META_MAPPING(sbi), addr, addr); @@ -2690,7 +2700,7 @@ unlock: up_read(&SM_I(sbi)->curseg_lock); } -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg; unsigned int old_segno; @@ -2699,10 +2709,17 @@ void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + if (type != NO_CHECK_TYPE && i != type) + continue; + curseg = CURSEG_I(sbi, i); - old_segno = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); - locate_dirty_segment(sbi, old_segno); + if (type == NO_CHECK_TYPE || curseg->next_blkoff || + get_valid_blocks(sbi, curseg->segno, false) || + get_ckpt_valid_blocks(sbi, curseg->segno)) { + old_segno = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); + locate_dirty_segment(sbi, old_segno); + } } up_write(&SIT_I(sbi)->sentry_lock); @@ -2843,9 +2860,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) if (sbi->discard_blks == 0) goto out; - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); err = f2fs_write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); if (err) goto out; @@ -3018,7 +3035,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (fio->type == DATA) { struct inode *inode = fio->page->mapping->host; - if (is_cold_data(fio->page) || file_is_cold(inode)) + if (is_cold_data(fio->page) || file_is_cold(inode) || + f2fs_compressed_file(inode)) return CURSEG_COLD_DATA; if (file_is_hot(inode) || is_inode_flag_set(inode, FI_HOT_DATA) || @@ -3068,6 +3086,19 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); + bool put_pin_sem = false; + + if (type == CURSEG_COLD_DATA) { + /* GC during CURSEG_COLD_DATA_PINNED allocation */ + if (down_read_trylock(&sbi->pin_sem)) { + put_pin_sem = true; + } else { + type = CURSEG_WARM_DATA; + curseg = CURSEG_I(sbi, type); + } + } else if (type == CURSEG_COLD_DATA_PINNED) { + type = CURSEG_COLD_DATA; + } down_read(&SM_I(sbi)->curseg_lock); @@ -3116,12 +3147,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, f2fs_inode_chksum_set(sbi, page); } + if (F2FS_IO_ALIGNED(sbi)) + fio->retry = false; + if (add_list) { struct f2fs_bio_info *io; INIT_LIST_HEAD(&fio->list); fio->in_list = true; - fio->retry = false; io = sbi->write_io[fio->type] + fio->temp; spin_lock(&io->io_lock); list_add_tail(&fio->list, &io->io_list); @@ -3131,6 +3164,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&curseg->curseg_mutex); up_read(&SM_I(sbi)->curseg_lock); + + if (put_pin_sem) + up_read(&sbi->pin_sem); } static void update_device_state(struct f2fs_io_info *fio) @@ -3253,7 +3289,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) stat_inc_inplace_blocks(fio->sbi); - if (fio->bio) + if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE))) err = f2fs_merge_page_bio(fio); else err = f2fs_submit_page_bio(fio); @@ -3377,7 +3413,10 @@ void f2fs_wait_on_page_writeback(struct page *page, if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); + /* submit cached LFS IO */ f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); + /* sbumit cached IPU IO */ + f2fs_submit_merged_ipu_write(sbi, NULL, page); if (ordered) { wait_on_page_writeback(page); f2fs_bug_on(sbi, locked && PageWriteback(page)); @@ -3447,11 +3486,6 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) seg_i = CURSEG_I(sbi, i); segno = le32_to_cpu(ckpt->cur_data_segno[i]); blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); - if (blk_off > ENTRIES_IN_SUM) { - f2fs_bug_on(sbi, 1); - f2fs_put_page(page, 1); - return -EFAULT; - } seg_i->next_segno = segno; reset_curseg(sbi, i, 0); seg_i->alloc_type = ckpt->alloc_type[i]; @@ -3941,8 +3975,8 @@ static int build_sit_info(struct f2fs_sb_info *sbi) struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct sit_info *sit_i; unsigned int sit_segs, start; - char *src_bitmap; - unsigned int bitmap_size; + char *src_bitmap, *bitmap; + unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size; /* allocate memory for SIT information */ sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL); @@ -3958,33 +3992,37 @@ static int build_sit_info(struct f2fs_sb_info *sbi) if (!sit_i->sentries) return -ENOMEM; - bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size, + main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); + sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size, GFP_KERNEL); if (!sit_i->dirty_sentries_bitmap) return -ENOMEM; +#ifdef CONFIG_F2FS_CHECK_FS + bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 4; +#else + bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 3; +#endif + sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); + if (!sit_i->bitmap) + return -ENOMEM; + + bitmap = sit_i->bitmap; + for (start = 0; start < MAIN_SEGS(sbi); start++) { - sit_i->sentries[start].cur_valid_map - = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); - sit_i->sentries[start].ckpt_valid_map - = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); - if (!sit_i->sentries[start].cur_valid_map || - !sit_i->sentries[start].ckpt_valid_map) - return -ENOMEM; + sit_i->sentries[start].cur_valid_map = bitmap; + bitmap += SIT_VBLOCK_MAP_SIZE; + + sit_i->sentries[start].ckpt_valid_map = bitmap; + bitmap += SIT_VBLOCK_MAP_SIZE; #ifdef CONFIG_F2FS_CHECK_FS - sit_i->sentries[start].cur_valid_map_mir - = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); - if (!sit_i->sentries[start].cur_valid_map_mir) - return -ENOMEM; + sit_i->sentries[start].cur_valid_map_mir = bitmap; + bitmap += SIT_VBLOCK_MAP_SIZE; #endif - sit_i->sentries[start].discard_map - = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, - GFP_KERNEL); - if (!sit_i->sentries[start].discard_map) - return -ENOMEM; + sit_i->sentries[start].discard_map = bitmap; + bitmap += SIT_VBLOCK_MAP_SIZE; } sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); @@ -4004,17 +4042,23 @@ static int build_sit_info(struct f2fs_sb_info *sbi) sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1; /* setup SIT bitmap from ckeckpoint pack */ - bitmap_size = __bitmap_size(sbi, SIT_BITMAP); + sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP); src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); - sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); + sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL); if (!sit_i->sit_bitmap) return -ENOMEM; #ifdef CONFIG_F2FS_CHECK_FS - sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); + sit_i->sit_bitmap_mir = kmemdup(src_bitmap, + sit_bitmap_size, GFP_KERNEL); if (!sit_i->sit_bitmap_mir) return -ENOMEM; + + sit_i->invalid_segmap = f2fs_kvzalloc(sbi, + main_bitmap_size, GFP_KERNEL); + if (!sit_i->invalid_segmap) + return -ENOMEM; #endif /* init SIT information */ @@ -4023,7 +4067,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; sit_i->written_valid_blocks = 0; - sit_i->bitmap_size = bitmap_size; + sit_i->bitmap_size = sit_bitmap_size; sit_i->dirty_sentries = 0; sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); @@ -4161,7 +4205,6 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (start >= MAIN_SEGS(sbi)) { f2fs_err(sbi, "Wrong journal entry on segno %u", start); - set_sbi_flag(sbi, SBI_NEED_FSCK); err = -EFSCORRUPTED; break; } @@ -4201,7 +4244,6 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (!err && total_node_blocks != valid_node_count(sbi)) { f2fs_err(sbi, "SIT is corrupted node# %u vs %u", total_node_blocks, valid_node_count(sbi)); - set_sbi_flag(sbi, SBI_NEED_FSCK); err = -EFSCORRUPTED; } @@ -4326,6 +4368,263 @@ out: return 0; } +#ifdef CONFIG_BLK_DEV_ZONED + +static int check_zone_write_pointer(struct f2fs_sb_info *sbi, + struct f2fs_dev_info *fdev, + struct blk_zone *zone) +{ + unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno; + block_t zone_block, wp_block, last_valid_block; + unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; + int i, s, b, ret; + struct seg_entry *se; + + if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ) + return 0; + + wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block); + wp_segno = GET_SEGNO(sbi, wp_block); + wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); + zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block); + zone_segno = GET_SEGNO(sbi, zone_block); + zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno); + + if (zone_segno >= MAIN_SEGS(sbi)) + return 0; + + /* + * Skip check of zones cursegs point to, since + * fix_curseg_write_pointer() checks them. + */ + for (i = 0; i < NO_CHECK_TYPE; i++) + if (zone_secno == GET_SEC_FROM_SEG(sbi, + CURSEG_I(sbi, i)->segno)) + return 0; + + /* + * Get last valid block of the zone. + */ + last_valid_block = zone_block - 1; + for (s = sbi->segs_per_sec - 1; s >= 0; s--) { + segno = zone_segno + s; + se = get_seg_entry(sbi, segno); + for (b = sbi->blocks_per_seg - 1; b >= 0; b--) + if (f2fs_test_bit(b, se->cur_valid_map)) { + last_valid_block = START_BLOCK(sbi, segno) + b; + break; + } + if (last_valid_block >= zone_block) + break; + } + + /* + * If last valid block is beyond the write pointer, report the + * inconsistency. This inconsistency does not cause write error + * because the zone will not be selected for write operation until + * it get discarded. Just report it. + */ + if (last_valid_block >= wp_block) { + f2fs_notice(sbi, "Valid block beyond write pointer: " + "valid block[0x%x,0x%x] wp[0x%x,0x%x]", + GET_SEGNO(sbi, last_valid_block), + GET_BLKOFF_FROM_SEG0(sbi, last_valid_block), + wp_segno, wp_blkoff); + return 0; + } + + /* + * If there is no valid block in the zone and if write pointer is + * not at zone start, reset the write pointer. + */ + if (last_valid_block + 1 == zone_block && zone->wp != zone->start) { + f2fs_notice(sbi, + "Zone without valid block has non-zero write " + "pointer. Reset the write pointer: wp[0x%x,0x%x]", + wp_segno, wp_blkoff); + ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block, + zone->len >> log_sectors_per_block); + if (ret) { + f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", + fdev->path, ret); + return ret; + } + } + + return 0; +} + +static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi, + block_t zone_blkaddr) +{ + int i; + + for (i = 0; i < sbi->s_ndevs; i++) { + if (!bdev_is_zoned(FDEV(i).bdev)) + continue; + if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr && + zone_blkaddr <= FDEV(i).end_blk)) + return &FDEV(i); + } + + return NULL; +} + +static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx, + void *data) { + memcpy(data, zone, sizeof(struct blk_zone)); + return 0; +} + +static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *cs = CURSEG_I(sbi, type); + struct f2fs_dev_info *zbd; + struct blk_zone zone; + unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off; + block_t cs_zone_block, wp_block; + unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; + sector_t zone_sector; + int err; + + cs_section = GET_SEC_FROM_SEG(sbi, cs->segno); + cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section)); + + zbd = get_target_zoned_dev(sbi, cs_zone_block); + if (!zbd) + return 0; + + /* report zone for the sector the curseg points to */ + zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) + << log_sectors_per_block; + err = blkdev_report_zones(zbd->bdev, zone_sector, 1, + report_one_zone_cb, &zone); + if (err != 1) { + f2fs_err(sbi, "Report zone failed: %s errno=(%d)", + zbd->path, err); + return err; + } + + if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ) + return 0; + + wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block); + wp_segno = GET_SEGNO(sbi, wp_block); + wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); + wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0); + + if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff && + wp_sector_off == 0) + return 0; + + f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: " + "curseg[0x%x,0x%x] wp[0x%x,0x%x]", + type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff); + + f2fs_notice(sbi, "Assign new section to curseg[%d]: " + "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); + allocate_segment_by_default(sbi, type, true); + + /* check consistency of the zone curseg pointed to */ + if (check_zone_write_pointer(sbi, zbd, &zone)) + return -EIO; + + /* check newly assigned zone */ + cs_section = GET_SEC_FROM_SEG(sbi, cs->segno); + cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section)); + + zbd = get_target_zoned_dev(sbi, cs_zone_block); + if (!zbd) + return 0; + + zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) + << log_sectors_per_block; + err = blkdev_report_zones(zbd->bdev, zone_sector, 1, + report_one_zone_cb, &zone); + if (err != 1) { + f2fs_err(sbi, "Report zone failed: %s errno=(%d)", + zbd->path, err); + return err; + } + + if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ) + return 0; + + if (zone.wp != zone.start) { + f2fs_notice(sbi, + "New zone for curseg[%d] is not yet discarded. " + "Reset the zone: curseg[0x%x,0x%x]", + type, cs->segno, cs->next_blkoff); + err = __f2fs_issue_discard_zone(sbi, zbd->bdev, + zone_sector >> log_sectors_per_block, + zone.len >> log_sectors_per_block); + if (err) { + f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", + zbd->path, err); + return err; + } + } + + return 0; +} + +int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) +{ + int i, ret; + + for (i = 0; i < NO_CHECK_TYPE; i++) { + ret = fix_curseg_write_pointer(sbi, i); + if (ret) + return ret; + } + + return 0; +} + +struct check_zone_write_pointer_args { + struct f2fs_sb_info *sbi; + struct f2fs_dev_info *fdev; +}; + +static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx, + void *data) { + struct check_zone_write_pointer_args *args; + args = (struct check_zone_write_pointer_args *)data; + + return check_zone_write_pointer(args->sbi, args->fdev, zone); +} + +int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) +{ + int i, ret; + struct check_zone_write_pointer_args args; + + for (i = 0; i < sbi->s_ndevs; i++) { + if (!bdev_is_zoned(FDEV(i).bdev)) + continue; + + args.sbi = sbi; + args.fdev = &FDEV(i); + ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES, + check_zone_write_pointer_cb, &args); + if (ret < 0) + return ret; + } + + return 0; +} +#else +int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) +{ + return 0; +} + +int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) +{ + return 0; +} +#endif + /* * Update min, max modified time for cost-benefit GC algorithm */ @@ -4492,21 +4791,12 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi) static void destroy_sit_info(struct f2fs_sb_info *sbi) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int start; if (!sit_i) return; - if (sit_i->sentries) { - for (start = 0; start < MAIN_SEGS(sbi); start++) { - kvfree(sit_i->sentries[start].cur_valid_map); -#ifdef CONFIG_F2FS_CHECK_FS - kvfree(sit_i->sentries[start].cur_valid_map_mir); -#endif - kvfree(sit_i->sentries[start].ckpt_valid_map); - kvfree(sit_i->sentries[start].discard_map); - } - } + if (sit_i->sentries) + kvfree(sit_i->bitmap); kvfree(sit_i->tmp_map); kvfree(sit_i->sentries); @@ -4517,6 +4807,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) kvfree(sit_i->sit_bitmap); #ifdef CONFIG_F2FS_CHECK_FS kvfree(sit_i->sit_bitmap_mir); + kvfree(sit_i->invalid_segmap); #endif kvfree(sit_i); } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index b74602813a05..459dc3901a57 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -200,18 +200,6 @@ struct segment_allocation { void (*allocate_segment)(struct f2fs_sb_info *, int, bool); }; -/* - * this value is set in page as a private data which indicate that - * the page is atomically written, and it is in inmem_pages list. - */ -#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1) -#define DUMMY_WRITTEN_PAGE ((unsigned long)-2) - -#define IS_ATOMIC_WRITTEN_PAGE(page) \ - (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE) -#define IS_DUMMY_WRITTEN_PAGE(page) \ - (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE) - #define MAX_SKIP_GC_COUNT 16 struct inmem_pages { @@ -226,9 +214,13 @@ struct sit_info { block_t sit_base_addr; /* start block address of SIT area */ block_t sit_blocks; /* # of blocks used by SIT area */ block_t written_valid_blocks; /* # of valid blocks in main area */ + char *bitmap; /* all bitmaps pointer */ char *sit_bitmap; /* SIT bitmap pointer */ #ifdef CONFIG_F2FS_CHECK_FS char *sit_bitmap_mir; /* SIT bitmap mirror */ + + /* bitmap of segments to be ignored by GC in case of errors */ + unsigned long *invalid_segmap; #endif unsigned int bitmap_size; /* SIT bitmap size */ @@ -309,6 +301,8 @@ struct sit_entry_set { */ static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) { + if (type == CURSEG_COLD_DATA_PINNED) + type = CURSEG_COLD_DATA; return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); } @@ -582,13 +576,13 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, reserved_sections(sbi) + needed); } -static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) +static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) { if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) - return 0; + return true; if (likely(!has_not_enough_free_secs(sbi, 0, 0))) - return 0; - return -ENOSPC; + return true; + return false; } static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) @@ -613,8 +607,10 @@ static inline int utilization(struct f2fs_sb_info *sbi) * threashold, * F2FS_IPU_FSYNC - activated in fsync path only for high performance flash * storages. IPU will be triggered only if the # of dirty - * pages over min_fsync_blocks. - * F2FS_IPUT_DISABLE - disable IPU. (=default option) + * pages over min_fsync_blocks. (=default option) + * F2FS_IPU_ASYNC - do IPU given by asynchronous write requests. + * F2FS_IPU_NOCACHE - disable IPU bio cache. + * F2FS_IPUT_DISABLE - disable IPU. (=default option in LFS mode) */ #define DEF_MIN_IPU_UTIL 70 #define DEF_MIN_FSYNC_BLOCKS 8 @@ -629,6 +625,7 @@ enum { F2FS_IPU_SSR_UTIL, F2FS_IPU_FSYNC, F2FS_IPU_ASYNC, + F2FS_IPU_NOCACHE, }; static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 78a1b873e48a..65a7a432dfee 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -23,6 +23,7 @@ #include <linux/f2fs_fs.h> #include <linux/sysfs.h> #include <linux/quota.h> +#include <linux/unicode.h> #include "f2fs.h" #include "node.h" @@ -140,6 +141,9 @@ enum { Opt_checkpoint_disable_cap, Opt_checkpoint_disable_cap_perc, Opt_checkpoint_enable, + Opt_compress_algorithm, + Opt_compress_log_size, + Opt_compress_extension, Opt_err, }; @@ -202,6 +206,9 @@ static match_table_t f2fs_tokens = { {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"}, {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"}, {Opt_checkpoint_enable, "checkpoint=enable"}, + {Opt_compress_algorithm, "compress_algorithm=%s"}, + {Opt_compress_log_size, "compress_log_size=%u"}, + {Opt_compress_extension, "compress_extension=%s"}, {Opt_err, NULL}, }; @@ -222,6 +229,36 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...) va_end(args); } +#ifdef CONFIG_UNICODE +static const struct f2fs_sb_encodings { + __u16 magic; + char *name; + char *version; +} f2fs_sb_encoding_map[] = { + {F2FS_ENC_UTF8_12_1, "utf8", "12.1.0"}, +}; + +static int f2fs_sb_read_encoding(const struct f2fs_super_block *sb, + const struct f2fs_sb_encodings **encoding, + __u16 *flags) +{ + __u16 magic = le16_to_cpu(sb->s_encoding); + int i; + + for (i = 0; i < ARRAY_SIZE(f2fs_sb_encoding_map); i++) + if (magic == f2fs_sb_encoding_map[i].magic) + break; + + if (i >= ARRAY_SIZE(f2fs_sb_encoding_map)) + return -EINVAL; + + *encoding = &f2fs_sb_encoding_map[i]; + *flags = le16_to_cpu(sb->s_encoding_flags); + + return 0; +} +#endif + static inline void limit_reserve_root(struct f2fs_sb_info *sbi) { block_t limit = min((sbi->user_block_count << 1) / 1000, @@ -360,8 +397,9 @@ static int parse_options(struct super_block *sb, char *options) { struct f2fs_sb_info *sbi = F2FS_SB(sb); substring_t args[MAX_OPT_ARGS]; + unsigned char (*ext)[F2FS_EXTENSION_LEN]; char *p, *name; - int arg = 0; + int arg = 0, ext_cnt; kuid_t uid; kgid_t gid; #ifdef CONFIG_QUOTA @@ -779,6 +817,66 @@ static int parse_options(struct super_block *sb, char *options) case Opt_checkpoint_enable: clear_opt(sbi, DISABLE_CHECKPOINT); break; + case Opt_compress_algorithm: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_err(sbi, "Compression feature if off"); + return -EINVAL; + } + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (strlen(name) == 3 && !strcmp(name, "lzo")) { + F2FS_OPTION(sbi).compress_algorithm = + COMPRESS_LZO; + } else if (strlen(name) == 3 && + !strcmp(name, "lz4")) { + F2FS_OPTION(sbi).compress_algorithm = + COMPRESS_LZ4; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_compress_log_size: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_err(sbi, "Compression feature is off"); + return -EINVAL; + } + if (args->from && match_int(args, &arg)) + return -EINVAL; + if (arg < MIN_COMPRESS_LOG_SIZE || + arg > MAX_COMPRESS_LOG_SIZE) { + f2fs_err(sbi, + "Compress cluster log size is out of range"); + return -EINVAL; + } + F2FS_OPTION(sbi).compress_log_size = arg; + break; + case Opt_compress_extension: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_err(sbi, "Compression feature is off"); + return -EINVAL; + } + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + + ext = F2FS_OPTION(sbi).extensions; + ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + + if (strlen(name) >= F2FS_EXTENSION_LEN || + ext_cnt >= COMPRESS_EXT_NUM) { + f2fs_err(sbi, + "invalid extension length/number"); + kfree(name); + return -EINVAL; + } + + strcpy(ext[ext_cnt], name); + F2FS_OPTION(sbi).compress_ext_cnt++; + kfree(name); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -798,6 +896,13 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; } #endif +#ifndef CONFIG_UNICODE + if (f2fs_sb_has_casefold(sbi)) { + f2fs_err(sbi, + "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE"); + return -EINVAL; + } +#endif if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) { f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO", @@ -873,7 +978,21 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) static int f2fs_drop_inode(struct inode *inode) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int ret; + + /* + * during filesystem shutdown, if checkpoint is disabled, + * drop useless meta/node dirty pages. + */ + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_META_INO(sbi)) { + trace_f2fs_drop_inode(inode, 1); + return 1; + } + } + /* * This is to avoid a deadlock condition like below. * writeback_single_inode(inode) @@ -913,6 +1032,8 @@ static int f2fs_drop_inode(struct inode *inode) return 0; } ret = generic_drop_inode(inode); + if (!ret) + ret = fscrypt_drop_inode(inode); trace_f2fs_drop_inode(inode, ret); return ret; } @@ -1071,6 +1192,8 @@ static void f2fs_put_super(struct super_block *sb) f2fs_destroy_node_manager(sbi); f2fs_destroy_segment_manager(sbi); + f2fs_destroy_post_read_wq(sbi); + kvfree(sbi->ckpt); f2fs_unregister_sysfs(sbi); @@ -1089,6 +1212,9 @@ static void f2fs_put_super(struct super_block *sb) destroy_percpu_info(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) kvfree(sbi->write_io[i]); +#ifdef CONFIG_UNICODE + utf8_unload(sbi->s_encoding); +#endif kvfree(sbi); } @@ -1112,9 +1238,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync) cpc.reason = __get_cp_reason(sbi); - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); err = f2fs_write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); } f2fs_trace_ios(NULL, 1); @@ -1156,9 +1282,11 @@ static int f2fs_statfs_project(struct super_block *sb, return PTR_ERR(dquot); spin_lock(&dquot->dq_dqb_lock); - limit = (dquot->dq_dqb.dqb_bsoftlimit ? - dquot->dq_dqb.dqb_bsoftlimit : - dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits; + limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit, + dquot->dq_dqb.dqb_bhardlimit); + if (limit) + limit >>= sb->s_blocksize_bits; + if (limit && buf->f_blocks > limit) { curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; buf->f_blocks = limit; @@ -1167,9 +1295,9 @@ static int f2fs_statfs_project(struct super_block *sb, (buf->f_blocks - curblock) : 0; } - limit = dquot->dq_dqb.dqb_isoftlimit ? - dquot->dq_dqb.dqb_isoftlimit : - dquot->dq_dqb.dqb_ihardlimit; + limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit, + dquot->dq_dqb.dqb_ihardlimit); + if (limit && buf->f_files > limit) { buf->f_files = limit; buf->f_ffree = @@ -1214,8 +1342,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) else buf->f_bavail = 0; - avail_node_count = sbi->total_node_count - sbi->nquota_files - - F2FS_RESERVED_NODE_NUM; + avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; if (avail_node_count > user_block_count) { buf->f_files = user_block_count; @@ -1276,6 +1403,35 @@ static inline void f2fs_show_quota_options(struct seq_file *seq, #endif } +static inline void f2fs_show_compress_options(struct seq_file *seq, + struct super_block *sb) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + char *algtype = ""; + int i; + + if (!f2fs_sb_has_compression(sbi)) + return; + + switch (F2FS_OPTION(sbi).compress_algorithm) { + case COMPRESS_LZO: + algtype = "lzo"; + break; + case COMPRESS_LZ4: + algtype = "lz4"; + break; + } + seq_printf(seq, ",compress_algorithm=%s", algtype); + + seq_printf(seq, ",compress_log_size=%u", + F2FS_OPTION(sbi).compress_log_size); + + for (i = 0; i < F2FS_OPTION(sbi).compress_ext_cnt; i++) { + seq_printf(seq, ",compress_extension=%s", + F2FS_OPTION(sbi).extensions[i]); + } +} + static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); @@ -1398,6 +1554,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",fsync_mode=%s", "strict"); else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER) seq_printf(seq, ",fsync_mode=%s", "nobarrier"); + + f2fs_show_compress_options(seq, sbi->sb); return 0; } @@ -1412,6 +1570,9 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).test_dummy_encryption = false; F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; + F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE; + F2FS_OPTION(sbi).compress_ext_cnt = 0; set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); @@ -1460,7 +1621,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) f2fs_update_time(sbi, DISABLE_TIME); while (!f2fs_time_over(sbi, DISABLE_TIME)) { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, NULL_SEGNO); if (err == -ENODATA) { err = 0; @@ -1482,7 +1643,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) goto restore_flag; } - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); cpc.reason = CP_PAUSE; set_sbi_flag(sbi, SBI_CP_DISABLED); err = f2fs_write_checkpoint(sbi, &cpc); @@ -1494,7 +1655,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) spin_unlock(&sbi->stat_lock); out_unlock: - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); restore_flag: sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ return err; @@ -1502,12 +1663,12 @@ restore_flag: static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); f2fs_dirty_to_prefree(sbi); clear_sbi_flag(sbi, SBI_CP_DISABLED); set_sbi_flag(sbi, SBI_IS_DIRTY); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); f2fs_sync_fs(sbi->sb, 1); } @@ -1522,6 +1683,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool need_stop_gc = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT); + bool no_io_align = !F2FS_IO_ALIGNED(sbi); bool checkpoint_changed; #ifdef CONFIG_QUOTA int i, j; @@ -1601,6 +1763,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) { + err = -EINVAL; + f2fs_warn(sbi, "switch io_bits option is not allowed"); + goto restore_opts; + } + if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { err = -EINVAL; f2fs_warn(sbi, "disabling checkpoint not compatible with read-only"); @@ -1869,7 +2037,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, /* Don't account quota for quota files to avoid recursion */ qf_inode->i_flags |= S_NOQUOTA; - err = dquot_enable(qf_inode, type, format_id, flags); + err = dquot_load_quota_inode(qf_inode, type, format_id, flags); iput(qf_inode); return err; } @@ -1979,6 +2147,12 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id, struct inode *inode; int err; + /* if quota sysfile exists, deny enabling quota with specific file */ + if (f2fs_sb_has_quota_ino(F2FS_SB(sb))) { + f2fs_err(F2FS_SB(sb), "quota sysfile already exists"); + return -EBUSY; + } + err = f2fs_quota_sync(sb, type); if (err) return err; @@ -1998,7 +2172,7 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id, return 0; } -static int f2fs_quota_off(struct super_block *sb, int type) +static int __f2fs_quota_off(struct super_block *sb, int type) { struct inode *inode = sb_dqopt(sb)->files[type]; int err; @@ -2024,13 +2198,30 @@ out_put: return err; } +static int f2fs_quota_off(struct super_block *sb, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int err; + + err = __f2fs_quota_off(sb, type); + + /* + * quotactl can shutdown journalled quota, result in inconsistence + * between quota record and fs data by following updates, tag the + * flag to let fsck be aware of it. + */ + if (is_journalled_quota(sbi)) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + return err; +} + void f2fs_quota_off_umount(struct super_block *sb) { int type; int err; for (type = 0; type < MAXQUOTAS; type++) { - err = f2fs_quota_off(sb, type); + err = __f2fs_quota_off(sb, type); if (err) { int ret = dquot_quota_off(sb, type); @@ -2064,7 +2255,7 @@ static int f2fs_dquot_commit(struct dquot *dquot) struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; - down_read(&sbi->quota_sem); + down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING); ret = dquot_commit(dquot); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); @@ -2088,13 +2279,10 @@ static int f2fs_dquot_acquire(struct dquot *dquot) static int f2fs_dquot_release(struct dquot *dquot) { struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); - int ret; + int ret = dquot_release(dquot); - down_read(&sbi->quota_sem); - ret = dquot_release(dquot); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); return ret; } @@ -2102,29 +2290,22 @@ static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot) { struct super_block *sb = dquot->dq_sb; struct f2fs_sb_info *sbi = F2FS_SB(sb); - int ret; - - down_read(&sbi->quota_sem); - ret = dquot_mark_dquot_dirty(dquot); + int ret = dquot_mark_dquot_dirty(dquot); /* if we are using journalled quota */ if (is_journalled_quota(sbi)) set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); - up_read(&sbi->quota_sem); return ret; } static int f2fs_dquot_commit_info(struct super_block *sb, int type) { struct f2fs_sb_info *sbi = F2FS_SB(sb); - int ret; + int ret = dquot_commit_info(sb, type); - down_read(&sbi->quota_sem); - ret = dquot_commit_info(sb, type); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); return ret; } @@ -2222,13 +2403,27 @@ static bool f2fs_dummy_context(struct inode *inode) return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode)); } +static bool f2fs_has_stable_inodes(struct super_block *sb) +{ + return true; +} + +static void f2fs_get_ino_and_lblk_bits(struct super_block *sb, + int *ino_bits_ret, int *lblk_bits_ret) +{ + *ino_bits_ret = 8 * sizeof(nid_t); + *lblk_bits_ret = 8 * sizeof(block_t); +} + static const struct fscrypt_operations f2fs_cryptops = { - .key_prefix = "f2fs:", - .get_context = f2fs_get_context, - .set_context = f2fs_set_context, - .dummy_context = f2fs_dummy_context, - .empty_dir = f2fs_empty_dir, - .max_namelen = F2FS_NAME_LEN, + .key_prefix = "f2fs:", + .get_context = f2fs_get_context, + .set_context = f2fs_set_context, + .dummy_context = f2fs_dummy_context, + .empty_dir = f2fs_empty_dir, + .max_namelen = F2FS_NAME_LEN, + .has_stable_inodes = f2fs_has_stable_inodes, + .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits, }; #endif @@ -2518,6 +2713,21 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return -EFSCORRUPTED; } + if (RDEV(0).path[0]) { + block_t dev_seg_count = le32_to_cpu(RDEV(0).total_segments); + int i = 1; + + while (i < MAX_DEVICES && RDEV(i).path[0]) { + dev_seg_count += le32_to_cpu(RDEV(i).total_segments); + i++; + } + if (segment_count != dev_seg_count) { + f2fs_info(sbi, "Segment count (%u) mismatch with total segments from devices (%u)", + segment_count, dev_seg_count); + return -EFSCORRUPTED; + } + } + if (secs_per_zone > total_sections || !secs_per_zone) { f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)", secs_per_zone, total_sections); @@ -2615,8 +2825,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) } valid_node_count = le32_to_cpu(ckpt->valid_node_count); - avail_node_count = sbi->total_node_count - sbi->nquota_files - - F2FS_RESERVED_NODE_NUM; + avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; if (valid_node_count > avail_node_count) { f2fs_err(sbi, "Wrong valid_node_count: %u, avail_node_count: %u", valid_node_count, avail_node_count); @@ -2655,10 +2864,10 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) } } for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { - for (j = i; j < NR_CURSEG_DATA_TYPE; j++) { + for (j = 0; j < NR_CURSEG_DATA_TYPE; j++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) == le32_to_cpu(ckpt->cur_data_segno[j])) { - f2fs_err(sbi, "Data segment (%u) and Data segment (%u) has the same segno: %u", + f2fs_err(sbi, "Node segment (%u) and Data segment (%u) has the same segno: %u", i, j, le32_to_cpu(ckpt->cur_node_segno[i])); return 1; @@ -2753,6 +2962,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) spin_lock_init(&sbi->dev_lock); init_rwsem(&sbi->sb_lock); + init_rwsem(&sbi->pin_sem); } static int init_percpu_info(struct f2fs_sb_info *sbi) @@ -2772,15 +2982,21 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) } #ifdef CONFIG_BLK_DEV_ZONED +static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx, + void *data) +{ + struct f2fs_dev_info *dev = data; + + if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) + set_bit(idx, dev->blkz_seq); + return 0; +} + static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) { struct block_device *bdev = FDEV(devi).bdev; sector_t nr_sectors = bdev->bd_part->nr_sects; - sector_t sector = 0; - struct blk_zone *zones; - unsigned int i, nr_zones; - unsigned int n = 0; - int err = -EIO; + int ret; if (!f2fs_sb_has_blkzoned(sbi)) return 0; @@ -2805,38 +3021,13 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) if (!FDEV(devi).blkz_seq) return -ENOMEM; -#define F2FS_REPORT_NR_ZONES 4096 - - zones = f2fs_kzalloc(sbi, - array_size(F2FS_REPORT_NR_ZONES, - sizeof(struct blk_zone)), - GFP_KERNEL); - if (!zones) - return -ENOMEM; - /* Get block zones type */ - while (zones && sector < nr_sectors) { - - nr_zones = F2FS_REPORT_NR_ZONES; - err = blkdev_report_zones(bdev, sector, zones, &nr_zones); - if (err) - break; - if (!nr_zones) { - err = -EIO; - break; - } - - for (i = 0; i < nr_zones; i++) { - if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL) - set_bit(n, FDEV(devi).blkz_seq); - sector += zones[i].len; - n++; - } - } - - kvfree(zones); + ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, f2fs_report_zone_cb, + &FDEV(devi)); + if (ret < 0) + return ret; - return err; + return 0; } #endif @@ -2866,6 +3057,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, f2fs_err(sbi, "Unable to read %dth superblock", block + 1); err = -EIO; + *recovery = 1; continue; } @@ -2875,6 +3067,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", block + 1); brelse(bh); + *recovery = 1; continue; } @@ -2887,10 +3080,6 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, brelse(bh); } - /* Fail to read any one of the superblocks*/ - if (err < 0) - *recovery = 1; - /* No valid superblock */ if (!*raw_super) kvfree(super); @@ -3031,6 +3220,53 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) return 0; } +static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) +{ +#ifdef CONFIG_UNICODE + if (f2fs_sb_has_casefold(sbi) && !sbi->s_encoding) { + const struct f2fs_sb_encodings *encoding_info; + struct unicode_map *encoding; + __u16 encoding_flags; + + if (f2fs_sb_has_encrypt(sbi)) { + f2fs_err(sbi, + "Can't mount with encoding and encryption"); + return -EINVAL; + } + + if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info, + &encoding_flags)) { + f2fs_err(sbi, + "Encoding requested by superblock is unknown"); + return -EINVAL; + } + + encoding = utf8_load(encoding_info->version); + if (IS_ERR(encoding)) { + f2fs_err(sbi, + "can't mount with superblock charset: %s-%s " + "not supported by the kernel. flags: 0x%x.", + encoding_info->name, encoding_info->version, + encoding_flags); + return PTR_ERR(encoding); + } + f2fs_info(sbi, "Using encoding defined by superblock: " + "%s-%s with flags 0x%hx", encoding_info->name, + encoding_info->version?:"\b", encoding_flags); + + sbi->s_encoding = encoding; + sbi->s_encoding_flags = encoding_flags; + sbi->sb->s_d_op = &f2fs_dentry_ops; + } +#else + if (f2fs_sb_has_casefold(sbi)) { + f2fs_err(sbi, "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE"); + return -EINVAL; + } +#endif + return 0; +} + static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) { struct f2fs_sm_info *sm_i = SM_I(sbi); @@ -3127,6 +3363,10 @@ try_onemore: le32_to_cpu(raw_super->log_blocksize); sb->s_max_links = F2FS_LINK_MAX; + err = f2fs_setup_casefold(sbi); + if (err) + goto free_options; + #ifdef CONFIG_QUOTA sb->dq_op = &f2fs_quota_operations; sb->s_qcop = &f2fs_quotactl_ops; @@ -3144,6 +3384,9 @@ try_onemore: #ifdef CONFIG_FS_ENCRYPTION sb->s_cop = &f2fs_cryptops; #endif +#ifdef CONFIG_FS_VERITY + sb->s_vop = &f2fs_verityops; +#endif sb->s_xattr = f2fs_xattr_handlers; sb->s_export_op = &f2fs_export_ops; sb->s_magic = F2FS_SUPER_MAGIC; @@ -3155,7 +3398,7 @@ try_onemore: /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; - mutex_init(&sbi->gc_mutex); + init_rwsem(&sbi->gc_lock); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); mutex_init(&sbi->resize_mutex); @@ -3190,6 +3433,8 @@ try_onemore: sbi->write_io[i][j].bio = NULL; spin_lock_init(&sbi->write_io[i][j].io_lock); INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); + INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list); + init_rwsem(&sbi->write_io[i][j].bio_list_lock); } } @@ -3202,7 +3447,7 @@ try_onemore: if (err) goto free_bio_info; - if (F2FS_IO_SIZE(sbi) > 1) { + if (F2FS_IO_ALIGNED(sbi)) { sbi->write_io_dummy = mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0); if (!sbi->write_io_dummy) { @@ -3242,6 +3487,12 @@ try_onemore: goto free_devices; } + err = f2fs_init_post_read_wq(sbi); + if (err) { + f2fs_err(sbi, "Failed to initialize post read workqueue"); + goto free_devices; + } + sbi->total_valid_node_count = le32_to_cpu(sbi->ckpt->valid_node_count); percpu_counter_set(&sbi->total_valid_inode_count, @@ -3386,6 +3637,17 @@ try_onemore: goto free_meta; } } + + /* + * If the f2fs is not readonly and fsync data recovery succeeds, + * check zoned block devices' write pointer consistency. + */ + if (!err && !f2fs_readonly(sb) && f2fs_sb_has_blkzoned(sbi)) { + err = f2fs_check_write_pointer(sbi); + if (err) + goto free_meta; + } + reset_checkpoint: /* f2fs_recover_fsync_data() cleared this already */ clear_sbi_flag(sbi, SBI_POR_DOING); @@ -3463,6 +3725,7 @@ free_nm: f2fs_destroy_node_manager(sbi); free_sm: f2fs_destroy_segment_manager(sbi); + f2fs_destroy_post_read_wq(sbi); free_devices: destroy_device_list(sbi); kvfree(sbi->ckpt); @@ -3477,6 +3740,10 @@ free_percpu: free_bio_info: for (i = 0; i < NR_PAGE_TYPE; i++) kvfree(sbi->write_io[i]); + +#ifdef CONFIG_UNICODE + utf8_unload(sbi->s_encoding); +#endif free_options: #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) @@ -3597,8 +3864,17 @@ static int __init init_f2fs_fs(void) err = f2fs_init_post_read_processing(); if (err) goto free_root_stats; + err = f2fs_init_bio_entry_cache(); + if (err) + goto free_post_read; + err = f2fs_init_bioset(); + if (err) + goto free_bio_enrty_cache; return 0; - +free_bio_enrty_cache: + f2fs_destroy_bio_entry_cache(); +free_post_read: + f2fs_destroy_post_read_processing(); free_root_stats: f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); @@ -3622,6 +3898,8 @@ fail: static void __exit exit_f2fs_fs(void) { + f2fs_destroy_bioset(); + f2fs_destroy_bio_entry_cache(); f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 3aeacd0aacfd..91d649790b1b 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -10,6 +10,7 @@ #include <linux/proc_fs.h> #include <linux/f2fs_fs.h> #include <linux/seq_file.h> +#include <linux/unicode.h> #include "f2fs.h" #include "segment.h" @@ -24,6 +25,9 @@ enum { DCC_INFO, /* struct discard_cmd_control */ NM_INFO, /* struct f2fs_nm_info */ F2FS_SBI, /* struct f2fs_sb_info */ +#ifdef CONFIG_F2FS_STAT_FS + STAT_INFO, /* struct f2fs_stat_info */ +#endif #ifdef CONFIG_F2FS_FAULT_INJECTION FAULT_INFO_RATE, /* struct f2fs_fault_info */ FAULT_INFO_TYPE, /* struct f2fs_fault_info */ @@ -41,6 +45,9 @@ struct f2fs_attr { int id; }; +static ssize_t f2fs_sbi_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf); + static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) { if (struct_type == GC_THREAD) @@ -58,40 +65,37 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) struct_type == FAULT_INFO_TYPE) return (unsigned char *)&F2FS_OPTION(sbi).fault_info; #endif +#ifdef CONFIG_F2FS_STAT_FS + else if (struct_type == STAT_INFO) + return (unsigned char *)F2FS_STAT(sbi); +#endif return NULL; } static ssize_t dirty_segments_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)(dirty_segments(sbi))); + return sprintf(buf, "%llu\n", + (unsigned long long)(dirty_segments(sbi))); } -static ssize_t unusable_show(struct f2fs_attr *a, +static ssize_t free_segments_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - block_t unusable; - - if (test_opt(sbi, DISABLE_CHECKPOINT)) - unusable = sbi->unusable_block_count; - else - unusable = f2fs_get_unusable_blocks(sbi); - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)unusable); + return sprintf(buf, "%llu\n", + (unsigned long long)(free_segments(sbi))); } - static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { struct super_block *sb = sbi->sb; if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); + return sprintf(buf, "0\n"); - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)(sbi->kbytes_written + + return sprintf(buf, "%llu\n", + (unsigned long long)(sbi->kbytes_written + BD_PART_WRITTEN(sbi))); } @@ -102,7 +106,7 @@ static ssize_t features_show(struct f2fs_attr *a, int len = 0; if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); + return sprintf(buf, "0\n"); if (f2fs_sb_has_encrypt(sbi)) len += snprintf(buf, PAGE_SIZE - len, "%s", @@ -131,9 +135,20 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_lost_found(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "lost_found"); + if (f2fs_sb_has_verity(sbi)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "verity"); if (f2fs_sb_has_sb_chksum(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "sb_checksum"); + if (f2fs_sb_has_casefold(sbi)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "casefold"); + if (f2fs_sb_has_compression(sbi)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "compression"); + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "pin_file"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -141,9 +156,66 @@ static ssize_t features_show(struct f2fs_attr *a, static ssize_t current_reserved_blocks_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u\n", sbi->current_reserved_blocks); + return sprintf(buf, "%u\n", sbi->current_reserved_blocks); +} + +static ssize_t unusable_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + block_t unusable; + + if (test_opt(sbi, DISABLE_CHECKPOINT)) + unusable = sbi->unusable_block_count; + else + unusable = f2fs_get_unusable_blocks(sbi); + return sprintf(buf, "%llu\n", (unsigned long long)unusable); +} + +static ssize_t encoding_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ +#ifdef CONFIG_UNICODE + if (f2fs_sb_has_casefold(sbi)) + return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n", + sbi->s_encoding->charset, + (sbi->s_encoding->version >> 16) & 0xff, + (sbi->s_encoding->version >> 8) & 0xff, + sbi->s_encoding->version & 0xff); +#endif + return sprintf(buf, "(none)"); +} + +#ifdef CONFIG_F2FS_STAT_FS +static ssize_t moved_blocks_foreground_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + + return sprintf(buf, "%llu\n", + (unsigned long long)(si->tot_blks - + (si->bg_data_blks + si->bg_node_blks))); } +static ssize_t moved_blocks_background_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + + return sprintf(buf, "%llu\n", + (unsigned long long)(si->bg_data_blks + si->bg_node_blks)); +} + +static ssize_t avg_vblocks_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + + si->dirty_count = dirty_segments(sbi); + f2fs_update_sit_info(sbi); + return sprintf(buf, "%llu\n", (unsigned long long)(si->avg_vblocks)); +} +#endif + static ssize_t f2fs_sbi_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -177,7 +249,7 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, ui = (unsigned int *)(ptr + a->offset); - return snprintf(buf, PAGE_SIZE, "%u\n", *ui); + return sprintf(buf, "%u\n", *ui); } static ssize_t __sbi_store(struct f2fs_attr *a, @@ -364,7 +436,10 @@ enum feat_id { FEAT_QUOTA_INO, FEAT_INODE_CRTIME, FEAT_LOST_FOUND, + FEAT_VERITY, FEAT_SB_CHECKSUM, + FEAT_CASEFOLD, + FEAT_COMPRESSION, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -381,8 +456,11 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_QUOTA_INO: case FEAT_INODE_CRTIME: case FEAT_LOST_FOUND: + case FEAT_VERITY: case FEAT_SB_CHECKSUM: - return snprintf(buf, PAGE_SIZE, "supported\n"); + case FEAT_CASEFOLD: + case FEAT_COMPRESSION: + return sprintf(buf, "supported\n"); } return 0; } @@ -411,6 +489,14 @@ static struct f2fs_attr f2fs_attr_##_name = { \ .id = _id, \ } +#define F2FS_STAT_ATTR(_struct_type, _struct_name, _name, _elname) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = 0444 }, \ + .show = f2fs_sbi_show, \ + .struct_type = _struct_type, \ + .offset = offsetof(struct _struct_name, _elname), \ +} + F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time, urgent_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); @@ -419,6 +505,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, main_blkaddr, main_blkaddr); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); @@ -451,10 +538,21 @@ F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif F2FS_GENERAL_RO_ATTR(dirty_segments); +F2FS_GENERAL_RO_ATTR(free_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); F2FS_GENERAL_RO_ATTR(unusable); +F2FS_GENERAL_RO_ATTR(encoding); +#ifdef CONFIG_F2FS_STAT_FS +F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count); +F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count); +F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, gc_foreground_calls, call_count); +F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, gc_background_calls, bg_gc); +F2FS_GENERAL_RO_ATTR(moved_blocks_background); +F2FS_GENERAL_RO_ATTR(moved_blocks_foreground); +F2FS_GENERAL_RO_ATTR(avg_vblocks); +#endif #ifdef CONFIG_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); @@ -470,7 +568,12 @@ F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); +#ifdef CONFIG_FS_VERITY +F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY); +#endif F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); +F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD); +F2FS_FEATURE_RO_ATTR(compression, FEAT_COMPRESSION); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -481,6 +584,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), + ATTR_LIST(main_blkaddr), ATTR_LIST(max_small_discards), ATTR_LIST(discard_granularity), ATTR_LIST(batched_trim_sections), @@ -510,11 +614,22 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_type), #endif ATTR_LIST(dirty_segments), + ATTR_LIST(free_segments), ATTR_LIST(unusable), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(features), ATTR_LIST(reserved_blocks), ATTR_LIST(current_reserved_blocks), + ATTR_LIST(encoding), +#ifdef CONFIG_F2FS_STAT_FS + ATTR_LIST(cp_foreground_calls), + ATTR_LIST(cp_background_calls), + ATTR_LIST(gc_foreground_calls), + ATTR_LIST(gc_background_calls), + ATTR_LIST(moved_blocks_foreground), + ATTR_LIST(moved_blocks_background), + ATTR_LIST(avg_vblocks), +#endif NULL, }; ATTRIBUTE_GROUPS(f2fs); @@ -534,7 +649,12 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(quota_ino), ATTR_LIST(inode_crtime), ATTR_LIST(lost_found), +#ifdef CONFIG_FS_VERITY + ATTR_LIST(verity), +#endif ATTR_LIST(sb_checksum), + ATTR_LIST(casefold), + ATTR_LIST(compression), NULL, }; ATTRIBUTE_GROUPS(f2fs_feat); @@ -695,10 +815,12 @@ int __init f2fs_init_sysfs(void) ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype, NULL, "features"); - if (ret) + if (ret) { + kobject_put(&f2fs_feat); kset_unregister(&f2fs_kset); - else + } else { f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + } return ret; } @@ -719,8 +841,11 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &f2fs_sb_ktype, NULL, "%s", sb->s_id); - if (err) + if (err) { + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); return err; + } if (f2fs_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); @@ -748,4 +873,5 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); } diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c new file mode 100644 index 000000000000..d7d430a6f130 --- /dev/null +++ b/fs/f2fs/verity.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/f2fs/verity.c: fs-verity support for f2fs + * + * Copyright 2019 Google LLC + */ + +/* + * Implementation of fsverity_operations for f2fs. + * + * Like ext4, f2fs stores the verity metadata (Merkle tree and + * fsverity_descriptor) past the end of the file, starting at the first 64K + * boundary beyond i_size. This approach works because (a) verity files are + * readonly, and (b) pages fully beyond i_size aren't visible to userspace but + * can be read/written internally by f2fs with only some relatively small + * changes to f2fs. Extended attributes cannot be used because (a) f2fs limits + * the total size of an inode's xattr entries to 4096 bytes, which wouldn't be + * enough for even a single Merkle tree block, and (b) f2fs encryption doesn't + * encrypt xattrs, yet the verity metadata *must* be encrypted when the file is + * because it contains hashes of the plaintext data. + * + * Using a 64K boundary rather than a 4K one keeps things ready for + * architectures with 64K pages, and it doesn't necessarily waste space on-disk + * since there can be a hole between i_size and the start of the Merkle tree. + */ + +#include <linux/f2fs_fs.h> + +#include "f2fs.h" +#include "xattr.h" + +static inline loff_t f2fs_verity_metadata_pos(const struct inode *inode) +{ + return round_up(inode->i_size, 65536); +} + +/* + * Read some verity metadata from the inode. __vfs_read() can't be used because + * we need to read beyond i_size. + */ +static int pagecache_read(struct inode *inode, void *buf, size_t count, + loff_t pos) +{ + while (count) { + size_t n = min_t(size_t, count, + PAGE_SIZE - offset_in_page(pos)); + struct page *page; + void *addr; + + page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT, + NULL); + if (IS_ERR(page)) + return PTR_ERR(page); + + addr = kmap_atomic(page); + memcpy(buf, addr + offset_in_page(pos), n); + kunmap_atomic(addr); + + put_page(page); + + buf += n; + pos += n; + count -= n; + } + return 0; +} + +/* + * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY. + * kernel_write() can't be used because the file descriptor is readonly. + */ +static int pagecache_write(struct inode *inode, const void *buf, size_t count, + loff_t pos) +{ + if (pos + count > inode->i_sb->s_maxbytes) + return -EFBIG; + + while (count) { + size_t n = min_t(size_t, count, + PAGE_SIZE - offset_in_page(pos)); + struct page *page; + void *fsdata; + void *addr; + int res; + + res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0, + &page, &fsdata); + if (res) + return res; + + addr = kmap_atomic(page); + memcpy(addr + offset_in_page(pos), buf, n); + kunmap_atomic(addr); + + res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n, + page, fsdata); + if (res < 0) + return res; + if (res != n) + return -EIO; + + buf += n; + pos += n; + count -= n; + } + return 0; +} + +/* + * Format of f2fs verity xattr. This points to the location of the verity + * descriptor within the file data rather than containing it directly because + * the verity descriptor *must* be encrypted when f2fs encryption is used. But, + * f2fs encryption does not encrypt xattrs. + */ +struct fsverity_descriptor_location { + __le32 version; + __le32 size; + __le64 pos; +}; + +static int f2fs_begin_enable_verity(struct file *filp) +{ + struct inode *inode = file_inode(filp); + int err; + + if (f2fs_verity_in_progress(inode)) + return -EBUSY; + + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + return -EOPNOTSUPP; + + /* + * Since the file was opened readonly, we have to initialize the quotas + * here and not rely on ->open() doing it. This must be done before + * evicting the inline data. + */ + err = dquot_initialize(inode); + if (err) + return err; + + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + + set_inode_flag(inode, FI_VERITY_IN_PROGRESS); + return 0; +} + +static int f2fs_end_enable_verity(struct file *filp, const void *desc, + size_t desc_size, u64 merkle_tree_size) +{ + struct inode *inode = file_inode(filp); + u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size; + struct fsverity_descriptor_location dloc = { + .version = cpu_to_le32(1), + .size = cpu_to_le32(desc_size), + .pos = cpu_to_le64(desc_pos), + }; + int err = 0; + + if (desc != NULL) { + /* Succeeded; write the verity descriptor. */ + err = pagecache_write(inode, desc, desc_size, desc_pos); + + /* Write all pages before clearing FI_VERITY_IN_PROGRESS. */ + if (!err) + err = filemap_write_and_wait(inode->i_mapping); + } + + /* If we failed, truncate anything we wrote past i_size. */ + if (desc == NULL || err) + f2fs_truncate(inode); + + clear_inode_flag(inode, FI_VERITY_IN_PROGRESS); + + if (desc != NULL && !err) { + err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY, + F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), + NULL, XATTR_CREATE); + if (!err) { + file_set_verity(inode); + f2fs_set_inode_flags(inode); + f2fs_mark_inode_dirty_sync(inode, true); + } + } + return err; +} + +static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, + size_t buf_size) +{ + struct fsverity_descriptor_location dloc; + int res; + u32 size; + u64 pos; + + /* Get the descriptor location */ + res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_VERITY, + F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), NULL); + if (res < 0 && res != -ERANGE) + return res; + if (res != sizeof(dloc) || dloc.version != cpu_to_le32(1)) { + f2fs_warn(F2FS_I_SB(inode), "unknown verity xattr format"); + return -EINVAL; + } + size = le32_to_cpu(dloc.size); + pos = le64_to_cpu(dloc.pos); + + /* Get the descriptor */ + if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes || + pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) { + f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr"); + return -EFSCORRUPTED; + } + if (buf_size) { + if (size > buf_size) + return -ERANGE; + res = pagecache_read(inode, buf, size, pos); + if (res) + return res; + } + return size; +} + +/* + * Prefetch some pages from the file's Merkle tree. + * + * This is basically a stripped-down version of __do_page_cache_readahead() + * which works on pages past i_size. + */ +static void f2fs_merkle_tree_readahead(struct address_space *mapping, + pgoff_t start_index, unsigned long count) +{ + LIST_HEAD(pages); + unsigned int nr_pages = 0; + struct page *page; + pgoff_t index; + struct blk_plug plug; + + for (index = start_index; index < start_index + count; index++) { + page = xa_load(&mapping->i_pages, index); + if (!page || xa_is_value(page)) { + page = __page_cache_alloc(readahead_gfp_mask(mapping)); + if (!page) + break; + page->index = index; + list_add(&page->lru, &pages); + nr_pages++; + } + } + blk_start_plug(&plug); + f2fs_mpage_readpages(mapping, &pages, NULL, nr_pages, true); + blk_finish_plug(&plug); +} + +static struct page *f2fs_read_merkle_tree_page(struct inode *inode, + pgoff_t index, + unsigned long num_ra_pages) +{ + struct page *page; + + index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; + + page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { + if (page) + put_page(page); + else if (num_ra_pages > 1) + f2fs_merkle_tree_readahead(inode->i_mapping, index, + num_ra_pages); + page = read_mapping_page(inode->i_mapping, index, NULL); + } + return page; +} + +static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf, + u64 index, int log_blocksize) +{ + loff_t pos = f2fs_verity_metadata_pos(inode) + (index << log_blocksize); + + return pagecache_write(inode, buf, 1 << log_blocksize, pos); +} + +const struct fsverity_operations f2fs_verityops = { + .begin_enable_verity = f2fs_begin_enable_verity, + .end_enable_verity = f2fs_end_enable_verity, + .get_verity_descriptor = f2fs_get_verity_descriptor, + .read_merkle_tree_page = f2fs_read_merkle_tree_page, + .write_merkle_tree_block = f2fs_write_merkle_tree_block, +}; diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index b32c45621679..296b3189448a 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -21,6 +21,7 @@ #include <linux/posix_acl_xattr.h> #include "f2fs.h" #include "xattr.h" +#include "segment.h" static int f2fs_xattr_generic_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, @@ -538,8 +539,9 @@ out: ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { struct inode *inode = d_inode(dentry); + nid_t xnid = F2FS_I(inode)->i_xattr_nid; struct f2fs_xattr_entry *entry; - void *base_addr; + void *base_addr, *last_base_addr; int error = 0; size_t rest = buffer_size; @@ -549,6 +551,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if (error) return error; + last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode); + list_for_each_xattr(entry, base_addr) { const struct xattr_handler *handler = f2fs_xattr_handler(entry->e_name_index); @@ -556,6 +560,15 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) size_t prefix_len; size_t size; + if ((void *)(entry) + sizeof(__u32) > last_base_addr || + (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) { + f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + inode->i_ino); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + error = -EFSCORRUPTED; + goto cleanup; + } + if (!handler || (handler->list && !handler->list(dentry))) continue; @@ -729,6 +742,11 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + if (!f2fs_is_checkpoint_ready(sbi)) + return -ENOSPC; + err = dquot_initialize(inode); if (err) return err; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index a90920e2f949..de0c600b9cab 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -34,8 +34,10 @@ #define F2FS_XATTR_INDEX_ADVISE 7 /* Should be same as EXT4_XATTR_INDEX_ENCRYPTION */ #define F2FS_XATTR_INDEX_ENCRYPTION 9 +#define F2FS_XATTR_INDEX_VERITY 11 #define F2FS_XATTR_NAME_ENCRYPTION_CONTEXT "c" +#define F2FS_XATTR_NAME_VERITY "v" struct f2fs_xattr_header { __le32 h_magic; /* magic number for identification */ |