diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/acl.c | 3 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 3 | ||||
-rw-r--r-- | fs/ext4/crypto.c | 61 | ||||
-rw-r--r-- | fs/ext4/dir.c | 13 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 54 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 15 | ||||
-rw-r--r-- | fs/ext4/extents.c | 20 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 2 | ||||
-rw-r--r-- | fs/ext4/file.c | 31 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 59 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 127 | ||||
-rw-r--r-- | fs/ext4/inline.c | 20 | ||||
-rw-r--r-- | fs/ext4/inode.c | 507 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 4 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 52 | ||||
-rw-r--r-- | fs/ext4/mmp.c | 4 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 29 | ||||
-rw-r--r-- | fs/ext4/namei.c | 13 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 20 | ||||
-rw-r--r-- | fs/ext4/readpage.c | 14 | ||||
-rw-r--r-- | fs/ext4/resize.c | 2 | ||||
-rw-r--r-- | fs/ext4/super.c | 80 | ||||
-rw-r--r-- | fs/ext4/symlink.c | 4 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 32 | ||||
-rw-r--r-- | fs/ext4/xattr_security.c | 13 | ||||
-rw-r--r-- | fs/ext4/xattr_trusted.c | 13 | ||||
-rw-r--r-- | fs/ext4/xattr_user.c | 17 |
27 files changed, 666 insertions, 546 deletions
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 69b1e73026a5..c6601a476c02 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -172,9 +172,6 @@ ext4_get_acl(struct inode *inode, int type) acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - return acl; } diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index fe1f50fe764f..3020fd70c392 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -610,7 +610,8 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); - return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); + jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); + return 1; } /* diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index edc053a81914..6a6c27373b54 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -32,6 +32,7 @@ #include <linux/random.h> #include <linux/scatterlist.h> #include <linux/spinlock_types.h> +#include <linux/namei.h> #include "ext4_extents.h" #include "xattr.h" @@ -91,7 +92,8 @@ void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx) * Return: An allocated and initialized encryption context on success; error * value or NULL otherwise. */ -struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode) +struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode, + gfp_t gfp_flags) { struct ext4_crypto_ctx *ctx = NULL; int res = 0; @@ -118,7 +120,7 @@ struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode) list_del(&ctx->free_list); spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags); if (!ctx) { - ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, GFP_NOFS); + ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, gfp_flags); if (!ctx) { res = -ENOMEM; goto out; @@ -255,7 +257,8 @@ static int ext4_page_crypto(struct inode *inode, ext4_direction_t rw, pgoff_t index, struct page *src_page, - struct page *dest_page) + struct page *dest_page, + gfp_t gfp_flags) { u8 xts_tweak[EXT4_XTS_TWEAK_SIZE]; @@ -266,7 +269,7 @@ static int ext4_page_crypto(struct inode *inode, struct crypto_skcipher *tfm = ci->ci_ctfm; int res = 0; - req = skcipher_request_alloc(tfm, GFP_NOFS); + req = skcipher_request_alloc(tfm, gfp_flags); if (!req) { printk_ratelimited(KERN_ERR "%s: crypto_request_alloc() failed\n", @@ -283,10 +286,10 @@ static int ext4_page_crypto(struct inode *inode, EXT4_XTS_TWEAK_SIZE - sizeof(index)); sg_init_table(&dst, 1); - sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0); + sg_set_page(&dst, dest_page, PAGE_SIZE, 0); sg_init_table(&src, 1); - sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0); - skcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE, + sg_set_page(&src, src_page, PAGE_SIZE, 0); + skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE, xts_tweak); if (rw == EXT4_DECRYPT) res = crypto_skcipher_decrypt(req); @@ -307,9 +310,10 @@ static int ext4_page_crypto(struct inode *inode, return 0; } -static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx) +static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx, + gfp_t gfp_flags) { - ctx->w.bounce_page = mempool_alloc(ext4_bounce_page_pool, GFP_NOWAIT); + ctx->w.bounce_page = mempool_alloc(ext4_bounce_page_pool, gfp_flags); if (ctx->w.bounce_page == NULL) return ERR_PTR(-ENOMEM); ctx->flags |= EXT4_WRITE_PATH_FL; @@ -332,7 +336,8 @@ static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx) * error value or NULL. */ struct page *ext4_encrypt(struct inode *inode, - struct page *plaintext_page) + struct page *plaintext_page, + gfp_t gfp_flags) { struct ext4_crypto_ctx *ctx; struct page *ciphertext_page = NULL; @@ -340,17 +345,17 @@ struct page *ext4_encrypt(struct inode *inode, BUG_ON(!PageLocked(plaintext_page)); - ctx = ext4_get_crypto_ctx(inode); + ctx = ext4_get_crypto_ctx(inode, gfp_flags); if (IS_ERR(ctx)) return (struct page *) ctx; /* The encryption operation will require a bounce page. */ - ciphertext_page = alloc_bounce_page(ctx); + ciphertext_page = alloc_bounce_page(ctx, gfp_flags); if (IS_ERR(ciphertext_page)) goto errout; ctx->w.control_page = plaintext_page; err = ext4_page_crypto(inode, EXT4_ENCRYPT, plaintext_page->index, - plaintext_page, ciphertext_page); + plaintext_page, ciphertext_page, gfp_flags); if (err) { ciphertext_page = ERR_PTR(err); errout: @@ -378,8 +383,8 @@ int ext4_decrypt(struct page *page) { BUG_ON(!PageLocked(page)); - return ext4_page_crypto(page->mapping->host, - EXT4_DECRYPT, page->index, page, page); + return ext4_page_crypto(page->mapping->host, EXT4_DECRYPT, + page->index, page, page, GFP_NOFS); } int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, @@ -396,13 +401,13 @@ int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, (unsigned long) inode->i_ino, lblk, len); #endif - BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE); + BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE); - ctx = ext4_get_crypto_ctx(inode); + ctx = ext4_get_crypto_ctx(inode, GFP_NOFS); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ciphertext_page = alloc_bounce_page(ctx); + ciphertext_page = alloc_bounce_page(ctx, GFP_NOWAIT); if (IS_ERR(ciphertext_page)) { err = PTR_ERR(ciphertext_page); goto errout; @@ -410,11 +415,12 @@ int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, while (len--) { err = ext4_page_crypto(inode, EXT4_ENCRYPT, lblk, - ZERO_PAGE(0), ciphertext_page); + ZERO_PAGE(0), ciphertext_page, + GFP_NOFS); if (err) goto errout; - bio = bio_alloc(GFP_KERNEL, 1); + bio = bio_alloc(GFP_NOWAIT, 1); if (!bio) { err = -ENOMEM; goto errout; @@ -473,13 +479,19 @@ uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size) */ static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags) { - struct inode *dir = d_inode(dentry->d_parent); - struct ext4_crypt_info *ci = EXT4_I(dir)->i_crypt_info; + struct dentry *dir; + struct ext4_crypt_info *ci; int dir_has_key, cached_with_key; - if (!ext4_encrypted_inode(dir)) - return 0; + if (flags & LOOKUP_RCU) + return -ECHILD; + dir = dget_parent(dentry); + if (!ext4_encrypted_inode(d_inode(dir))) { + dput(dir); + return 0; + } + ci = EXT4_I(d_inode(dir))->i_crypt_info; if (ci && ci->ci_keyring_key && (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | (1 << KEY_FLAG_REVOKED) | @@ -489,6 +501,7 @@ static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags) /* this should eventually be an flag in d_flags */ cached_with_key = dentry->d_fsdata != NULL; dir_has_key = (ci != NULL); + dput(dir); /* * If the dentry was cached without the key, and it is a diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 50ba27cbed03..68323e3da3fa 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -150,18 +150,23 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) while (ctx->pos < inode->i_size) { struct ext4_map_blocks map; + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + goto errout; + } + cond_resched(); map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); map.m_len = 1; err = ext4_map_blocks(NULL, inode, &map, 0); if (err > 0) { pgoff_t index = map.m_pblk >> - (PAGE_CACHE_SHIFT - inode->i_blkbits); + (PAGE_SHIFT - inode->i_blkbits); if (!ra_has_index(&file->f_ra, index)) page_cache_sync_readahead( sb->s_bdev->bd_inode->i_mapping, &file->f_ra, file, index, 1); - file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; + file->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT; bh = ext4_bread(NULL, inode, map.m_lblk, 0); if (IS_ERR(bh)) { err = PTR_ERR(bh); @@ -266,7 +271,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) ctx->pos += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); } - if ((ctx->pos < inode->i_size) && !dir_relax(inode)) + if ((ctx->pos < inode->i_size) && !dir_relax_shared(inode)) goto done; brelse(bh); bh = NULL; @@ -644,7 +649,7 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf, const struct file_operations ext4_dir_operations = { .llseek = ext4_dir_llseek, .read = generic_read_dir, - .iterate = ext4_readdir, + .iterate_shared = ext4_readdir, .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c04743519865..b84aa1ca480a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -33,6 +33,7 @@ #include <linux/ratelimit.h> #include <crypto/hash.h> #include <linux/falloc.h> +#include <linux/percpu-rwsem.h> #ifdef __KERNEL__ #include <linux/compat.h> #endif @@ -581,6 +582,9 @@ enum { #define EXT4_GET_BLOCKS_ZERO 0x0200 #define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\ EXT4_GET_BLOCKS_ZERO) + /* Caller will submit data before dropping transaction handle. This + * allows jbd2 to avoid submitting data before commit. */ +#define EXT4_GET_BLOCKS_IO_SUBMIT 0x0400 /* * The bit position of these flags must not overlap with any of the @@ -912,6 +916,29 @@ do { \ #include "extents_status.h" /* + * Lock subclasses for i_data_sem in the ext4_inode_info structure. + * + * These are needed to avoid lockdep false positives when we need to + * allocate blocks to the quota inode during ext4_map_blocks(), while + * holding i_data_sem for a normal (non-quota) inode. Since we don't + * do quota tracking for the quota inode, this avoids deadlock (as + * well as infinite recursion, since it isn't turtles all the way + * down...) + * + * I_DATA_SEM_NORMAL - Used for most inodes + * I_DATA_SEM_OTHER - Used by move_inode.c for the second normal inode + * where the second inode has larger inode number + * than the first + * I_DATA_SEM_QUOTA - Used for quota inodes only + */ +enum { + I_DATA_SEM_NORMAL = 0, + I_DATA_SEM_OTHER, + I_DATA_SEM_QUOTA, +}; + + +/* * fourth extended file system inode data in memory */ struct ext4_inode_info { @@ -1482,6 +1509,9 @@ struct ext4_sb_info { struct ratelimit_state s_err_ratelimit_state; struct ratelimit_state s_warning_ratelimit_state; struct ratelimit_state s_msg_ratelimit_state; + + /* Barrier between changing inodes' journal flags and writepages ops. */ + struct percpu_rw_semaphore s_journal_flag_rwsem; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -1526,7 +1556,6 @@ enum { EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read nolocking */ EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ - EXT4_STATE_ORDERED_MODE, /* data=ordered mode */ EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ }; @@ -1961,7 +1990,7 @@ ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) { unsigned len = le16_to_cpu(dlen); -#if (PAGE_CACHE_SIZE >= 65536) +#if (PAGE_SIZE >= 65536) if (len == EXT4_MAX_REC_LEN || len == 0) return blocksize; return (len & 65532) | ((len & 3) << 16); @@ -1974,7 +2003,7 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) { if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) BUG(); -#if (PAGE_CACHE_SIZE >= 65536) +#if (PAGE_SIZE >= 65536) if (len < 65536) return cpu_to_le16(len); if (len == blocksize) { @@ -2282,11 +2311,13 @@ extern struct kmem_cache *ext4_crypt_info_cachep; bool ext4_valid_contents_enc_mode(uint32_t mode); uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size); extern struct workqueue_struct *ext4_read_workqueue; -struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode); +struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode, + gfp_t gfp_flags); void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx); void ext4_restore_control_page(struct page *data_page); struct page *ext4_encrypt(struct inode *inode, - struct page *plaintext_page); + struct page *plaintext_page, + gfp_t gfp_flags); int ext4_decrypt(struct page *page); int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, ext4_lblk_t len); @@ -2496,8 +2527,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); -int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create); +int ext4_dax_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create); int ext4_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); int ext4_dio_get_block(struct inode *inode, sector_t iblock, @@ -2556,8 +2587,6 @@ extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, /* indirect.c */ extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); -extern ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset); extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); extern void ext4_ind_truncate(handle_t *, struct inode *inode); @@ -3305,6 +3334,13 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) } } +static inline bool ext4_aligned_io(struct inode *inode, loff_t off, loff_t len) +{ + int blksize = 1 << inode->i_blkbits; + + return IS_ALIGNED(off, blksize) && IS_ALIGNED(len, blksize); +} + #endif /* __KERNEL__ */ #define EFSBADCRC EBADMSG /* Bad CRC detected */ diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 5f5846211095..09c1ef38cbe6 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -359,10 +359,21 @@ static inline int ext4_journal_force_commit(journal_t *journal) return 0; } -static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) +static inline int ext4_jbd2_inode_add_write(handle_t *handle, + struct inode *inode) { if (ext4_handle_valid(handle)) - return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode); + return jbd2_journal_inode_add_write(handle, + EXT4_I(inode)->jinode); + return 0; +} + +static inline int ext4_jbd2_inode_add_wait(handle_t *handle, + struct inode *inode) +{ + if (ext4_handle_valid(handle)) + return jbd2_journal_inode_add_wait(handle, + EXT4_I(inode)->jinode); return 0; } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 95bf4679ac54..2a2eef9c14e4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -120,9 +120,14 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle, if (!ext4_handle_valid(handle)) return 0; - if (handle->h_buffer_credits > needed) + if (handle->h_buffer_credits >= needed) return 0; - err = ext4_journal_extend(handle, needed); + /* + * If we need to extend the journal get a few extra blocks + * while we're at it for efficiency's sake. + */ + needed += 3; + err = ext4_journal_extend(handle, needed - handle->h_buffer_credits); if (err <= 0) return err; err = ext4_truncate_restart_trans(handle, inode, needed); @@ -907,13 +912,6 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, eh = ext_block_hdr(bh); ppos++; - if (unlikely(ppos > depth)) { - put_bh(bh); - EXT4_ERROR_INODE(inode, - "ppos %d > depth %d", ppos, depth); - ret = -EFSCORRUPTED; - goto err; - } path[ppos].p_bh = bh; path[ppos].p_hdr = eh; } @@ -2583,7 +2581,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, } } else ext4_error(sbi->s_sb, "strange request: removal(2) " - "%u-%u from %u:%u\n", + "%u-%u from %u:%u", from, to, le32_to_cpu(ex->ee_block), ee_len); return 0; } @@ -3738,7 +3736,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, if (ee_block != map->m_lblk || ee_len > map->m_len) { #ifdef EXT4_DEBUG ext4_warning("Inode (%ld) finished: extent logical block %llu," - " len %u; IO logical block %llu, len %u\n", + " len %u; IO logical block %llu, len %u", inode->i_ino, (unsigned long long)ee_block, ee_len, (unsigned long long)map->m_lblk, map->m_len); #endif diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index e38b987ac7f5..37e059202cd2 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -707,7 +707,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, (status & EXTENT_STATUS_WRITTEN)) { ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as " " delayed and written which can potentially " - " cause data loss.\n", lblk, len); + " cause data loss.", lblk, len); WARN_ON(1); } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 6659e216385e..df44c877892a 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -169,13 +169,8 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = __generic_file_write_iter(iocb, from); inode_unlock(inode); - if (ret > 0) { - ssize_t err; - - err = generic_write_sync(file, iocb->ki_pos - ret, ret); - if (err < 0) - ret = err; - } + if (ret > 0) + ret = generic_write_sync(iocb, ret); if (o_direct) blk_finish_plug(&plug); @@ -207,7 +202,7 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (IS_ERR(handle)) result = VM_FAULT_SIGBUS; else - result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL); + result = __dax_fault(vma, vmf, ext4_dax_get_block); if (write) { if (!IS_ERR(handle)) @@ -243,7 +238,7 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, result = VM_FAULT_SIGBUS; else result = __dax_pmd_fault(vma, addr, pmd, flags, - ext4_dax_mmap_get_block, NULL); + ext4_dax_get_block); if (write) { if (!IS_ERR(handle)) @@ -329,7 +324,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) struct super_block *sb = inode->i_sb; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct vfsmount *mnt = filp->f_path.mnt; - struct inode *dir = filp->f_path.dentry->d_parent->d_inode; + struct dentry *dir; struct path path; char buf[64], *cp; int ret; @@ -373,14 +368,18 @@ static int ext4_file_open(struct inode * inode, struct file * filp) if (ext4_encryption_info(inode) == NULL) return -ENOKEY; } - if (ext4_encrypted_inode(dir) && - !ext4_is_child_context_consistent_with_parent(dir, inode)) { + + dir = dget_parent(file_dentry(filp)); + if (ext4_encrypted_inode(d_inode(dir)) && + !ext4_is_child_context_consistent_with_parent(d_inode(dir), inode)) { ext4_warning(inode->i_sb, - "Inconsistent encryption contexts: %lu/%lu\n", - (unsigned long) dir->i_ino, + "Inconsistent encryption contexts: %lu/%lu", + (unsigned long) d_inode(dir)->i_ino, (unsigned long) inode->i_ino); + dput(dir); return -EPERM; } + dput(dir); /* * Set up the jbd2_inode if we are opening the inode for * writing and the journal is present @@ -428,8 +427,8 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, lastoff = startoff; endoff = (loff_t)end_blk << blkbits; - index = startoff >> PAGE_CACHE_SHIFT; - end = endoff >> PAGE_CACHE_SHIFT; + index = startoff >> PAGE_SHIFT; + end = endoff >> PAGE_SHIFT; pagevec_init(&pvec, 0); do { diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 237b877d316d..3da4cf8d18b6 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1150,25 +1150,20 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); ext4_group_t block_group; int bit; - struct buffer_head *bitmap_bh; + struct buffer_head *bitmap_bh = NULL; struct inode *inode = NULL; - long err = -EIO; + int err = -EFSCORRUPTED; - /* Error cases - e2fsck has already cleaned up for us */ - if (ino > max_ino) { - ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino); - err = -EFSCORRUPTED; - goto error; - } + if (ino < EXT4_FIRST_INO(sb) || ino > max_ino) + goto bad_orphan; block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); bitmap_bh = ext4_read_inode_bitmap(sb, block_group); if (IS_ERR(bitmap_bh)) { - err = PTR_ERR(bitmap_bh); - ext4_warning(sb, "inode bitmap error %ld for orphan %lu", - ino, err); - goto error; + ext4_error(sb, "inode bitmap error %ld for orphan %lu", + ino, PTR_ERR(bitmap_bh)); + return (struct inode *) bitmap_bh; } /* Having the inode bit set should be a 100% indicator that this @@ -1179,15 +1174,21 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) goto bad_orphan; inode = ext4_iget(sb, ino); - if (IS_ERR(inode)) - goto iget_failed; + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + ext4_error(sb, "couldn't read orphan inode %lu (err %d)", + ino, err); + return inode; + } /* - * If the orphans has i_nlinks > 0 then it should be able to be - * truncated, otherwise it won't be removed from the orphan list - * during processing and an infinite loop will result. + * If the orphans has i_nlinks > 0 then it should be able to + * be truncated, otherwise it won't be removed from the orphan + * list during processing and an infinite loop will result. + * Similarly, it must not be a bad inode. */ - if (inode->i_nlink && !ext4_can_truncate(inode)) + if ((inode->i_nlink && !ext4_can_truncate(inode)) || + is_bad_inode(inode)) goto bad_orphan; if (NEXT_ORPHAN(inode) > max_ino) @@ -1195,29 +1196,25 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) brelse(bitmap_bh); return inode; -iget_failed: - err = PTR_ERR(inode); - inode = NULL; bad_orphan: - ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino); - printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n", - bit, (unsigned long long)bitmap_bh->b_blocknr, - ext4_test_bit(bit, bitmap_bh->b_data)); - printk(KERN_WARNING "inode=%p\n", inode); + ext4_error(sb, "bad orphan inode %lu", ino); + if (bitmap_bh) + printk(KERN_ERR "ext4_test_bit(bit=%d, block=%llu) = %d\n", + bit, (unsigned long long)bitmap_bh->b_blocknr, + ext4_test_bit(bit, bitmap_bh->b_data)); if (inode) { - printk(KERN_WARNING "is_bad_inode(inode)=%d\n", + printk(KERN_ERR "is_bad_inode(inode)=%d\n", is_bad_inode(inode)); - printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n", + printk(KERN_ERR "NEXT_ORPHAN(inode)=%u\n", NEXT_ORPHAN(inode)); - printk(KERN_WARNING "max_ino=%lu\n", max_ino); - printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink); + printk(KERN_ERR "max_ino=%lu\n", max_ino); + printk(KERN_ERR "i_nlink=%u\n", inode->i_nlink); /* Avoid freeing blocks if we got a bad deleted inode */ if (inode->i_nlink == 0) inode->i_blocks = 0; iput(inode); } brelse(bitmap_bh); -error: return ERR_PTR(err); } diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 3027fa681de5..bc15c2c17633 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -649,133 +649,6 @@ out: } /* - * O_DIRECT for ext3 (or indirect map) based files - * - * If the O_DIRECT write will extend the file then add this inode to the - * orphan list. So recovery will truncate it back to the original size - * if the machine crashes during the write. - * - * If the O_DIRECT write is intantiating holes inside i_size and the machine - * crashes then stale disk data _may_ be exposed inside the file. But current - * VFS code falls back into buffered path in that case so we are safe. - */ -ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct ext4_inode_info *ei = EXT4_I(inode); - handle_t *handle; - ssize_t ret; - int orphan = 0; - size_t count = iov_iter_count(iter); - int retries = 0; - - if (iov_iter_rw(iter) == WRITE) { - loff_t final_size = offset + count; - - if (final_size > inode->i_size) { - /* Credits for sb + inode write */ - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - ret = ext4_orphan_add(handle, inode); - if (ret) { - ext4_journal_stop(handle); - goto out; - } - orphan = 1; - ei->i_disksize = inode->i_size; - ext4_journal_stop(handle); - } - } - -retry: - if (iov_iter_rw(iter) == READ && ext4_should_dioread_nolock(inode)) { - /* - * Nolock dioread optimization may be dynamically disabled - * via ext4_inode_block_unlocked_dio(). Check inode's state - * while holding extra i_dio_count ref. - */ - inode_dio_begin(inode); - smp_mb(); - if (unlikely(ext4_test_inode_state(inode, - EXT4_STATE_DIOREAD_LOCK))) { - inode_dio_end(inode); - goto locked; - } - if (IS_DAX(inode)) - ret = dax_do_io(iocb, inode, iter, offset, - ext4_dio_get_block, NULL, 0); - else - ret = __blockdev_direct_IO(iocb, inode, - inode->i_sb->s_bdev, iter, - offset, ext4_dio_get_block, - NULL, NULL, 0); - inode_dio_end(inode); - } else { -locked: - if (IS_DAX(inode)) - ret = dax_do_io(iocb, inode, iter, offset, - ext4_dio_get_block, NULL, DIO_LOCKING); - else - ret = blockdev_direct_IO(iocb, inode, iter, offset, - ext4_dio_get_block); - - if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { - loff_t isize = i_size_read(inode); - loff_t end = offset + count; - - if (end > isize) - ext4_truncate_failed_write(inode); - } - } - if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) - goto retry; - - if (orphan) { - int err; - - /* Credits for sb + inode write */ - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); - if (IS_ERR(handle)) { - /* This is really bad luck. We've written the data - * but cannot extend i_size. Bail out and pretend - * the write failed... */ - ret = PTR_ERR(handle); - if (inode->i_nlink) - ext4_orphan_del(NULL, inode); - - goto out; - } - if (inode->i_nlink) - ext4_orphan_del(handle, inode); - if (ret > 0) { - loff_t end = offset + ret; - if (end > inode->i_size) { - ei->i_disksize = end; - i_size_write(inode, end); - /* - * We're going to return a positive `ret' - * here due to non-zero-length I/O, so there's - * no way of reporting error returns from - * ext4_mark_inode_dirty() to userspace. So - * ignore it. - */ - ext4_mark_inode_dirty(handle, inode); - } - } - err = ext4_journal_stop(handle); - if (ret == 0) - ret = err; - } -out: - return ret; -} - -/* * Calculate the number of metadata blocks need to reserve * to allocate a new block at @lblocks for non extent file based file */ diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 7cbdd3752ba5..ff7538c26992 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -482,7 +482,7 @@ static int ext4_read_inline_page(struct inode *inode, struct page *page) ret = ext4_read_inline_data(inode, kaddr, len, &iloc); flush_dcache_page(page); kunmap_atomic(kaddr); - zero_user_segment(page, len, PAGE_CACHE_SIZE); + zero_user_segment(page, len, PAGE_SIZE); SetPageUptodate(page); brelse(iloc.bh); @@ -507,7 +507,7 @@ int ext4_readpage_inline(struct inode *inode, struct page *page) if (!page->index) ret = ext4_read_inline_page(inode, page); else if (!PageUptodate(page)) { - zero_user_segment(page, 0, PAGE_CACHE_SIZE); + zero_user_segment(page, 0, PAGE_SIZE); SetPageUptodate(page); } @@ -595,7 +595,7 @@ retry: if (ret) { unlock_page(page); - page_cache_release(page); + put_page(page); page = NULL; ext4_orphan_add(handle, inode); up_write(&EXT4_I(inode)->xattr_sem); @@ -621,7 +621,7 @@ retry: out: if (page) { unlock_page(page); - page_cache_release(page); + put_page(page); } if (sem_held) up_write(&EXT4_I(inode)->xattr_sem); @@ -690,7 +690,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, if (!ext4_has_inline_data(inode)) { ret = 0; unlock_page(page); - page_cache_release(page); + put_page(page); goto out_up_read; } @@ -815,7 +815,7 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, if (ret) { up_read(&EXT4_I(inode)->xattr_sem); unlock_page(page); - page_cache_release(page); + put_page(page); ext4_truncate_failed_write(inode); return ret; } @@ -829,7 +829,7 @@ out: up_read(&EXT4_I(inode)->xattr_sem); if (page) { unlock_page(page); - page_cache_release(page); + put_page(page); } return ret; } @@ -919,7 +919,7 @@ retry_journal: out_release_page: up_read(&EXT4_I(inode)->xattr_sem); unlock_page(page); - page_cache_release(page); + put_page(page); out_journal: ext4_journal_stop(handle); out: @@ -947,7 +947,7 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, i_size_changed = 1; } unlock_page(page); - page_cache_release(page); + put_page(page); /* * Don't mark the inode dirty under page lock. First, it unnecessarily @@ -1780,7 +1780,7 @@ int empty_inline_dir(struct inode *dir, int *has_inline_data) ext4_warning(dir->i_sb, "bad inline directory (dir #%lu) - " "inode %u, rec_len %u, name_len %d" - "inline size %d\n", + "inline size %d", dir->i_ino, le32_to_cpu(de->inode), le16_to_cpu(de->rec_len), de->name_len, inline_size); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index dab84a2530ff..f7140ca66e3b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -684,6 +684,24 @@ out_sem: ret = check_block_validity(inode, map); if (ret != 0) return ret; + + /* + * Inodes with freshly allocated blocks where contents will be + * visible after transaction commit must be on transaction's + * ordered data list. + */ + if (map->m_flags & EXT4_MAP_NEW && + !(map->m_flags & EXT4_MAP_UNWRITTEN) && + !(flags & EXT4_GET_BLOCKS_ZERO) && + !IS_NOQUOTA(inode) && + ext4_should_order_data(inode)) { + if (flags & EXT4_GET_BLOCKS_IO_SUBMIT) + ret = ext4_jbd2_inode_add_wait(handle, inode); + else + ret = ext4_jbd2_inode_add_write(handle, inode); + if (ret) + return ret; + } } return retval; } @@ -763,39 +781,47 @@ int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, /* Maximum number of blocks we map for direct IO at once. */ #define DIO_MAX_BLOCKS 4096 -static handle_t *start_dio_trans(struct inode *inode, - struct buffer_head *bh_result) +/* + * Get blocks function for the cases that need to start a transaction - + * generally difference cases of direct IO and DAX IO. It also handles retries + * in case of ENOSPC. + */ +static int ext4_get_block_trans(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int flags) { int dio_credits; + handle_t *handle; + int retries = 0; + int ret; /* Trim mapping request to maximum we can map at once for DIO */ if (bh_result->b_size >> inode->i_blkbits > DIO_MAX_BLOCKS) bh_result->b_size = DIO_MAX_BLOCKS << inode->i_blkbits; dio_credits = ext4_chunk_trans_blocks(inode, bh_result->b_size >> inode->i_blkbits); - return ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits); +retry: + handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + ret = _ext4_get_block(inode, iblock, bh_result, flags); + ext4_journal_stop(handle); + + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; + return ret; } /* Get block function for DIO reads and writes to inodes without extents */ int ext4_dio_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create) { - handle_t *handle; - int ret; - /* We don't expect handle for direct IO */ WARN_ON_ONCE(ext4_journal_current_handle()); - if (create) { - handle = start_dio_trans(inode, bh); - if (IS_ERR(handle)) - return PTR_ERR(handle); - } - ret = _ext4_get_block(inode, iblock, bh, - create ? EXT4_GET_BLOCKS_CREATE : 0); - if (create) - ext4_journal_stop(handle); - return ret; + if (!create) + return _ext4_get_block(inode, iblock, bh, 0); + return ext4_get_block_trans(inode, iblock, bh, EXT4_GET_BLOCKS_CREATE); } /* @@ -806,18 +832,13 @@ int ext4_dio_get_block(struct inode *inode, sector_t iblock, static int ext4_dio_get_block_unwritten_async(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { - handle_t *handle; int ret; /* We don't expect handle for direct IO */ WARN_ON_ONCE(ext4_journal_current_handle()); - handle = start_dio_trans(inode, bh_result); - if (IS_ERR(handle)) - return PTR_ERR(handle); - ret = _ext4_get_block(inode, iblock, bh_result, - EXT4_GET_BLOCKS_IO_CREATE_EXT); - ext4_journal_stop(handle); + ret = ext4_get_block_trans(inode, iblock, bh_result, + EXT4_GET_BLOCKS_IO_CREATE_EXT); /* * When doing DIO using unwritten extents, we need io_end to convert @@ -850,18 +871,13 @@ static int ext4_dio_get_block_unwritten_async(struct inode *inode, static int ext4_dio_get_block_unwritten_sync(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { - handle_t *handle; int ret; /* We don't expect handle for direct IO */ WARN_ON_ONCE(ext4_journal_current_handle()); - handle = start_dio_trans(inode, bh_result); - if (IS_ERR(handle)) - return PTR_ERR(handle); - ret = _ext4_get_block(inode, iblock, bh_result, - EXT4_GET_BLOCKS_IO_CREATE_EXT); - ext4_journal_stop(handle); + ret = ext4_get_block_trans(inode, iblock, bh_result, + EXT4_GET_BLOCKS_IO_CREATE_EXT); /* * Mark inode as having pending DIO writes to unwritten extents. @@ -1057,7 +1073,7 @@ int do_journal_get_write_access(handle_t *handle, static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, get_block_t *get_block) { - unsigned from = pos & (PAGE_CACHE_SIZE - 1); + unsigned from = pos & (PAGE_SIZE - 1); unsigned to = from + len; struct inode *inode = page->mapping->host; unsigned block_start, block_end; @@ -1069,15 +1085,15 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, bool decrypt = false; BUG_ON(!PageLocked(page)); - BUG_ON(from > PAGE_CACHE_SIZE); - BUG_ON(to > PAGE_CACHE_SIZE); + BUG_ON(from > PAGE_SIZE); + BUG_ON(to > PAGE_SIZE); BUG_ON(from > to); if (!page_has_buffers(page)) create_empty_buffers(page, blocksize, 0); head = page_buffers(page); bbits = ilog2(blocksize); - block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); + block = (sector_t)page->index << (PAGE_SHIFT - bbits); for (bh = head, block_start = 0; bh != head || !block_start; block++, block_start = block_end, bh = bh->b_this_page) { @@ -1159,8 +1175,8 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, * we allocate blocks but write fails for some reason */ needed_blocks = ext4_writepage_trans_blocks(inode) + 1; - index = pos >> PAGE_CACHE_SHIFT; - from = pos & (PAGE_CACHE_SIZE - 1); + index = pos >> PAGE_SHIFT; + from = pos & (PAGE_SIZE - 1); to = from + len; if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { @@ -1188,7 +1204,7 @@ retry_grab: retry_journal: handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); if (IS_ERR(handle)) { - page_cache_release(page); + put_page(page); return PTR_ERR(handle); } @@ -1196,7 +1212,7 @@ retry_journal: if (page->mapping != mapping) { /* The page got truncated from under us */ unlock_page(page); - page_cache_release(page); + put_page(page); ext4_journal_stop(handle); goto retry_grab; } @@ -1252,7 +1268,7 @@ retry_journal: if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry_journal; - page_cache_release(page); + put_page(page); return ret; } *pagep = page; @@ -1291,15 +1307,6 @@ static int ext4_write_end(struct file *file, int i_size_changed = 0; trace_ext4_write_end(inode, pos, len, copied); - if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) { - ret = ext4_jbd2_file_inode(handle, inode); - if (ret) { - unlock_page(page); - page_cache_release(page); - goto errout; - } - } - if (ext4_has_inline_data(inode)) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); @@ -1315,7 +1322,7 @@ static int ext4_write_end(struct file *file, */ i_size_changed = ext4_update_inode_size(inode, pos + copied); unlock_page(page); - page_cache_release(page); + put_page(page); if (old_size < pos) pagecache_isize_extended(inode, old_size, pos); @@ -1399,7 +1406,7 @@ static int ext4_journalled_write_end(struct file *file, int size_changed = 0; trace_ext4_journalled_write_end(inode, pos, len, copied); - from = pos & (PAGE_CACHE_SIZE - 1); + from = pos & (PAGE_SIZE - 1); to = from + len; BUG_ON(!ext4_handle_valid(handle)); @@ -1423,7 +1430,7 @@ static int ext4_journalled_write_end(struct file *file, ext4_set_inode_state(inode, EXT4_STATE_JDATA); EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; unlock_page(page); - page_cache_release(page); + put_page(page); if (old_size < pos) pagecache_isize_extended(inode, old_size, pos); @@ -1537,7 +1544,7 @@ static void ext4_da_page_release_reservation(struct page *page, int num_clusters; ext4_fsblk_t lblk; - BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); + BUG_ON(stop > PAGE_SIZE || stop < length); head = page_buffers(page); bh = head; @@ -1553,7 +1560,7 @@ static void ext4_da_page_release_reservation(struct page *page, clear_buffer_delay(bh); } else if (contiguous_blks) { lblk = page->index << - (PAGE_CACHE_SHIFT - inode->i_blkbits); + (PAGE_SHIFT - inode->i_blkbits); lblk += (curr_off >> inode->i_blkbits) - contiguous_blks; ext4_es_remove_extent(inode, lblk, contiguous_blks); @@ -1563,7 +1570,7 @@ static void ext4_da_page_release_reservation(struct page *page, } while ((bh = bh->b_this_page) != head); if (contiguous_blks) { - lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + lblk = page->index << (PAGE_SHIFT - inode->i_blkbits); lblk += (curr_off >> inode->i_blkbits) - contiguous_blks; ext4_es_remove_extent(inode, lblk, contiguous_blks); } @@ -1572,7 +1579,7 @@ static void ext4_da_page_release_reservation(struct page *page, * need to release the reserved space for that cluster. */ num_clusters = EXT4_NUM_B2C(sbi, to_release); while (num_clusters > 0) { - lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) + + lblk = (page->index << (PAGE_SHIFT - inode->i_blkbits)) + ((num_clusters - 1) << sbi->s_cluster_bits); if (sbi->s_cluster_ratio == 1 || !ext4_find_delalloc_cluster(inode, lblk)) @@ -1619,8 +1626,8 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, end = mpd->next_page - 1; if (invalidate) { ext4_lblk_t start, last; - start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); + start = index << (PAGE_SHIFT - inode->i_blkbits); + last = end << (PAGE_SHIFT - inode->i_blkbits); ext4_es_remove_extent(inode, start, last - start + 1); } @@ -1636,7 +1643,7 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); if (invalidate) { - block_invalidatepage(page, 0, PAGE_CACHE_SIZE); + block_invalidatepage(page, 0, PAGE_SIZE); ClearPageUptodate(page); } unlock_page(page); @@ -2007,10 +2014,10 @@ static int ext4_writepage(struct page *page, trace_ext4_writepage(page); size = i_size_read(inode); - if (page->index == size >> PAGE_CACHE_SHIFT) - len = size & ~PAGE_CACHE_MASK; + if (page->index == size >> PAGE_SHIFT) + len = size & ~PAGE_MASK; else - len = PAGE_CACHE_SIZE; + len = PAGE_SIZE; page_bufs = page_buffers(page); /* @@ -2034,7 +2041,7 @@ static int ext4_writepage(struct page *page, ext4_bh_delay_or_unwritten)) { redirty_page_for_writepage(wbc, page); if ((current->flags & PF_MEMALLOC) || - (inode->i_sb->s_blocksize == PAGE_CACHE_SIZE)) { + (inode->i_sb->s_blocksize == PAGE_SIZE)) { /* * For memory cleaning there's no point in writing only * some buffers. So just bail out. Warn if we came here @@ -2076,10 +2083,10 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) int err; BUG_ON(page->index != mpd->first_page); - if (page->index == size >> PAGE_CACHE_SHIFT) - len = size & ~PAGE_CACHE_MASK; + if (page->index == size >> PAGE_SHIFT) + len = size & ~PAGE_MASK; else - len = PAGE_CACHE_SIZE; + len = PAGE_SIZE; clear_page_dirty_for_io(page); err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); if (!err) @@ -2213,7 +2220,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) int nr_pages, i; struct inode *inode = mpd->inode; struct buffer_head *head, *bh; - int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; + int bpp_bits = PAGE_SHIFT - inode->i_blkbits; pgoff_t start, end; ext4_lblk_t lblk; sector_t pblock; @@ -2274,7 +2281,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) * supports blocksize < pagesize as we will try to * convert potentially unmapped parts of inode. */ - mpd->io_submit.io_end->size += PAGE_CACHE_SIZE; + mpd->io_submit.io_end->size += PAGE_SIZE; /* Page fully mapped - let IO run! */ err = mpage_submit_page(mpd, page); if (err < 0) { @@ -2315,7 +2322,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) * the data was copied into the page cache. */ get_blocks_flags = EXT4_GET_BLOCKS_CREATE | - EXT4_GET_BLOCKS_METADATA_NOFAIL; + EXT4_GET_BLOCKS_METADATA_NOFAIL | + EXT4_GET_BLOCKS_IO_SUBMIT; dioread_nolock = ext4_should_dioread_nolock(inode); if (dioread_nolock) get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; @@ -2426,7 +2434,7 @@ update_disksize: * Update on-disk size after IO is submitted. Races with * truncate are avoided by checking i_size under i_data_sem. */ - disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; + disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT; if (disksize > EXT4_I(inode)->i_disksize) { int err2; loff_t i_size; @@ -2562,7 +2570,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) mpd->next_page = page->index + 1; /* Add all dirty buffers to mpd */ lblk = ((ext4_lblk_t)page->index) << - (PAGE_CACHE_SHIFT - blkbits); + (PAGE_SHIFT - blkbits); head = page_buffers(page); err = mpage_process_page_bufs(mpd, head, head, lblk); if (err <= 0) @@ -2604,11 +2612,14 @@ static int ext4_writepages(struct address_space *mapping, struct blk_plug plug; bool give_up_on_write = false; + percpu_down_read(&sbi->s_journal_flag_rwsem); trace_ext4_writepages(inode, wbc); - if (dax_mapping(mapping)) - return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, - wbc); + if (dax_mapping(mapping)) { + ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, + wbc); + goto out_writepages; + } /* * No pages to write? This is mainly a kludge to avoid starting @@ -2647,7 +2658,7 @@ static int ext4_writepages(struct address_space *mapping, * We may need to convert up to one extent per block in * the page and we may dirty the inode. */ - rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); + rsv_blocks = 1 + (PAGE_SIZE >> inode->i_blkbits); } /* @@ -2678,8 +2689,8 @@ static int ext4_writepages(struct address_space *mapping, mpd.first_page = writeback_index; mpd.last_page = -1; } else { - mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT; - mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT; + mpd.first_page = wbc->range_start >> PAGE_SHIFT; + mpd.last_page = wbc->range_end >> PAGE_SHIFT; } mpd.inode = inode; @@ -2778,6 +2789,7 @@ retry: out_writepages: trace_ext4_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); + percpu_up_read(&sbi->s_journal_flag_rwsem); return ret; } @@ -2838,7 +2850,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; handle_t *handle; - index = pos >> PAGE_CACHE_SHIFT; + index = pos >> PAGE_SHIFT; if (ext4_nonda_switch(inode->i_sb)) { *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; @@ -2881,7 +2893,7 @@ retry_journal: handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, ext4_da_write_credits(inode, pos, len)); if (IS_ERR(handle)) { - page_cache_release(page); + put_page(page); return PTR_ERR(handle); } @@ -2889,7 +2901,7 @@ retry_journal: if (page->mapping != mapping) { /* The page got truncated from under us */ unlock_page(page); - page_cache_release(page); + put_page(page); ext4_journal_stop(handle); goto retry_grab; } @@ -2917,7 +2929,7 @@ retry_journal: ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry_journal; - page_cache_release(page); + put_page(page); return ret; } @@ -2965,7 +2977,7 @@ static int ext4_da_write_end(struct file *file, len, copied, page, fsdata); trace_ext4_da_write_end(inode, pos, len, copied); - start = pos & (PAGE_CACHE_SIZE - 1); + start = pos & (PAGE_SIZE - 1); end = start + copied - 1; /* @@ -3187,7 +3199,7 @@ static int __ext4_journalled_invalidatepage(struct page *page, /* * If it's a full truncate we just forget about the pending dirtying */ - if (offset == 0 && length == PAGE_CACHE_SIZE) + if (offset == 0 && length == PAGE_SIZE) ClearPageChecked(page); return jbd2_journal_invalidatepage(journal, page, offset, length); @@ -3217,75 +3229,52 @@ static int ext4_releasepage(struct page *page, gfp_t wait) } #ifdef CONFIG_FS_DAX -int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) +/* + * Get block function for DAX IO and mmap faults. It takes care of converting + * unwritten extents to written ones and initializes new / converted blocks + * to zeros. + */ +int ext4_dax_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) { - int ret, err; - int credits; - struct ext4_map_blocks map; - handle_t *handle = NULL; - int flags = 0; + int ret; - ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n", - inode->i_ino, create); - map.m_lblk = iblock; - map.m_len = bh_result->b_size >> inode->i_blkbits; - credits = ext4_chunk_trans_blocks(inode, map.m_len); - if (create) { - flags |= EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_CREATE_ZERO; - handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - return ret; - } - } + ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create); + if (!create) + return _ext4_get_block(inode, iblock, bh_result, 0); - ret = ext4_map_blocks(handle, inode, &map, flags); - if (create) { - err = ext4_journal_stop(handle); - if (ret >= 0 && err < 0) - ret = err; - } - if (ret <= 0) - goto out; - if (map.m_flags & EXT4_MAP_UNWRITTEN) { - int err2; + ret = ext4_get_block_trans(inode, iblock, bh_result, + EXT4_GET_BLOCKS_PRE_IO | + EXT4_GET_BLOCKS_CREATE_ZERO); + if (ret < 0) + return ret; + if (buffer_unwritten(bh_result)) { /* - * We are protected by i_mmap_sem so we know block cannot go - * away from under us even though we dropped i_data_sem. - * Convert extent to written and write zeros there. - * - * Note: We may get here even when create == 0. + * We are protected by i_mmap_sem or i_mutex so we know block + * cannot go away from under us even though we dropped + * i_data_sem. Convert extent to written and write zeros there. */ - handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - - err = ext4_map_blocks(handle, inode, &map, - EXT4_GET_BLOCKS_CONVERT | EXT4_GET_BLOCKS_CREATE_ZERO); - if (err < 0) - ret = err; - err2 = ext4_journal_stop(handle); - if (err2 < 0 && ret > 0) - ret = err2; - } -out: - WARN_ON_ONCE(ret == 0 && create); - if (ret > 0) { - map_bh(bh_result, inode->i_sb, map.m_pblk); - /* - * At least for now we have to clear BH_New so that DAX code - * doesn't attempt to zero blocks again in a racy way. - */ - map.m_flags &= ~EXT4_MAP_NEW; - ext4_update_bh_state(bh_result, map.m_flags); - bh_result->b_size = map.m_len << inode->i_blkbits; - ret = 0; + ret = ext4_get_block_trans(inode, iblock, bh_result, + EXT4_GET_BLOCKS_CONVERT | + EXT4_GET_BLOCKS_CREATE_ZERO); + if (ret < 0) + return ret; } - return ret; + /* + * At least for now we have to clear BH_New so that DAX code + * doesn't attempt to zero blocks again in a racy way. + */ + clear_buffer_new(bh_result); + return 0; +} +#else +/* Just define empty function, it will never get called. */ +int ext4_dax_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + BUG(); + return 0; } #endif @@ -3318,7 +3307,9 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, } /* - * For ext4 extent files, ext4 will do direct-io write to holes, + * Handling of direct IO writes. + * + * For ext4 extent files, ext4 will do direct-io write even to holes, * preallocated extents, and those write extend the file, no need to * fall back to buffered IO. * @@ -3336,21 +3327,37 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, * if the machine crashes during the write. * */ -static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + struct ext4_inode_info *ei = EXT4_I(inode); ssize_t ret; + loff_t offset = iocb->ki_pos; size_t count = iov_iter_count(iter); int overwrite = 0; get_block_t *get_block_func = NULL; int dio_flags = 0; loff_t final_size = offset + count; + int orphan = 0; + handle_t *handle; - /* Use the old path for reads and writes beyond i_size. */ - if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size) - return ext4_ind_direct_IO(iocb, iter, offset); + if (final_size > inode->i_size) { + /* Credits for sb + inode write */ + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } + ret = ext4_orphan_add(handle, inode); + if (ret) { + ext4_journal_stop(handle); + goto out; + } + orphan = 1; + ei->i_disksize = inode->i_size; + ext4_journal_stop(handle); + } BUG_ON(iocb->private == NULL); @@ -3359,8 +3366,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * conversion. This also disallows race between truncate() and * overwrite DIO as i_dio_count needs to be incremented under i_mutex. */ - if (iov_iter_rw(iter) == WRITE) - inode_dio_begin(inode); + inode_dio_begin(inode); /* If we do a overwrite dio, i_mutex locking can be released */ overwrite = *((int *)iocb->private); @@ -3369,7 +3375,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, inode_unlock(inode); /* - * We could direct write to holes and fallocate. + * For extent mapped files we could direct write to holes and fallocate. * * Allocated blocks to fill the hole are marked as unwritten to prevent * parallel buffered read to expose the stale data before DIO complete @@ -3391,7 +3397,23 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, iocb->private = NULL; if (overwrite) get_block_func = ext4_dio_get_block_overwrite; - else if (is_sync_kiocb(iocb)) { + else if (IS_DAX(inode)) { + /* + * We can avoid zeroing for aligned DAX writes beyond EOF. Other + * writes need zeroing either because they can race with page + * faults or because they use partial blocks. + */ + if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size && + ext4_aligned_io(inode, offset, count)) + get_block_func = ext4_dio_get_block; + else + get_block_func = ext4_dax_get_block; + dio_flags = DIO_LOCKING; + } else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || + round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) { + get_block_func = ext4_dio_get_block; + dio_flags = DIO_LOCKING | DIO_SKIP_HOLES; + } else if (is_sync_kiocb(iocb)) { get_block_func = ext4_dio_get_block_unwritten_sync; dio_flags = DIO_LOCKING; } else { @@ -3401,12 +3423,12 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, #ifdef CONFIG_EXT4_FS_ENCRYPTION BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); #endif - if (IS_DAX(inode)) - ret = dax_do_io(iocb, inode, iter, offset, get_block_func, + if (IS_DAX(inode)) { + ret = dax_do_io(iocb, inode, iter, get_block_func, ext4_end_io_dio, dio_flags); - else + } else ret = __blockdev_direct_IO(iocb, inode, - inode->i_sb->s_bdev, iter, offset, + inode->i_sb->s_bdev, iter, get_block_func, ext4_end_io_dio, NULL, dio_flags); @@ -3424,21 +3446,95 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); } - if (iov_iter_rw(iter) == WRITE) - inode_dio_end(inode); + inode_dio_end(inode); /* take i_mutex locking again if we do a ovewrite dio */ if (overwrite) inode_lock(inode); + if (ret < 0 && final_size > inode->i_size) + ext4_truncate_failed_write(inode); + + /* Handle extending of i_size after direct IO write */ + if (orphan) { + int err; + + /* Credits for sb + inode write */ + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); + if (IS_ERR(handle)) { + /* This is really bad luck. We've written the data + * but cannot extend i_size. Bail out and pretend + * the write failed... */ + ret = PTR_ERR(handle); + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + + goto out; + } + if (inode->i_nlink) + ext4_orphan_del(handle, inode); + if (ret > 0) { + loff_t end = offset + ret; + if (end > inode->i_size) { + ei->i_disksize = end; + i_size_write(inode, end); + /* + * We're going to return a positive `ret' + * here due to non-zero-length I/O, so there's + * no way of reporting error returns from + * ext4_mark_inode_dirty() to userspace. So + * ignore it. + */ + ext4_mark_inode_dirty(handle, inode); + } + } + err = ext4_journal_stop(handle); + if (ret == 0) + ret = err; + } +out: + return ret; +} + +static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter) +{ + int unlocked = 0; + struct inode *inode = iocb->ki_filp->f_mapping->host; + ssize_t ret; + + if (ext4_should_dioread_nolock(inode)) { + /* + * Nolock dioread optimization may be dynamically disabled + * via ext4_inode_block_unlocked_dio(). Check inode's state + * while holding extra i_dio_count ref. + */ + inode_dio_begin(inode); + smp_mb(); + if (unlikely(ext4_test_inode_state(inode, + EXT4_STATE_DIOREAD_LOCK))) + inode_dio_end(inode); + else + unlocked = 1; + } + if (IS_DAX(inode)) { + ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, + NULL, unlocked ? 0 : DIO_LOCKING); + } else { + ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, + iter, ext4_dio_get_block, + NULL, NULL, + unlocked ? 0 : DIO_LOCKING); + } + if (unlocked) + inode_dio_end(inode); return ret; } -static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; size_t count = iov_iter_count(iter); + loff_t offset = iocb->ki_pos; ssize_t ret; #ifdef CONFIG_EXT4_FS_ENCRYPTION @@ -3457,10 +3553,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter, return 0; trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - ret = ext4_ext_direct_IO(iocb, iter, offset); + if (iov_iter_rw(iter) == READ) + ret = ext4_direct_IO_read(iocb, iter); else - ret = ext4_ind_direct_IO(iocb, iter, offset); + ret = ext4_direct_IO_write(iocb, iter); trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); return ret; } @@ -3536,10 +3632,7 @@ void ext4_set_aops(struct inode *inode) { switch (ext4_inode_journal_mode(inode)) { case EXT4_INODE_ORDERED_DATA_MODE: - ext4_set_inode_state(inode, EXT4_STATE_ORDERED_MODE); - break; case EXT4_INODE_WRITEBACK_DATA_MODE: - ext4_clear_inode_state(inode, EXT4_STATE_ORDERED_MODE); break; case EXT4_INODE_JOURNAL_DATA_MODE: inode->i_mapping->a_ops = &ext4_journalled_aops; @@ -3556,8 +3649,8 @@ void ext4_set_aops(struct inode *inode) static int __ext4_block_zero_page_range(handle_t *handle, struct address_space *mapping, loff_t from, loff_t length) { - ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; - unsigned offset = from & (PAGE_CACHE_SIZE-1); + ext4_fsblk_t index = from >> PAGE_SHIFT; + unsigned offset = from & (PAGE_SIZE-1); unsigned blocksize, pos; ext4_lblk_t iblock; struct inode *inode = mapping->host; @@ -3565,14 +3658,14 @@ static int __ext4_block_zero_page_range(handle_t *handle, struct page *page; int err = 0; - page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, + page = find_or_create_page(mapping, from >> PAGE_SHIFT, mapping_gfp_constraint(mapping, ~__GFP_FS)); if (!page) return -ENOMEM; blocksize = inode->i_sb->s_blocksize; - iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); + iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits); if (!page_has_buffers(page)) create_empty_buffers(page, blocksize, 0); @@ -3614,7 +3707,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, ext4_encrypted_inode(inode)) { /* We expect the key to be set. */ BUG_ON(!ext4_has_encryption_key(inode)); - BUG_ON(blocksize != PAGE_CACHE_SIZE); + BUG_ON(blocksize != PAGE_SIZE); WARN_ON_ONCE(ext4_decrypt(page)); } } @@ -3632,13 +3725,13 @@ static int __ext4_block_zero_page_range(handle_t *handle, } else { err = 0; mark_buffer_dirty(bh); - if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) - err = ext4_jbd2_file_inode(handle, inode); + if (ext4_should_order_data(inode)) + err = ext4_jbd2_inode_add_write(handle, inode); } unlock: unlock_page(page); - page_cache_release(page); + put_page(page); return err; } @@ -3653,7 +3746,7 @@ static int ext4_block_zero_page_range(handle_t *handle, struct address_space *mapping, loff_t from, loff_t length) { struct inode *inode = mapping->host; - unsigned offset = from & (PAGE_CACHE_SIZE-1); + unsigned offset = from & (PAGE_SIZE-1); unsigned blocksize = inode->i_sb->s_blocksize; unsigned max = blocksize - (offset & (blocksize - 1)); @@ -3678,7 +3771,7 @@ static int ext4_block_zero_page_range(handle_t *handle, static int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from) { - unsigned offset = from & (PAGE_CACHE_SIZE-1); + unsigned offset = from & (PAGE_SIZE-1); unsigned length; unsigned blocksize; struct inode *inode = mapping->host; @@ -3816,7 +3909,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) */ if (offset + length > inode->i_size) { length = inode->i_size + - PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) - + PAGE_SIZE - (inode->i_size & (PAGE_SIZE - 1)) - offset; } @@ -4891,23 +4984,23 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) tid_t commit_tid = 0; int ret; - offset = inode->i_size & (PAGE_CACHE_SIZE - 1); + offset = inode->i_size & (PAGE_SIZE - 1); /* * All buffers in the last page remain valid? Then there's nothing to - * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE == + * do. We do the check mainly to optimize the common PAGE_SIZE == * blocksize case */ - if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits)) + if (offset > PAGE_SIZE - (1 << inode->i_blkbits)) return; while (1) { page = find_lock_page(inode->i_mapping, - inode->i_size >> PAGE_CACHE_SHIFT); + inode->i_size >> PAGE_SHIFT); if (!page) return; ret = __ext4_journalled_invalidatepage(page, offset, - PAGE_CACHE_SIZE - offset); + PAGE_SIZE - offset); unlock_page(page); - page_cache_release(page); + put_page(page); if (ret != -EBUSY) return; commit_tid = 0; @@ -5431,6 +5524,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) journal_t *journal; handle_t *handle; int err; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); /* * We have to be very careful here: changing a data block's @@ -5447,22 +5541,30 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) return 0; if (is_journal_aborted(journal)) return -EROFS; - /* We have to allocate physical blocks for delalloc blocks - * before flushing journal. otherwise delalloc blocks can not - * be allocated any more. even more truncate on delalloc blocks - * could trigger BUG by flushing delalloc blocks in journal. - * There is no delalloc block in non-journal data mode. - */ - if (val && test_opt(inode->i_sb, DELALLOC)) { - err = ext4_alloc_da_blocks(inode); - if (err < 0) - return err; - } /* Wait for all existing dio workers */ ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); + /* + * Before flushing the journal and switching inode's aops, we have + * to flush all dirty data the inode has. There can be outstanding + * delayed allocations, there can be unwritten extents created by + * fallocate or buffered writes in dioread_nolock mode covered by + * dirty data which can be converted only after flushing the dirty + * data (and journalled aops don't know how to handle these cases). + */ + if (val) { + down_write(&EXT4_I(inode)->i_mmap_sem); + err = filemap_write_and_wait(inode->i_mapping); + if (err < 0) { + up_write(&EXT4_I(inode)->i_mmap_sem); + ext4_inode_resume_unlocked_dio(inode); + return err; + } + } + + percpu_down_write(&sbi->s_journal_flag_rwsem); jbd2_journal_lock_updates(journal); /* @@ -5479,6 +5581,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) err = jbd2_journal_flush(journal); if (err < 0) { jbd2_journal_unlock_updates(journal); + percpu_up_write(&sbi->s_journal_flag_rwsem); ext4_inode_resume_unlocked_dio(inode); return err; } @@ -5487,6 +5590,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ext4_set_aops(inode); jbd2_journal_unlock_updates(journal); + percpu_up_write(&sbi->s_journal_flag_rwsem); + + if (val) + up_write(&EXT4_I(inode)->i_mmap_sem); ext4_inode_resume_unlocked_dio(inode); /* Finally we can mark the inode as dirty. */ @@ -5546,10 +5653,10 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out; } - if (page->index == size >> PAGE_CACHE_SHIFT) - len = size & ~PAGE_CACHE_MASK; + if (page->index == size >> PAGE_SHIFT) + len = size & ~PAGE_MASK; else - len = PAGE_CACHE_SIZE; + len = PAGE_SIZE; /* * Return if we have all the buffers mapped. This avoids the need to do * journal_start/journal_stop which can block and take a long time @@ -5580,7 +5687,7 @@ retry_alloc: ret = block_page_mkwrite(vma, vmf, get_block); if (!ret && ext4_should_journal_data(inode)) { if (ext4_walk_page_buffers(handle, page_buffers(page), 0, - PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { + PAGE_SIZE, NULL, do_journal_get_write_access)) { unlock_page(page); ret = VM_FAULT_SIGBUS; ext4_journal_stop(handle); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index eae5917c534e..28cc412852af 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -13,8 +13,8 @@ #include <linux/compat.h> #include <linux/mount.h> #include <linux/file.h> -#include <linux/random.h> #include <linux/quotaops.h> +#include <linux/uuid.h> #include <asm/uaccess.h> #include "ext4_jbd2.h" #include "ext4.h" @@ -365,7 +365,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) struct dquot *transfer_to[MAXQUOTAS] = { }; transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); - if (transfer_to[PRJQUOTA]) { + if (!IS_ERR(transfer_to[PRJQUOTA])) { err = __dquot_transfer(inode, transfer_to); dqput(transfer_to[PRJQUOTA]); if (err) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 50e05df28f66..c1ab3ec30423 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -119,7 +119,7 @@ MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc"); * * * one block each for bitmap and buddy information. So for each group we - * take up 2 blocks. A page can contain blocks_per_page (PAGE_CACHE_SIZE / + * take up 2 blocks. A page can contain blocks_per_page (PAGE_SIZE / * blocksize) blocks. So it can have information regarding groups_per_page * which is blocks_per_page/2 * @@ -807,7 +807,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b) * * one block each for bitmap and buddy information. * So for each group we take up 2 blocks. A page can - * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize) blocks. + * contain blocks_per_page (PAGE_SIZE / blocksize) blocks. * So it can have information regarding groups_per_page which * is blocks_per_page/2 * @@ -839,7 +839,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) sb = inode->i_sb; ngroups = ext4_get_groups_count(sb); blocksize = 1 << inode->i_blkbits; - blocks_per_page = PAGE_CACHE_SIZE / blocksize; + blocks_per_page = PAGE_SIZE / blocksize; groups_per_page = blocks_per_page >> 1; if (groups_per_page == 0) @@ -993,7 +993,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, e4b->bd_buddy_page = NULL; e4b->bd_bitmap_page = NULL; - blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; + blocks_per_page = PAGE_SIZE / sb->s_blocksize; /* * the buddy cache inode stores the block bitmap * and buddy information in consecutive blocks. @@ -1028,11 +1028,11 @@ static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b) { if (e4b->bd_bitmap_page) { unlock_page(e4b->bd_bitmap_page); - page_cache_release(e4b->bd_bitmap_page); + put_page(e4b->bd_bitmap_page); } if (e4b->bd_buddy_page) { unlock_page(e4b->bd_buddy_page); - page_cache_release(e4b->bd_buddy_page); + put_page(e4b->bd_buddy_page); } } @@ -1125,7 +1125,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, might_sleep(); mb_debug(1, "load group %u\n", group); - blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; + blocks_per_page = PAGE_SIZE / sb->s_blocksize; grp = ext4_get_group_info(sb, group); e4b->bd_blkbits = sb->s_blocksize_bits; @@ -1167,7 +1167,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, * is yet to initialize the same. So * wait for it to initialize. */ - page_cache_release(page); + put_page(page); page = find_or_create_page(inode->i_mapping, pnum, gfp); if (page) { BUG_ON(page->mapping != inode->i_mapping); @@ -1203,7 +1203,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED); if (page == NULL || !PageUptodate(page)) { if (page) - page_cache_release(page); + put_page(page); page = find_or_create_page(inode->i_mapping, pnum, gfp); if (page) { BUG_ON(page->mapping != inode->i_mapping); @@ -1238,11 +1238,11 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, err: if (page) - page_cache_release(page); + put_page(page); if (e4b->bd_bitmap_page) - page_cache_release(e4b->bd_bitmap_page); + put_page(e4b->bd_bitmap_page); if (e4b->bd_buddy_page) - page_cache_release(e4b->bd_buddy_page); + put_page(e4b->bd_buddy_page); e4b->bd_buddy = NULL; e4b->bd_bitmap = NULL; return ret; @@ -1257,15 +1257,16 @@ static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) { if (e4b->bd_bitmap_page) - page_cache_release(e4b->bd_bitmap_page); + put_page(e4b->bd_bitmap_page); if (e4b->bd_buddy_page) - page_cache_release(e4b->bd_buddy_page); + put_page(e4b->bd_buddy_page); } static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) { int order = 1; + int bb_incr = 1 << (e4b->bd_blkbits - 1); void *bb; BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); @@ -1278,7 +1279,8 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) /* this block is part of buddy of order 'order' */ return order; } - bb += 1 << (e4b->bd_blkbits - order); + bb += bb_incr; + bb_incr >>= 1; order++; } return 0; @@ -2583,7 +2585,7 @@ int ext4_mb_init(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned i, j; - unsigned offset; + unsigned offset, offset_incr; unsigned max; int ret; @@ -2612,11 +2614,13 @@ int ext4_mb_init(struct super_block *sb) i = 1; offset = 0; + offset_incr = 1 << (sb->s_blocksize_bits - 1); max = sb->s_blocksize << 2; do { sbi->s_mb_offsets[i] = offset; sbi->s_mb_maxs[i] = max; - offset += 1 << (sb->s_blocksize_bits - i); + offset += offset_incr; + offset_incr = offset_incr >> 1; max = max >> 1; i++; } while (i <= sb->s_blocksize_bits + 1); @@ -2833,8 +2837,8 @@ static void ext4_free_data_callback(struct super_block *sb, /* No more items in the per group rb tree * balance refcounts from ext4_mb_free_metadata() */ - page_cache_release(e4b.bd_buddy_page); - page_cache_release(e4b.bd_bitmap_page); + put_page(e4b.bd_buddy_page); + put_page(e4b.bd_bitmap_page); } ext4_unlock_group(sb, entry->efd_group); kmem_cache_free(ext4_free_data_cachep, entry); @@ -4385,9 +4389,9 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) ext4_mb_put_pa(ac, ac->ac_sb, pa); } if (ac->ac_bitmap_page) - page_cache_release(ac->ac_bitmap_page); + put_page(ac->ac_bitmap_page); if (ac->ac_buddy_page) - page_cache_release(ac->ac_buddy_page); + put_page(ac->ac_buddy_page); if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) mutex_unlock(&ac->ac_lg->lg_mutex); ext4_mb_collect_stats(ac); @@ -4599,8 +4603,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, * otherwise we'll refresh it from * on-disk bitmap and lose not-yet-available * blocks */ - page_cache_get(e4b->bd_buddy_page); - page_cache_get(e4b->bd_bitmap_page); + get_page(e4b->bd_buddy_page); + get_page(e4b->bd_bitmap_page); } while (*n) { parent = *n; @@ -4935,7 +4939,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, * boundary. */ if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { - ext4_warning(sb, "too much blocks added to group %u\n", + ext4_warning(sb, "too much blocks added to group %u", block_group); err = -EINVAL; goto error_return; diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 24445275d330..23d436d6f8b8 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -121,7 +121,7 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, __ext4_warning(sb, function, line, "%s", msg); __ext4_warning(sb, function, line, "MMP failure info: last update time: %llu, last update " - "node: %s, last update device: %s\n", + "node: %s, last update device: %s", (long long unsigned int) le64_to_cpu(mmp->mmp_time), mmp->mmp_nodename, mmp->mmp_bdevname); } @@ -353,7 +353,7 @@ skip: * wait for MMP interval and check mmp_seq. */ if (schedule_timeout_interruptible(HZ * wait_time) != 0) { - ext4_warning(sb, "MMP startup interrupted, failing mount\n"); + ext4_warning(sb, "MMP startup interrupted, failing mount"); goto failed; } diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 4098acc701c3..a920c5d29fac 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -60,10 +60,10 @@ ext4_double_down_write_data_sem(struct inode *first, struct inode *second) { if (first < second) { down_write(&EXT4_I(first)->i_data_sem); - down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); + down_write_nested(&EXT4_I(second)->i_data_sem, I_DATA_SEM_OTHER); } else { down_write(&EXT4_I(second)->i_data_sem); - down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING); + down_write_nested(&EXT4_I(first)->i_data_sem, I_DATA_SEM_OTHER); } } @@ -156,7 +156,7 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2, page[1] = grab_cache_page_write_begin(mapping[1], index2, fl); if (!page[1]) { unlock_page(page[0]); - page_cache_release(page[0]); + put_page(page[0]); return -ENOMEM; } /* @@ -192,7 +192,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) create_empty_buffers(page, blocksize, 0); head = page_buffers(page); - block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + block = (sector_t)page->index << (PAGE_SHIFT - inode->i_blkbits); for (bh = head, block_start = 0; bh != head || !block_start; block++, block_start = block_end, bh = bh->b_this_page) { block_end = block_start + blocksize; @@ -268,7 +268,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, int i, err2, jblocks, retries = 0; int replaced_count = 0; int from = data_offset_in_page << orig_inode->i_blkbits; - int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; + int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits; struct super_block *sb = orig_inode->i_sb; struct buffer_head *bh = NULL; @@ -400,13 +400,13 @@ data_copy: /* Even in case of data=writeback it is reasonable to pin * inode to transaction, to prevent unexpected data loss */ - *err = ext4_jbd2_file_inode(handle, orig_inode); + *err = ext4_jbd2_inode_add_write(handle, orig_inode); unlock_pages: unlock_page(pagep[0]); - page_cache_release(pagep[0]); + put_page(pagep[0]); unlock_page(pagep[1]); - page_cache_release(pagep[1]); + put_page(pagep[1]); stop_journal: ext4_journal_stop(handle); if (*err == -ENOSPC && @@ -484,6 +484,13 @@ mext_check_arguments(struct inode *orig_inode, return -EBUSY; } + if (IS_NOQUOTA(orig_inode) || IS_NOQUOTA(donor_inode)) { + ext4_debug("ext4 move extent: The argument files should " + "not be quota files [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EBUSY; + } + /* Ext4 move extent supports only extent based file */ if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { ext4_debug("ext4 move extent: orig file is not extents " @@ -554,7 +561,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, struct inode *orig_inode = file_inode(o_filp); struct inode *donor_inode = file_inode(d_filp); struct ext4_ext_path *path = NULL; - int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; + int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits; ext4_lblk_t o_end, o_start = orig_blk; ext4_lblk_t d_start = donor_blk; int ret; @@ -648,9 +655,9 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, if (o_end - o_start < cur_len) cur_len = o_end - o_start; - orig_page_index = o_start >> (PAGE_CACHE_SHIFT - + orig_page_index = o_start >> (PAGE_SHIFT - orig_inode->i_blkbits); - donor_page_index = d_start >> (PAGE_CACHE_SHIFT - + donor_page_index = d_start >> (PAGE_SHIFT - donor_inode->i_blkbits); offset_in_page = o_start % blocks_per_page; if (cur_len > blocks_per_page- offset_in_page) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 48e4b8907826..ec4c39952e84 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1107,6 +1107,11 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, } while (1) { + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + goto errout; + } + cond_resched(); block = dx_get_block(frame->at); ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, start_hash, start_minor_hash); @@ -1613,7 +1618,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi if (nokey) return ERR_PTR(-ENOKEY); ext4_warning(inode->i_sb, - "Inconsistent encryption contexts: %lu/%lu\n", + "Inconsistent encryption contexts: %lu/%lu", (unsigned long) dir->i_ino, (unsigned long) inode->i_ino); return ERR_PTR(-EPERM); @@ -1638,13 +1643,13 @@ struct dentry *ext4_get_parent(struct dentry *child) ino = le32_to_cpu(de->inode); brelse(bh); - if (!ext4_valid_inum(d_inode(child)->i_sb, ino)) { + if (!ext4_valid_inum(child->d_sb, ino)) { EXT4_ERROR_INODE(d_inode(child), "bad parent inode number: %u", ino); return ERR_PTR(-EFSCORRUPTED); } - return d_obtain_alias(ext4_iget_normal(d_inode(child)->i_sb, ino)); + return d_obtain_alias(ext4_iget_normal(child->d_sb, ino)); } /* @@ -2828,7 +2833,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) * list entries can cause panics at unmount time. */ mutex_lock(&sbi->s_orphan_lock); - list_del(&EXT4_I(inode)->i_orphan); + list_del_init(&EXT4_I(inode)->i_orphan); mutex_unlock(&sbi->s_orphan_lock); } } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index d77d15f4b674..2a01df9cc1c3 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -23,6 +23,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/mm.h> +#include <linux/backing-dev.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -341,9 +342,7 @@ void ext4_io_submit(struct ext4_io_submit *io) if (bio) { int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE; - bio_get(io->io_bio); submit_bio(io_op, io->io_bio); - bio_put(io->io_bio); } io->io_bio = NULL; } @@ -432,8 +431,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io, * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - if (len < PAGE_CACHE_SIZE) - zero_user_segment(page, len, PAGE_CACHE_SIZE); + if (len < PAGE_SIZE) + zero_user_segment(page, len, PAGE_SIZE); /* * In the first loop we prepare and mark buffers to submit. We have to * mark all buffers in the page before submitting so that @@ -470,9 +469,20 @@ int ext4_bio_write_page(struct ext4_io_submit *io, if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) && nr_to_submit) { - data_page = ext4_encrypt(inode, page); + gfp_t gfp_flags = GFP_NOFS; + + retry_encrypt: + data_page = ext4_encrypt(inode, page, gfp_flags); if (IS_ERR(data_page)) { ret = PTR_ERR(data_page); + if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { + if (io->io_bio) { + ext4_io_submit(io); + congestion_wait(BLK_RW_ASYNC, HZ/50); + } + gfp_flags |= __GFP_NOFAIL; + goto retry_encrypt; + } data_page = NULL; goto out; } diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 5dc5e95063de..dc54a4b60eba 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -23,7 +23,7 @@ * * then this code just gives up and calls the buffer_head-based read function. * It does handle a page which has holes at the end - that is a common case: - * the end-of-file on blocksize < PAGE_CACHE_SIZE setups. + * the end-of-file on blocksize < PAGE_SIZE setups. * */ @@ -140,7 +140,7 @@ int ext4_mpage_readpages(struct address_space *mapping, struct inode *inode = mapping->host; const unsigned blkbits = inode->i_blkbits; - const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits; + const unsigned blocks_per_page = PAGE_SIZE >> blkbits; const unsigned blocksize = 1 << blkbits; sector_t block_in_file; sector_t last_block; @@ -173,7 +173,7 @@ int ext4_mpage_readpages(struct address_space *mapping, if (page_has_buffers(page)) goto confused; - block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); + block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); last_block = block_in_file + nr_pages * blocks_per_page; last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; if (last_block > last_block_in_file) @@ -217,7 +217,7 @@ int ext4_mpage_readpages(struct address_space *mapping, set_error_page: SetPageError(page); zero_user_segment(page, 0, - PAGE_CACHE_SIZE); + PAGE_SIZE); unlock_page(page); goto next_page; } @@ -250,7 +250,7 @@ int ext4_mpage_readpages(struct address_space *mapping, } if (first_hole != blocks_per_page) { zero_user_segment(page, first_hole << blkbits, - PAGE_CACHE_SIZE); + PAGE_SIZE); if (first_hole == 0) { SetPageUptodate(page); unlock_page(page); @@ -279,7 +279,7 @@ int ext4_mpage_readpages(struct address_space *mapping, if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { - ctx = ext4_get_crypto_ctx(inode); + ctx = ext4_get_crypto_ctx(inode, GFP_NOFS); if (IS_ERR(ctx)) goto set_error_page; } @@ -319,7 +319,7 @@ int ext4_mpage_readpages(struct address_space *mapping, unlock_page(page); next_page: if (pages) - page_cache_release(page); + put_page(page); } BUG_ON(pages && !list_empty(pages)); if (bio) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 34038e3598d5..cf681004b196 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -41,7 +41,7 @@ int ext4_resize_begin(struct super_block *sb) */ if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { ext4_warning(sb, "There are errors in the filesystem, " - "so online resizing is not allowed\n"); + "so online resizing is not allowed"); return -EPERM; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 539297515896..3822a5aedc61 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -859,6 +859,7 @@ static void ext4_put_super(struct super_block *sb) percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); + percpu_free_rwsem(&sbi->s_journal_flag_rwsem); brelse(sbi->s_sbh); #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) @@ -1113,6 +1114,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, static int ext4_quota_enable(struct super_block *sb, int type, int format_id, unsigned int flags); static int ext4_enable_quotas(struct super_block *sb); +static int ext4_get_next_id(struct super_block *sb, struct kqid *qid); static struct dquot **ext4_get_dquots(struct inode *inode) { @@ -1129,7 +1131,7 @@ static const struct dquot_operations ext4_quota_operations = { .alloc_dquot = dquot_alloc, .destroy_dquot = dquot_destroy, .get_projid = ext4_get_projid, - .get_next_id = dquot_get_next_id, + .get_next_id = ext4_get_next_id, }; static const struct quotactl_ops ext4_qctl_operations = { @@ -1323,9 +1325,9 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) return -1; } if (ext4_has_feature_quota(sb)) { - ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " - "when QUOTA feature is enabled"); - return -1; + ext4_msg(sb, KERN_INFO, "Journaled quota options " + "ignored when QUOTA feature is enabled"); + return 1; } qname = match_strdup(args); if (!qname) { @@ -1688,10 +1690,10 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, return -1; } if (ext4_has_feature_quota(sb)) { - ext4_msg(sb, KERN_ERR, - "Cannot set journaled quota options " + ext4_msg(sb, KERN_INFO, + "Quota format mount options ignored " "when QUOTA feature is enabled"); - return -1; + return 1; } sbi->s_jquota_fmt = m->mount_opt; #endif @@ -1756,11 +1758,11 @@ static int parse_options(char *options, struct super_block *sb, #ifdef CONFIG_QUOTA if (ext4_has_feature_quota(sb) && (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { - ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA " - "feature is enabled"); - return 0; - } - if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { + ext4_msg(sb, KERN_INFO, "Quota feature enabled, usrquota and grpquota " + "mount options ignored."); + clear_opt(sb, USRQUOTA); + clear_opt(sb, GRPQUOTA); + } else if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) clear_opt(sb, USRQUOTA); @@ -1784,7 +1786,7 @@ static int parse_options(char *options, struct super_block *sb, int blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); - if (blocksize < PAGE_CACHE_SIZE) { + if (blocksize < PAGE_SIZE) { ext4_msg(sb, KERN_ERR, "can't mount with " "dioread_nolock if block size != PAGE_SIZE"); return 0; @@ -3415,16 +3417,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (sbi->s_mount_opt & EXT4_MOUNT_DAX) { - if (blocksize != PAGE_SIZE) { - ext4_msg(sb, KERN_ERR, - "error: unsupported blocksize for dax"); - goto failed_mount; - } - if (!sb->s_bdev->bd_disk->fops->direct_access) { - ext4_msg(sb, KERN_ERR, - "error: device does not support dax"); + err = bdev_dax_supported(sb, blocksize); + if (err) goto failed_mount; - } } if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) { @@ -3808,7 +3803,7 @@ no_journal: } if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) && - (blocksize != PAGE_CACHE_SIZE)) { + (blocksize != PAGE_SIZE)) { ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs encryption"); goto failed_mount_wq; @@ -3929,6 +3924,9 @@ no_journal: if (!err) err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, GFP_KERNEL); + if (!err) + err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem); + if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); goto failed_mount6; @@ -5028,6 +5026,20 @@ static int ext4_quota_on_mount(struct super_block *sb, int type) EXT4_SB(sb)->s_jquota_fmt, type); } +static void lockdep_set_quota_inode(struct inode *inode, int subclass) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + + /* The first argument of lockdep_set_subclass has to be + * *exactly* the same as the argument to init_rwsem() --- in + * this case, in init_once() --- or lockdep gets unhappy + * because the name of the lock is set using the + * stringification of the argument to init_rwsem(). + */ + (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */ + lockdep_set_subclass(&ei->i_data_sem, subclass); +} + /* * Standard function to be called on quota_on */ @@ -5067,8 +5079,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, if (err) return err; } - - return dquot_quota_on(sb, type, format_id, path); + lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA); + err = dquot_quota_on(sb, type, format_id, path); + if (err) + lockdep_set_quota_inode(path->dentry->d_inode, + I_DATA_SEM_NORMAL); + return err; } static int ext4_quota_enable(struct super_block *sb, int type, int format_id, @@ -5095,8 +5111,11 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, /* Don't account quota for quota files to avoid recursion */ qf_inode->i_flags |= S_NOQUOTA; + lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA); err = dquot_enable(qf_inode, type, format_id, flags); iput(qf_inode); + if (err) + lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL); return err; } @@ -5253,6 +5272,17 @@ out: return len; } +static int ext4_get_next_id(struct super_block *sb, struct kqid *qid) +{ + const struct quota_format_ops *ops; + + if (!sb_has_quota_loaded(sb, qid->type)) + return -ESRCH; + ops = sb_dqopt(sb)->ops[qid->type]; + if (!ops || !ops->get_next_id) + return -ENOSYS; + return dquot_get_next_id(sb, qid); +} #endif static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 6f7ee30a89ce..75ed5c2f0c16 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -80,12 +80,12 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry, if (res <= plen) paddr[res] = '\0'; if (cpage) - page_cache_release(cpage); + put_page(cpage); set_delayed_call(done, kfree_link, paddr); return paddr; errout: if (cpage) - page_cache_release(cpage); + put_page(cpage); kfree(paddr); return ERR_PTR(res); } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 0441e055c8e8..e79bd32b9b79 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -230,6 +230,27 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) return error; } +static int +__xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header, + void *end, const char *function, unsigned int line) +{ + struct ext4_xattr_entry *entry = IFIRST(header); + int error = -EFSCORRUPTED; + + if (((void *) header >= end) || + (header->h_magic != le32_to_cpu(EXT4_XATTR_MAGIC))) + goto errout; + error = ext4_xattr_check_names(entry, end, entry); +errout: + if (error) + __ext4_error_inode(inode, function, line, 0, + "corrupted in-inode xattr"); + return error; +} + +#define xattr_check_inode(inode, header, end) \ + __xattr_check_inode((inode), (header), (end), __func__, __LINE__) + static inline int ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size) { @@ -341,7 +362,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, header = IHDR(inode, raw_inode); entry = IFIRST(header); end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - error = ext4_xattr_check_names(entry, end, entry); + error = xattr_check_inode(inode, header, end); if (error) goto cleanup; error = ext4_xattr_find_entry(&entry, name_index, name, @@ -477,7 +498,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) raw_inode = ext4_raw_inode(&iloc); header = IHDR(inode, raw_inode); end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header)); + error = xattr_check_inode(inode, header, end); if (error) goto cleanup; error = ext4_xattr_list_entries(dentry, IFIRST(header), @@ -1040,8 +1061,7 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, is->s.here = is->s.first; is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { - error = ext4_xattr_check_names(IFIRST(header), is->s.end, - IFIRST(header)); + error = xattr_check_inode(inode, header, is->s.end); if (error) return error; /* Find the named attribute. */ @@ -1356,6 +1376,10 @@ retry: last = entry; total_ino = sizeof(struct ext4_xattr_ibody_header); + error = xattr_check_inode(inode, header, end); + if (error) + goto cleanup; + free = ext4_xattr_free_space(last, &min_offs, base, &total_ino); if (free >= new_extra_isize) { entry = IFIRST(header); diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 3e81bdca071a..a8921112030d 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -13,19 +13,20 @@ static int ext4_xattr_security_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY, + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name, buffer, size); } static int ext4_xattr_security_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY, + return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name, value, size, flags); } diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index 2a3c6f9b8cb8..c7765c735714 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c @@ -20,19 +20,20 @@ ext4_xattr_trusted_list(struct dentry *dentry) static int ext4_xattr_trusted_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, void *buffer, - size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED, + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name, buffer, size); } static int ext4_xattr_trusted_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED, + return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name, value, size, flags); } diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index d152f431e432..ca20e423034b 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c @@ -19,23 +19,24 @@ ext4_xattr_user_list(struct dentry *dentry) static int ext4_xattr_user_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - if (!test_opt(dentry->d_sb, XATTR_USER)) + if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; - return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_USER, + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size); } static int ext4_xattr_user_set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { - if (!test_opt(dentry->d_sb, XATTR_USER)) + if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; - return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_USER, + return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name, value, size, flags); } |