diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 244 |
1 files changed, 68 insertions, 176 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d7b7462a0e13..367a60c07cf0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -148,6 +148,9 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) int ea_blocks = EXT4_I(inode)->i_file_acl ? EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; + if (ext4_has_inline_data(inode)) + return 0; + return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); } @@ -322,18 +325,6 @@ qsize_t *ext4_get_reserved_space(struct inode *inode) #endif /* - * Calculate the number of metadata blocks need to reserve - * to allocate a block located at @lblock - */ -static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) -{ - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - return ext4_ext_calc_metadata_amount(inode, lblock); - - return ext4_ind_calc_metadata_amount(inode, lblock); -} - -/* * Called with i_data_sem down, which is important since we can call * ext4_discard_preallocations() from here. */ @@ -354,35 +345,10 @@ void ext4_da_update_reserve_space(struct inode *inode, used = ei->i_reserved_data_blocks; } - if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) { - ext4_warning(inode->i_sb, "ino %lu, allocated %d " - "with only %d reserved metadata blocks " - "(releasing %d blocks with reserved %d data blocks)", - inode->i_ino, ei->i_allocated_meta_blocks, - ei->i_reserved_meta_blocks, used, - ei->i_reserved_data_blocks); - WARN_ON(1); - ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks; - } - /* Update per-inode reservations */ ei->i_reserved_data_blocks -= used; - ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; - percpu_counter_sub(&sbi->s_dirtyclusters_counter, - used + ei->i_allocated_meta_blocks); - ei->i_allocated_meta_blocks = 0; + percpu_counter_sub(&sbi->s_dirtyclusters_counter, used); - if (ei->i_reserved_data_blocks == 0) { - /* - * We can release all of the reserved metadata blocks - * only when we have written all of the delayed - * allocation blocks. - */ - percpu_counter_sub(&sbi->s_dirtyclusters_counter, - ei->i_reserved_meta_blocks); - ei->i_reserved_meta_blocks = 0; - ei->i_da_metadata_calc_len = 0; - } spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); /* Update quota subsystem for data blocks */ @@ -443,7 +409,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, * could be converted. */ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { retval = ext4_ext_map_blocks(handle, inode, map, flags & EXT4_GET_BLOCKS_KEEP_SIZE); @@ -489,8 +455,8 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping * based files * - * On success, it returns the number of blocks being mapped or allocate. - * if create==0 and the blocks are pre-allocated and uninitialized block, + * On success, it returns the number of blocks being mapped or allocated. + * if create==0 and the blocks are pre-allocated and unwritten block, * the result buffer head is unmapped. If the create ==1, it will make sure * the buffer head is mapped. * @@ -555,7 +521,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, * file system block. */ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { retval = ext4_ext_map_blocks(handle, inode, map, flags & EXT4_GET_BLOCKS_KEEP_SIZE); @@ -622,12 +588,12 @@ found: map->m_flags &= ~EXT4_MAP_FLAGS; /* - * New blocks allocate and/or writing to uninitialized extent + * New blocks allocate and/or writing to unwritten extent * will possibly result in updating i_data, so we take * the write lock of i_data_sem, and call get_blocks() * with create == 1 flag. */ - down_write((&EXT4_I(inode)->i_data_sem)); + down_write(&EXT4_I(inode)->i_data_sem); /* * if the caller is from delayed allocation writeout path @@ -922,6 +888,7 @@ int do_journal_get_write_access(handle_t *handle, */ if (dirty) clear_buffer_dirty(bh); + BUFFER_TRACE(bh, "get write access"); ret = ext4_journal_get_write_access(handle, bh); if (!ret && dirty) ret = ext4_handle_dirty_metadata(handle, NULL, bh); @@ -1218,49 +1185,6 @@ static int ext4_journalled_write_end(struct file *file, } /* - * Reserve a metadata for a single block located at lblock - */ -static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) -{ - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - struct ext4_inode_info *ei = EXT4_I(inode); - unsigned int md_needed; - ext4_lblk_t save_last_lblock; - int save_len; - - /* - * recalculate the amount of metadata blocks to reserve - * in order to allocate nrblocks - * worse case is one extent per block - */ - spin_lock(&ei->i_block_reservation_lock); - /* - * ext4_calc_metadata_amount() has side effects, which we have - * to be prepared undo if we fail to claim space. - */ - save_len = ei->i_da_metadata_calc_len; - save_last_lblock = ei->i_da_metadata_calc_last_lblock; - md_needed = EXT4_NUM_B2C(sbi, - ext4_calc_metadata_amount(inode, lblock)); - trace_ext4_da_reserve_space(inode, md_needed); - - /* - * We do still charge estimated metadata to the sb though; - * we cannot afford to run out of free blocks. - */ - if (ext4_claim_free_clusters(sbi, md_needed, 0)) { - ei->i_da_metadata_calc_len = save_len; - ei->i_da_metadata_calc_last_lblock = save_last_lblock; - spin_unlock(&ei->i_block_reservation_lock); - return -ENOSPC; - } - ei->i_reserved_meta_blocks += md_needed; - spin_unlock(&ei->i_block_reservation_lock); - - return 0; /* success */ -} - -/* * Reserve a single cluster located at lblock */ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) @@ -1269,8 +1193,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) struct ext4_inode_info *ei = EXT4_I(inode); unsigned int md_needed; int ret; - ext4_lblk_t save_last_lblock; - int save_len; /* * We will charge metadata quota at writeout time; this saves @@ -1291,25 +1213,15 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) * ext4_calc_metadata_amount() has side effects, which we have * to be prepared undo if we fail to claim space. */ - save_len = ei->i_da_metadata_calc_len; - save_last_lblock = ei->i_da_metadata_calc_last_lblock; - md_needed = EXT4_NUM_B2C(sbi, - ext4_calc_metadata_amount(inode, lblock)); - trace_ext4_da_reserve_space(inode, md_needed); + md_needed = 0; + trace_ext4_da_reserve_space(inode, 0); - /* - * We do still charge estimated metadata to the sb though; - * we cannot afford to run out of free blocks. - */ - if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) { - ei->i_da_metadata_calc_len = save_len; - ei->i_da_metadata_calc_last_lblock = save_last_lblock; + if (ext4_claim_free_clusters(sbi, 1, 0)) { spin_unlock(&ei->i_block_reservation_lock); dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); return -ENOSPC; } ei->i_reserved_data_blocks++; - ei->i_reserved_meta_blocks += md_needed; spin_unlock(&ei->i_block_reservation_lock); return 0; /* success */ @@ -1342,20 +1254,6 @@ static void ext4_da_release_space(struct inode *inode, int to_free) } ei->i_reserved_data_blocks -= to_free; - if (ei->i_reserved_data_blocks == 0) { - /* - * We can release all of the reserved metadata blocks - * only when we have written all of the delayed - * allocation blocks. - * Note that in case of bigalloc, i_reserved_meta_blocks, - * i_reserved_data_blocks, etc. refer to number of clusters. - */ - percpu_counter_sub(&sbi->s_dirtyclusters_counter, - ei->i_reserved_meta_blocks); - ei->i_reserved_meta_blocks = 0; - ei->i_da_metadata_calc_len = 0; - } - /* update fs dirty data blocks counter */ percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free); @@ -1496,10 +1394,6 @@ static void ext4_print_free_blocks(struct inode *inode) ext4_msg(sb, KERN_CRIT, "Block reservation details"); ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u", ei->i_reserved_data_blocks); - ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u", - ei->i_reserved_meta_blocks); - ext4_msg(sb, KERN_CRIT, "i_allocated_meta_blocks=%u", - ei->i_allocated_meta_blocks); return; } @@ -1540,7 +1434,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ext4_es_lru_add(inode); if (ext4_es_is_hole(&es)) { retval = 0; - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); goto add_delayed; } @@ -1577,7 +1471,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, * Try to see if we can get the block without requesting a new * file system block. */ - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); if (ext4_has_inline_data(inode)) { /* * We will soon create blocks for this page, and let @@ -1616,13 +1510,6 @@ add_delayed: retval = ret; goto out_unlock; } - } else { - ret = ext4_da_reserve_metadata(inode, iblock); - if (ret) { - /* not enough space to reserve */ - retval = ret; - goto out_unlock; - } } ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, @@ -1769,6 +1656,7 @@ static int __ext4_journalled_writepage(struct page *page, BUG_ON(!ext4_handle_valid(handle)); if (inline_data) { + BUFFER_TRACE(inode_bh, "get write access"); ret = ext4_journal_get_write_access(handle, inode_bh); err = ext4_handle_dirty_metadata(handle, inode, inode_bh); @@ -1846,6 +1734,7 @@ static int ext4_writepage(struct page *page, struct buffer_head *page_bufs = NULL; struct inode *inode = page->mapping->host; struct ext4_io_submit io_submit; + bool keep_towrite = false; trace_ext4_writepage(page); size = i_size_read(inode); @@ -1876,6 +1765,7 @@ static int ext4_writepage(struct page *page, unlock_page(page); return 0; } + keep_towrite = true; } if (PageChecked(page) && ext4_should_journal_data(inode)) @@ -1892,7 +1782,7 @@ static int ext4_writepage(struct page *page, unlock_page(page); return -ENOMEM; } - ret = ext4_bio_write_page(&io_submit, page, len, wbc); + ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite); ext4_io_submit(&io_submit); /* Drop io_end reference we got from init */ ext4_put_io_end_defer(io_submit.io_end); @@ -1911,7 +1801,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) else len = PAGE_CACHE_SIZE; clear_page_dirty_for_io(page); - err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc); + err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); if (!err) mpd->wbc->nr_to_write--; mpd->first_page++; @@ -2032,7 +1922,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd, * Scan buffers corresponding to changed extent (we expect corresponding pages * to be already locked) and update buffer state according to new extent state. * We map delalloc buffers to their physical location, clear unwritten bits, - * and mark buffers as uninit when we perform writes to uninitialized extents + * and mark buffers as uninit when we perform writes to unwritten extents * and do extent conversion after IO is finished. If the last page is not fully * mapped, we update @map to the next extent in the last page that needs * mapping. Otherwise we submit the page for IO. @@ -2126,12 +2016,12 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) struct inode *inode = mpd->inode; struct ext4_map_blocks *map = &mpd->map; int get_blocks_flags; - int err; + int err, dioread_nolock; trace_ext4_da_write_pages_extent(inode, map); /* * Call ext4_map_blocks() to allocate any delayed allocation blocks, or - * to convert an uninitialized extent to be initialized (in the case + * to convert an unwritten extent to be initialized (in the case * where we have written into one or more preallocated blocks). It is * possible that we're going to need more metadata blocks than * previously reserved. However we must not fail because we're in @@ -2148,7 +2038,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) */ get_blocks_flags = EXT4_GET_BLOCKS_CREATE | EXT4_GET_BLOCKS_METADATA_NOFAIL; - if (ext4_should_dioread_nolock(inode)) + dioread_nolock = ext4_should_dioread_nolock(inode); + if (dioread_nolock) get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; if (map->m_flags & (1 << BH_Delay)) get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; @@ -2156,7 +2047,7 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) err = ext4_map_blocks(handle, inode, map, get_blocks_flags); if (err < 0) return err; - if (map->m_flags & EXT4_MAP_UNINIT) { + if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) { if (!mpd->io_submit.io_end->handle && ext4_handle_valid(handle)) { mpd->io_submit.io_end->handle = handle->h_rsv_handle; @@ -2835,8 +2726,7 @@ int ext4_alloc_da_blocks(struct inode *inode) { trace_ext4_alloc_da_blocks(inode); - if (!EXT4_I(inode)->i_reserved_data_blocks && - !EXT4_I(inode)->i_reserved_meta_blocks) + if (!EXT4_I(inode)->i_reserved_data_blocks) return 0; /* @@ -3070,9 +2960,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, * preallocated extents, and those write extend the file, no need to * fall back to buffered IO. * - * For holes, we fallocate those blocks, mark them as uninitialized + * For holes, we fallocate those blocks, mark them as unwritten * If those blocks were preallocated, we mark sure they are split, but - * still keep the range to write as uninitialized. + * still keep the range to write as unwritten. * * The unwritten extents will be converted to written when DIO is completed. * For async direct IO, since the IO may still pending when return, we @@ -3085,13 +2975,12 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, * */ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t ret; - size_t count = iov_length(iov, nr_segs); + size_t count = iov_iter_count(iter); int overwrite = 0; get_block_t *get_block_func = NULL; int dio_flags = 0; @@ -3100,7 +2989,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, /* Use the old path for reads and writes beyond i_size. */ if (rw != WRITE || final_size > inode->i_size) - return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); + return ext4_ind_direct_IO(rw, iocb, iter, offset); BUG_ON(iocb->private == NULL); @@ -3124,12 +3013,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, * We could direct write to holes and fallocate. * * Allocated blocks to fill the hole are marked as - * uninitialized to prevent parallel buffered read to expose + * unwritten to prevent parallel buffered read to expose * the stale data before DIO complete the data IO. * * As to previously fallocated extents, ext4 get_block will * just simply mark the buffer mapped but still keep the - * extents uninitialized. + * extents unwritten. * * For non AIO case, we will convert those unwritten extents * to written after return back from blockdev_direct_IO. @@ -3167,8 +3056,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, dio_flags = DIO_LOCKING; } ret = __blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iov, - offset, nr_segs, + inode->i_sb->s_bdev, iter, + offset, get_block_func, ext4_end_io_dio, NULL, @@ -3222,11 +3111,11 @@ retake_lock: } static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + size_t count = iov_iter_count(iter); ssize_t ret; /* @@ -3239,13 +3128,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, if (ext4_has_inline_data(inode)) return 0; - trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); + trace_ext4_direct_IO_enter(inode, offset, count, rw); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); + ret = ext4_ext_direct_IO(rw, iocb, iter, offset); else - ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); - trace_ext4_direct_IO_exit(inode, offset, - iov_length(iov, nr_segs), rw, ret); + ret = ext4_ind_direct_IO(rw, iocb, iter, offset); + trace_ext4_direct_IO_exit(inode, offset, count, rw, ret); return ret; } @@ -3440,7 +3328,7 @@ unlock: * This required during truncate. We need to physically zero the tail end * of that block so it doesn't yield old data if the file is later grown. */ -int ext4_block_truncate_page(handle_t *handle, +static int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from) { unsigned offset = from & (PAGE_CACHE_SIZE-1); @@ -3618,7 +3506,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) ret = ext4_ext_remove_space(inode, first_block, stop_block - 1); else - ret = ext4_free_hole_blocks(handle, inode, first_block, + ret = ext4_ind_remove_space(handle, inode, first_block, stop_block); up_write(&EXT4_I(inode)->i_data_sem); @@ -4304,12 +4192,15 @@ static int ext4_do_update_inode(handle_t *handle, struct ext4_inode *raw_inode = ext4_raw_inode(iloc); struct ext4_inode_info *ei = EXT4_I(inode); struct buffer_head *bh = iloc->bh; + struct super_block *sb = inode->i_sb; int err = 0, rc, block; - int need_datasync = 0; + int need_datasync = 0, set_large_file = 0; uid_t i_uid; gid_t i_gid; - /* For fields not not tracking in the in-memory inode, + spin_lock(&ei->i_raw_lock); + + /* For fields not tracked in the in-memory inode, * initialise them to zero for new inodes. */ if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); @@ -4347,8 +4238,10 @@ static int ext4_do_update_inode(handle_t *handle, EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); - if (ext4_inode_blocks_set(handle, raw_inode, ei)) + if (ext4_inode_blocks_set(handle, raw_inode, ei)) { + spin_unlock(&ei->i_raw_lock); goto out_brelse; + } raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) @@ -4360,24 +4253,11 @@ static int ext4_do_update_inode(handle_t *handle, need_datasync = 1; } if (ei->i_disksize > 0x7fffffffULL) { - struct super_block *sb = inode->i_sb; if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || EXT4_SB(sb)->s_es->s_rev_level == - cpu_to_le32(EXT4_GOOD_OLD_REV)) { - /* If this is the first large file - * created, add a flag to the superblock. - */ - err = ext4_journal_get_write_access(handle, - EXT4_SB(sb)->s_sbh); - if (err) - goto out_brelse; - ext4_update_dynamic_rev(sb); - EXT4_SET_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_LARGE_FILE); - ext4_handle_sync(handle); - err = ext4_handle_dirty_super(handle, sb); - } + cpu_to_le32(EXT4_GOOD_OLD_REV)) + set_large_file = 1; } raw_inode->i_generation = cpu_to_le32(inode->i_generation); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { @@ -4409,12 +4289,24 @@ static int ext4_do_update_inode(handle_t *handle, ext4_inode_csum_set(inode, raw_inode, ei); + spin_unlock(&ei->i_raw_lock); + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); rc = ext4_handle_dirty_metadata(handle, NULL, bh); if (!err) err = rc; ext4_clear_inode_state(inode, EXT4_STATE_NEW); - + if (set_large_file) { + BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access"); + err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); + if (err) + goto out_brelse; + ext4_update_dynamic_rev(sb); + EXT4_SET_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_LARGE_FILE); + ext4_handle_sync(handle); + err = ext4_handle_dirty_super(handle, sb); + } ext4_update_inode_fsync_trans(handle, inode, need_datasync); out_brelse: brelse(bh); |