From 07d45f126712fea3a9f44068bf65e0a26a162286 Mon Sep 17 00:00:00 2001 From: Julia Lawall <julia@diku.dk> Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: ext4: Use BUG_ON() instead of BUG() if (...) BUG(); should be replaced with BUG_ON(...) when the test has no side-effects to allow a definition of BUG_ON that drops the code completely. The semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // <smpl> @ disable unlikely @ expression E,f; @@ ( if (<... f(...) ...>) { BUG(); } | - if (unlikely(E)) { BUG(); } + BUG_ON(E); ) @@ expression E,f; @@ ( if (<... f(...) ...>) { BUG(); } | - if (E) { BUG(); } + BUG_ON(E); ) // </smpl> Signed-off-by: Julia Lawall <julia@diku.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> --- fs/ext4/balloc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ext4/balloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 9cc80b9cc8d8..c29d774abe55 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -409,8 +409,7 @@ restart: prev = rsv; } printk("Window map complete.\n"); - if (bad) - BUG(); + BUG_ON(bad); } #define rsv_window_dump(root, verbose) \ __rsv_window_dump((root), (verbose), __func__) -- cgit v1.2.1 From 574ca174c97f790086e3e6f2251381420ad38fd0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o <tytso@mit.edu> Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: ext4: Rename read_block_bitmap() to ext4_read_block_bitmap() Since this a non-static function, make it be ext4 specific to avoid conflicts with potentially other filesystems. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> --- fs/ext4/balloc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/ext4/balloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index c29d774abe55..ba411233cc25 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -295,7 +295,7 @@ err_out: return 0; } /** - * read_block_bitmap() + * ext4_read_block_bitmap() * @sb: super block * @block_group: given block group * @@ -305,7 +305,7 @@ err_out: * Return buffer_head on success or NULL in case of failure. */ struct buffer_head * -read_block_bitmap(struct super_block *sb, ext4_group_t block_group) +ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) { struct ext4_group_desc * desc; struct buffer_head * bh = NULL; @@ -693,7 +693,7 @@ do_more: count -= overflow; } brelse(bitmap_bh); - bitmap_bh = read_block_bitmap(sb, block_group); + bitmap_bh = ext4_read_block_bitmap(sb, block_group); if (!bitmap_bh) goto error_return; desc = ext4_get_group_desc (sb, block_group, &gd_bh); @@ -1733,7 +1733,7 @@ retry_alloc: my_rsv = NULL; if (free_blocks > 0) { - bitmap_bh = read_block_bitmap(sb, group_no); + bitmap_bh = ext4_read_block_bitmap(sb, group_no); if (!bitmap_bh) goto io_error; grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, @@ -1769,7 +1769,7 @@ retry_alloc: continue; brelse(bitmap_bh); - bitmap_bh = read_block_bitmap(sb, group_no); + bitmap_bh = ext4_read_block_bitmap(sb, group_no); if (!bitmap_bh) goto io_error; /* @@ -1985,7 +1985,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) continue; desc_count += le16_to_cpu(gdp->bg_free_blocks_count); brelse(bitmap_bh); - bitmap_bh = read_block_bitmap(sb, i); + bitmap_bh = ext4_read_block_bitmap(sb, i); if (bitmap_bh == NULL) continue; -- cgit v1.2.1 From 772cb7c83ba256a11c7bf99a11bef3858d23767c Mon Sep 17 00:00:00 2001 From: "Jose R. Santos" <jrs@us.ibm.com> Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: ext4: New inode allocation for FLEX_BG meta-data groups. This patch mostly controls the way inode are allocated in order to make ialloc aware of flex_bg block group grouping. It achieves this by bypassing the Orlov allocator when block group meta-data are packed toghether through mke2fs. Since the impact on the block allocator is minimal, this patch should have little or no effect on other block allocation algorithms. By controlling the inode allocation, it can basically control where the initial search for new block begins and thus indirectly manipulate the block allocator. This allocator favors data and meta-data locality so the disk will gradually be filled from block group zero upward. This helps improve performance by reducing seek time. Since the group of inode tables within one flex_bg are treated as one giant inode table, uninitialized block groups would not need to partially initialize as many inode table as with Orlov which would help fsck time as the filesystem usage goes up. Signed-off-by: Jose R. Santos <jrs@us.ibm.com> Signed-off-by: Valerie Clement <valerie.clement@bull.net> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> --- fs/ext4/balloc.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'fs/ext4/balloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index ba411233cc25..0b2b7549ac63 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -809,6 +809,13 @@ do_more: spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_add(&sbi->s_freeblocks_counter, count); + if (sbi->s_log_groups_per_flex) { + ext4_group_t flex_group = ext4_flex_group(sbi, block_group); + spin_lock(sb_bgl_lock(sbi, flex_group)); + sbi->s_flex_groups[flex_group].free_blocks += count; + spin_unlock(sb_bgl_lock(sbi, flex_group)); + } + /* We dirtied the bitmap block */ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); err = ext4_journal_dirty_metadata(handle, bitmap_bh); @@ -1883,6 +1890,13 @@ allocated: spin_unlock(sb_bgl_lock(sbi, group_no)); percpu_counter_sub(&sbi->s_freeblocks_counter, num); + if (sbi->s_log_groups_per_flex) { + ext4_group_t flex_group = ext4_flex_group(sbi, group_no); + spin_lock(sb_bgl_lock(sbi, flex_group)); + sbi->s_flex_groups[flex_group].free_blocks -= num; + spin_unlock(sb_bgl_lock(sbi, flex_group)); + } + BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); err = ext4_journal_dirty_metadata(handle, gdp_bh); if (!fatal) -- cgit v1.2.1 From 7477827f6686ce29b6216d9eb26f6b9027742dcc Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: ext4: Fix sparse warning Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> --- fs/ext4/balloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/balloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 0b2b7549ac63..6dcbec9b2562 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -47,7 +47,7 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block, ext4_group_t block_group) { ext4_group_t actual_group; - ext4_get_group_no_and_offset(sb, block, &actual_group, 0); + ext4_get_group_no_and_offset(sb, block, &actual_group, NULL); if (actual_group == block_group) return 1; return 0; -- cgit v1.2.1 From 6afd670713c9e7d5c5550e379dfedca8ffab4c90 Mon Sep 17 00:00:00 2001 From: Akinobu Mita <akinobu.mita@gmail.com> Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: ext4: fix ext4_init_block_bitmap() for metablock block group When meta_bg feature is enabled and s_first_meta_bg != 0, ext4_init_block_bitmap() miscalculates the number of block used by the group descriptor table (0 or 1 for metablock block group) This patch fixes this by using ext4_bg_num_gdb() Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Stephen Tweedie <sct@redhat.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Acked-by: Andreas Dilger <adilger@sun.com> --- fs/ext4/balloc.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs/ext4/balloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 6dcbec9b2562..1327ac3a04de 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -121,12 +121,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); } } else { /* For META_BG_BLOCK_GROUPS */ - int group_rel = (block_group - - le32_to_cpu(sbi->s_es->s_first_meta_bg)) % - EXT4_DESC_PER_BLOCK(sb); - if (group_rel == 0 || group_rel == 1 || - (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1)) - bit_max += 1; + bit_max += ext4_bg_num_gdb(sb, block_group); } if (block_group == sbi->s_groups_count - 1) { -- cgit v1.2.1 From 7061eba75ceb0835ba61e7cbd757a6f9c1e4af92 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: ext4: Use inode preallocation with -o noextents When mballoc is enabled, block allocation for old block-based files are allocated using mballoc allocator instead of old block-based allocator. The old ext3 block reservation is turned off when mballoc is turned on. However, the in-core preallocation is not enabled for block-based/ non-extent based file block allocation. This result in performance regression, as now we don't have "reservation" ore in-core preallocation to prevent interleaved fragmentation in multiple writes workload. This patch fix this by enable per inode in-core preallocation for non extent files when mballoc is used. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> --- fs/ext4/balloc.c | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'fs/ext4/balloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1327ac3a04de..816f1dbaeb3c 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -1923,7 +1923,7 @@ out: return 0; } -ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode, +ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode, ext4_fsblk_t goal, int *errp) { struct ext4_allocation_request ar; @@ -1942,9 +1942,29 @@ ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode, ret = ext4_mb_new_blocks(handle, &ar, errp); return ret; } +ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp) +{ + struct ext4_allocation_request ar; + ext4_fsblk_t ret; + + if (!test_opt(inode->i_sb, MBALLOC)) { + ret = ext4_new_blocks_old(handle, inode, goal, count, errp); + return ret; + } + + memset(&ar, 0, sizeof(ar)); + ar.inode = inode; + ar.goal = goal; + ar.len = *count; + ret = ext4_mb_new_blocks(handle, &ar, errp); + *count = ar.len; + return ret; +} ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp) + ext4_lblk_t iblock, ext4_fsblk_t goal, + unsigned long *count, int *errp) { struct ext4_allocation_request ar; ext4_fsblk_t ret; @@ -1955,9 +1975,21 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, } memset(&ar, 0, sizeof(ar)); + /* Fill with neighbour allocated blocks */ + ar.lleft = 0; + ar.pleft = 0; + ar.lright = 0; + ar.pright = 0; + ar.inode = inode; ar.goal = goal; ar.len = *count; + ar.logical = iblock; + if (S_ISREG(inode->i_mode)) + ar.flags = EXT4_MB_HINT_DATA; + else + /* disable in-core preallocation for non-regular files */ + ar.flags = 0; ret = ext4_mb_new_blocks(handle, &ar, errp); *count = ar.len; return ret; -- cgit v1.2.1 From 654b4908bc17a6318d18f3036fecc5155de92f55 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: ext4: cleanup block allocator Move the code related to block allocation to a single function and add helper funtions to differient allocation for data and meta data blocks Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> --- fs/ext4/balloc.c | 127 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 74 insertions(+), 53 deletions(-) (limited to 'fs/ext4/balloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 816f1dbaeb3c..48787e86d438 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -1640,20 +1640,24 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) } /** - * ext4_new_blocks_old() -- core block(s) allocation function + * ext4_old_new_blocks() -- core block bitmap based block allocation function + * * @handle: handle to this transaction * @inode: file inode * @goal: given target block(filesystem wide) * @count: target number of blocks to allocate * @errp: error code * - * ext4_new_blocks uses a goal block to assist allocation. It tries to - * allocate block(s) from the block group contains the goal block first. If that - * fails, it will try to allocate block(s) from other block groups without - * any specific goal block. + * ext4_old_new_blocks uses a goal block to assist allocation and look up + * the block bitmap directly to do block allocation. It tries to + * allocate block(s) from the block group contains the goal block first. If + * that fails, it will try to allocate block(s) from other block groups + * without any specific goal block. + * + * This function is called when -o nomballoc mount option is enabled * */ -ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, +ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t goal, unsigned long *count, int *errp) { struct buffer_head *bitmap_bh = NULL; @@ -1923,78 +1927,95 @@ out: return 0; } -ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, int *errp) -{ - struct ext4_allocation_request ar; - ext4_fsblk_t ret; - - if (!test_opt(inode->i_sb, MBALLOC)) { - unsigned long count = 1; - ret = ext4_new_blocks_old(handle, inode, goal, &count, errp); - return ret; - } +#define EXT4_META_BLOCK 0x1 - memset(&ar, 0, sizeof(ar)); - ar.inode = inode; - ar.goal = goal; - ar.len = 1; - ret = ext4_mb_new_blocks(handle, &ar, errp); - return ret; -} -ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp) -{ - struct ext4_allocation_request ar; - ext4_fsblk_t ret; - - if (!test_opt(inode->i_sb, MBALLOC)) { - ret = ext4_new_blocks_old(handle, inode, goal, count, errp); - return ret; - } - - memset(&ar, 0, sizeof(ar)); - ar.inode = inode; - ar.goal = goal; - ar.len = *count; - ret = ext4_mb_new_blocks(handle, &ar, errp); - *count = ar.len; - return ret; -} - -ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, +static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, ext4_fsblk_t goal, - unsigned long *count, int *errp) + unsigned long *count, int *errp, int flags) { struct ext4_allocation_request ar; ext4_fsblk_t ret; if (!test_opt(inode->i_sb, MBALLOC)) { - ret = ext4_new_blocks_old(handle, inode, goal, count, errp); - return ret; + return ext4_old_new_blocks(handle, inode, goal, count, errp); } memset(&ar, 0, sizeof(ar)); /* Fill with neighbour allocated blocks */ - ar.lleft = 0; - ar.pleft = 0; - ar.lright = 0; - ar.pright = 0; ar.inode = inode; ar.goal = goal; ar.len = *count; ar.logical = iblock; - if (S_ISREG(inode->i_mode)) + + if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK)) + /* enable in-core preallocation for data block allocation */ ar.flags = EXT4_MB_HINT_DATA; else /* disable in-core preallocation for non-regular files */ ar.flags = 0; + ret = ext4_mb_new_blocks(handle, &ar, errp); *count = ar.len; return ret; } +/* + * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks + * + * @handle: handle to this transaction + * @inode: file inode + * @goal: given target block(filesystem wide) + * @errp: error code + * + * Return allocated block number on success + */ +ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, int *errp) +{ + unsigned long count = 1; + return do_blk_alloc(handle, inode, 0, goal, + &count, errp, EXT4_META_BLOCK); +} + +/* + * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks + * + * @handle: handle to this transaction + * @inode: file inode + * @goal: given target block(filesystem wide) + * @count: total number of blocks need + * @errp: error code + * + * Return 1st allocated block numberon success, *count stores total account + * error stores in errp pointer + */ +ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp) +{ + return do_blk_alloc(handle, inode, 0, goal, + count, errp, EXT4_META_BLOCK); +} + +/* + * ext4_new_blocks() -- allocate data blocks + * + * @handle: handle to this transaction + * @inode: file inode + * @goal: given target block(filesystem wide) + * @count: total number of blocks need + * @errp: error code + * + * Return 1st allocated block numberon success, *count stores total account + * error stores in errp pointer + */ + +ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, + ext4_lblk_t iblock, ext4_fsblk_t goal, + unsigned long *count, int *errp) +{ + return do_blk_alloc(handle, inode, iblock, goal, count, errp, 0); +} /** * ext4_count_free_blocks() -- count filesystem free blocks -- cgit v1.2.1 From 07031431072ece801d53d2c03d5e5bb21f4f64a4 Mon Sep 17 00:00:00 2001 From: Mingming Cao <cmm@us.ibm.com> Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: ext4: mballoc avoid use root reserved blocks for non root allocation mballoc allocation missed check for blocks reserved for root users. Add ext4_has_free_blocks() check before allocation. Also modified ext4_has_free_blocks() to support multiple block allocation request. Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> --- fs/ext4/balloc.c | 51 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 18 deletions(-) (limited to 'fs/ext4/balloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 48787e86d438..25f63d8c1b3d 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -1599,23 +1599,35 @@ out: /** * ext4_has_free_blocks() - * @sbi: in-core super block structure. + * @sbi: in-core super block structure. + * @nblocks: number of neeed blocks * - * Check if filesystem has at least 1 free block available for allocation. + * Check if filesystem has free blocks available for allocation. + * Return the number of blocks avaible for allocation for this request + * On success, return nblocks */ -static int ext4_has_free_blocks(struct ext4_sb_info *sbi) +ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, + ext4_fsblk_t nblocks) { - ext4_fsblk_t free_blocks, root_blocks; + ext4_fsblk_t free_blocks; + ext4_fsblk_t root_blocks = 0; free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); - root_blocks = ext4_r_blocks_count(sbi->s_es); - if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && + + if (!capable(CAP_SYS_RESOURCE) && sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { - return 0; - } - return 1; -} + (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) + root_blocks = ext4_r_blocks_count(sbi->s_es); +#ifdef CONFIG_SMP + if (free_blocks - root_blocks < FBC_BATCH) + free_blocks = + percpu_counter_sum_positive(&sbi->s_freeblocks_counter); +#endif + if (free_blocks - root_blocks < nblocks) + return free_blocks - root_blocks; + return nblocks; + } + /** * ext4_should_retry_alloc() @@ -1631,7 +1643,7 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi) */ int ext4_should_retry_alloc(struct super_block *sb, int *retries) { - if (!ext4_has_free_blocks(EXT4_SB(sb)) || (*retries)++ > 3) + if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3) return 0; jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); @@ -1681,13 +1693,21 @@ ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, ext4_group_t ngroups; unsigned long num = *count; - *errp = -ENOSPC; sb = inode->i_sb; if (!sb) { + *errp = -ENODEV; printk("ext4_new_block: nonexistent device"); return 0; } + sbi = EXT4_SB(sb); + *count = ext4_has_free_blocks(sbi, *count); + if (*count == 0) { + *errp = -ENOSPC; + return 0; /*return with ENOSPC error */ + } + num = *count; + /* * Check quota for allocation of this block. */ @@ -1711,11 +1731,6 @@ ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) my_rsv = &block_i->rsv_window_node; - if (!ext4_has_free_blocks(sbi)) { - *errp = -ENOSPC; - goto out; - } - /* * First, test whether the goal block is free. */ -- cgit v1.2.1 From e8ced39d5e8911c662d4d69a342b9d053eaaac4e Mon Sep 17 00:00:00 2001 From: Mingming Cao <cmm@us.ibm.com> Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: percpu_counter: new function percpu_counter_sum_and_set Delayed allocation need to check free blocks at every write time. percpu_counter_read_positive() is not quit accurate. delayed allocation need a more accurate accounting, but using percpu_counter_sum_positive() is frequently is quite expensive. This patch added a new function to update center counter when sum per-cpu counter, to increase the accurate rate for next percpu_counter_read() and require less calling expensive percpu_counter_sum(). Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> --- fs/ext4/balloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/balloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 25f63d8c1b3d..6369bacf0dcb 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -1621,7 +1621,7 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, #ifdef CONFIG_SMP if (free_blocks - root_blocks < FBC_BATCH) free_blocks = - percpu_counter_sum_positive(&sbi->s_freeblocks_counter); + percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); #endif if (free_blocks - root_blocks < nblocks) return free_blocks - root_blocks; -- cgit v1.2.1 From d2a1763791a634e315ec926b62829c1e88842c86 Mon Sep 17 00:00:00 2001 From: Mingming Cao <cmm@us.ibm.com> Date: Mon, 14 Jul 2008 17:52:37 -0400 Subject: ext4: delayed allocation ENOSPC handling This patch does block reservation for delayed allocation, to avoid ENOSPC later at page flush time. Blocks(data and metadata) are reserved at da_write_begin() time, the freeblocks counter is updated by then, and the number of reserved blocks is store in per inode counter. At the writepage time, the unused reserved meta blocks are returned back. At unlink/truncate time, reserved blocks are properly released. Updated fix from Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> to fix the oldallocator block reservation accounting with delalloc, added lock to guard the counters and also fix the reservation for meta blocks. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> --- fs/ext4/balloc.c | 49 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 17 deletions(-) (limited to 'fs/ext4/balloc.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 6369bacf0dcb..495ab21b9832 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -1701,7 +1701,12 @@ ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, } sbi = EXT4_SB(sb); - *count = ext4_has_free_blocks(sbi, *count); + if (!EXT4_I(inode)->i_delalloc_reserved_flag) { + /* + * With delalloc we already reserved the blocks + */ + *count = ext4_has_free_blocks(sbi, *count); + } if (*count == 0) { *errp = -ENOSPC; return 0; /*return with ENOSPC error */ @@ -1902,7 +1907,8 @@ allocated: le16_add_cpu(&gdp->bg_free_blocks_count, -num); gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); spin_unlock(sb_bgl_lock(sbi, group_no)); - percpu_counter_sub(&sbi->s_freeblocks_counter, num); + if (!EXT4_I(inode)->i_delalloc_reserved_flag) + percpu_counter_sub(&sbi->s_freeblocks_counter, num); if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, group_no); @@ -1976,40 +1982,49 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, } /* - * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks + * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks * * @handle: handle to this transaction * @inode: file inode * @goal: given target block(filesystem wide) + * @count: total number of blocks need * @errp: error code * - * Return allocated block number on success + * Return 1st allocated block numberon success, *count stores total account + * error stores in errp pointer */ -ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, int *errp) +ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp) { - unsigned long count = 1; - return do_blk_alloc(handle, inode, 0, goal, - &count, errp, EXT4_META_BLOCK); + ext4_fsblk_t ret; + ret = do_blk_alloc(handle, inode, 0, goal, + count, errp, EXT4_META_BLOCK); + /* + * Account for the allocated meta blocks + */ + if (!(*errp)) { + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); + EXT4_I(inode)->i_allocated_meta_blocks += *count; + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + } + return ret; } /* - * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks + * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks * * @handle: handle to this transaction * @inode: file inode * @goal: given target block(filesystem wide) - * @count: total number of blocks need * @errp: error code * - * Return 1st allocated block numberon success, *count stores total account - * error stores in errp pointer + * Return allocated block number on success */ -ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp) +ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, int *errp) { - return do_blk_alloc(handle, inode, 0, goal, - count, errp, EXT4_META_BLOCK); + unsigned long count = 1; + return ext4_new_meta_blocks(handle, inode, goal, &count, errp); } /* -- cgit v1.2.1