diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2006-05-12 12:09:15 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2006-05-12 12:09:15 -0400 |
commit | e90c01e148b967d30caf59e76accb3a58ca6b74b (patch) | |
tree | 92f9b45febbdbc52174307e2e73dbb26aa893465 | |
parent | 7d63b54a65ce902f9aaa8efe8192aa3b983264d4 (diff) | |
download | talos-op-linux-e90c01e148b967d30caf59e76accb3a58ca6b74b.tar.gz talos-op-linux-e90c01e148b967d30caf59e76accb3a58ca6b74b.zip |
[GFS2] Reverse block order in build_height
The original code ordered the blocks allocated in the build_height
routine backwards causing excessive disk seeks during a read of the
metadata. This patch reverses the order to try and reduce disk seeks.
Example: A five level metadata tree, I = Inode, P = Pointers, D = Data
You need to read the blocks in the order:
I P5 P4 P3 P2 P1 D
in order to read a single data block. The new code now orders the blocks
in this way. The old code used to order them as:
I P1 P2 P3 P4 P5 D
requiring two extra seeks on average. Note that for files which are
grown by gradual extension rather than by truncate or by llseek/write
at a large offset, this doesn't apply. In the case of writing to a
file linearly, this routine will only be called upon to extend the
height of the tree by one block at a time, so the ordering is
determined by when its called rather than by the internals of the
routine itself. Optimising that part of the ordering is a much
harder problem.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
-rw-r--r-- | fs/gfs2/bmap.c | 102 |
1 files changed, 46 insertions, 56 deletions
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 474b9a16f0f5..31c3e92820e4 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -164,72 +164,62 @@ static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size) * @ip: The GFS2 inode * @height: The height to build to * - * This routine makes sure that the metadata tree is tall enough to hold - * "size" bytes of data. * * Returns: errno */ -static int build_height(struct gfs2_inode *ip, int height) +static int build_height(struct inode *inode, unsigned height) { - struct gfs2_sbd *sdp = ip->i_sbd; - struct buffer_head *bh, *dibh; - uint64_t block = 0, *bp; - unsigned int x; - int new_block; + struct gfs2_inode *ip = inode->u.generic_ip; + unsigned new_height = height - ip->i_di.di_height; + struct buffer_head *dibh; + struct buffer_head *blocks[GFS2_MAX_META_HEIGHT]; int error; + u64 *bp; + u64 bn; + unsigned n; - while (ip->i_di.di_height < height) { - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - new_block = 0; - bp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)); - for (x = 0; x < sdp->sd_diptrs; x++, bp++) - if (*bp) { - new_block = 1; - break; - } - - if (new_block) { - /* Get a new block, fill it with the old direct - pointers, and write it out */ + if (height <= ip->i_di.di_height) + return 0; - block = gfs2_alloc_meta(ip); + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; - bh = gfs2_meta_new(ip->i_gl, block); - gfs2_trans_add_bh(ip->i_gl, bh, 1); - gfs2_metatype_set(bh, - GFS2_METATYPE_IN, + for(n = 0; n < new_height; n++) { + bn = gfs2_alloc_meta(ip); + blocks[n] = gfs2_meta_new(ip->i_gl, bn); + gfs2_trans_add_bh(ip->i_gl, blocks[n], 1); + } + + n = 0; + bn = blocks[0]->b_blocknr; + if (new_height > 1) { + for(; n < new_height-1; n++) { + gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN); - gfs2_buffer_copy_tail(bh, - sizeof(struct gfs2_meta_header), - dibh, sizeof(struct gfs2_dinode)); - - brelse(bh); - } - - /* Set up the new direct pointer and write it out to disk */ - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - - gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); - - if (new_block) { - *(uint64_t *)(dibh->b_data + - sizeof(struct gfs2_dinode)) = - cpu_to_be64(block); - ip->i_di.di_blocks++; + gfs2_buffer_clear_tail(blocks[n], + sizeof(struct gfs2_meta_header)); + bp = (u64 *)(blocks[n]->b_data + + sizeof(struct gfs2_meta_header)); + *bp = cpu_to_be64(blocks[n+1]->b_blocknr); + brelse(blocks[n]); + blocks[n] = NULL; } - - ip->i_di.di_height++; - - gfs2_dinode_out(&ip->i_di, dibh->b_data); - brelse(dibh); } - - return 0; + gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN); + gfs2_buffer_copy_tail(blocks[n], sizeof(struct gfs2_meta_header), + dibh, sizeof(struct gfs2_dinode)); + brelse(blocks[n]); + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + bp = (u64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); + *bp = cpu_to_be64(bn); + ip->i_di.di_height += new_height; + ip->i_di.di_blocks += new_height; + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + return error; } /** @@ -416,7 +406,7 @@ static struct buffer_head *gfs2_block_pointers(struct inode *inode, u64 lblock, if (!create) goto out; - error = build_height(ip, height); + error = build_height(inode, height); if (error) goto out; } @@ -806,7 +796,7 @@ static int do_grow(struct gfs2_inode *ip, uint64_t size) h = calc_tree_height(ip, size); if (ip->i_di.di_height < h) { down_write(&ip->i_rw_mutex); - error = build_height(ip, h); + error = build_height(ip->i_vnode, h); up_write(&ip->i_rw_mutex); if (error) goto out_end_trans; |