diff options
author | Mark Fasheh <mark.fasheh@oracle.com> | 2007-03-07 16:46:57 -0800 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2007-04-26 15:02:37 -0700 |
commit | e48edee2d8eab812f31f0ff62c6ba635ca2e1e21 (patch) | |
tree | 6afb9fe59a06ce621cb11d570e432e7d739376ff /fs/ocfs2/alloc.c | |
parent | 6af67d8205cf65fbaaa743edc7ebb46e486e34ff (diff) | |
download | blackbird-op-linux-e48edee2d8eab812f31f0ff62c6ba635ca2e1e21.tar.gz blackbird-op-linux-e48edee2d8eab812f31f0ff62c6ba635ca2e1e21.zip |
ocfs2: make room for unwritten extents flag
Due to the size of our group bitmaps, we'll never have a leaf node extent
record with more than 16 bits worth of clusters. Split e_clusters up so that
leaf nodes can get a flags field where we can mark unwritten extents.
Interior nodes whose length references all the child nodes beneath it can't
split their e_clusters field, so we use a union to preserve sizing there.
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/alloc.c')
-rw-r--r-- | fs/ocfs2/alloc.c | 155 |
1 files changed, 96 insertions, 59 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 027cf5d05ffb..0eab0d328289 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -218,20 +218,32 @@ enum ocfs2_contig_type { CONTIG_RIGHT }; + +/* + * NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and + * ocfs2_extent_contig only work properly against leaf nodes! + */ static int ocfs2_block_extent_contig(struct super_block *sb, struct ocfs2_extent_rec *ext, u64 blkno) { - return blkno == (le64_to_cpu(ext->e_blkno) + - ocfs2_clusters_to_blocks(sb, - le32_to_cpu(ext->e_clusters))); + u64 blk_end = le64_to_cpu(ext->e_blkno); + + blk_end += ocfs2_clusters_to_blocks(sb, + le16_to_cpu(ext->e_leaf_clusters)); + + return blkno == blk_end; } static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left, struct ocfs2_extent_rec *right) { - return (le32_to_cpu(left->e_cpos) + le32_to_cpu(left->e_clusters) == - le32_to_cpu(right->e_cpos)); + u32 left_range; + + left_range = le32_to_cpu(left->e_cpos) + + le16_to_cpu(left->e_leaf_clusters); + + return (left_range == le32_to_cpu(right->e_cpos)); } static enum ocfs2_contig_type @@ -430,7 +442,7 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el) i = le16_to_cpu(el->l_next_free_rec) - 1; return le32_to_cpu(el->l_recs[i].e_cpos) + - le32_to_cpu(el->l_recs[i].e_clusters); + ocfs2_rec_clusters(el, &el->l_recs[i]); } /* @@ -442,7 +454,7 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el) * for the new last extent block. * * the new branch will be 'empty' in the sense that every block will - * contain a single record with e_clusters == 0. + * contain a single record with cluster count == 0. */ static int ocfs2_add_branch(struct ocfs2_super *osb, handle_t *handle, @@ -532,7 +544,12 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, */ eb_el->l_recs[0].e_cpos = cpu_to_le32(new_cpos); eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno); - eb_el->l_recs[0].e_clusters = cpu_to_le32(0); + /* + * eb_el isn't always an interior node, but even leaf + * nodes want a zero'd flags and reserved field so + * this gets the whole 32 bits regardless of use. + */ + eb_el->l_recs[0].e_int_clusters = cpu_to_le32(0); if (!eb_el->l_tree_depth) new_last_eb_blk = le64_to_cpu(eb->h_blkno); @@ -577,7 +594,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, i = le16_to_cpu(el->l_next_free_rec); el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); - el->l_recs[i].e_clusters = 0; + el->l_recs[i].e_int_clusters = 0; le16_add_cpu(&el->l_next_free_rec, 1); /* fe needs a new last extent block pointer, as does the @@ -662,11 +679,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, /* copy the fe data into the new extent block */ eb_el->l_tree_depth = fe_el->l_tree_depth; eb_el->l_next_free_rec = fe_el->l_next_free_rec; - for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) { - eb_el->l_recs[i].e_cpos = fe_el->l_recs[i].e_cpos; - eb_el->l_recs[i].e_clusters = fe_el->l_recs[i].e_clusters; - eb_el->l_recs[i].e_blkno = fe_el->l_recs[i].e_blkno; - } + for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) + eb_el->l_recs[i] = fe_el->l_recs[i]; status = ocfs2_journal_dirty(handle, new_eb_bh); if (status < 0) { @@ -687,12 +701,9 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, le16_add_cpu(&fe_el->l_tree_depth, 1); fe_el->l_recs[0].e_cpos = 0; fe_el->l_recs[0].e_blkno = eb->h_blkno; - fe_el->l_recs[0].e_clusters = cpu_to_le32(new_clusters); - for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) { - fe_el->l_recs[i].e_cpos = 0; - fe_el->l_recs[i].e_clusters = 0; - fe_el->l_recs[i].e_blkno = 0; - } + fe_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters); + for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) + memset(&fe_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); fe_el->l_next_free_rec = cpu_to_le16(1); /* If this is our 1st tree depth shift, then last_eb_blk @@ -817,9 +828,13 @@ bail: return status; } +/* + * This is only valid for leaf nodes, which are the only ones that can + * have empty extents anyway. + */ static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) { - return !rec->e_clusters; + return !rec->e_leaf_clusters; } /* @@ -930,6 +945,8 @@ static void ocfs2_create_empty_extent(struct ocfs2_extent_list *el) { int next_free = le16_to_cpu(el->l_next_free_rec); + BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); + if (next_free == 0) goto set_and_inc; @@ -1034,7 +1051,7 @@ static int __ocfs2_find_path(struct inode *inode, * rightmost record. */ range = le32_to_cpu(rec->e_cpos) + - le32_to_cpu(rec->e_clusters); + ocfs2_rec_clusters(el, rec); if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range) break; } @@ -1195,21 +1212,21 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec, */ left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); left_clusters -= le32_to_cpu(left_rec->e_cpos); - left_rec->e_clusters = cpu_to_le32(left_clusters); + left_rec->e_int_clusters = cpu_to_le32(left_clusters); /* * Calculate the rightmost cluster count boundary before - * moving cpos - we will need to adjust e_clusters after + * moving cpos - we will need to adjust clusters after * updating e_cpos to keep the same highest cluster count. */ right_end = le32_to_cpu(right_rec->e_cpos); - right_end += le32_to_cpu(right_rec->e_clusters); + right_end += le32_to_cpu(right_rec->e_int_clusters); right_rec->e_cpos = left_rec->e_cpos; le32_add_cpu(&right_rec->e_cpos, left_clusters); right_end -= le32_to_cpu(right_rec->e_cpos); - right_rec->e_clusters = cpu_to_le32(right_end); + right_rec->e_int_clusters = cpu_to_le32(right_end); } /* @@ -1452,6 +1469,8 @@ static int ocfs2_find_cpos_for_left_leaf(struct super_block *sb, u64 blkno; struct ocfs2_extent_list *el; + BUG_ON(path->p_tree_depth == 0); + *cpos = 0; blkno = path_leaf_bh(path)->b_blocknr; @@ -1486,7 +1505,9 @@ static int ocfs2_find_cpos_for_left_leaf(struct super_block *sb, } *cpos = le32_to_cpu(el->l_recs[j - 1].e_cpos); - *cpos = *cpos + le32_to_cpu(el->l_recs[j - 1].e_clusters) - 1; + *cpos = *cpos + ocfs2_rec_clusters(el, + &el->l_recs[j - 1]); + *cpos = *cpos - 1; goto out; } } @@ -1715,7 +1736,7 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, unsigned int range; struct ocfs2_extent_rec *rec; - BUG_ON(el->l_tree_depth); + BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); /* * Contiguous insert - either left or right. @@ -1726,8 +1747,8 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, rec->e_blkno = insert_rec->e_blkno; rec->e_cpos = insert_rec->e_cpos; } - le32_add_cpu(&rec->e_clusters, - le32_to_cpu(insert_rec->e_clusters)); + le16_add_cpu(&rec->e_leaf_clusters, + le16_to_cpu(insert_rec->e_leaf_clusters)); return; } @@ -1748,7 +1769,8 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, if (insert->ins_appending == APPEND_TAIL) { i = le16_to_cpu(el->l_next_free_rec) - 1; rec = &el->l_recs[i]; - range = le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters); + range = le32_to_cpu(rec->e_cpos) + + le16_to_cpu(rec->e_leaf_clusters); BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range); mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >= @@ -1761,9 +1783,9 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, le16_to_cpu(el->l_count), le16_to_cpu(el->l_next_free_rec), le32_to_cpu(el->l_recs[i].e_cpos), - le32_to_cpu(el->l_recs[i].e_clusters), + le16_to_cpu(el->l_recs[i].e_leaf_clusters), le32_to_cpu(insert_rec->e_cpos), - le32_to_cpu(insert_rec->e_clusters)); + le16_to_cpu(insert_rec->e_leaf_clusters)); i++; el->l_recs[i] = *insert_rec; le16_add_cpu(&el->l_next_free_rec, 1); @@ -1806,6 +1828,12 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, *ret_left_path = NULL; /* + * This shouldn't happen for non-trees. The extent rec cluster + * count manipulation below only works for interior nodes. + */ + BUG_ON(right_path->p_tree_depth == 0); + + /* * If our appending insert is at the leftmost edge of a leaf, * then we might need to update the rightmost records of the * neighboring path. @@ -1863,6 +1891,8 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, bh = path_root_bh(right_path); i = 0; while (1) { + struct ocfs2_extent_rec *rec; + next_free = le16_to_cpu(el->l_next_free_rec); if (next_free == 0) { ocfs2_error(inode->i_sb, @@ -1872,16 +1902,19 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, goto out; } - el->l_recs[next_free - 1].e_clusters = insert_rec->e_cpos; - le32_add_cpu(&el->l_recs[next_free - 1].e_clusters, - le32_to_cpu(insert_rec->e_clusters)); - le32_add_cpu(&el->l_recs[next_free - 1].e_clusters, - -le32_to_cpu(el->l_recs[next_free - 1].e_cpos)); + rec = &el->l_recs[next_free - 1]; + + rec->e_int_clusters = insert_rec->e_cpos; + le32_add_cpu(&rec->e_int_clusters, + le16_to_cpu(insert_rec->e_leaf_clusters)); + le32_add_cpu(&rec->e_int_clusters, + -le32_to_cpu(rec->e_cpos)); ret = ocfs2_journal_dirty(handle, bh); if (ret) mlog_errno(ret); + /* Don't touch the leaf node */ if (++i >= right_path->p_tree_depth) break; @@ -2068,7 +2101,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, out_update_clusters: ocfs2_update_dinode_clusters(inode, di, - le32_to_cpu(insert_rec->e_clusters)); + le16_to_cpu(insert_rec->e_leaf_clusters)); ret = ocfs2_journal_dirty(handle, di_bh); if (ret) @@ -2089,6 +2122,8 @@ static void ocfs2_figure_contig_type(struct inode *inode, int i; enum ocfs2_contig_type contig_type = CONTIG_NONE; + BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); + for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { contig_type = ocfs2_extent_contig(inode, &el->l_recs[i], insert_rec); @@ -2120,7 +2155,7 @@ static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert, insert->ins_appending = APPEND_NONE; - BUG_ON(el->l_tree_depth); + BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); if (!el->l_next_free_rec) goto set_tail_append; @@ -2134,7 +2169,8 @@ static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert, i = le16_to_cpu(el->l_next_free_rec) - 1; rec = &el->l_recs[i]; - if (cpos >= (le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters))) + if (cpos >= + (le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters))) goto set_tail_append; return; @@ -2242,7 +2278,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, * The insert code isn't quite ready to deal with all cases of * left contiguousness. Specifically, if it's an insert into * the 1st record in a leaf, it will require the adjustment of - * e_clusters on the last record of the path directly to it's + * cluster count on the last record of the path directly to it's * left. For now, just catch that case and fool the layers * above us. This works just fine for tree_depth == 0, which * is why we allow that above. @@ -2310,9 +2346,10 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, OCFS2_I(inode)->ip_clusters); + memset(&rec, 0, sizeof(rec)); rec.e_cpos = cpu_to_le32(cpos); rec.e_blkno = cpu_to_le64(start_blk); - rec.e_clusters = cpu_to_le32(new_clusters); + rec.e_leaf_clusters = cpu_to_le16(new_clusters); status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, &insert); @@ -2981,7 +3018,7 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode, * Check it we'll only be trimming off the end of this * cluster. */ - if (le16_to_cpu(rec->e_clusters) > clusters_to_del) + if (le16_to_cpu(rec->e_leaf_clusters) > clusters_to_del) goto out; } @@ -3061,11 +3098,11 @@ find_tail_record: mlog(0, "Extent list before: record %d: (%u, %u, %llu), " "next = %u\n", i, le32_to_cpu(rec->e_cpos), - le32_to_cpu(rec->e_clusters), + ocfs2_rec_clusters(el, rec), (unsigned long long)le64_to_cpu(rec->e_blkno), le16_to_cpu(el->l_next_free_rec)); - BUG_ON(le32_to_cpu(rec->e_clusters) < clusters_to_del); + BUG_ON(ocfs2_rec_clusters(el, rec) < clusters_to_del); if (le16_to_cpu(el->l_tree_depth) == 0) { /* @@ -3107,13 +3144,13 @@ find_tail_record: goto find_tail_record; } - le32_add_cpu(&rec->e_clusters, -clusters_to_del); + le16_add_cpu(&rec->e_leaf_clusters, -clusters_to_del); /* * We'll use "new_edge" on our way back up the * tree to know what our rightmost cpos is. */ - new_edge = le32_to_cpu(rec->e_clusters); + new_edge = le16_to_cpu(rec->e_leaf_clusters); new_edge += le32_to_cpu(rec->e_cpos); /* @@ -3121,12 +3158,12 @@ find_tail_record: */ *delete_start = le64_to_cpu(rec->e_blkno) + ocfs2_clusters_to_blocks(inode->i_sb, - le32_to_cpu(rec->e_clusters)); + le16_to_cpu(rec->e_leaf_clusters)); /* * If it's now empty, remove this record. */ - if (le32_to_cpu(rec->e_clusters) == 0) { + if (le16_to_cpu(rec->e_leaf_clusters) == 0) { memset(rec, 0, sizeof(struct ocfs2_extent_rec)); le16_add_cpu(&el->l_next_free_rec, -1); @@ -3152,15 +3189,15 @@ find_tail_record: if (new_edge == 0) goto delete; - rec->e_clusters = cpu_to_le32(new_edge); - le32_add_cpu(&rec->e_clusters, + rec->e_int_clusters = cpu_to_le32(new_edge); + le32_add_cpu(&rec->e_int_clusters, -le32_to_cpu(rec->e_cpos)); /* * A deleted child record should have been * caught above. */ - BUG_ON(le32_to_cpu(rec->e_clusters) == 0); + BUG_ON(le32_to_cpu(rec->e_int_clusters) == 0); } delete: @@ -3173,7 +3210,7 @@ delete: mlog(0, "extent list container %llu, after: record %d: " "(%u, %u, %llu), next = %u.\n", (unsigned long long)bh->b_blocknr, i, - le32_to_cpu(rec->e_cpos), le32_to_cpu(rec->e_clusters), + le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec), (unsigned long long)le64_to_cpu(rec->e_blkno), le16_to_cpu(el->l_next_free_rec)); @@ -3195,7 +3232,7 @@ delete: ocfs2_remove_from_cache(inode, bh); - BUG_ON(le32_to_cpu(el->l_recs[0].e_clusters)); + BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0])); BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); @@ -3283,7 +3320,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, * Lower levels depend on this never happening, but it's best * to check it up here before changing the tree. */ - if (el->l_tree_depth && ocfs2_is_empty_extent(&el->l_recs[0])) { + if (el->l_tree_depth && el->l_recs[0].e_int_clusters == 0) { ocfs2_error(inode->i_sb, "Inode %lu has an empty extent record, depth %u\n", inode->i_ino, le16_to_cpu(el->l_tree_depth)); @@ -3644,13 +3681,13 @@ start: i = le16_to_cpu(el->l_next_free_rec) - 1; range = le32_to_cpu(el->l_recs[i].e_cpos) + - le32_to_cpu(el->l_recs[i].e_clusters); + ocfs2_rec_clusters(el, &el->l_recs[i]); if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) { clusters_to_del = 0; } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { - clusters_to_del = le32_to_cpu(el->l_recs[i].e_clusters); + clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); } else if (range > new_highest_cpos) { - clusters_to_del = (le32_to_cpu(el->l_recs[i].e_clusters) + + clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + le32_to_cpu(el->l_recs[i].e_cpos)) - new_highest_cpos; } else { |