diff options
author | Mark Fasheh <mark.fasheh@oracle.com> | 2007-06-18 11:22:56 -0700 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2007-07-10 17:32:03 -0700 |
commit | b27b7cbcf12a1bfff1ed68a73ddd7d11edc20daf (patch) | |
tree | a25a8ca272e7f0ef01987db3b3795a49e0ccb51f /fs | |
parent | 0d172baa5586071ae0ae0c07356a378fdbedecdb (diff) | |
download | talos-op-linux-b27b7cbcf12a1bfff1ed68a73ddd7d11edc20daf.tar.gz talos-op-linux-b27b7cbcf12a1bfff1ed68a73ddd7d11edc20daf.zip |
ocfs2: support writing of unwritten extents
Update the write code to detect when the user is asking to write to an
unwritten extent. Like writing to a hole, we must zero the region between
the write and the cluster boundaries. Most of the existing cluster zeroing
logic can be re-used with some additional checks for the unwritten flag on
extent records.
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ocfs2/aops.c | 94 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 14 | ||||
-rw-r--r-- | fs/ocfs2/file.h | 2 |
3 files changed, 84 insertions, 26 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 077583b50391..8af923316d22 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -782,8 +782,14 @@ struct ocfs2_write_cluster_desc { * filled. */ unsigned c_new; + unsigned c_unwritten; }; +static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d) +{ + return d->c_new || d->c_unwritten; +} + struct ocfs2_write_ctxt { /* Logical cluster position / len of write */ u32 w_cpos; @@ -829,6 +835,8 @@ struct ocfs2_write_ctxt { handle_t *w_handle; struct buffer_head *w_di_bh; + + struct ocfs2_cached_dealloc_ctxt w_dealloc; }; static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) @@ -868,6 +876,8 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, else wc->w_large_pages = 0; + ocfs2_init_dealloc_ctxt(&wc->w_dealloc); + *wcp = wc; return 0; @@ -1103,16 +1113,19 @@ out: * Prepare a single cluster for write one cluster into the file. */ static int ocfs2_write_cluster(struct address_space *mapping, - u32 phys, struct ocfs2_alloc_context *data_ac, + u32 phys, unsigned int unwritten, + struct ocfs2_alloc_context *data_ac, struct ocfs2_alloc_context *meta_ac, struct ocfs2_write_ctxt *wc, u32 cpos, loff_t user_pos, unsigned user_len) { - int ret, i, new; + int ret, i, new, should_zero = 0; u64 v_blkno, p_blkno; struct inode *inode = mapping->host; new = phys == 0 ? 1 : 0; + if (new || unwritten) + should_zero = 1; if (new) { u32 tmp_pos; @@ -1142,11 +1155,20 @@ static int ocfs2_write_cluster(struct address_space *mapping, mlog_errno(ret); goto out; } + } else if (unwritten) { + ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, + wc->w_handle, cpos, 1, phys, + meta_ac, &wc->w_dealloc); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + } + if (should_zero) v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos); - } else { + else v_blkno = user_pos >> inode->i_sb->s_blocksize_bits; - } /* * The only reason this should fail is due to an inability to @@ -1169,7 +1191,8 @@ static int ocfs2_write_cluster(struct address_space *mapping, tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc, wc->w_pages[i], cpos, - user_pos, user_len, new); + user_pos, user_len, + should_zero); if (tmpret) { mlog_errno(tmpret); if (ret == 0) @@ -1200,8 +1223,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping, for (i = 0; i < wc->w_clen; i++) { desc = &wc->w_desc[i]; - ret = ocfs2_write_cluster(mapping, desc->c_phys, data_ac, - meta_ac, wc, desc->c_cpos, pos, len); + ret = ocfs2_write_cluster(mapping, desc->c_phys, + desc->c_unwritten, data_ac, meta_ac, + wc, desc->c_cpos, pos, len); if (ret) { mlog_errno(ret); goto out; @@ -1242,19 +1266,19 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb, if (wc->w_large_pages) { /* * We only care about the 1st and last cluster within - * our range and whether they are holes or not. Either + * our range and whether they should be zero'd or not. Either * value may be extended out to the start/end of a * newly allocated cluster. */ desc = &wc->w_desc[0]; - if (desc->c_new) + if (ocfs2_should_zero_cluster(desc)) ocfs2_figure_cluster_boundaries(osb, desc->c_cpos, &wc->w_target_from, NULL); desc = &wc->w_desc[wc->w_clen - 1]; - if (desc->c_new) + if (ocfs2_should_zero_cluster(desc)) ocfs2_figure_cluster_boundaries(osb, desc->c_cpos, NULL, @@ -1268,28 +1292,52 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb, /* * Populate each single-cluster write descriptor in the write context * with information about the i/o to be done. + * + * Returns the number of clusters that will have to be allocated, as + * well as a worst case estimate of the number of extent records that + * would have to be created during a write to an unwritten region. */ static int ocfs2_populate_write_desc(struct inode *inode, struct ocfs2_write_ctxt *wc, - unsigned int *clusters_to_alloc) + unsigned int *clusters_to_alloc, + unsigned int *extents_to_split) { int ret; struct ocfs2_write_cluster_desc *desc; unsigned int num_clusters = 0; + unsigned int ext_flags = 0; u32 phys = 0; int i; + *clusters_to_alloc = 0; + *extents_to_split = 0; + for (i = 0; i < wc->w_clen; i++) { desc = &wc->w_desc[i]; desc->c_cpos = wc->w_cpos + i; if (num_clusters == 0) { + /* + * Need to look up the next extent record. + */ ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys, - &num_clusters, NULL); + &num_clusters, &ext_flags); if (ret) { mlog_errno(ret); goto out; } + + /* + * Assume worst case - that we're writing in + * the middle of the extent. + * + * We can assume that the write proceeds from + * left to right, in which case the extent + * insert code is smart enough to coalesce the + * next splits into the previous records created. + */ + if (ext_flags & OCFS2_EXT_UNWRITTEN) + *extents_to_split = *extents_to_split + 2; } else if (phys) { /* * Only increment phys if it doesn't describe @@ -1303,6 +1351,8 @@ static int ocfs2_populate_write_desc(struct inode *inode, desc->c_new = 1; *clusters_to_alloc = *clusters_to_alloc + 1; } + if (ext_flags & OCFS2_EXT_UNWRITTEN) + desc->c_unwritten = 1; num_clusters--; } @@ -1318,7 +1368,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, struct buffer_head *di_bh, struct page *mmap_page) { int ret, credits = OCFS2_INODE_UPDATE_CREDITS; - unsigned int clusters_to_alloc = 0; + unsigned int clusters_to_alloc, extents_to_split; struct ocfs2_write_ctxt *wc; struct inode *inode = mapping->host; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); @@ -1333,7 +1383,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, return ret; } - ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc); + ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, + &extents_to_split); if (ret) { mlog_errno(ret); goto out; @@ -1347,14 +1398,14 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, * write out. An allocation requires that we write the entire * cluster range. */ - if (clusters_to_alloc > 0) { + if (clusters_to_alloc || extents_to_split) { /* * XXX: We are stretching the limits of - * ocfs2_lock_allocators(). It greately over-estimates + * ocfs2_lock_allocators(). It greatly over-estimates * the work to be done. */ ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, - &data_ac, &meta_ac); + extents_to_split, &data_ac, &meta_ac); if (ret) { mlog_errno(ret); goto out; @@ -1365,7 +1416,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, } - ocfs2_set_target_boundaries(osb, wc, pos, len, clusters_to_alloc); + ocfs2_set_target_boundaries(osb, wc, pos, len, + clusters_to_alloc + extents_to_split); handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { @@ -1393,7 +1445,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, * extent. */ ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, - clusters_to_alloc, mmap_page); + clusters_to_alloc + extents_to_split, + mmap_page); if (ret) { mlog_errno(ret); goto out_commit; @@ -1538,11 +1591,12 @@ int ocfs2_write_end_nolock(struct address_space *mapping, inode->i_mtime = inode->i_ctime = CURRENT_TIME; di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - ocfs2_journal_dirty(handle, wc->w_di_bh); ocfs2_commit_trans(osb, handle); + ocfs2_run_deallocs(osb, &wc->w_dealloc); + ocfs2_free_write_ctxt(wc); return copied; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index a80f31776d94..6745086da6fd 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -527,20 +527,21 @@ leave: * understand sparse inodes. */ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, - u32 clusters_to_add, + u32 clusters_to_add, u32 extents_to_split, struct ocfs2_alloc_context **data_ac, struct ocfs2_alloc_context **meta_ac) { int ret, num_free_extents; + unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); *meta_ac = NULL; *data_ac = NULL; mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " - "clusters_to_add = %u\n", + "clusters_to_add = %u, extents_to_split = %u\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), - le32_to_cpu(di->i_clusters), clusters_to_add); + le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); num_free_extents = ocfs2_num_free_extents(osb, inode, di); if (num_free_extents < 0) { @@ -558,9 +559,12 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, * * Most of the time we'll only be seeing this 1 cluster at a time * anyway. + * + * Always lock for any unwritten extents - we might want to + * add blocks during a split. */ if (!num_free_extents || - (ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) { + (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); if (ret < 0) { if (ret != -ENOSPC) @@ -641,7 +645,7 @@ restart_all: down_write(&OCFS2_I(inode)->ip_alloc_sem); drop_alloc_sem = 1; - status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac, + status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac, &meta_ac); if (status) { mlog_errno(status); diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index a4dd1fa1822b..54df3c4bd2fd 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -47,7 +47,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb, struct ocfs2_alloc_context *meta_ac, enum ocfs2_alloc_restarted *reason); int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, - u32 clusters_to_add, + u32 clusters_to_add, u32 extents_to_split, struct ocfs2_alloc_context **data_ac, struct ocfs2_alloc_context **meta_ac); int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); |