diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.c | 4 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 371 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.h | 7 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_da_format.c | 1 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2.c | 67 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2.h | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc.c | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_rtbitmap.c | 49 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_sb.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 77 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 50 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 205 | ||||
-rw-r--r-- | fs/xfs/xfs_globals.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 24 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 30 | ||||
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 47 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 93 | ||||
-rw-r--r-- | fs/xfs/xfs_mru_cache.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.c | 55 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 38 | ||||
-rw-r--r-- | fs/xfs/xfs_symlink.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_sysctl.h | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_sysfs.c | 74 | ||||
-rw-r--r-- | fs/xfs/xfs_sysfs.h | 1 |
28 files changed, 744 insertions, 484 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 4bffffe038a1..eff34218f405 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2209,6 +2209,10 @@ xfs_agf_verify( be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) return false; + if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) + return false; + /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index de2d26d32844..79c981984dca 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5404,39 +5404,237 @@ error0: } /* + * Determine whether an extent shift can be accomplished by a merge with the + * extent that precedes the target hole of the shift. + */ +STATIC bool +xfs_bmse_can_merge( + struct xfs_bmbt_irec *left, /* preceding extent */ + struct xfs_bmbt_irec *got, /* current extent to shift */ + xfs_fileoff_t shift) /* shift fsb */ +{ + xfs_fileoff_t startoff; + + startoff = got->br_startoff - shift; + + /* + * The extent, once shifted, must be adjacent in-file and on-disk with + * the preceding extent. + */ + if ((left->br_startoff + left->br_blockcount != startoff) || + (left->br_startblock + left->br_blockcount != got->br_startblock) || + (left->br_state != got->br_state) || + (left->br_blockcount + got->br_blockcount > MAXEXTLEN)) + return false; + + return true; +} + +/* + * A bmap extent shift adjusts the file offset of an extent to fill a preceding + * hole in the file. If an extent shift would result in the extent being fully + * adjacent to the extent that currently precedes the hole, we can merge with + * the preceding extent rather than do the shift. + * + * This function assumes the caller has verified a shift-by-merge is possible + * with the provided extents via xfs_bmse_can_merge(). + */ +STATIC int +xfs_bmse_merge( + struct xfs_inode *ip, + int whichfork, + xfs_fileoff_t shift, /* shift fsb */ + int current_ext, /* idx of gotp */ + struct xfs_bmbt_rec_host *gotp, /* extent to shift */ + struct xfs_bmbt_rec_host *leftp, /* preceding extent */ + struct xfs_btree_cur *cur, + int *logflags) /* output */ +{ + struct xfs_ifork *ifp; + struct xfs_bmbt_irec got; + struct xfs_bmbt_irec left; + xfs_filblks_t blockcount; + int error, i; + + ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_bmbt_get_all(gotp, &got); + xfs_bmbt_get_all(leftp, &left); + blockcount = left.br_blockcount + got.br_blockcount; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(xfs_bmse_can_merge(&left, &got, shift)); + + /* + * Merge the in-core extents. Note that the host record pointers and + * current_ext index are invalid once the extent has been removed via + * xfs_iext_remove(). + */ + xfs_bmbt_set_blockcount(leftp, blockcount); + xfs_iext_remove(ip, current_ext, 1, 0); + + /* + * Update the on-disk extent count, the btree if necessary and log the + * inode. + */ + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + *logflags |= XFS_ILOG_CORE; + if (!cur) { + *logflags |= XFS_ILOG_DEXT; + return 0; + } + + /* lookup and remove the extent to merge */ + error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, + got.br_blockcount, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + + error = xfs_btree_delete(cur, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + + /* lookup and update size of the previous extent */ + error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock, + left.br_blockcount, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + + left.br_blockcount = blockcount; + + error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock, + left.br_blockcount, left.br_state); + if (error) + goto out_error; + + return 0; + +out_error: + return error; +} + +/* + * Shift a single extent. + */ +STATIC int +xfs_bmse_shift_one( + struct xfs_inode *ip, + int whichfork, + xfs_fileoff_t offset_shift_fsb, + int *current_ext, + struct xfs_bmbt_rec_host *gotp, + struct xfs_btree_cur *cur, + int *logflags) +{ + struct xfs_ifork *ifp; + xfs_fileoff_t startoff; + struct xfs_bmbt_rec_host *leftp; + struct xfs_bmbt_irec got; + struct xfs_bmbt_irec left; + int error; + int i; + + ifp = XFS_IFORK_PTR(ip, whichfork); + + xfs_bmbt_get_all(gotp, &got); + startoff = got.br_startoff - offset_shift_fsb; + + /* delalloc extents should be prevented by caller */ + XFS_WANT_CORRUPTED_GOTO(!isnullstartblock(got.br_startblock), + out_error); + + /* + * If this is the first extent in the file, make sure there's enough + * room at the start of the file and jump right to the shift as there's + * no left extent to merge. + */ + if (*current_ext == 0) { + if (got.br_startoff < offset_shift_fsb) + return -EINVAL; + goto shift_extent; + } + + /* grab the left extent and check for a large enough hole */ + leftp = xfs_iext_get_ext(ifp, *current_ext - 1); + xfs_bmbt_get_all(leftp, &left); + + if (startoff < left.br_startoff + left.br_blockcount) + return -EINVAL; + + /* check whether to merge the extent or shift it down */ + if (!xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) + goto shift_extent; + + return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, *current_ext, + gotp, leftp, cur, logflags); + +shift_extent: + /* + * Increment the extent index for the next iteration, update the start + * offset of the in-core extent and update the btree if applicable. + */ + (*current_ext)++; + xfs_bmbt_set_startoff(gotp, startoff); + *logflags |= XFS_ILOG_CORE; + if (!cur) { + *logflags |= XFS_ILOG_DEXT; + return 0; + } + + error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, + got.br_blockcount, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + + got.br_startoff = startoff; + error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, + got.br_blockcount, got.br_state); + if (error) + return error; + + return 0; + +out_error: + return error; +} + +/* * Shift extent records to the left to cover a hole. * - * The maximum number of extents to be shifted in a single operation - * is @num_exts, and @current_ext keeps track of the current extent - * index we have shifted. @offset_shift_fsb is the length by which each - * extent is shifted. If there is no hole to shift the extents - * into, this will be considered invalid operation and we abort immediately. + * The maximum number of extents to be shifted in a single operation is + * @num_exts. @start_fsb specifies the file offset to start the shift and the + * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb + * is the length by which each extent is shifted. If there is no hole to shift + * the extents into, this will be considered invalid operation and we abort + * immediately. */ int xfs_bmap_shift_extents( struct xfs_trans *tp, struct xfs_inode *ip, - int *done, xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb, - xfs_extnum_t *current_ext, + int *done, + xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist, int num_exts) { - struct xfs_btree_cur *cur; + struct xfs_btree_cur *cur = NULL; struct xfs_bmbt_rec_host *gotp; struct xfs_bmbt_irec got; - struct xfs_bmbt_irec left; struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; xfs_extnum_t nexts = 0; - xfs_fileoff_t startoff; + xfs_extnum_t current_ext; int error = 0; - int i; int whichfork = XFS_DATA_FORK; - int logflags; - xfs_filblks_t blockcount = 0; + int logflags = 0; int total_extents; if (unlikely(XFS_TEST_ERROR( @@ -5451,7 +5649,8 @@ xfs_bmap_shift_extents( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - ASSERT(current_ext != NULL); + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ifp = XFS_IFORK_PTR(ip, whichfork); if (!(ifp->if_flags & XFS_IFEXTENTS)) { @@ -5461,142 +5660,62 @@ xfs_bmap_shift_extents( return error; } - /* - * If *current_ext is 0, we would need to lookup the extent - * from where we would start shifting and store it in gotp. - */ - if (!*current_ext) { - gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext); - /* - * gotp can be null in 2 cases: 1) if there are no extents - * or 2) start_fsb lies in a hole beyond which there are - * no extents. Either way, we are done. - */ - if (!gotp) { - *done = 1; - return 0; - } - } - - /* We are going to change core inode */ - logflags = XFS_ILOG_CORE; if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; cur->bc_private.b.flags = 0; - } else { - cur = NULL; - logflags |= XFS_ILOG_DEXT; + } + + /* + * Look up the extent index for the fsb where we start shifting. We can + * henceforth iterate with current_ext as extent list changes are locked + * out via ilock. + * + * gotp can be null in 2 cases: 1) if there are no extents or 2) + * start_fsb lies in a hole beyond which there are no extents. Either + * way, we are done. + */ + gotp = xfs_iext_bno_to_ext(ifp, start_fsb, ¤t_ext); + if (!gotp) { + *done = 1; + goto del_cursor; } /* * There may be delalloc extents in the data fork before the range we - * are collapsing out, so we cannot - * use the count of real extents here. Instead we have to calculate it - * from the incore fork. + * are collapsing out, so we cannot use the count of real extents here. + * Instead we have to calculate it from the incore fork. */ total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); - while (nexts++ < num_exts && *current_ext < total_extents) { - - gotp = xfs_iext_get_ext(ifp, *current_ext); - xfs_bmbt_get_all(gotp, &got); - startoff = got.br_startoff - offset_shift_fsb; - - /* - * Before shifting extent into hole, make sure that the hole - * is large enough to accomodate the shift. - */ - if (*current_ext) { - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, - *current_ext - 1), &left); - - if (startoff < left.br_startoff + left.br_blockcount) - error = -EINVAL; - } else if (offset_shift_fsb > got.br_startoff) { - /* - * When first extent is shifted, offset_shift_fsb - * should be less than the stating offset of - * the first extent. - */ - error = -EINVAL; - } - + while (nexts++ < num_exts && current_ext < total_extents) { + error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, + ¤t_ext, gotp, cur, &logflags); if (error) goto del_cursor; - if (cur) { - error = xfs_bmbt_lookup_eq(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount, - &i); - if (error) - goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); - } - - /* Check if we can merge 2 adjacent extents */ - if (*current_ext && - left.br_startoff + left.br_blockcount == startoff && - left.br_startblock + left.br_blockcount == - got.br_startblock && - left.br_state == got.br_state && - left.br_blockcount + got.br_blockcount <= MAXEXTLEN) { - blockcount = left.br_blockcount + - got.br_blockcount; - xfs_iext_remove(ip, *current_ext, 1, 0); - if (cur) { - error = xfs_btree_delete(cur, &i); - if (error) - goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); - } - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); - gotp = xfs_iext_get_ext(ifp, --*current_ext); - xfs_bmbt_get_all(gotp, &got); - - /* Make cursor point to the extent we will update */ - if (cur) { - error = xfs_bmbt_lookup_eq(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount, - &i); - if (error) - goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); - } - - xfs_bmbt_set_blockcount(gotp, blockcount); - got.br_blockcount = blockcount; - } else { - /* We have to update the startoff */ - xfs_bmbt_set_startoff(gotp, startoff); - got.br_startoff = startoff; - } - - if (cur) { - error = xfs_bmbt_update(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount, - got.br_state); - if (error) - goto del_cursor; - } - - (*current_ext)++; + /* update total extent count and grab the next record */ total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + if (current_ext >= total_extents) + break; + gotp = xfs_iext_get_ext(ifp, current_ext); } /* Check if we are done */ - if (*current_ext == total_extents) + if (current_ext == total_extents) { *done = 1; + } else if (next_fsb) { + xfs_bmbt_get_all(gotp, &got); + *next_fsb = got.br_startoff; + } del_cursor: if (cur) xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - xfs_trans_log_inode(tp, ip, logflags); + if (logflags) + xfs_trans_log_inode(tp, ip, logflags); + return error; } diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index b879ca56a64c..44db6db86402 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -178,9 +178,8 @@ int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, xfs_extnum_t num); uint xfs_default_attroffset(struct xfs_inode *ip); int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, - int *done, xfs_fileoff_t start_fsb, - xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext, - xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist, - int num_exts); + xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb, + int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock, + struct xfs_bmap_free *flist, int num_exts); #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c index c9aee52a37e2..7e42fdfd2f1d 100644 --- a/fs/xfs/libxfs/xfs_da_format.c +++ b/fs/xfs/libxfs/xfs_da_format.c @@ -270,7 +270,6 @@ xfs_dir3_data_get_ftype( { __uint8_t ftype = dep->name[dep->namelen]; - ASSERT(ftype < XFS_DIR3_FT_MAX); if (ftype >= XFS_DIR3_FT_MAX) return XFS_DIR3_FT_UNKNOWN; return ftype; diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 6cef22152fd6..7075aaf131f4 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -237,7 +237,8 @@ xfs_dir_init( } /* - Enter a name in a directory. + * Enter a name in a directory, or check for available space. + * If inum is 0, only the available space test is performed. */ int xfs_dir_createname( @@ -254,10 +255,12 @@ xfs_dir_createname( int v; /* type-checking value */ ASSERT(S_ISDIR(dp->i_d.di_mode)); - rval = xfs_dir_ino_validate(tp->t_mountp, inum); - if (rval) - return rval; - XFS_STATS_INC(xs_dir_create); + if (inum) { + rval = xfs_dir_ino_validate(tp->t_mountp, inum); + if (rval) + return rval; + XFS_STATS_INC(xs_dir_create); + } args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); if (!args) @@ -276,6 +279,8 @@ xfs_dir_createname( args->whichfork = XFS_DATA_FORK; args->trans = tp; args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; + if (!inum) + args->op_flags |= XFS_DA_OP_JUSTCHECK; if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { rval = xfs_dir2_sf_addname(args); @@ -535,62 +540,14 @@ out_free: /* * See if this entry can be added to the directory without allocating space. - * First checks that the caller couldn't reserve enough space (resblks = 0). */ int xfs_dir_canenter( xfs_trans_t *tp, xfs_inode_t *dp, - struct xfs_name *name, /* name of entry to add */ - uint resblks) + struct xfs_name *name) /* name of entry to add */ { - struct xfs_da_args *args; - int rval; - int v; /* type-checking value */ - - if (resblks) - return 0; - - ASSERT(S_ISDIR(dp->i_d.di_mode)); - - args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); - if (!args) - return -ENOMEM; - - args->geo = dp->i_mount->m_dir_geo; - args->name = name->name; - args->namelen = name->len; - args->filetype = name->type; - args->hashval = dp->i_mount->m_dirnameops->hashname(name); - args->dp = dp; - args->whichfork = XFS_DATA_FORK; - args->trans = tp; - args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | - XFS_DA_OP_OKNOENT; - - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { - rval = xfs_dir2_sf_addname(args); - goto out_free; - } - - rval = xfs_dir2_isblock(args, &v); - if (rval) - goto out_free; - if (v) { - rval = xfs_dir2_block_addname(args); - goto out_free; - } - - rval = xfs_dir2_isleaf(args, &v); - if (rval) - goto out_free; - if (v) - rval = xfs_dir2_leaf_addname(args); - else - rval = xfs_dir2_node_addname(args); -out_free: - kmem_free(args); - return rval; + return xfs_dir_createname(tp, dp, name, 0, NULL, NULL, 0); } /* diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index c8e86b0b5e99..4dff261e6ed5 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -136,7 +136,7 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, xfs_fsblock_t *first, struct xfs_bmap_free *flist, xfs_extlen_t tot); extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_name *name, uint resblks); + struct xfs_name *name); /* * Direct call from the bmap code, bypassing the generic directory layer. diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index b62771f1f4b5..d213a2eae95e 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2051,6 +2051,8 @@ xfs_agi_verify( if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) return false; + if (be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) + return false; /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index f4dd697cac08..7c818f1e4484 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -424,20 +424,24 @@ xfs_rtfind_forw( } /* - * Read and modify the summary information for a given extent size, + * Read and/or modify the summary information for a given extent size, * bitmap block combination. * Keeps track of a current summary block, so we don't keep reading * it from the buffer cache. + * + * Summary information is returned in *sum if specified. + * If no delta is specified, returns summary only. */ int -xfs_rtmodify_summary( - xfs_mount_t *mp, /* file system mount point */ +xfs_rtmodify_summary_int( + xfs_mount_t *mp, /* file system mount structure */ xfs_trans_t *tp, /* transaction pointer */ int log, /* log2 of extent size */ xfs_rtblock_t bbno, /* bitmap block number */ int delta, /* change to make to summary info */ xfs_buf_t **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb) /* in/out: summary block number */ + xfs_fsblock_t *rsb, /* in/out: summary block number */ + xfs_suminfo_t *sum) /* out: summary info for this block */ { xfs_buf_t *bp; /* buffer for the summary block */ int error; /* error value */ @@ -456,7 +460,7 @@ xfs_rtmodify_summary( /* * If we have an old buffer, and the block number matches, use that. */ - if (rbpp && *rbpp && *rsb == sb) + if (*rbpp && *rsb == sb) bp = *rbpp; /* * Otherwise we have to get the buffer. @@ -465,7 +469,7 @@ xfs_rtmodify_summary( /* * If there was an old one, get rid of it first. */ - if (rbpp && *rbpp) + if (*rbpp) xfs_trans_brelse(tp, *rbpp); error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); if (error) { @@ -474,21 +478,38 @@ xfs_rtmodify_summary( /* * Remember this buffer and block for the next call. */ - if (rbpp) { - *rbpp = bp; - *rsb = sb; - } + *rbpp = bp; + *rsb = sb; } /* - * Point to the summary information, modify and log it. + * Point to the summary information, modify/log it, and/or copy it out. */ sp = XFS_SUMPTR(mp, bp, so); - *sp += delta; - xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), - (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); + if (delta) { + uint first = (uint)((char *)sp - (char *)bp->b_addr); + + *sp += delta; + xfs_trans_log_buf(tp, bp, first, first + sizeof(*sp) - 1); + } + if (sum) + *sum = *sp; return 0; } +int +xfs_rtmodify_summary( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + int log, /* log2 of extent size */ + xfs_rtblock_t bbno, /* bitmap block number */ + int delta, /* change to make to summary info */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb) /* in/out: summary block number */ +{ + return xfs_rtmodify_summary_int(mp, tp, log, bbno, + delta, rbpp, rsb, NULL); +} + /* * Set the given range of bitmap bits to the given value. * Do whatever I/O and logging is required. diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index ad525a5623a4..8426e5e2682e 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -279,11 +279,13 @@ xfs_mount_validate_sb( sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_blocksize != (1 << sbp->sb_blocklog) || + sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || sbp->sb_inodelog < XFS_DINODE_MIN_LOG || sbp->sb_inodelog > XFS_DINODE_MAX_LOG || sbp->sb_inodesize != (1 << sbp->sb_inodelog) || + sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE || sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 11e9b4caa54f..2f502537a39c 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -434,10 +434,22 @@ xfs_start_page_writeback( { ASSERT(PageLocked(page)); ASSERT(!PageWriteback(page)); - if (clear_dirty) + + /* + * if the page was not fully cleaned, we need to ensure that the higher + * layers come back to it correctly. That means we need to keep the page + * dirty, and for WB_SYNC_ALL writeback we need to ensure the + * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to + * write this page in this writeback sweep will be made. + */ + if (clear_dirty) { clear_page_dirty_for_io(page); - set_page_writeback(page); + set_page_writeback(page); + } else + set_page_writeback_keepwrite(page); + unlock_page(page); + /* If no buffers on the page are to be written, finish it here */ if (!buffers) end_page_writeback(page); @@ -1753,11 +1765,72 @@ xfs_vm_readpages( return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); } +/* + * This is basically a copy of __set_page_dirty_buffers() with one + * small tweak: buffers beyond EOF do not get marked dirty. If we mark them + * dirty, we'll never be able to clean them because we don't write buffers + * beyond EOF, and that means we can't invalidate pages that span EOF + * that have been marked dirty. Further, the dirty state can leak into + * the file interior if the file is extended, resulting in all sorts of + * bad things happening as the state does not match the underlying data. + * + * XXX: this really indicates that bufferheads in XFS need to die. Warts like + * this only exist because of bufferheads and how the generic code manages them. + */ +STATIC int +xfs_vm_set_page_dirty( + struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + loff_t end_offset; + loff_t offset; + int newly_dirty; + + if (unlikely(!mapping)) + return !TestSetPageDirty(page); + + end_offset = i_size_read(inode); + offset = page_offset(page); + + spin_lock(&mapping->private_lock); + if (page_has_buffers(page)) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + + do { + if (offset < end_offset) + set_buffer_dirty(bh); + bh = bh->b_this_page; + offset += 1 << inode->i_blkbits; + } while (bh != head); + } + newly_dirty = !TestSetPageDirty(page); + spin_unlock(&mapping->private_lock); + + if (newly_dirty) { + /* sigh - __set_page_dirty() is static, so copy it here, too */ + unsigned long flags; + + spin_lock_irqsave(&mapping->tree_lock, flags); + if (page->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(!PageUptodate(page)); + account_page_dirtied(page, mapping); + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + } + spin_unlock_irqrestore(&mapping->tree_lock, flags); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + } + return newly_dirty; +} + const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readpages = xfs_vm_readpages, .writepage = xfs_vm_writepage, .writepages = xfs_vm_writepages, + .set_page_dirty = xfs_vm_set_page_dirty, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .write_begin = xfs_vm_write_begin, diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 2f1e30d39a35..d8b77b5bf4d9 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1205,6 +1205,7 @@ xfs_free_file_space( xfs_bmap_free_t free_list; xfs_bmbt_irec_t imap; xfs_off_t ioffset; + xfs_off_t iendoffset; xfs_extlen_t mod=0; xfs_mount_t *mp; int nimap; @@ -1233,12 +1234,13 @@ xfs_free_file_space( inode_dio_wait(VFS_I(ip)); rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); - ioffset = offset & ~(rounding - 1); - error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - ioffset, -1); + ioffset = round_down(offset, rounding); + iendoffset = round_up(offset + len, rounding) - 1; + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset, + iendoffset); if (error) goto out; - truncate_pagecache_range(VFS_I(ip), ioffset, -1); + truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset); /* * Need to zero the stuff we're not freeing, on disk. @@ -1456,24 +1458,50 @@ xfs_collapse_file_space( struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error; - xfs_extnum_t current_ext = 0; struct xfs_bmap_free free_list; xfs_fsblock_t first_block; int committed; xfs_fileoff_t start_fsb; + xfs_fileoff_t next_fsb; xfs_fileoff_t shift_fsb; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); trace_xfs_collapse_file_space(ip); - start_fsb = XFS_B_TO_FSB(mp, offset + len); + next_fsb = XFS_B_TO_FSB(mp, offset + len); shift_fsb = XFS_B_TO_FSB(mp, len); error = xfs_free_file_space(ip, offset, len); if (error) return error; + /* + * Trim eofblocks to avoid shifting uninitialized post-eof preallocation + * into the accessible region of the file. + */ + if (xfs_can_free_eofblocks(ip, true)) { + error = xfs_free_eofblocks(mp, ip, false); + if (error) + return error; + } + + /* + * Writeback and invalidate cache for the remainder of the file as we're + * about to shift down every extent from the collapse range to EOF. The + * free of the collapse range above might have already done some of + * this, but we shouldn't rely on it to do anything outside of the range + * that was freed. + */ + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + offset + len, -1); + if (error) + return error; + error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + (offset + len) >> PAGE_CACHE_SHIFT, -1); + if (error) + return error; + while (!error && !done) { tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); /* @@ -1505,10 +1533,10 @@ xfs_collapse_file_space( * We are using the write transaction in which max 2 bmbt * updates are allowed */ - error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb, - shift_fsb, ¤t_ext, - &first_block, &free_list, - XFS_BMAP_MAX_SHIFT_EXTENTS); + start_fsb = next_fsb; + error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb, + &done, &next_fsb, &first_block, &free_list, + XFS_BMAP_MAX_SHIFT_EXTENTS); if (error) goto out; @@ -1618,7 +1646,7 @@ xfs_swap_extents_check_format( return 0; } -int +static int xfs_swap_extent_flush( struct xfs_inode *ip) { diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index cd7b8ca9b064..ec6505056b2c 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1884,7 +1884,7 @@ xfs_buf_init(void) goto out; xfslogd_workqueue = alloc_workqueue("xfslogd", - WQ_MEM_RECLAIM | WQ_HIGHPRI, 1); + WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 1); if (!xfslogd_workqueue) goto out_free_buf_zone; diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 76007deed31f..30fa5db9aea8 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -501,7 +501,7 @@ xfs_buf_item_unpin( * buffer being bad.. */ -DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); +static DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); STATIC uint xfs_buf_item_push( diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 076b1708d134..eb596b419942 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -291,12 +291,22 @@ xfs_file_read_iter( if (inode->i_mapping->nrpages) { ret = filemap_write_and_wait_range( VFS_I(ip)->i_mapping, - pos, -1); + pos, pos + size - 1); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } - truncate_pagecache_range(VFS_I(ip), pos, -1); + + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, + (pos + size - 1) >> PAGE_CACHE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } @@ -632,10 +642,19 @@ xfs_file_dio_aio_write( if (mapping->nrpages) { ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - pos, -1); + pos, pos + count - 1); if (ret) goto out; - truncate_pagecache_range(VFS_I(ip), pos, -1); + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, + (pos + count - 1) >> PAGE_CACHE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } /* @@ -964,7 +983,7 @@ xfs_vm_page_mkwrite( /* * This type is designed to indicate the type of offset we would like - * to search from page cache for either xfs_seek_data() or xfs_seek_hole(). + * to search from page cache for xfs_seek_hole_data(). */ enum { HOLE_OFF = 0, @@ -1021,7 +1040,7 @@ xfs_lookup_buffer_offset( /* * This routine is called to find out and return a data or hole offset * from the page cache for unwritten extents according to the desired - * type for xfs_seek_data() or xfs_seek_hole(). + * type for xfs_seek_hole_data(). * * The argument offset is used to tell where we start to search from the * page cache. Map is used to figure out the end points of the range to @@ -1181,9 +1200,10 @@ out: } STATIC loff_t -xfs_seek_data( +xfs_seek_hole_data( struct file *file, - loff_t start) + loff_t start, + int whence) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); @@ -1195,6 +1215,9 @@ xfs_seek_data( uint lock; int error; + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + lock = xfs_ilock_data_map_shared(ip); isize = i_size_read(inode); @@ -1209,6 +1232,7 @@ xfs_seek_data( */ fsbno = XFS_B_TO_FSBT(mp, start); end = XFS_B_TO_FSB(mp, isize); + for (;;) { struct xfs_bmbt_irec map[2]; int nmap = 2; @@ -1229,29 +1253,48 @@ xfs_seek_data( offset = max_t(loff_t, start, XFS_FSB_TO_B(mp, map[i].br_startoff)); - /* Landed in a data extent */ - if (map[i].br_startblock == DELAYSTARTBLOCK || - (map[i].br_state == XFS_EXT_NORM && - !isnullstartblock(map[i].br_startblock))) + /* Landed in the hole we wanted? */ + if (whence == SEEK_HOLE && + map[i].br_startblock == HOLESTARTBLOCK) + goto out; + + /* Landed in the data extent we wanted? */ + if (whence == SEEK_DATA && + (map[i].br_startblock == DELAYSTARTBLOCK || + (map[i].br_state == XFS_EXT_NORM && + !isnullstartblock(map[i].br_startblock)))) goto out; /* - * Landed in an unwritten extent, try to search data - * from page cache. + * Landed in an unwritten extent, try to search + * for hole or data from page cache. */ if (map[i].br_state == XFS_EXT_UNWRITTEN) { if (xfs_find_get_desired_pgoff(inode, &map[i], - DATA_OFF, &offset)) + whence == SEEK_HOLE ? HOLE_OFF : DATA_OFF, + &offset)) goto out; } } /* - * map[0] is hole or its an unwritten extent but - * without data in page cache. Probably means that - * we are reading after EOF if nothing in map[1]. + * We only received one extent out of the two requested. This + * means we've hit EOF and didn't find what we are looking for. */ if (nmap == 1) { + /* + * If we were looking for a hole, set offset to + * the end of the file (i.e., there is an implicit + * hole at the end of any file). + */ + if (whence == SEEK_HOLE) { + offset = isize; + break; + } + /* + * If we were looking for data, it's nowhere to be found + */ + ASSERT(whence == SEEK_DATA); error = -ENXIO; goto out_unlock; } @@ -1260,125 +1303,30 @@ xfs_seek_data( /* * Nothing was found, proceed to the next round of search - * if reading offset not beyond or hit EOF. + * if the next reading offset is not at or beyond EOF. */ fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; start = XFS_FSB_TO_B(mp, fsbno); if (start >= isize) { + if (whence == SEEK_HOLE) { + offset = isize; + break; + } + ASSERT(whence == SEEK_DATA); error = -ENXIO; goto out_unlock; } } out: - offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); - -out_unlock: - xfs_iunlock(ip, lock); - - if (error) - return error; - return offset; -} - -STATIC loff_t -xfs_seek_hole( - struct file *file, - loff_t start) -{ - struct inode *inode = file->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - loff_t uninitialized_var(offset); - xfs_fsize_t isize; - xfs_fileoff_t fsbno; - xfs_filblks_t end; - uint lock; - int error; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - lock = xfs_ilock_data_map_shared(ip); - - isize = i_size_read(inode); - if (start >= isize) { - error = -ENXIO; - goto out_unlock; - } - - fsbno = XFS_B_TO_FSBT(mp, start); - end = XFS_B_TO_FSB(mp, isize); - - for (;;) { - struct xfs_bmbt_irec map[2]; - int nmap = 2; - unsigned int i; - - error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, - XFS_BMAPI_ENTIRE); - if (error) - goto out_unlock; - - /* No extents at given offset, must be beyond EOF */ - if (nmap == 0) { - error = -ENXIO; - goto out_unlock; - } - - for (i = 0; i < nmap; i++) { - offset = max_t(loff_t, start, - XFS_FSB_TO_B(mp, map[i].br_startoff)); - - /* Landed in a hole */ - if (map[i].br_startblock == HOLESTARTBLOCK) - goto out; - - /* - * Landed in an unwritten extent, try to search hole - * from page cache. - */ - if (map[i].br_state == XFS_EXT_UNWRITTEN) { - if (xfs_find_get_desired_pgoff(inode, &map[i], - HOLE_OFF, &offset)) - goto out; - } - } - - /* - * map[0] contains data or its unwritten but contains - * data in page cache, probably means that we are - * reading after EOF. We should fix offset to point - * to the end of the file(i.e., there is an implicit - * hole at the end of any file). - */ - if (nmap == 1) { - offset = isize; - break; - } - - ASSERT(i > 1); - - /* - * Both mappings contains data, proceed to the next round of - * search if the current reading offset not beyond or hit EOF. - */ - fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; - start = XFS_FSB_TO_B(mp, fsbno); - if (start >= isize) { - offset = isize; - break; - } - } - -out: /* - * At this point, we must have found a hole. However, the returned + * If at this point we have found the hole we wanted, the returned * offset may be bigger than the file size as it may be aligned to - * page boundary for unwritten extents, we need to deal with this + * page boundary for unwritten extents. We need to deal with this * situation in particular. */ - offset = min_t(loff_t, offset, isize); + if (whence == SEEK_HOLE) + offset = min_t(loff_t, offset, isize); offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: @@ -1393,17 +1341,16 @@ STATIC loff_t xfs_file_llseek( struct file *file, loff_t offset, - int origin) + int whence) { - switch (origin) { + switch (whence) { case SEEK_END: case SEEK_CUR: case SEEK_SET: - return generic_file_llseek(file, offset, origin); - case SEEK_DATA: - return xfs_seek_data(file, offset); + return generic_file_llseek(file, offset, whence); case SEEK_HOLE: - return xfs_seek_hole(file, offset); + case SEEK_DATA: + return xfs_seek_hole_data(file, offset, whence); default: return -EINVAL; } diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c index 5399ef222dd7..4d41b241298f 100644 --- a/fs/xfs/xfs_globals.c +++ b/fs/xfs/xfs_globals.c @@ -43,3 +43,7 @@ xfs_param_t xfs_params = { .fstrm_timer = { 1, 30*100, 3600*100}, .eofb_timer = { 1, 300, 3600*24}, }; + +struct xfs_globals xfs_globals = { + .log_recovery_delay = 0, /* no delay by default */ +}; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 981b2cf51985..b45f7b27b5df 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -33,7 +33,6 @@ #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_bmap_util.h" -#include "xfs_quota.h" #include "xfs_dquot_item.h" #include "xfs_dquot.h" diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index fea3c92fb3f0..c92cb48617d1 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1153,9 +1153,11 @@ xfs_create( if (error) goto out_trans_cancel; - error = xfs_dir_canenter(tp, dp, name, resblks); - if (error) - goto out_trans_cancel; + if (!resblks) { + error = xfs_dir_canenter(tp, dp, name); + if (error) + goto out_trans_cancel; + } /* * A newly created regular or special file just has one directory @@ -1421,9 +1423,11 @@ xfs_link( goto error_return; } - error = xfs_dir_canenter(tp, tdp, target_name, resblks); - if (error) - goto error_return; + if (!resblks) { + error = xfs_dir_canenter(tp, tdp, target_name); + if (error) + goto error_return; + } xfs_bmap_init(&free_list, &first_block); @@ -2759,9 +2763,11 @@ xfs_rename( * If there's no space reservation, check the entry will * fit before actually inserting it. */ - error = xfs_dir_canenter(tp, target_dp, target_name, spaceres); - if (error) - goto error_return; + if (!spaceres) { + error = xfs_dir_canenter(tp, target_dp, target_name); + if (error) + goto error_return; + } /* * If target does not exist and the rename crosses * directories, adjust the target directory link count diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 72129493e9d3..ec6dcdc181ee 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -849,6 +849,36 @@ xfs_setattr_size( return error; truncate_setsize(inode, newsize); + /* + * The "we can't serialise against page faults" pain gets worse. + * + * If the file is mapped then we have to clean the page at the old EOF + * when extending the file. Extending the file can expose changes the + * underlying page mapping (e.g. from beyond EOF to a hole or + * unwritten), and so on the next attempt to write to that page we need + * to remap it for write. i.e. we need .page_mkwrite() to be called. + * Hence we need to clean the page to clean the pte and so a new write + * fault will be triggered appropriately. + * + * If we do it before we change the inode size, then we can race with a + * page fault that maps the page with exactly the same problem. If we do + * it after we change the file size, then a new page fault can come in + * and allocate space before we've run the rest of the truncate + * transaction. That's kinda grotesque, but it's better than have data + * over a hole, and so that's the lesser evil that has been chosen here. + * + * The real solution, however, is to have some mechanism for locking out + * page faults while a truncate is in progress. + */ + if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) { + error = filemap_write_and_wait_range( + VFS_I(ip)->i_mapping, + round_down(oldsize, PAGE_CACHE_SIZE), + round_up(oldsize, PAGE_CACHE_SIZE) - 1); + if (error) + return error; + } + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index f6b79e5325dd..f506c457011e 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -463,12 +463,40 @@ xlog_cil_push( spin_unlock(&cil->xc_push_lock); goto out_skip; } - spin_unlock(&cil->xc_push_lock); /* check for a previously pushed seqeunce */ - if (push_seq < cil->xc_ctx->sequence) + if (push_seq < cil->xc_ctx->sequence) { + spin_unlock(&cil->xc_push_lock); goto out_skip; + } + + /* + * We are now going to push this context, so add it to the committing + * list before we do anything else. This ensures that anyone waiting on + * this push can easily detect the difference between a "push in + * progress" and "CIL is empty, nothing to do". + * + * IOWs, a wait loop can now check for: + * the current sequence not being found on the committing list; + * an empty CIL; and + * an unchanged sequence number + * to detect a push that had nothing to do and therefore does not need + * waiting on. If the CIL is not empty, we get put on the committing + * list before emptying the CIL and bumping the sequence number. Hence + * an empty CIL and an unchanged sequence number means we jumped out + * above after doing nothing. + * + * Hence the waiter will either find the commit sequence on the + * committing list or the sequence number will be unchanged and the CIL + * still dirty. In that latter case, the push has not yet started, and + * so the waiter will have to continue trying to check the CIL + * committing list until it is found. In extreme cases of delay, the + * sequence may fully commit between the attempts the wait makes to wait + * on the commit sequence. + */ + list_add(&ctx->committing, &cil->xc_committing); + spin_unlock(&cil->xc_push_lock); /* * pull all the log vectors off the items in the CIL, and @@ -532,7 +560,6 @@ xlog_cil_push( */ spin_lock(&cil->xc_push_lock); cil->xc_current_sequence = new_ctx->sequence; - list_add(&ctx->committing, &cil->xc_committing); spin_unlock(&cil->xc_push_lock); up_write(&cil->xc_ctx_lock); @@ -855,13 +882,15 @@ restart: * Hence by the time we have got here it our sequence may not have been * pushed yet. This is true if the current sequence still matches the * push sequence after the above wait loop and the CIL still contains - * dirty objects. + * dirty objects. This is guaranteed by the push code first adding the + * context to the committing list before emptying the CIL. * - * When the push occurs, it will empty the CIL and atomically increment - * the currect sequence past the push sequence and move it into the - * committing list. Of course, if the CIL is clean at the time of the - * push, it won't have pushed the CIL at all, so in that case we should - * try the push for this sequence again from the start just in case. + * Hence if we don't find the context in the committing list and the + * current sequence number is unchanged then the CIL contents are + * significant. If the CIL is empty, if means there was nothing to push + * and that means there is nothing to wait for. If the CIL is not empty, + * it means we haven't yet started the push, because if it had started + * we would have found the context on the committing list. */ if (sequence == cil->xc_current_sequence && !list_empty(&cil->xc_cil)) { diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 5019f52e4cc2..79cfe7e6ec7a 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -4184,41 +4184,13 @@ xlog_do_recovery_pass( } memset(rhash, 0, sizeof(rhash)); - if (tail_blk <= head_blk) { - for (blk_no = tail_blk; blk_no < head_blk; ) { - error = xlog_bread(log, blk_no, hblks, hbp, &offset); - if (error) - goto bread_err2; - - rhead = (xlog_rec_header_t *)offset; - error = xlog_valid_rec_header(log, rhead, blk_no); - if (error) - goto bread_err2; - - /* blocks in data section */ - bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); - error = xlog_bread(log, blk_no + hblks, bblks, dbp, - &offset); - if (error) - goto bread_err2; - - error = xlog_unpack_data(rhead, offset, log); - if (error) - goto bread_err2; - - error = xlog_recover_process_data(log, - rhash, rhead, offset, pass); - if (error) - goto bread_err2; - blk_no += bblks + hblks; - } - } else { + blk_no = tail_blk; + if (tail_blk > head_blk) { /* * Perform recovery around the end of the physical log. * When the head is not on the same cycle number as the tail, - * we can't do a sequential recovery as above. + * we can't do a sequential recovery. */ - blk_no = tail_blk; while (blk_no < log->l_logBBsize) { /* * Check for header wrapping around physical end-of-log @@ -4332,34 +4304,35 @@ xlog_do_recovery_pass( ASSERT(blk_no >= log->l_logBBsize); blk_no -= log->l_logBBsize; + } - /* read first part of physical log */ - while (blk_no < head_blk) { - error = xlog_bread(log, blk_no, hblks, hbp, &offset); - if (error) - goto bread_err2; + /* read first part of physical log */ + while (blk_no < head_blk) { + error = xlog_bread(log, blk_no, hblks, hbp, &offset); + if (error) + goto bread_err2; - rhead = (xlog_rec_header_t *)offset; - error = xlog_valid_rec_header(log, rhead, blk_no); - if (error) - goto bread_err2; + rhead = (xlog_rec_header_t *)offset; + error = xlog_valid_rec_header(log, rhead, blk_no); + if (error) + goto bread_err2; - bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); - error = xlog_bread(log, blk_no+hblks, bblks, dbp, - &offset); - if (error) - goto bread_err2; + /* blocks in data section */ + bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); + error = xlog_bread(log, blk_no+hblks, bblks, dbp, + &offset); + if (error) + goto bread_err2; - error = xlog_unpack_data(rhead, offset, log); - if (error) - goto bread_err2; + error = xlog_unpack_data(rhead, offset, log); + if (error) + goto bread_err2; - error = xlog_recover_process_data(log, rhash, - rhead, offset, pass); - if (error) - goto bread_err2; - blk_no += bblks + hblks; - } + error = xlog_recover_process_data(log, rhash, + rhead, offset, pass); + if (error) + goto bread_err2; + blk_no += bblks + hblks; } bread_err2: @@ -4561,6 +4534,18 @@ xlog_recover( return -EINVAL; } + /* + * Delay log recovery if the debug hook is set. This is debug + * instrumention to coordinate simulation of I/O failures with + * log recovery. + */ + if (xfs_globals.log_recovery_delay) { + xfs_notice(log->l_mp, + "Delaying log recovery for %d seconds.", + xfs_globals.log_recovery_delay); + msleep(xfs_globals.log_recovery_delay * 1000); + } + xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", log->l_mp->m_logname ? log->l_mp->m_logname : "internal"); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 1eb6f3df698c..30ecca3037e3 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -304,7 +304,8 @@ _xfs_mru_cache_reap( int xfs_mru_cache_init(void) { - xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1); + xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", + WQ_MEM_RECLAIM|WQ_FREEZABLE, 1); if (!xfs_mru_reap_wq) return -ENOMEM; return 0; diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 909e143b87ae..d45aebe04dde 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -46,7 +46,7 @@ * Keeps track of a current summary block, so we don't keep reading * it from the buffer cache. */ -STATIC int /* error */ +static int xfs_rtget_summary( xfs_mount_t *mp, /* file system mount structure */ xfs_trans_t *tp, /* transaction pointer */ @@ -56,60 +56,9 @@ xfs_rtget_summary( xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_suminfo_t *sum) /* out: summary info for this block */ { - xfs_buf_t *bp; /* buffer for summary block */ - int error; /* error value */ - xfs_fsblock_t sb; /* summary fsblock */ - int so; /* index into the summary file */ - xfs_suminfo_t *sp; /* pointer to returned data */ - - /* - * Compute entry number in the summary file. - */ - so = XFS_SUMOFFS(mp, log, bbno); - /* - * Compute the block number in the summary file. - */ - sb = XFS_SUMOFFSTOBLOCK(mp, so); - /* - * If we have an old buffer, and the block number matches, use that. - */ - if (rbpp && *rbpp && *rsb == sb) - bp = *rbpp; - /* - * Otherwise we have to get the buffer. - */ - else { - /* - * If there was an old one, get rid of it first. - */ - if (rbpp && *rbpp) - xfs_trans_brelse(tp, *rbpp); - error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); - if (error) { - return error; - } - /* - * Remember this buffer and block for the next call. - */ - if (rbpp) { - *rbpp = bp; - *rsb = sb; - } - } - /* - * Point to the summary information & copy it out. - */ - sp = XFS_SUMPTR(mp, bp, so); - *sum = *sp; - /* - * Drop the buffer if we're not asked to remember it. - */ - if (!rbpp) - xfs_trans_brelse(tp, bp); - return 0; + return xfs_rtmodify_summary_int(mp, tp, log, bbno, 0, rbpp, rsb, sum); } - /* * Return whether there are any free extents in the size range given * by low and high, for the bitmap block bbno. diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index c642795324af..76c0a4a9bb17 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -111,6 +111,10 @@ int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtblock_t *rtblock); int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtblock_t start, xfs_extlen_t len, int val); +int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp, + int log, xfs_rtblock_t bbno, int delta, + xfs_buf_t **rbpp, xfs_fsblock_t *rsb, + xfs_suminfo_t *sum); int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp, xfs_fsblock_t *rsb); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index b194652033cd..dcd4b93dccdc 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -47,6 +47,7 @@ #include "xfs_dinode.h" #include "xfs_filestream.h" #include "xfs_quota.h" +#include "xfs_sysfs.h" #include <linux/namei.h> #include <linux/init.h> @@ -61,7 +62,11 @@ static const struct super_operations xfs_super_operations; static kmem_zone_t *xfs_ioend_zone; mempool_t *xfs_ioend_pool; -struct kset *xfs_kset; + +struct kset *xfs_kset; /* top-level xfs sysfs dir */ +#ifdef DEBUG +static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ +#endif #define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ #define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ @@ -838,32 +843,32 @@ xfs_init_mount_workqueues( struct xfs_mount *mp) { mp->m_data_workqueue = alloc_workqueue("xfs-data/%s", - WQ_MEM_RECLAIM, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_data_workqueue) goto out; mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", - WQ_MEM_RECLAIM, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_unwritten_workqueue) goto out_destroy_data_iodone_queue; mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s", - WQ_MEM_RECLAIM, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_cil_workqueue) goto out_destroy_unwritten; mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", - 0, 0, mp->m_fsname); + WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_reclaim_workqueue) goto out_destroy_cil; mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", - 0, 0, mp->m_fsname); + WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_log_workqueue) goto out_destroy_reclaim; mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", - 0, 0, mp->m_fsname); + WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_eofblocks_workqueue) goto out_destroy_log; @@ -1715,7 +1720,8 @@ xfs_init_workqueues(void) * AGs in all the filesystems mounted. Hence use the default large * max_active value for this workqueue. */ - xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0); + xfs_alloc_wq = alloc_workqueue("xfsalloc", + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0); if (!xfs_alloc_wq) return -ENOMEM; @@ -1768,9 +1774,16 @@ init_xfs_fs(void) goto out_sysctl_unregister;; } - error = xfs_qm_init(); +#ifdef DEBUG + xfs_dbg_kobj.kobject.kset = xfs_kset; + error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); if (error) goto out_kset_unregister; +#endif + + error = xfs_qm_init(); + if (error) + goto out_remove_kobj; error = register_filesystem(&xfs_fs_type); if (error) @@ -1779,7 +1792,11 @@ init_xfs_fs(void) out_qm_exit: xfs_qm_exit(); + out_remove_kobj: +#ifdef DEBUG + xfs_sysfs_del(&xfs_dbg_kobj); out_kset_unregister: +#endif kset_unregister(xfs_kset); out_sysctl_unregister: xfs_sysctl_unregister(); @@ -1802,6 +1819,9 @@ exit_xfs_fs(void) { xfs_qm_exit(); unregister_filesystem(&xfs_fs_type); +#ifdef DEBUG + xfs_sysfs_del(&xfs_dbg_kobj); +#endif kset_unregister(xfs_kset); xfs_sysctl_unregister(); xfs_cleanup_procfs(); diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 6a944a2cd36f..02ae62a998e0 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -269,9 +269,11 @@ xfs_symlink( /* * Check for ability to enter directory entry, if no space reserved. */ - error = xfs_dir_canenter(tp, dp, link_name, resblks); - if (error) - goto error_return; + if (!resblks) { + error = xfs_dir_canenter(tp, dp, link_name); + if (error) + goto error_return; + } /* * Initialize the bmap freelist prior to calling either * bmapi or the directory create code. diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h index bd8e157c20ef..ffef45375754 100644 --- a/fs/xfs/xfs_sysctl.h +++ b/fs/xfs/xfs_sysctl.h @@ -92,6 +92,11 @@ enum { extern xfs_param_t xfs_params; +struct xfs_globals { + int log_recovery_delay; /* log recovery delay (secs) */ +}; +extern struct xfs_globals xfs_globals; + #ifdef CONFIG_SYSCTL extern int xfs_sysctl_register(void); extern void xfs_sysctl_unregister(void); diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 9835139ce1ec..aa03670851d8 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -51,6 +51,80 @@ struct kobj_type xfs_mp_ktype = { .release = xfs_sysfs_release, }; +#ifdef DEBUG +/* debug */ + +STATIC ssize_t +log_recovery_delay_store( + const char *buf, + size_t count, + void *data) +{ + int ret; + int val; + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; + + if (val < 0 || val > 60) + return -EINVAL; + + xfs_globals.log_recovery_delay = val; + + return count; +} + +STATIC ssize_t +log_recovery_delay_show( + char *buf, + void *data) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay); +} +XFS_SYSFS_ATTR_RW(log_recovery_delay); + +static struct attribute *xfs_dbg_attrs[] = { + ATTR_LIST(log_recovery_delay), + NULL, +}; + +STATIC ssize_t +xfs_dbg_show( + struct kobject *kobject, + struct attribute *attr, + char *buf) +{ + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + + return xfs_attr->show ? xfs_attr->show(buf, NULL) : 0; +} + +STATIC ssize_t +xfs_dbg_store( + struct kobject *kobject, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + + return xfs_attr->store ? xfs_attr->store(buf, count, NULL) : 0; +} + +static struct sysfs_ops xfs_dbg_ops = { + .show = xfs_dbg_show, + .store = xfs_dbg_store, +}; + +struct kobj_type xfs_dbg_ktype = { + .release = xfs_sysfs_release, + .sysfs_ops = &xfs_dbg_ops, + .default_attrs = xfs_dbg_attrs, +}; + +#endif /* DEBUG */ + /* xlog */ STATIC ssize_t diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index 54a2091183c0..240eee35f342 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -20,6 +20,7 @@ #define __XFS_SYSFS_H__ extern struct kobj_type xfs_mp_ktype; /* xfs_mount */ +extern struct kobj_type xfs_dbg_ktype; /* debug */ extern struct kobj_type xfs_log_ktype; /* xlog */ static inline struct xfs_kobj * |