summaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c168
1 files changed, 99 insertions, 69 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 6467d5e1df2d..c5077e6326c7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -55,6 +55,12 @@ xfs_extlen_t
xfs_get_extsz_hint(
struct xfs_inode *ip)
{
+ /*
+ * No point in aligning allocations if we need to COW to actually
+ * write to them.
+ */
+ if (xfs_is_always_cow_inode(ip))
+ return 0;
if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
return ip->i_d.di_extsize;
if (XFS_IS_REALTIME_INODE(ip))
@@ -809,7 +815,7 @@ xfs_ialloc(
ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
inode->i_rdev = rdev;
- xfs_set_projid(ip, prid);
+ ip->i_d.di_projid = prid;
if (pip && XFS_INHERIT_GID(pip)) {
ip->i_d.di_gid = pip->i_d.di_gid;
@@ -845,8 +851,7 @@ xfs_ialloc(
inode_set_iversion(inode, 1);
ip->i_d.di_flags2 = 0;
ip->i_d.di_cowextsize = 0;
- ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec;
- ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec;
+ ip->i_d.di_crtime = tv;
}
@@ -1418,7 +1423,7 @@ xfs_link(
* the tree quota mechanism could be circumvented.
*/
if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
- (xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
+ tdp->i_d.di_projid != sip->i_d.di_projid)) {
error = -EXDEV;
goto error_return;
}
@@ -1513,10 +1518,8 @@ xfs_itruncate_extents_flags(
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp = *tpp;
xfs_fileoff_t first_unmap_block;
- xfs_fileoff_t last_block;
xfs_filblks_t unmap_len;
int error = 0;
- int done = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(!atomic_read(&VFS_I(ip)->i_count) ||
@@ -1536,21 +1539,22 @@ xfs_itruncate_extents_flags(
* the end of the file (in a crash where the space is allocated
* but the inode size is not yet updated), simply remove any
* blocks which show up between the new EOF and the maximum
- * possible file size. If the first block to be removed is
- * beyond the maximum file size (ie it is the same as last_block),
- * then there is nothing to do.
+ * possible file size.
+ *
+ * We have to free all the blocks to the bmbt maximum offset, even if
+ * the page cache can't scale that far.
*/
first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
- last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
- if (first_unmap_block == last_block)
+ if (first_unmap_block >= XFS_MAX_FILEOFF) {
+ WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF);
return 0;
+ }
- ASSERT(first_unmap_block < last_block);
- unmap_len = last_block - first_unmap_block + 1;
- while (!done) {
+ unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1;
+ while (unmap_len > 0) {
ASSERT(tp->t_firstblock == NULLFSBLOCK);
- error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
- XFS_ITRUNC_MAX_EXTENTS, &done);
+ error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len,
+ flags, XFS_ITRUNC_MAX_EXTENTS);
if (error)
goto out;
@@ -1570,7 +1574,7 @@ xfs_itruncate_extents_flags(
if (whichfork == XFS_DATA_FORK) {
/* Remove all pending CoW reservations. */
error = xfs_reflink_cancel_cow_blocks(ip, &tp,
- first_unmap_block, last_block, true);
+ first_unmap_block, XFS_MAX_FILEOFF, true);
if (error)
goto out;
@@ -2018,7 +2022,7 @@ xfs_iunlink_add_backref(
if (XFS_TEST_ERROR(false, pag->pag_mount, XFS_ERRTAG_IUNLINK_FALLBACK))
return 0;
- iu = kmem_zalloc(sizeof(*iu), KM_SLEEP | KM_NOFS);
+ iu = kmem_zalloc(sizeof(*iu), KM_NOFS);
iu->iu_agino = prev_agino;
iu->iu_next_unlinked = this_agino;
@@ -2130,8 +2134,10 @@ xfs_iunlink_update_bucket(
* passed in because either we're adding or removing ourselves from the
* head of the list.
*/
- if (old_value == new_agino)
+ if (old_value == new_agino) {
+ xfs_buf_corruption_error(agibp);
return -EFSCORRUPTED;
+ }
agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
offset = offsetof(struct xfs_agi, agi_unlinked) +
@@ -2194,6 +2200,8 @@ xfs_iunlink_update_inode(
/* Make sure the old pointer isn't garbage. */
old_value = be32_to_cpu(dip->di_next_unlinked);
if (!xfs_verify_agino_or_null(mp, agno, old_value)) {
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
+ sizeof(*dip), __this_address);
error = -EFSCORRUPTED;
goto out;
}
@@ -2205,8 +2213,11 @@ xfs_iunlink_update_inode(
*/
*old_next_agino = old_value;
if (old_value == next_agino) {
- if (next_agino != NULLAGINO)
+ if (next_agino != NULLAGINO) {
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
+ dip, sizeof(*dip), __this_address);
error = -EFSCORRUPTED;
+ }
goto out;
}
@@ -2257,8 +2268,10 @@ xfs_iunlink(
*/
next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
if (next_agino == agino ||
- !xfs_verify_agino_or_null(mp, agno, next_agino))
+ !xfs_verify_agino_or_null(mp, agno, next_agino)) {
+ xfs_buf_corruption_error(agibp);
return -EFSCORRUPTED;
+ }
if (next_agino != NULLAGINO) {
struct xfs_perag *pag;
@@ -2533,6 +2546,7 @@ xfs_ifree_cluster(
struct xfs_perag *pag;
struct xfs_ino_geometry *igeo = M_IGEO(mp);
xfs_ino_t inum;
+ int error;
inum = xic->first_ino;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
@@ -2561,12 +2575,11 @@ xfs_ifree_cluster(
* complete before we get a lock on it, and hence we may fail
* to mark all the active inodes on the buffer stale.
*/
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
- mp->m_bsize * igeo->blocks_per_cluster,
- XBF_UNMAPPED);
-
- if (!bp)
- return -ENOMEM;
+ error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
+ mp->m_bsize * igeo->blocks_per_cluster,
+ XBF_UNMAPPED, &bp);
+ if (error)
+ return error;
/*
* This buffer may not have been correctly initialised as we
@@ -3196,6 +3209,7 @@ xfs_rename(
struct xfs_trans *tp;
struct xfs_inode *wip = NULL; /* whiteout inode */
struct xfs_inode *inodes[__XFS_SORT_INODES];
+ struct xfs_buf *agibp;
int num_inodes = __XFS_SORT_INODES;
bool new_parent = (src_dp != target_dp);
bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
@@ -3270,7 +3284,7 @@ xfs_rename(
* tree quota mechanism would be circumvented.
*/
if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
- (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
+ target_dp->i_d.di_projid != src_ip->i_d.di_projid)) {
error = -EXDEV;
goto out_trans_cancel;
}
@@ -3282,7 +3296,8 @@ xfs_rename(
spaceres);
/*
- * Set up the target.
+ * Check for expected errors before we dirty the transaction
+ * so we can return an error without a transaction abort.
*/
if (target_ip == NULL) {
/*
@@ -3294,6 +3309,45 @@ xfs_rename(
if (error)
goto out_trans_cancel;
}
+ } else {
+ /*
+ * If target exists and it's a directory, check that whether
+ * it can be destroyed.
+ */
+ if (S_ISDIR(VFS_I(target_ip)->i_mode) &&
+ (!xfs_dir_isempty(target_ip) ||
+ (VFS_I(target_ip)->i_nlink > 2))) {
+ error = -EEXIST;
+ goto out_trans_cancel;
+ }
+ }
+
+ /*
+ * Directory entry creation below may acquire the AGF. Remove
+ * the whiteout from the unlinked list first to preserve correct
+ * AGI/AGF locking order. This dirties the transaction so failures
+ * after this point will abort and log recovery will clean up the
+ * mess.
+ *
+ * For whiteouts, we need to bump the link count on the whiteout
+ * inode. After this point, we have a real link, clear the tmpfile
+ * state flag from the inode so it doesn't accidentally get misused
+ * in future.
+ */
+ if (wip) {
+ ASSERT(VFS_I(wip)->i_nlink == 0);
+ error = xfs_iunlink_remove(tp, wip);
+ if (error)
+ goto out_trans_cancel;
+
+ xfs_bumplink(tp, wip);
+ VFS_I(wip)->i_state &= ~I_LINKABLE;
+ }
+
+ /*
+ * Set up the target.
+ */
+ if (target_ip == NULL) {
/*
* If target does not exist and the rename crosses
* directories, adjust the target directory link count
@@ -3312,22 +3366,6 @@ xfs_rename(
}
} else { /* target_ip != NULL */
/*
- * If target exists and it's a directory, check that both
- * target and source are directories and that target can be
- * destroyed, or that neither is a directory.
- */
- if (S_ISDIR(VFS_I(target_ip)->i_mode)) {
- /*
- * Make sure target dir is empty.
- */
- if (!(xfs_dir_isempty(target_ip)) ||
- (VFS_I(target_ip)->i_nlink > 2)) {
- error = -EEXIST;
- goto out_trans_cancel;
- }
- }
-
- /*
* Link the source inode under the target name.
* If the source inode is a directory and we are moving
* it across directories, its ".." entry will be
@@ -3336,6 +3374,22 @@ xfs_rename(
* In case there is already an entry with the same
* name at the destination directory, remove it first.
*/
+
+ /*
+ * Check whether the replace operation will need to allocate
+ * blocks. This happens when the shortform directory lacks
+ * space and we have to convert it to a block format directory.
+ * When more blocks are necessary, we must lock the AGI first
+ * to preserve locking order (AGI -> AGF).
+ */
+ if (xfs_dir2_sf_replace_needblock(target_dp, src_ip->i_ino)) {
+ error = xfs_read_agi(mp, tp,
+ XFS_INO_TO_AGNO(mp, target_ip->i_ino),
+ &agibp);
+ if (error)
+ goto out_trans_cancel;
+ }
+
error = xfs_dir_replace(tp, target_dp, target_name,
src_ip->i_ino, spaceres);
if (error)
@@ -3417,30 +3471,6 @@ xfs_rename(
if (error)
goto out_trans_cancel;
- /*
- * For whiteouts, we need to bump the link count on the whiteout inode.
- * This means that failures all the way up to this point leave the inode
- * on the unlinked list and so cleanup is a simple matter of dropping
- * the remaining reference to it. If we fail here after bumping the link
- * count, we're shutting down the filesystem so we'll never see the
- * intermediate state on disk.
- */
- if (wip) {
- ASSERT(VFS_I(wip)->i_nlink == 0);
- xfs_bumplink(tp, wip);
- error = xfs_iunlink_remove(tp, wip);
- if (error)
- goto out_trans_cancel;
- xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
-
- /*
- * Now we have a real link, clear the "I'm a tmpfile" state
- * flag from the inode so it doesn't accidentally get misused in
- * future.
- */
- VFS_I(wip)->i_state &= ~I_LINKABLE;
- }
-
xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
if (new_parent)
OpenPOWER on IntegriCloud