diff options
92 files changed, 2648 insertions, 2785 deletions
diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 2b363e23f36e..ff3f0b3cfdb3 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -278,6 +278,17 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id, return ret; } +static int quota_rmxquota(struct super_block *sb, void __user *addr) +{ + __u32 flags; + + if (copy_from_user(&flags, addr, sizeof(flags))) + return -EFAULT; + if (!sb->s_qcop->rm_xquota) + return -ENOSYS; + return sb->s_qcop->rm_xquota(sb, flags); +} + /* Copy parameters and call proper function */ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void __user *addr, struct path *path) @@ -316,8 +327,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, return sb->s_qcop->quota_sync(sb, type); case Q_XQUOTAON: case Q_XQUOTAOFF: - case Q_XQUOTARM: return quota_setxstate(sb, cmd, addr); + case Q_XQUOTARM: + return quota_rmxquota(sb, addr); case Q_XGETQSTAT: return quota_getxstate(sb, addr); case Q_XGETQSTATV: diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 0fdd4109c624..6e247a99f5db 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -160,30 +160,38 @@ typedef struct xfs_agi { * still being referenced. */ __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; - + /* + * This marks the end of logging region 1 and start of logging region 2. + */ uuid_t agi_uuid; /* uuid of filesystem */ __be32 agi_crc; /* crc of agi sector */ __be32 agi_pad32; __be64 agi_lsn; /* last write sequence */ + __be32 agi_free_root; /* root of the free inode btree */ + __be32 agi_free_level;/* levels in free inode btree */ + /* structure must be padded to 64 bit alignment */ } xfs_agi_t; #define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) -#define XFS_AGI_MAGICNUM 0x00000001 -#define XFS_AGI_VERSIONNUM 0x00000002 -#define XFS_AGI_SEQNO 0x00000004 -#define XFS_AGI_LENGTH 0x00000008 -#define XFS_AGI_COUNT 0x00000010 -#define XFS_AGI_ROOT 0x00000020 -#define XFS_AGI_LEVEL 0x00000040 -#define XFS_AGI_FREECOUNT 0x00000080 -#define XFS_AGI_NEWINO 0x00000100 -#define XFS_AGI_DIRINO 0x00000200 -#define XFS_AGI_UNLINKED 0x00000400 -#define XFS_AGI_NUM_BITS 11 -#define XFS_AGI_ALL_BITS ((1 << XFS_AGI_NUM_BITS) - 1) +#define XFS_AGI_MAGICNUM (1 << 0) +#define XFS_AGI_VERSIONNUM (1 << 1) +#define XFS_AGI_SEQNO (1 << 2) +#define XFS_AGI_LENGTH (1 << 3) +#define XFS_AGI_COUNT (1 << 4) +#define XFS_AGI_ROOT (1 << 5) +#define XFS_AGI_LEVEL (1 << 6) +#define XFS_AGI_FREECOUNT (1 << 7) +#define XFS_AGI_NEWINO (1 << 8) +#define XFS_AGI_DIRINO (1 << 9) +#define XFS_AGI_UNLINKED (1 << 10) +#define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */ +#define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1) +#define XFS_AGI_FREE_ROOT (1 << 11) +#define XFS_AGI_FREE_LEVEL (1 << 12) +#define XFS_AGI_NUM_BITS_R2 13 /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index c1cf6a336a72..d43813267a80 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -257,16 +257,14 @@ xfs_alloc_fix_len( k = rlen % args->prod; if (k == args->mod) return; - if (k > args->mod) { - if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen) - return; - } else { - if ((int)(rlen = rlen - args->prod - (args->mod - k)) < - (int)args->minlen) - return; - } - ASSERT(rlen >= args->minlen); - ASSERT(rlen <= args->maxlen); + if (k > args->mod) + rlen = rlen - (k - args->mod); + else + rlen = rlen - args->prod + (args->mod - k); + if ((int)rlen < (int)args->minlen) + return; + ASSERT(rlen >= args->minlen && rlen <= args->maxlen); + ASSERT(rlen % args->prod == args->mod); args->len = rlen; } @@ -541,7 +539,6 @@ xfs_alloc_read_agfl( XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); if (error) return error; - ASSERT(!xfs_buf_geterror(bp)); xfs_buf_set_ref(bp, XFS_AGFL_REF); *bpp = bp; return 0; diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index cc1eadcbb049..8358f1ded94d 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -70,7 +70,6 @@ xfs_allocbt_alloc_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *start, union xfs_btree_ptr *new, - int length, int *stat) { int error; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 0479c32c5eb1..e32640eedea6 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -975,14 +975,39 @@ xfs_vm_writepage( * Given that we do not allow direct reclaim to call us, we should * never be called while in a filesystem transaction. */ - if (WARN_ON(current->flags & PF_FSTRANS)) + if (WARN_ON_ONCE(current->flags & PF_FSTRANS)) goto redirty; /* Is this page beyond the end of the file? */ offset = i_size_read(inode); end_index = offset >> PAGE_CACHE_SHIFT; last_index = (offset - 1) >> PAGE_CACHE_SHIFT; - if (page->index >= end_index) { + + /* + * The page index is less than the end_index, adjust the end_offset + * to the highest offset that this page should represent. + * ----------------------------------------------------- + * | file mapping | <EOF> | + * ----------------------------------------------------- + * | Page ... | Page N-2 | Page N-1 | Page N | | + * ^--------------------------------^----------|-------- + * | desired writeback range | see else | + * ---------------------------------^------------------| + */ + if (page->index < end_index) + end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT; + else { + /* + * Check whether the page to write out is beyond or straddles + * i_size or not. + * ------------------------------------------------------- + * | file mapping | <EOF> | + * ------------------------------------------------------- + * | Page ... | Page N-2 | Page N-1 | Page N | Beyond | + * ^--------------------------------^-----------|--------- + * | | Straddles | + * ---------------------------------^-----------|--------| + */ unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1); /* @@ -990,24 +1015,36 @@ xfs_vm_writepage( * truncate operation that is in progress. We must redirty the * page so that reclaim stops reclaiming it. Otherwise * xfs_vm_releasepage() is called on it and gets confused. + * + * Note that the end_index is unsigned long, it would overflow + * if the given offset is greater than 16TB on 32-bit system + * and if we do check the page is fully outside i_size or not + * via "if (page->index >= end_index + 1)" as "end_index + 1" + * will be evaluated to 0. Hence this page will be redirtied + * and be written out repeatedly which would result in an + * infinite loop, the user program that perform this operation + * will hang. Instead, we can verify this situation by checking + * if the page to write is totally beyond the i_size or if it's + * offset is just equal to the EOF. */ - if (page->index >= end_index + 1 || offset_into_page == 0) + if (page->index > end_index || + (page->index == end_index && offset_into_page == 0)) goto redirty; /* * The page straddles i_size. It must be zeroed out on each * and every writepage invocation because it may be mmapped. * "A file is mapped in multiples of the page size. For a file - * that is not a multiple of the page size, the remaining + * that is not a multiple of the page size, the remaining * memory is zeroed when mapped, and writes to that region are * not written out to the file." */ zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE); + + /* Adjust the end_offset to the end of file */ + end_offset = offset; } - end_offset = min_t(unsigned long long, - (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, - offset); len = 1 << inode->i_blkbits; bh = head = page_buffers(page); @@ -1188,9 +1225,9 @@ xfs_vm_releasepage( xfs_count_page_state(page, &delalloc, &unwritten); - if (WARN_ON(delalloc)) + if (WARN_ON_ONCE(delalloc)) return 0; - if (WARN_ON(unwritten)) + if (WARN_ON_ONCE(unwritten)) return 0; return try_to_free_buffers(page); diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index abda1124a70f..bfe36fc2cdc2 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -77,17 +77,27 @@ STATIC int xfs_attr_refillstate(xfs_da_state_t *state); STATIC int -xfs_attr_name_to_xname( - struct xfs_name *xname, - const unsigned char *aname) +xfs_attr_args_init( + struct xfs_da_args *args, + struct xfs_inode *dp, + const unsigned char *name, + int flags) { - if (!aname) + + if (!name) return EINVAL; - xname->name = aname; - xname->len = strlen((char *)aname); - if (xname->len >= MAXNAMELEN) + + memset(args, 0, sizeof(*args)); + args->geo = dp->i_mount->m_attr_geo; + args->whichfork = XFS_ATTR_FORK; + args->dp = dp; + args->flags = flags; + args->name = name; + args->namelen = strlen((const char *)name); + if (args->namelen >= MAXNAMELEN) return EFAULT; /* match IRIX behaviour */ + args->hashval = xfs_da_hashname(args->name, args->namelen); return 0; } @@ -106,79 +116,46 @@ xfs_inode_hasattr( * Overall external interface routines. *========================================================================*/ -STATIC int -xfs_attr_get_int( +int +xfs_attr_get( struct xfs_inode *ip, - struct xfs_name *name, + const unsigned char *name, unsigned char *value, int *valuelenp, int flags) { - xfs_da_args_t args; - int error; + struct xfs_da_args args; + uint lock_mode; + int error; + + XFS_STATS_INC(xs_attr_get); + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return EIO; if (!xfs_inode_hasattr(ip)) return ENOATTR; - /* - * Fill in the arg structure for this request. - */ - memset((char *)&args, 0, sizeof(args)); - args.name = name->name; - args.namelen = name->len; + error = xfs_attr_args_init(&args, ip, name, flags); + if (error) + return error; + args.value = value; args.valuelen = *valuelenp; - args.flags = flags; - args.hashval = xfs_da_hashname(args.name, args.namelen); - args.dp = ip; - args.whichfork = XFS_ATTR_FORK; - /* - * Decide on what work routines to call based on the inode size. - */ - if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { + lock_mode = xfs_ilock_attr_map_shared(ip); + if (!xfs_inode_hasattr(ip)) + error = ENOATTR; + else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) error = xfs_attr_shortform_getvalue(&args); - } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) { + else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) error = xfs_attr_leaf_get(&args); - } else { + else error = xfs_attr_node_get(&args); - } + xfs_iunlock(ip, lock_mode); - /* - * Return the number of bytes in the value to the caller. - */ *valuelenp = args.valuelen; - - if (error == EEXIST) - error = 0; - return(error); -} - -int -xfs_attr_get( - xfs_inode_t *ip, - const unsigned char *name, - unsigned char *value, - int *valuelenp, - int flags) -{ - int error; - struct xfs_name xname; - uint lock_mode; - - XFS_STATS_INC(xs_attr_get); - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return(EIO); - - error = xfs_attr_name_to_xname(&xname, name); - if (error) - return error; - - lock_mode = xfs_ilock_attr_map_shared(ip); - error = xfs_attr_get_int(ip, &xname, value, valuelenp, flags); - xfs_iunlock(ip, lock_mode); - return(error); + return error == EEXIST ? 0 : error; } /* @@ -186,12 +163,10 @@ xfs_attr_get( */ STATIC int xfs_attr_calc_size( - struct xfs_inode *ip, - int namelen, - int valuelen, + struct xfs_da_args *args, int *local) { - struct xfs_mount *mp = ip->i_mount; + struct xfs_mount *mp = args->dp->i_mount; int size; int nblks; @@ -199,12 +174,10 @@ xfs_attr_calc_size( * Determine space new attribute will use, and if it would be * "local" or "remote" (note: local != inline). */ - size = xfs_attr_leaf_newentsize(namelen, valuelen, - mp->m_sb.sb_blocksize, local); - + size = xfs_attr_leaf_newentsize(args, local); nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); if (*local) { - if (size > (mp->m_sb.sb_blocksize >> 1)) { + if (size > (args->geo->blksize / 2)) { /* Double split possible */ nblks *= 2; } @@ -213,7 +186,7 @@ xfs_attr_calc_size( * Out of line attribute, cannot double split, but * make room for the attribute value itself. */ - uint dblocks = xfs_attr3_rmt_blocks(mp, valuelen); + uint dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen); nblks += dblocks; nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); } @@ -221,26 +194,38 @@ xfs_attr_calc_size( return nblks; } -STATIC int -xfs_attr_set_int( - struct xfs_inode *dp, - struct xfs_name *name, - unsigned char *value, - int valuelen, - int flags) +int +xfs_attr_set( + struct xfs_inode *dp, + const unsigned char *name, + unsigned char *value, + int valuelen, + int flags) { - xfs_da_args_t args; - xfs_fsblock_t firstblock; - xfs_bmap_free_t flist; - int error, err2, committed; struct xfs_mount *mp = dp->i_mount; + struct xfs_da_args args; + struct xfs_bmap_free flist; struct xfs_trans_res tres; + xfs_fsblock_t firstblock; int rsvd = (flags & ATTR_ROOT) != 0; - int local; + int error, err2, committed, local; + + XFS_STATS_INC(xs_attr_set); + + if (XFS_FORCED_SHUTDOWN(dp->i_mount)) + return EIO; + + error = xfs_attr_args_init(&args, dp, name, flags); + if (error) + return error; + + args.value = value; + args.valuelen = valuelen; + args.firstblock = &firstblock; + args.flist = &flist; + args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; + args.total = xfs_attr_calc_size(&args, &local); - /* - * Attach the dquots to the inode. - */ error = xfs_qm_dqattach(dp, 0); if (error) return error; @@ -251,32 +236,14 @@ xfs_attr_set_int( */ if (XFS_IFORK_Q(dp) == 0) { int sf_size = sizeof(xfs_attr_sf_hdr_t) + - XFS_ATTR_SF_ENTSIZE_BYNAME(name->len, valuelen); + XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen); - if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd))) - return(error); + error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); + if (error) + return error; } /* - * Fill in the arg structure for this request. - */ - memset((char *)&args, 0, sizeof(args)); - args.name = name->name; - args.namelen = name->len; - args.value = value; - args.valuelen = valuelen; - args.flags = flags; - args.hashval = xfs_da_hashname(args.name, args.namelen); - args.dp = dp; - args.firstblock = &firstblock; - args.flist = &flist; - args.whichfork = XFS_ATTR_FORK; - args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; - - /* Size is now blocks for attribute data */ - args.total = xfs_attr_calc_size(dp, name->len, valuelen, &local); - - /* * Start our first transaction of the day. * * All future transactions during this code must be "chained" off @@ -303,7 +270,7 @@ xfs_attr_set_int( error = xfs_trans_reserve(args.trans, &tres, args.total, 0); if (error) { xfs_trans_cancel(args.trans, 0); - return(error); + return error; } xfs_ilock(dp, XFS_ILOCK_EXCL); @@ -313,7 +280,7 @@ xfs_attr_set_int( if (error) { xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); - return (error); + return error; } xfs_trans_ijoin(args.trans, dp, 0); @@ -322,9 +289,9 @@ xfs_attr_set_int( * If the attribute list is non-existent or a shortform list, * upgrade it to a single-leaf-block attribute list. */ - if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) || - ((dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) && - (dp->i_d.di_anextents == 0))) { + if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || + (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && + dp->i_d.di_anextents == 0)) { /* * Build initial attribute list (if required). @@ -349,9 +316,8 @@ xfs_attr_set_int( * the transaction goes to disk before returning * to the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) { + if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); - } if (!error && (flags & ATTR_KERNOTIME) == 0) { xfs_trans_ichgtime(args.trans, dp, @@ -361,7 +327,7 @@ xfs_attr_set_int( XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); - return(error == 0 ? err2 : error); + return error ? error : err2; } /* @@ -399,22 +365,19 @@ xfs_attr_set_int( } - if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { + if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) error = xfs_attr_leaf_addname(&args); - } else { + else error = xfs_attr_node_addname(&args); - } - if (error) { + if (error) goto out; - } /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) { + if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); - } if ((flags & ATTR_KERNOTIME) == 0) xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); @@ -426,65 +389,47 @@ xfs_attr_set_int( error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); - return(error); + return error; out: - if (args.trans) + if (args.trans) { xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); + } xfs_iunlock(dp, XFS_ILOCK_EXCL); - return(error); + return error; } +/* + * Generic handler routine to remove a name from an attribute list. + * Transitions attribute list from Btree to shortform as necessary. + */ int -xfs_attr_set( - xfs_inode_t *dp, - const unsigned char *name, - unsigned char *value, - int valuelen, - int flags) +xfs_attr_remove( + struct xfs_inode *dp, + const unsigned char *name, + int flags) { - int error; - struct xfs_name xname; + struct xfs_mount *mp = dp->i_mount; + struct xfs_da_args args; + struct xfs_bmap_free flist; + xfs_fsblock_t firstblock; + int error; - XFS_STATS_INC(xs_attr_set); + XFS_STATS_INC(xs_attr_remove); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return (EIO); + return EIO; + + if (!xfs_inode_hasattr(dp)) + return ENOATTR; - error = xfs_attr_name_to_xname(&xname, name); + error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; - return xfs_attr_set_int(dp, &xname, value, valuelen, flags); -} - -/* - * Generic handler routine to remove a name from an attribute list. - * Transitions attribute list from Btree to shortform as necessary. - */ -STATIC int -xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) -{ - xfs_da_args_t args; - xfs_fsblock_t firstblock; - xfs_bmap_free_t flist; - int error; - xfs_mount_t *mp = dp->i_mount; - - /* - * Fill in the arg structure for this request. - */ - memset((char *)&args, 0, sizeof(args)); - args.name = name->name; - args.namelen = name->len; - args.flags = flags; - args.hashval = xfs_da_hashname(args.name, args.namelen); - args.dp = dp; args.firstblock = &firstblock; args.flist = &flist; - args.total = 0; - args.whichfork = XFS_ATTR_FORK; /* * we have no control over the attribute names that userspace passes us @@ -493,9 +438,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) */ args.op_flags = XFS_DA_OP_OKNOENT; - /* - * Attach the dquots to the inode. - */ error = xfs_qm_dqattach(dp, 0); if (error) return error; @@ -524,7 +466,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) XFS_ATTRRM_SPACE_RES(mp), 0); if (error) { xfs_trans_cancel(args.trans, 0); - return(error); + return error; } xfs_ilock(dp, XFS_ILOCK_EXCL); @@ -534,35 +476,26 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) */ xfs_trans_ijoin(args.trans, dp, 0); - /* - * Decide on what work routines to call based on the inode size. - */ if (!xfs_inode_hasattr(dp)) { error = XFS_ERROR(ENOATTR); - goto out; - } - if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { + } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { ASSERT(dp->i_afp->if_flags & XFS_IFINLINE); error = xfs_attr_shortform_remove(&args); - if (error) { - goto out; - } } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { error = xfs_attr_leaf_removename(&args); } else { error = xfs_attr_node_removename(&args); } - if (error) { + + if (error) goto out; - } /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) { + if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); - } if ((flags & ATTR_KERNOTIME) == 0) xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); @@ -574,45 +507,17 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); - return(error); + return error; out: - if (args.trans) + if (args.trans) { xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); - xfs_iunlock(dp, XFS_ILOCK_EXCL); - return(error); -} - -int -xfs_attr_remove( - xfs_inode_t *dp, - const unsigned char *name, - int flags) -{ - int error; - struct xfs_name xname; - - XFS_STATS_INC(xs_attr_remove); - - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return (EIO); - - error = xfs_attr_name_to_xname(&xname, name); - if (error) - return error; - - xfs_ilock(dp, XFS_ILOCK_SHARED); - if (!xfs_inode_hasattr(dp)) { - xfs_iunlock(dp, XFS_ILOCK_SHARED); - return XFS_ERROR(ENOATTR); } - xfs_iunlock(dp, XFS_ILOCK_SHARED); - - return xfs_attr_remove_int(dp, &xname, flags); + xfs_iunlock(dp, XFS_ILOCK_EXCL); + return error; } - /*======================================================================== * External routines when attribute list is inside the inode *========================================================================*/ @@ -958,7 +863,7 @@ xfs_attr_leaf_get(xfs_da_args_t *args) } /*======================================================================== - * External routines when attribute list size > XFS_LBSIZE(mp). + * External routines when attribute list size > geo->blksize *========================================================================*/ /* @@ -991,8 +896,6 @@ restart: state = xfs_da_state_alloc(); state->args = args; state->mp = mp; - state->blocksize = state->mp->m_sb.sb_blocksize; - state->node_ents = state->mp->m_attr_node_ents; /* * Search to see if name already exists, and get back a pointer @@ -1170,8 +1073,6 @@ restart: state = xfs_da_state_alloc(); state->args = args; state->mp = mp; - state->blocksize = state->mp->m_sb.sb_blocksize; - state->node_ents = state->mp->m_attr_node_ents; state->inleaf = 0; error = xfs_da3_node_lookup_int(state, &retval); if (error) @@ -1262,8 +1163,6 @@ xfs_attr_node_removename(xfs_da_args_t *args) state = xfs_da_state_alloc(); state->args = args; state->mp = dp->i_mount; - state->blocksize = state->mp->m_sb.sb_blocksize; - state->node_ents = state->mp->m_attr_node_ents; /* * Search to see if name exists, and get back a pointer to it. @@ -1525,8 +1424,6 @@ xfs_attr_node_get(xfs_da_args_t *args) state = xfs_da_state_alloc(); state->args = args; state->mp = args->dp->i_mount; - state->blocksize = state->mp->m_sb.sb_blocksize; - state->node_ents = state->mp->m_attr_node_ents; /* * Search to see if name exists, and get back a pointer to it. diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 511c283459b1..28712d29e43c 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -80,11 +80,12 @@ STATIC int xfs_attr3_leaf_figure_balance(xfs_da_state_t *state, /* * Utility routines. */ -STATIC void xfs_attr3_leaf_moveents(struct xfs_attr_leafblock *src_leaf, +STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args, + struct xfs_attr_leafblock *src_leaf, struct xfs_attr3_icleaf_hdr *src_ichdr, int src_start, struct xfs_attr_leafblock *dst_leaf, struct xfs_attr3_icleaf_hdr *dst_ichdr, int dst_start, - int move_count, struct xfs_mount *mp); + int move_count); STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); void @@ -711,6 +712,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) memset((char *)&nargs, 0, sizeof(nargs)); nargs.dp = dp; + nargs.geo = args->geo; nargs.firstblock = args->firstblock; nargs.flist = args->flist; nargs.total = args->total; @@ -805,18 +807,18 @@ xfs_attr3_leaf_to_shortform( trace_xfs_attr_leaf_to_sf(args); - tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP); + tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); if (!tmpbuffer) return ENOMEM; - memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount)); + memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); leaf = (xfs_attr_leafblock_t *)tmpbuffer; xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); entry = xfs_attr3_leaf_entryp(leaf); /* XXX (dgc): buffer is about to be marked stale - why zero it? */ - memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount)); + memset(bp->b_addr, 0, args->geo->blksize); /* * Clean out the prior contents of the attribute list. @@ -838,6 +840,7 @@ xfs_attr3_leaf_to_shortform( * Copy the attributes */ memset((char *)&nargs, 0, sizeof(nargs)); + nargs.geo = args->geo; nargs.dp = dp; nargs.firstblock = args->firstblock; nargs.flist = args->flist; @@ -904,12 +907,12 @@ xfs_attr3_leaf_to_node( /* copy leaf to new buffer, update identifiers */ xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF); bp2->b_ops = bp1->b_ops; - memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(mp)); + memcpy(bp2->b_addr, bp1->b_addr, args->geo->blksize); if (xfs_sb_version_hascrc(&mp->m_sb)) { struct xfs_da3_blkinfo *hdr3 = bp2->b_addr; hdr3->blkno = cpu_to_be64(bp2->b_bn); } - xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(mp) - 1); + xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1); /* * Set up the new root node. @@ -930,7 +933,7 @@ xfs_attr3_leaf_to_node( btree[0].before = cpu_to_be32(blkno); icnodehdr.count = 1; dp->d_ops->node_hdr_to_disk(node, &icnodehdr); - xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(mp) - 1); + xfs_trans_log_buf(args->trans, bp1, 0, args->geo->blksize - 1); error = 0; out: return error; @@ -966,10 +969,10 @@ xfs_attr3_leaf_create( bp->b_ops = &xfs_attr3_leaf_buf_ops; xfs_trans_buf_set_type(args->trans, bp, XFS_BLFT_ATTR_LEAF_BUF); leaf = bp->b_addr; - memset(leaf, 0, XFS_LBSIZE(mp)); + memset(leaf, 0, args->geo->blksize); memset(&ichdr, 0, sizeof(ichdr)); - ichdr.firstused = XFS_LBSIZE(mp); + ichdr.firstused = args->geo->blksize; if (xfs_sb_version_hascrc(&mp->m_sb)) { struct xfs_da3_blkinfo *hdr3 = bp->b_addr; @@ -988,7 +991,7 @@ xfs_attr3_leaf_create( ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base; xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); - xfs_trans_log_buf(args->trans, bp, 0, XFS_LBSIZE(mp) - 1); + xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1); *bpp = bp; return 0; @@ -1074,8 +1077,7 @@ xfs_attr3_leaf_add( leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); ASSERT(args->index >= 0 && args->index <= ichdr.count); - entsize = xfs_attr_leaf_newentsize(args->namelen, args->valuelen, - args->trans->t_mountp->m_sb.sb_blocksize, NULL); + entsize = xfs_attr_leaf_newentsize(args, NULL); /* * Search through freemap for first-fit on new name length. @@ -1174,17 +1176,14 @@ xfs_attr3_leaf_add_work( * Allocate space for the new string (at the end of the run). */ mp = args->trans->t_mountp; - ASSERT(ichdr->freemap[mapindex].base < XFS_LBSIZE(mp)); + ASSERT(ichdr->freemap[mapindex].base < args->geo->blksize); ASSERT((ichdr->freemap[mapindex].base & 0x3) == 0); ASSERT(ichdr->freemap[mapindex].size >= - xfs_attr_leaf_newentsize(args->namelen, args->valuelen, - mp->m_sb.sb_blocksize, NULL)); - ASSERT(ichdr->freemap[mapindex].size < XFS_LBSIZE(mp)); + xfs_attr_leaf_newentsize(args, NULL)); + ASSERT(ichdr->freemap[mapindex].size < args->geo->blksize); ASSERT((ichdr->freemap[mapindex].size & 0x3) == 0); - ichdr->freemap[mapindex].size -= - xfs_attr_leaf_newentsize(args->namelen, args->valuelen, - mp->m_sb.sb_blocksize, &tmp); + ichdr->freemap[mapindex].size -= xfs_attr_leaf_newentsize(args, &tmp); entry->nameidx = cpu_to_be16(ichdr->freemap[mapindex].base + ichdr->freemap[mapindex].size); @@ -1269,14 +1268,13 @@ xfs_attr3_leaf_compact( struct xfs_attr_leafblock *leaf_dst; struct xfs_attr3_icleaf_hdr ichdr_src; struct xfs_trans *trans = args->trans; - struct xfs_mount *mp = trans->t_mountp; char *tmpbuffer; trace_xfs_attr_leaf_compact(args); - tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP); - memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp)); - memset(bp->b_addr, 0, XFS_LBSIZE(mp)); + tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); + memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); + memset(bp->b_addr, 0, args->geo->blksize); leaf_src = (xfs_attr_leafblock_t *)tmpbuffer; leaf_dst = bp->b_addr; @@ -1289,7 +1287,7 @@ xfs_attr3_leaf_compact( /* Initialise the incore headers */ ichdr_src = *ichdr_dst; /* struct copy */ - ichdr_dst->firstused = XFS_LBSIZE(mp); + ichdr_dst->firstused = args->geo->blksize; ichdr_dst->usedbytes = 0; ichdr_dst->count = 0; ichdr_dst->holes = 0; @@ -1304,13 +1302,13 @@ xfs_attr3_leaf_compact( * Copy all entry's in the same (sorted) order, * but allocate name/value pairs packed and in sequence. */ - xfs_attr3_leaf_moveents(leaf_src, &ichdr_src, 0, leaf_dst, ichdr_dst, 0, - ichdr_src.count, mp); + xfs_attr3_leaf_moveents(args, leaf_src, &ichdr_src, 0, + leaf_dst, ichdr_dst, 0, ichdr_src.count); /* * this logs the entire buffer, but the caller must write the header * back to the buffer when it is finished modifying it. */ - xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); + xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1); kmem_free(tmpbuffer); } @@ -1461,8 +1459,8 @@ xfs_attr3_leaf_rebalance( /* * Move high entries from leaf1 to low end of leaf2. */ - xfs_attr3_leaf_moveents(leaf1, &ichdr1, ichdr1.count - count, - leaf2, &ichdr2, 0, count, state->mp); + xfs_attr3_leaf_moveents(args, leaf1, &ichdr1, + ichdr1.count - count, leaf2, &ichdr2, 0, count); } else if (count > ichdr1.count) { /* @@ -1490,14 +1488,14 @@ xfs_attr3_leaf_rebalance( /* * Move low entries from leaf2 to high end of leaf1. */ - xfs_attr3_leaf_moveents(leaf2, &ichdr2, 0, leaf1, &ichdr1, - ichdr1.count, count, state->mp); + xfs_attr3_leaf_moveents(args, leaf2, &ichdr2, 0, leaf1, &ichdr1, + ichdr1.count, count); } xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1); xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2); - xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); - xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1); + xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1); + xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1); /* * Copy out last hashval in each block for B-tree code. @@ -1592,11 +1590,9 @@ xfs_attr3_leaf_figure_balance( max = ichdr1->count + ichdr2->count; half = (max + 1) * sizeof(*entry); half += ichdr1->usedbytes + ichdr2->usedbytes + - xfs_attr_leaf_newentsize(state->args->namelen, - state->args->valuelen, - state->blocksize, NULL); + xfs_attr_leaf_newentsize(state->args, NULL); half /= 2; - lastdelta = state->blocksize; + lastdelta = state->args->geo->blksize; entry = xfs_attr3_leaf_entryp(leaf1); for (count = index = 0; count < max; entry++, index++, count++) { @@ -1606,10 +1602,7 @@ xfs_attr3_leaf_figure_balance( */ if (count == blk1->index) { tmp = totallen + sizeof(*entry) + - xfs_attr_leaf_newentsize( - state->args->namelen, - state->args->valuelen, - state->blocksize, NULL); + xfs_attr_leaf_newentsize(state->args, NULL); if (XFS_ATTR_ABS(half - tmp) > lastdelta) break; lastdelta = XFS_ATTR_ABS(half - tmp); @@ -1645,10 +1638,7 @@ xfs_attr3_leaf_figure_balance( totallen -= count * sizeof(*entry); if (foundit) { totallen -= sizeof(*entry) + - xfs_attr_leaf_newentsize( - state->args->namelen, - state->args->valuelen, - state->blocksize, NULL); + xfs_attr_leaf_newentsize(state->args, NULL); } *countarg = count; @@ -1700,7 +1690,7 @@ xfs_attr3_leaf_toosmall( bytes = xfs_attr3_leaf_hdr_size(leaf) + ichdr.count * sizeof(xfs_attr_leaf_entry_t) + ichdr.usedbytes; - if (bytes > (state->blocksize >> 1)) { + if (bytes > (state->args->geo->blksize >> 1)) { *action = 0; /* blk over 50%, don't try to join */ return(0); } @@ -1754,7 +1744,8 @@ xfs_attr3_leaf_toosmall( xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr); - bytes = state->blocksize - (state->blocksize >> 2) - + bytes = state->args->geo->blksize - + (state->args->geo->blksize >> 2) - ichdr.usedbytes - ichdr2.usedbytes - ((ichdr.count + ichdr2.count) * sizeof(xfs_attr_leaf_entry_t)) - @@ -1805,7 +1796,6 @@ xfs_attr3_leaf_remove( struct xfs_attr_leafblock *leaf; struct xfs_attr3_icleaf_hdr ichdr; struct xfs_attr_leaf_entry *entry; - struct xfs_mount *mp = args->trans->t_mountp; int before; int after; int smallest; @@ -1819,7 +1809,7 @@ xfs_attr3_leaf_remove( leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); - ASSERT(ichdr.count > 0 && ichdr.count < XFS_LBSIZE(mp) / 8); + ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8); ASSERT(args->index >= 0 && args->index < ichdr.count); ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) + xfs_attr3_leaf_hdr_size(leaf)); @@ -1827,7 +1817,7 @@ xfs_attr3_leaf_remove( entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused); - ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp)); + ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize); /* * Scan through free region table: @@ -1842,8 +1832,8 @@ xfs_attr3_leaf_remove( smallest = XFS_ATTR_LEAF_MAPSIZE - 1; entsize = xfs_attr_leaf_entsize(leaf, args->index); for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - ASSERT(ichdr.freemap[i].base < XFS_LBSIZE(mp)); - ASSERT(ichdr.freemap[i].size < XFS_LBSIZE(mp)); + ASSERT(ichdr.freemap[i].base < args->geo->blksize); + ASSERT(ichdr.freemap[i].size < args->geo->blksize); if (ichdr.freemap[i].base == tablesize) { ichdr.freemap[i].base -= sizeof(xfs_attr_leaf_entry_t); ichdr.freemap[i].size += sizeof(xfs_attr_leaf_entry_t); @@ -1920,11 +1910,11 @@ xfs_attr3_leaf_remove( * removing the name. */ if (smallest) { - tmp = XFS_LBSIZE(mp); + tmp = args->geo->blksize; entry = xfs_attr3_leaf_entryp(leaf); for (i = ichdr.count - 1; i >= 0; entry++, i--) { ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused); - ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp)); + ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize); if (be16_to_cpu(entry->nameidx) < tmp) tmp = be16_to_cpu(entry->nameidx); @@ -1947,7 +1937,7 @@ xfs_attr3_leaf_remove( tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) + ichdr.count * sizeof(xfs_attr_leaf_entry_t); - return tmp < mp->m_attr_magicpct; /* leaf is < 37% full */ + return tmp < args->geo->magicpct; /* leaf is < 37% full */ } /* @@ -1964,7 +1954,6 @@ xfs_attr3_leaf_unbalance( struct xfs_attr3_icleaf_hdr drophdr; struct xfs_attr3_icleaf_hdr savehdr; struct xfs_attr_leaf_entry *entry; - struct xfs_mount *mp = state->mp; trace_xfs_attr_leaf_unbalance(state->args); @@ -1991,13 +1980,15 @@ xfs_attr3_leaf_unbalance( */ if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, drop_blk->bp, &drophdr)) { - xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0, + xfs_attr3_leaf_moveents(state->args, + drop_leaf, &drophdr, 0, save_leaf, &savehdr, 0, - drophdr.count, mp); + drophdr.count); } else { - xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0, + xfs_attr3_leaf_moveents(state->args, + drop_leaf, &drophdr, 0, save_leaf, &savehdr, - savehdr.count, drophdr.count, mp); + savehdr.count, drophdr.count); } } else { /* @@ -2007,7 +1998,7 @@ xfs_attr3_leaf_unbalance( struct xfs_attr_leafblock *tmp_leaf; struct xfs_attr3_icleaf_hdr tmphdr; - tmp_leaf = kmem_zalloc(state->blocksize, KM_SLEEP); + tmp_leaf = kmem_zalloc(state->args->geo->blksize, KM_SLEEP); /* * Copy the header into the temp leaf so that all the stuff @@ -2020,35 +2011,39 @@ xfs_attr3_leaf_unbalance( tmphdr.magic = savehdr.magic; tmphdr.forw = savehdr.forw; tmphdr.back = savehdr.back; - tmphdr.firstused = state->blocksize; + tmphdr.firstused = state->args->geo->blksize; /* write the header to the temp buffer to initialise it */ xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr); if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, drop_blk->bp, &drophdr)) { - xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0, + xfs_attr3_leaf_moveents(state->args, + drop_leaf, &drophdr, 0, tmp_leaf, &tmphdr, 0, - drophdr.count, mp); - xfs_attr3_leaf_moveents(save_leaf, &savehdr, 0, + drophdr.count); + xfs_attr3_leaf_moveents(state->args, + save_leaf, &savehdr, 0, tmp_leaf, &tmphdr, tmphdr.count, - savehdr.count, mp); + savehdr.count); } else { - xfs_attr3_leaf_moveents(save_leaf, &savehdr, 0, + xfs_attr3_leaf_moveents(state->args, + save_leaf, &savehdr, 0, tmp_leaf, &tmphdr, 0, - savehdr.count, mp); - xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0, + savehdr.count); + xfs_attr3_leaf_moveents(state->args, + drop_leaf, &drophdr, 0, tmp_leaf, &tmphdr, tmphdr.count, - drophdr.count, mp); + drophdr.count); } - memcpy(save_leaf, tmp_leaf, state->blocksize); + memcpy(save_leaf, tmp_leaf, state->args->geo->blksize); savehdr = tmphdr; /* struct copy */ kmem_free(tmp_leaf); } xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr); xfs_trans_log_buf(state->args->trans, save_blk->bp, 0, - state->blocksize - 1); + state->args->geo->blksize - 1); /* * Copy out last hashval in each block for B-tree code. @@ -2094,7 +2089,7 @@ xfs_attr3_leaf_lookup_int( leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); entries = xfs_attr3_leaf_entryp(leaf); - ASSERT(ichdr.count < XFS_LBSIZE(args->dp->i_mount) / 8); + ASSERT(ichdr.count < args->geo->blksize / 8); /* * Binary search. (note: small blocks will skip this loop) @@ -2198,7 +2193,7 @@ xfs_attr3_leaf_getvalue( leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); - ASSERT(ichdr.count < XFS_LBSIZE(args->dp->i_mount) / 8); + ASSERT(ichdr.count < args->geo->blksize / 8); ASSERT(args->index < ichdr.count); entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; @@ -2249,14 +2244,14 @@ xfs_attr3_leaf_getvalue( /*ARGSUSED*/ STATIC void xfs_attr3_leaf_moveents( + struct xfs_da_args *args, struct xfs_attr_leafblock *leaf_s, struct xfs_attr3_icleaf_hdr *ichdr_s, int start_s, struct xfs_attr_leafblock *leaf_d, struct xfs_attr3_icleaf_hdr *ichdr_d, int start_d, - int count, - struct xfs_mount *mp) + int count) { struct xfs_attr_leaf_entry *entry_s; struct xfs_attr_leaf_entry *entry_d; @@ -2276,10 +2271,10 @@ xfs_attr3_leaf_moveents( ASSERT(ichdr_s->magic == XFS_ATTR_LEAF_MAGIC || ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC); ASSERT(ichdr_s->magic == ichdr_d->magic); - ASSERT(ichdr_s->count > 0 && ichdr_s->count < XFS_LBSIZE(mp) / 8); + ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8); ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s)) + xfs_attr3_leaf_hdr_size(leaf_s)); - ASSERT(ichdr_d->count < XFS_LBSIZE(mp) / 8); + ASSERT(ichdr_d->count < args->geo->blksize / 8); ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d)) + xfs_attr3_leaf_hdr_size(leaf_d)); @@ -2331,11 +2326,11 @@ xfs_attr3_leaf_moveents( entry_d->nameidx = cpu_to_be16(ichdr_d->firstused); entry_d->flags = entry_s->flags; ASSERT(be16_to_cpu(entry_d->nameidx) + tmp - <= XFS_LBSIZE(mp)); + <= args->geo->blksize); memmove(xfs_attr3_leaf_name(leaf_d, desti), xfs_attr3_leaf_name(leaf_s, start_s + i), tmp); ASSERT(be16_to_cpu(entry_s->nameidx) + tmp - <= XFS_LBSIZE(mp)); + <= args->geo->blksize); memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp); ichdr_s->usedbytes -= tmp; ichdr_d->usedbytes += tmp; @@ -2356,7 +2351,7 @@ xfs_attr3_leaf_moveents( tmp = count * sizeof(xfs_attr_leaf_entry_t); entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s]; ASSERT(((char *)entry_s + tmp) <= - ((char *)leaf_s + XFS_LBSIZE(mp))); + ((char *)leaf_s + args->geo->blksize)); memset(entry_s, 0, tmp); } else { /* @@ -2371,7 +2366,7 @@ xfs_attr3_leaf_moveents( tmp = count * sizeof(xfs_attr_leaf_entry_t); entry_s = &xfs_attr3_leaf_entryp(leaf_s)[ichdr_s->count]; ASSERT(((char *)entry_s + tmp) <= - ((char *)leaf_s + XFS_LBSIZE(mp))); + ((char *)leaf_s + args->geo->blksize)); memset(entry_s, 0, tmp); } @@ -2439,22 +2434,21 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) * a "local" or a "remote" attribute. */ int -xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local) +xfs_attr_leaf_newentsize( + struct xfs_da_args *args, + int *local) { - int size; + int size; - size = xfs_attr_leaf_entsize_local(namelen, valuelen); - if (size < xfs_attr_leaf_entsize_local_max(blocksize)) { - if (local) { + size = xfs_attr_leaf_entsize_local(args->namelen, args->valuelen); + if (size < xfs_attr_leaf_entsize_local_max(args->geo->blksize)) { + if (local) *local = 1; - } - } else { - size = xfs_attr_leaf_entsize_remote(namelen); - if (local) { - *local = 0; - } + return size; } - return size; + if (local) + *local = 0; + return xfs_attr_leaf_entsize_remote(args->namelen); } diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index 3ec5ec0b8678..e2929da7c3ba 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h @@ -96,8 +96,7 @@ int xfs_attr3_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp); xfs_dahash_t xfs_attr_leaf_lasthash(struct xfs_buf *bp, int *count); int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp, struct xfs_buf *leaf2_bp); -int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, - int *local); +int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local); int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, struct xfs_buf **bpp); diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 833fe5d98d80..90e2eeb21207 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -444,6 +444,7 @@ xfs_attr3_leaf_list_int( xfs_da_args_t args; memset((char *)&args, 0, sizeof(args)); + args.geo = context->dp->i_mount->m_attr_geo; args.dp = context->dp; args.whichfork = XFS_ATTR_FORK; args.valuelen = valuelen; diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c index d2e6e948cec7..b5adfecbb8ee 100644 --- a/fs/xfs/xfs_attr_remote.c +++ b/fs/xfs/xfs_attr_remote.c @@ -68,7 +68,6 @@ xfs_attr3_rmt_blocks( */ static bool xfs_attr3_rmt_hdr_ok( - struct xfs_mount *mp, void *ptr, xfs_ino_t ino, uint32_t offset, @@ -126,6 +125,7 @@ xfs_attr3_rmt_read_verify( char *ptr; int len; xfs_daddr_t bno; + int blksize = mp->m_attr_geo->blksize; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -134,21 +134,20 @@ xfs_attr3_rmt_read_verify( ptr = bp->b_addr; bno = bp->b_bn; len = BBTOB(bp->b_length); - ASSERT(len >= XFS_LBSIZE(mp)); + ASSERT(len >= blksize); while (len > 0) { - if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp), - XFS_ATTR3_RMT_CRC_OFF)) { + if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { xfs_buf_ioerror(bp, EFSBADCRC); break; } - if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { + if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { xfs_buf_ioerror(bp, EFSCORRUPTED); break; } - len -= XFS_LBSIZE(mp); - ptr += XFS_LBSIZE(mp); - bno += mp->m_bsize; + len -= blksize; + ptr += blksize; + bno += BTOBB(blksize); } if (bp->b_error) @@ -166,6 +165,7 @@ xfs_attr3_rmt_write_verify( char *ptr; int len; xfs_daddr_t bno; + int blksize = mp->m_attr_geo->blksize; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -174,10 +174,10 @@ xfs_attr3_rmt_write_verify( ptr = bp->b_addr; bno = bp->b_bn; len = BBTOB(bp->b_length); - ASSERT(len >= XFS_LBSIZE(mp)); + ASSERT(len >= blksize); while (len > 0) { - if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { + if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { xfs_buf_ioerror(bp, EFSCORRUPTED); xfs_verifier_error(bp); return; @@ -188,11 +188,11 @@ xfs_attr3_rmt_write_verify( rmt = (struct xfs_attr3_rmt_hdr *)ptr; rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); } - xfs_update_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF); + xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); - len -= XFS_LBSIZE(mp); - ptr += XFS_LBSIZE(mp); - bno += mp->m_bsize; + len -= blksize; + ptr += blksize; + bno += BTOBB(blksize); } ASSERT(len == 0); } @@ -241,17 +241,18 @@ xfs_attr_rmtval_copyout( char *src = bp->b_addr; xfs_daddr_t bno = bp->b_bn; int len = BBTOB(bp->b_length); + int blksize = mp->m_attr_geo->blksize; - ASSERT(len >= XFS_LBSIZE(mp)); + ASSERT(len >= blksize); while (len > 0 && *valuelen > 0) { int hdr_size = 0; - int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp)); + int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); byte_cnt = min(*valuelen, byte_cnt); if (xfs_sb_version_hascrc(&mp->m_sb)) { - if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset, + if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset, byte_cnt, bno)) { xfs_alert(mp, "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", @@ -264,9 +265,9 @@ xfs_attr_rmtval_copyout( memcpy(*dst, src + hdr_size, byte_cnt); /* roll buffer forwards */ - len -= XFS_LBSIZE(mp); - src += XFS_LBSIZE(mp); - bno += mp->m_bsize; + len -= blksize; + src += blksize; + bno += BTOBB(blksize); /* roll attribute data forwards */ *valuelen -= byte_cnt; @@ -288,12 +289,13 @@ xfs_attr_rmtval_copyin( char *dst = bp->b_addr; xfs_daddr_t bno = bp->b_bn; int len = BBTOB(bp->b_length); + int blksize = mp->m_attr_geo->blksize; - ASSERT(len >= XFS_LBSIZE(mp)); + ASSERT(len >= blksize); while (len > 0 && *valuelen > 0) { int hdr_size; - int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp)); + int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); byte_cnt = min(*valuelen, byte_cnt); hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset, @@ -305,17 +307,17 @@ xfs_attr_rmtval_copyin( * If this is the last block, zero the remainder of it. * Check that we are actually the last block, too. */ - if (byte_cnt + hdr_size < XFS_LBSIZE(mp)) { + if (byte_cnt + hdr_size < blksize) { ASSERT(*valuelen - byte_cnt == 0); - ASSERT(len == XFS_LBSIZE(mp)); + ASSERT(len == blksize); memset(dst + hdr_size + byte_cnt, 0, - XFS_LBSIZE(mp) - hdr_size - byte_cnt); + blksize - hdr_size - byte_cnt); } /* roll buffer forwards */ - len -= XFS_LBSIZE(mp); - dst += XFS_LBSIZE(mp); - bno += mp->m_bsize; + len -= blksize; + dst += blksize; + bno += BTOBB(blksize); /* roll attribute data forwards */ *valuelen -= byte_cnt; diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h index f1e3c907044d..e1649c0d3e02 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/xfs_bit.h @@ -66,8 +66,11 @@ static inline int xfs_lowbit64(__uint64_t v) n = ffs(w); } else { /* upper bits */ w = (__uint32_t)(v >> 32); - if (w && (n = ffs(w))) - n += 32; + if (w) { + n = ffs(w); + if (n) + n += 32; + } } return n - 1; } diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index f0efc7e970ef..96175df211b1 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -94,7 +94,7 @@ xfs_bmap_compute_maxlevels( maxleafents = MAXAEXTNUM; sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); } - maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0); + maxrootrecs = xfs_bmdr_maxrecs(sz, 0); minleafrecs = mp->m_bmap_dmnr[0]; minnoderecs = mp->m_bmap_dmnr[1]; maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; @@ -233,7 +233,6 @@ xfs_default_attroffset( */ STATIC void xfs_bmap_forkoff_reset( - xfs_mount_t *mp, xfs_inode_t *ip, int whichfork) { @@ -905,7 +904,7 @@ xfs_bmap_local_to_extents_empty( ASSERT(ifp->if_bytes == 0); ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); - xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork); + xfs_bmap_forkoff_reset(ip, whichfork); ifp->if_flags &= ~XFS_IFINLINE; ifp->if_flags |= XFS_IFEXTENTS; XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); @@ -1099,10 +1098,11 @@ xfs_bmap_add_attrfork_local( if (S_ISDIR(ip->i_d.di_mode)) { memset(&dargs, 0, sizeof(dargs)); + dargs.geo = ip->i_mount->m_dir_geo; dargs.dp = ip; dargs.firstblock = firstblock; dargs.flist = flist; - dargs.total = ip->i_mount->m_dirblkfsbs; + dargs.total = dargs.geo->fsbcount; dargs.whichfork = XFS_DATA_FORK; dargs.trans = tp; return xfs_dir2_sf_to_block(&dargs); @@ -1675,7 +1675,6 @@ xfs_bmap_isaeof( */ int xfs_bmap_last_offset( - struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *last_block, int whichfork) @@ -3517,6 +3516,67 @@ xfs_bmap_adjacent( #undef ISVALID } +static int +xfs_bmap_longest_free_extent( + struct xfs_trans *tp, + xfs_agnumber_t ag, + xfs_extlen_t *blen, + int *notinit) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_perag *pag; + xfs_extlen_t longest; + int error = 0; + + pag = xfs_perag_get(mp, ag); + if (!pag->pagf_init) { + error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK); + if (error) + goto out; + + if (!pag->pagf_init) { + *notinit = 1; + goto out; + } + } + + longest = xfs_alloc_longest_free_extent(mp, pag); + if (*blen < longest) + *blen = longest; + +out: + xfs_perag_put(pag); + return error; +} + +static void +xfs_bmap_select_minlen( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args, + xfs_extlen_t *blen, + int notinit) +{ + if (notinit || *blen < ap->minlen) { + /* + * Since we did a BUF_TRYLOCK above, it is possible that + * there is space for this request. + */ + args->minlen = ap->minlen; + } else if (*blen < args->maxlen) { + /* + * If the best seen length is less than the request length, + * use the best as the minimum. + */ + args->minlen = *blen; + } else { + /* + * Otherwise we've seen an extent as big as maxlen, use that + * as the minimum. + */ + args->minlen = args->maxlen; + } +} + STATIC int xfs_bmap_btalloc_nullfb( struct xfs_bmalloca *ap, @@ -3524,111 +3584,74 @@ xfs_bmap_btalloc_nullfb( xfs_extlen_t *blen) { struct xfs_mount *mp = ap->ip->i_mount; - struct xfs_perag *pag; xfs_agnumber_t ag, startag; int notinit = 0; int error; - if (ap->userdata && xfs_inode_is_filestream(ap->ip)) - args->type = XFS_ALLOCTYPE_NEAR_BNO; - else - args->type = XFS_ALLOCTYPE_START_BNO; + args->type = XFS_ALLOCTYPE_START_BNO; args->total = ap->total; - /* - * Search for an allocation group with a single extent large enough - * for the request. If one isn't found, then adjust the minimum - * allocation size to the largest space found. - */ startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); if (startag == NULLAGNUMBER) startag = ag = 0; - pag = xfs_perag_get(mp, ag); while (*blen < args->maxlen) { - if (!pag->pagf_init) { - error = xfs_alloc_pagf_init(mp, args->tp, ag, - XFS_ALLOC_FLAG_TRYLOCK); - if (error) { - xfs_perag_put(pag); - return error; - } - } - - /* - * See xfs_alloc_fix_freelist... - */ - if (pag->pagf_init) { - xfs_extlen_t longest; - longest = xfs_alloc_longest_free_extent(mp, pag); - if (*blen < longest) - *blen = longest; - } else - notinit = 1; - - if (xfs_inode_is_filestream(ap->ip)) { - if (*blen >= args->maxlen) - break; - - if (ap->userdata) { - /* - * If startag is an invalid AG, we've - * come here once before and - * xfs_filestream_new_ag picked the - * best currently available. - * - * Don't continue looping, since we - * could loop forever. - */ - if (startag == NULLAGNUMBER) - break; - - error = xfs_filestream_new_ag(ap, &ag); - xfs_perag_put(pag); - if (error) - return error; + error = xfs_bmap_longest_free_extent(args->tp, ag, blen, + ¬init); + if (error) + return error; - /* loop again to set 'blen'*/ - startag = NULLAGNUMBER; - pag = xfs_perag_get(mp, ag); - continue; - } - } if (++ag == mp->m_sb.sb_agcount) ag = 0; if (ag == startag) break; - xfs_perag_put(pag); - pag = xfs_perag_get(mp, ag); } - xfs_perag_put(pag); - /* - * Since the above loop did a BUF_TRYLOCK, it is - * possible that there is space for this request. - */ - if (notinit || *blen < ap->minlen) - args->minlen = ap->minlen; - /* - * If the best seen length is less than the request - * length, use the best as the minimum. - */ - else if (*blen < args->maxlen) - args->minlen = *blen; - /* - * Otherwise we've seen an extent as big as maxlen, - * use that as the minimum. - */ - else - args->minlen = args->maxlen; + xfs_bmap_select_minlen(ap, args, blen, notinit); + return 0; +} + +STATIC int +xfs_bmap_btalloc_filestreams( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args, + xfs_extlen_t *blen) +{ + struct xfs_mount *mp = ap->ip->i_mount; + xfs_agnumber_t ag; + int notinit = 0; + int error; + + args->type = XFS_ALLOCTYPE_NEAR_BNO; + args->total = ap->total; + + ag = XFS_FSB_TO_AGNO(mp, args->fsbno); + if (ag == NULLAGNUMBER) + ag = 0; + + error = xfs_bmap_longest_free_extent(args->tp, ag, blen, ¬init); + if (error) + return error; + + if (*blen < args->maxlen) { + error = xfs_filestream_new_ag(ap, &ag); + if (error) + return error; + + error = xfs_bmap_longest_free_extent(args->tp, ag, blen, + ¬init); + if (error) + return error; + + } + + xfs_bmap_select_minlen(ap, args, blen, notinit); /* - * set the failure fallback case to look in the selected - * AG as the stream may have moved. + * Set the failure fallback case to look in the selected AG as stream + * may have moved. */ - if (xfs_inode_is_filestream(ap->ip)) - ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); - + ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); return 0; } @@ -3708,7 +3731,15 @@ xfs_bmap_btalloc( args.firstblock = *ap->firstblock; blen = 0; if (nullfb) { - error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); + /* + * Search for an allocation group with a single extent large + * enough for the request. If one isn't found, then adjust + * the minimum allocation size to the largest space found. + */ + if (ap->userdata && xfs_inode_is_filestream(ap->ip)) + error = xfs_bmap_btalloc_filestreams(ap, &args, &blen); + else + error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); if (error) return error; } else if (ap->flist->xbf_low) { diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index f84bd7af43be..38ba36e9b2f0 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -156,8 +156,8 @@ int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *last_block, int whichfork); -int xfs_bmap_last_offset(struct xfs_trans *tp, struct xfs_inode *ip, - xfs_fileoff_t *unused, int whichfork); +int xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused, + int whichfork); int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork); int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork); diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 818d546664e7..948836c4fd90 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -84,7 +84,7 @@ xfs_bmdr_to_bmbt( rblock->bb_level = dblock->bb_level; ASSERT(be16_to_cpu(rblock->bb_level) > 0); rblock->bb_numrecs = dblock->bb_numrecs; - dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); + dmxr = xfs_bmdr_maxrecs(dblocklen, 0); fkp = XFS_BMDR_KEY_ADDR(dblock, 1); tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); @@ -443,7 +443,7 @@ xfs_bmbt_to_bmdr( ASSERT(rblock->bb_level != 0); dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; - dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); + dmxr = xfs_bmdr_maxrecs(dblocklen, 0); fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); tkp = XFS_BMDR_KEY_ADDR(dblock, 1); fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); @@ -519,7 +519,6 @@ xfs_bmbt_alloc_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *start, union xfs_btree_ptr *new, - int length, int *stat) { xfs_alloc_arg_t args; /* block allocation args */ @@ -672,8 +671,7 @@ xfs_bmbt_get_dmaxrecs( { if (level != cur->bc_nlevels - 1) return cur->bc_mp->m_bmap_dmxr[level != 0]; - return xfs_bmdr_maxrecs(cur->bc_mp, cur->bc_private.b.forksize, - level == 0); + return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0); } STATIC void @@ -914,7 +912,6 @@ xfs_bmbt_maxrecs( */ int xfs_bmdr_maxrecs( - struct xfs_mount *mp, int blocklen, int leaf) { diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 6e42e1e50b89..819a8a4dee95 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -130,7 +130,7 @@ extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int, xfs_bmdr_block_t *, int); extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level); -extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf); +extern int xfs_bmdr_maxrecs(int blocklen, int leaf); extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf); extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 296160b8e78c..703b3ec1796c 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -258,14 +258,23 @@ xfs_bmapi_allocate_worker( struct xfs_bmalloca *args = container_of(work, struct xfs_bmalloca, work); unsigned long pflags; + unsigned long new_pflags = PF_FSTRANS; - /* we are in a transaction context here */ - current_set_flags_nested(&pflags, PF_FSTRANS); + /* + * we are in a transaction context here, but may also be doing work + * in kswapd context, and hence we may need to inherit that state + * temporarily to ensure that we don't block waiting for memory reclaim + * in any way. + */ + if (args->kswapd) + new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; + + current_set_flags_nested(&pflags, new_pflags); args->result = __xfs_bmapi_allocate(args); complete(args->done); - current_restore_flags_nested(&pflags, PF_FSTRANS); + current_restore_flags_nested(&pflags, new_pflags); } /* @@ -284,6 +293,7 @@ xfs_bmapi_allocate( args->done = &done; + args->kswapd = current_is_kswapd(); INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); queue_work(xfs_alloc_wq, &args->work); wait_for_completion(&done); @@ -1519,7 +1529,6 @@ xfs_collapse_file_space( while (!error && !done) { tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); - tp->t_flags |= XFS_TRANS_RESERVE; /* * We would need to reserve permanent block for transaction. * This will come into picture when after shifting extent into @@ -1529,7 +1538,6 @@ xfs_collapse_file_space( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); if (error) { - ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); break; } diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 935ed2b24edf..075f72232a64 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -50,12 +50,13 @@ struct xfs_bmalloca { xfs_extlen_t total; /* total blocks needed for xaction */ xfs_extlen_t minlen; /* minimum allocation size (blocks) */ xfs_extlen_t minleft; /* amount must be left after alloc */ - char eof; /* set if allocating past last extent */ - char wasdel; /* replacing a delayed allocation */ - char userdata;/* set if is user data */ - char aeof; /* allocated space at eof */ - char conv; /* overwriting unwritten extents */ - char stack_switch; + bool eof; /* set if allocating past last extent */ + bool wasdel; /* replacing a delayed allocation */ + bool userdata;/* set if is user data */ + bool aeof; /* allocated space at eof */ + bool conv; /* overwriting unwritten extents */ + bool stack_switch; + bool kswapd; /* allocation in kswapd context */ int flags; struct completion *done; struct work_struct work; diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index e80d59fdf89a..bf810c6baf2b 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -43,9 +43,10 @@ kmem_zone_t *xfs_btree_cur_zone; * Btree magic numbers. */ static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { - { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }, + { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, + XFS_FIBT_MAGIC }, { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, - XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC } + XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } }; #define xfs_btree_magic(cur) \ xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] @@ -552,14 +553,11 @@ xfs_btree_get_bufl( xfs_fsblock_t fsbno, /* file system block number */ uint lock) /* lock flags for get_buf */ { - xfs_buf_t *bp; /* buffer pointer (return value) */ xfs_daddr_t d; /* real disk block address */ ASSERT(fsbno != NULLFSBLOCK); d = XFS_FSB_TO_DADDR(mp, fsbno); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); - ASSERT(!xfs_buf_geterror(bp)); - return bp; + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); } /* @@ -574,15 +572,12 @@ xfs_btree_get_bufs( xfs_agblock_t agbno, /* allocation group block number */ uint lock) /* lock flags for get_buf */ { - xfs_buf_t *bp; /* buffer pointer (return value) */ xfs_daddr_t d; /* real disk block address */ ASSERT(agno != NULLAGNUMBER); ASSERT(agbno != NULLAGBLOCK); d = XFS_AGB_TO_DADDR(mp, agno, agbno); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); - ASSERT(!xfs_buf_geterror(bp)); - return bp; + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); } /* @@ -722,7 +717,6 @@ xfs_btree_read_bufl( mp->m_bsize, lock, &bp, ops); if (error) return error; - ASSERT(!xfs_buf_geterror(bp)); if (bp) xfs_buf_set_ref(bp, refval); *bpp = bp; @@ -1115,6 +1109,7 @@ xfs_btree_set_refs( xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); break; case XFS_BTNUM_INO: + case XFS_BTNUM_FINO: xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); break; case XFS_BTNUM_BMAP: @@ -1159,7 +1154,6 @@ STATIC int xfs_btree_read_buf_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, - int level, int flags, struct xfs_btree_block **block, struct xfs_buf **bpp) @@ -1178,7 +1172,6 @@ xfs_btree_read_buf_block( if (error) return error; - ASSERT(!xfs_buf_geterror(*bpp)); xfs_btree_set_refs(cur, *bpp); *block = XFS_BUF_TO_BLOCK(*bpp); return 0; @@ -1517,8 +1510,8 @@ xfs_btree_increment( union xfs_btree_ptr *ptrp; ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); - error = xfs_btree_read_buf_block(cur, ptrp, --lev, - 0, &block, &bp); + --lev; + error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); if (error) goto error0; @@ -1616,8 +1609,8 @@ xfs_btree_decrement( union xfs_btree_ptr *ptrp; ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); - error = xfs_btree_read_buf_block(cur, ptrp, --lev, - 0, &block, &bp); + --lev; + error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); if (error) goto error0; xfs_btree_setbuf(cur, lev, bp); @@ -1667,7 +1660,7 @@ xfs_btree_lookup_get_block( return 0; } - error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp); + error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); if (error) return error; @@ -2018,7 +2011,7 @@ xfs_btree_lshift( goto out0; /* Set up the left neighbor as "left". */ - error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp); + error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); if (error) goto error0; @@ -2202,7 +2195,7 @@ xfs_btree_rshift( goto out0; /* Set up the right neighbor as "right". */ - error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp); + error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); if (error) goto error0; @@ -2372,7 +2365,7 @@ xfs_btree_split( xfs_btree_buf_to_ptr(cur, lbp, &lptr); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat); + error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); if (error) goto error0; if (*stat == 0) @@ -2470,7 +2463,7 @@ xfs_btree_split( * point back to right instead of to left. */ if (!xfs_btree_ptr_is_null(cur, &rrptr)) { - error = xfs_btree_read_buf_block(cur, &rrptr, level, + error = xfs_btree_read_buf_block(cur, &rrptr, 0, &rrblock, &rrbp); if (error) goto error0; @@ -2545,7 +2538,7 @@ xfs_btree_new_iroot( pp = xfs_btree_ptr_addr(cur, 1, block); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat); + error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); if (error) goto error0; if (*stat == 0) { @@ -2649,7 +2642,7 @@ xfs_btree_new_root( cur->bc_ops->init_ptr_from_cur(cur, &rptr); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat); + error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); if (error) goto error0; if (*stat == 0) @@ -2684,8 +2677,7 @@ xfs_btree_new_root( lbp = bp; xfs_btree_buf_to_ptr(cur, lbp, &lptr); left = block; - error = xfs_btree_read_buf_block(cur, &rptr, - cur->bc_nlevels - 1, 0, &right, &rbp); + error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); if (error) goto error0; bp = rbp; @@ -2696,8 +2688,7 @@ xfs_btree_new_root( xfs_btree_buf_to_ptr(cur, rbp, &rptr); right = block; xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); - error = xfs_btree_read_buf_block(cur, &lptr, - cur->bc_nlevels - 1, 0, &left, &lbp); + error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); if (error) goto error0; bp = lbp; @@ -3649,8 +3640,7 @@ xfs_btree_delrec( rptr = cptr; right = block; rbp = bp; - error = xfs_btree_read_buf_block(cur, &lptr, level, - 0, &left, &lbp); + error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); if (error) goto error0; @@ -3667,8 +3657,7 @@ xfs_btree_delrec( lptr = cptr; left = block; lbp = bp; - error = xfs_btree_read_buf_block(cur, &rptr, level, - 0, &right, &rbp); + error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); if (error) goto error0; @@ -3740,8 +3729,7 @@ xfs_btree_delrec( /* If there is a right sibling, point it to the remaining block. */ xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); if (!xfs_btree_ptr_is_null(cur, &cptr)) { - error = xfs_btree_read_buf_block(cur, &cptr, level, - 0, &rrblock, &rrbp); + error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); if (error) goto error0; xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 91e34f21bace..a04b69422f67 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -62,6 +62,7 @@ union xfs_btree_rec { #define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) #define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) #define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) +#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi) /* * For logging record fields. @@ -92,6 +93,7 @@ do { \ case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ + case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) @@ -105,6 +107,7 @@ do { \ case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ + case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) @@ -129,7 +132,7 @@ struct xfs_btree_ops { int (*alloc_block)(struct xfs_btree_cur *cur, union xfs_btree_ptr *start_bno, union xfs_btree_ptr *new_bno, - int length, int *stat); + int *stat); int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp); /* update last record information */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index cb10a0aaab3a..7a34a1ae6552 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -216,8 +216,7 @@ _xfs_buf_alloc( STATIC int _xfs_buf_get_pages( xfs_buf_t *bp, - int page_count, - xfs_buf_flags_t flags) + int page_count) { /* Make sure that we have a page list */ if (bp->b_pages == NULL) { @@ -330,7 +329,7 @@ use_alloc_page: end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1) >> PAGE_SHIFT; page_count = end - start; - error = _xfs_buf_get_pages(bp, page_count, flags); + error = _xfs_buf_get_pages(bp, page_count); if (unlikely(error)) return error; @@ -778,7 +777,7 @@ xfs_buf_associate_memory( bp->b_pages = NULL; bp->b_addr = mem; - rval = _xfs_buf_get_pages(bp, page_count, 0); + rval = _xfs_buf_get_pages(bp, page_count); if (rval) return rval; @@ -811,7 +810,7 @@ xfs_buf_get_uncached( goto fail; page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT; - error = _xfs_buf_get_pages(bp, page_count, 0); + error = _xfs_buf_get_pages(bp, page_count); if (error) goto fail_free_buf; @@ -1615,7 +1614,6 @@ xfs_free_buftarg( int xfs_setsize_buftarg( xfs_buftarg_t *btp, - unsigned int blocksize, unsigned int sectorsize) { /* Set up metadata sector size info */ @@ -1650,16 +1648,13 @@ xfs_setsize_buftarg_early( xfs_buftarg_t *btp, struct block_device *bdev) { - return xfs_setsize_buftarg(btp, PAGE_SIZE, - bdev_logical_block_size(bdev)); + return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev)); } xfs_buftarg_t * xfs_alloc_buftarg( struct xfs_mount *mp, - struct block_device *bdev, - int external, - const char *fsname) + struct block_device *bdev) { xfs_buftarg_t *btp; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index b8a3abf6cf47..3a7a5523d3dc 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -298,11 +298,6 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, extern int xfs_bioerror_relse(struct xfs_buf *); -static inline int xfs_buf_geterror(xfs_buf_t *bp) -{ - return bp ? bp->b_error : ENOMEM; -} - /* Buffer Utility Routines */ extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); @@ -387,10 +382,10 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) * Handling of buftargs. */ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, - struct block_device *, int, const char *); + struct block_device *); extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); extern void xfs_wait_buftarg(xfs_buftarg_t *); -extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); +extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int); #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 8752821443be..4654338b03fc 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -812,7 +812,6 @@ xfs_buf_item_init( */ static void xfs_buf_item_log_segment( - struct xfs_buf_log_item *bip, uint first, uint last, uint *map) @@ -920,7 +919,7 @@ xfs_buf_item_log( if (end > last) end = last; - xfs_buf_item_log_segment(bip, first, end, + xfs_buf_item_log_segment(first, end, &bip->bli_formats[i].blf_data_map[0]); start += bp->b_maps[i].bm_len; @@ -1053,7 +1052,7 @@ xfs_buf_iodone_callbacks( static ulong lasttime; static xfs_buftarg_t *lasttarg; - if (likely(!xfs_buf_geterror(bp))) + if (likely(!bp->b_error)) goto do_callbacks; /* diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 6cc5f6785a77..a514ab616650 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -167,8 +167,8 @@ xfs_da3_node_verify( * we don't know if the node is for and attribute or directory tree, * so only fail if the count is outside both bounds */ - if (ichdr.count > mp->m_dir_node_ents && - ichdr.count > mp->m_attr_node_ents) + if (ichdr.count > mp->m_dir_geo->node_ents && + ichdr.count > mp->m_attr_geo->node_ents) return false; /* XXX: hash order check? */ @@ -598,7 +598,7 @@ xfs_da3_root_split( * Set up the new root node. */ error = xfs_da3_node_create(args, - (args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0, + (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0, level + 1, &bp, args->whichfork); if (error) return error; @@ -616,10 +616,10 @@ xfs_da3_root_split( #ifdef DEBUG if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { - ASSERT(blk1->blkno >= mp->m_dirleafblk && - blk1->blkno < mp->m_dirfreeblk); - ASSERT(blk2->blkno >= mp->m_dirleafblk && - blk2->blkno < mp->m_dirfreeblk); + ASSERT(blk1->blkno >= args->geo->leafblk && + blk1->blkno < args->geo->freeblk); + ASSERT(blk2->blkno >= args->geo->leafblk && + blk2->blkno < args->geo->freeblk); } #endif @@ -663,7 +663,7 @@ xfs_da3_node_split( /* * Do we have to split the node? */ - if (nodehdr.count + newcount > state->node_ents) { + if (nodehdr.count + newcount > state->args->geo->node_ents) { /* * Allocate a new node, add to the doubly linked chain of * nodes, then move some of our excess entries into it. @@ -894,8 +894,8 @@ xfs_da3_node_add( ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count); ASSERT(newblk->blkno != 0); if (state->args->whichfork == XFS_DATA_FORK) - ASSERT(newblk->blkno >= state->mp->m_dirleafblk && - newblk->blkno < state->mp->m_dirfreeblk); + ASSERT(newblk->blkno >= state->args->geo->leafblk && + newblk->blkno < state->args->geo->freeblk); /* * We may need to make some room before we insert the new node. @@ -1089,14 +1089,15 @@ xfs_da3_root_join( * that could occur. For dir3 blocks we also need to update the block * number in the buffer header. */ - memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize); + memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize); root_blk->bp->b_ops = bp->b_ops; xfs_trans_buf_copy_type(root_blk->bp, bp); if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) { struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr; da3->blkno = cpu_to_be64(root_blk->bp->b_bn); } - xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); + xfs_trans_log_buf(args->trans, root_blk->bp, 0, + args->geo->blksize - 1); error = xfs_da_shrink_inode(args, child, bp); return(error); } @@ -1139,7 +1140,7 @@ xfs_da3_node_toosmall( info = blk->bp->b_addr; node = (xfs_da_intnode_t *)info; dp->d_ops->node_hdr_from_disk(&nodehdr, node); - if (nodehdr.count > (state->node_ents >> 1)) { + if (nodehdr.count > (state->args->geo->node_ents >> 1)) { *action = 0; /* blk over 50%, don't try to join */ return(0); /* blk over 50%, don't try to join */ } @@ -1176,8 +1177,8 @@ xfs_da3_node_toosmall( * We prefer coalescing with the lower numbered sibling so as * to shrink a directory over time. */ - count = state->node_ents; - count -= state->node_ents >> 2; + count = state->args->geo->node_ents; + count -= state->args->geo->node_ents >> 2; count -= nodehdr.count; /* start with smaller blk num */ @@ -1472,7 +1473,7 @@ xfs_da3_node_lookup_int( * Descend thru the B-tree searching each level for the right * node to use, until the right hashval is found. */ - blkno = (args->whichfork == XFS_DATA_FORK)? state->mp->m_dirleafblk : 0; + blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0; for (blk = &state->path.blk[0], state->path.active = 1; state->path.active <= XFS_DA_NODE_MAXDEPTH; blk++, state->path.active++) { @@ -2090,20 +2091,12 @@ xfs_da_grow_inode( xfs_dablk_t *new_blkno) { xfs_fileoff_t bno; - int count; int error; trace_xfs_da_grow_inode(args); - if (args->whichfork == XFS_DATA_FORK) { - bno = args->dp->i_mount->m_dirleafblk; - count = args->dp->i_mount->m_dirblkfsbs; - } else { - bno = 0; - count = 1; - } - - error = xfs_da_grow_inode_int(args, &bno, count); + bno = args->geo->leafblk; + error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount); if (!error) *new_blkno = (xfs_dablk_t)bno; return error; @@ -2158,7 +2151,7 @@ xfs_da3_swap_lastblock( w = args->whichfork; ASSERT(w == XFS_DATA_FORK); mp = dp->i_mount; - lastoff = mp->m_dirfreeblk; + lastoff = args->geo->freeblk; error = xfs_bmap_last_before(tp, dp, &lastoff, w); if (error) return error; @@ -2170,15 +2163,15 @@ xfs_da3_swap_lastblock( /* * Read the last block in the btree space. */ - last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs; + last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount; error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w); if (error) return error; /* * Copy the last block into the dead buffer and log it. */ - memcpy(dead_buf->b_addr, last_buf->b_addr, mp->m_dirblksize); - xfs_trans_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1); + memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize); + xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1); dead_info = dead_buf->b_addr; /* * Get values from the moved block. @@ -2247,7 +2240,7 @@ xfs_da3_swap_lastblock( sizeof(sib_info->back))); sib_buf = NULL; } - par_blkno = mp->m_dirleafblk; + par_blkno = args->geo->leafblk; level = -1; /* * Walk down the tree looking for the parent of the moved block. @@ -2357,10 +2350,7 @@ xfs_da_shrink_inode( w = args->whichfork; tp = args->trans; mp = dp->i_mount; - if (w == XFS_DATA_FORK) - count = mp->m_dirblkfsbs; - else - count = 1; + count = args->geo->fsbcount; for (;;) { /* * Remove extents. If we get ENOSPC for a dir we have to move @@ -2462,7 +2452,6 @@ xfs_buf_map_from_irec( */ static int xfs_dabuf_map( - struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, @@ -2480,7 +2469,10 @@ xfs_dabuf_map( ASSERT(map && *map); ASSERT(*nmaps == 1); - nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1; + if (whichfork == XFS_DATA_FORK) + nfsb = mp->m_dir_geo->fsbcount; + else + nfsb = mp->m_attr_geo->fsbcount; /* * Caller doesn't have a mapping. -2 means don't complain @@ -2558,7 +2550,7 @@ xfs_da_get_buf( *bpp = NULL; mapp = ↦ nmap = 1; - error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, + error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, &mapp, &nmap); if (error) { /* mapping a hole is not an error, but we don't continue */ @@ -2606,7 +2598,7 @@ xfs_da_read_buf( *bpp = NULL; mapp = ↦ nmap = 1; - error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, + error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, &mapp, &nmap); if (error) { /* mapping a hole is not an error, but we don't continue */ @@ -2625,47 +2617,6 @@ xfs_da_read_buf( xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF); else xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); - - /* - * This verification code will be moved to a CRC verification callback - * function so just leave it here unchanged until then. - */ - { - xfs_dir2_data_hdr_t *hdr = bp->b_addr; - xfs_dir2_free_t *free = bp->b_addr; - xfs_da_blkinfo_t *info = bp->b_addr; - uint magic, magic1; - struct xfs_mount *mp = dp->i_mount; - - magic = be16_to_cpu(info->magic); - magic1 = be32_to_cpu(hdr->magic); - if (unlikely( - XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) && - (magic != XFS_DA3_NODE_MAGIC) && - (magic != XFS_ATTR_LEAF_MAGIC) && - (magic != XFS_ATTR3_LEAF_MAGIC) && - (magic != XFS_DIR2_LEAF1_MAGIC) && - (magic != XFS_DIR3_LEAF1_MAGIC) && - (magic != XFS_DIR2_LEAFN_MAGIC) && - (magic != XFS_DIR3_LEAFN_MAGIC) && - (magic1 != XFS_DIR2_BLOCK_MAGIC) && - (magic1 != XFS_DIR3_BLOCK_MAGIC) && - (magic1 != XFS_DIR2_DATA_MAGIC) && - (magic1 != XFS_DIR3_DATA_MAGIC) && - (free->hdr.magic != - cpu_to_be32(XFS_DIR2_FREE_MAGIC)) && - (free->hdr.magic != - cpu_to_be32(XFS_DIR3_FREE_MAGIC)), - mp, XFS_ERRTAG_DA_READ_BUF, - XFS_RANDOM_DA_READ_BUF))) { - trace_xfs_da_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)", - XFS_ERRLEVEL_LOW, mp, info); - error = XFS_ERROR(EFSCORRUPTED); - xfs_trans_brelse(trans, bp); - goto out_free; - } - } *bpp = bp; out_free: if (mapp != &map) @@ -2679,7 +2630,6 @@ out_free: */ xfs_daddr_t xfs_da_reada_buf( - struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, @@ -2693,7 +2643,7 @@ xfs_da_reada_buf( mapp = ↦ nmap = 1; - error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, + error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, &mapp, &nmap); if (error) { /* mapping a hole is not an error, but we don't continue */ diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 201c6091d26a..6e153e399a77 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -25,6 +25,23 @@ struct xfs_trans; struct zone; struct xfs_dir_ops; +/* + * Directory/attribute geometry information. There will be one of these for each + * data fork type, and it will be passed around via the xfs_da_args. Global + * structures will be attached to the xfs_mount. + */ +struct xfs_da_geometry { + int blksize; /* da block size in bytes */ + int fsbcount; /* da block size in filesystem blocks */ + uint8_t fsblog; /* log2 of _filesystem_ block size */ + uint8_t blklog; /* log2 of da block size */ + uint node_ents; /* # of entries in a danode */ + int magicpct; /* 37% of block size in bytes */ + xfs_dablk_t datablk; /* blockno of dir data v2 */ + xfs_dablk_t leafblk; /* blockno of leaf data v2 */ + xfs_dablk_t freeblk; /* blockno of free data v2 */ +}; + /*======================================================================== * Btree searching and modification structure definitions. *========================================================================*/ @@ -42,6 +59,7 @@ enum xfs_dacmp { * Structure to ease passing around component names. */ typedef struct xfs_da_args { + struct xfs_da_geometry *geo; /* da block geometry */ const __uint8_t *name; /* string (maybe not NULL terminated) */ int namelen; /* length of string (maybe no NULL) */ __uint8_t filetype; /* filetype of inode for directories */ @@ -110,8 +128,6 @@ typedef struct xfs_da_state_path { typedef struct xfs_da_state { xfs_da_args_t *args; /* filename arguments */ struct xfs_mount *mp; /* filesystem mount point */ - unsigned int blocksize; /* logical block size */ - unsigned int node_ents; /* how many entries in danode */ xfs_da_state_path_t path; /* search/split paths */ xfs_da_state_path_t altpath; /* alternate path for join */ unsigned char inleaf; /* insert into 1->lf, 0->splf */ @@ -185,9 +201,9 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, struct xfs_buf **bpp, int whichfork, const struct xfs_buf_ops *ops); -xfs_daddr_t xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp, - xfs_dablk_t bno, xfs_daddr_t mapped_bno, - int whichfork, const struct xfs_buf_ops *ops); +xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, + xfs_daddr_t mapped_bno, int whichfork, + const struct xfs_buf_ops *ops); int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, struct xfs_buf *dead_buf); diff --git a/fs/xfs/xfs_da_format.c b/fs/xfs/xfs_da_format.c index e6c83e1fbc8a..c9aee52a37e2 100644 --- a/fs/xfs/xfs_da_format.c +++ b/fs/xfs/xfs_da_format.c @@ -26,8 +26,10 @@ #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_dir2.h" +#include "xfs_dir2_priv.h" /* * Shortform directory ops @@ -425,9 +427,9 @@ xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr) * Directory Leaf block operations */ static int -xfs_dir2_max_leaf_ents(struct xfs_mount *mp) +xfs_dir2_max_leaf_ents(struct xfs_da_geometry *geo) { - return (mp->m_dirblksize - sizeof(struct xfs_dir2_leaf_hdr)) / + return (geo->blksize - sizeof(struct xfs_dir2_leaf_hdr)) / (uint)sizeof(struct xfs_dir2_leaf_entry); } @@ -438,9 +440,9 @@ xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp) } static int -xfs_dir3_max_leaf_ents(struct xfs_mount *mp) +xfs_dir3_max_leaf_ents(struct xfs_da_geometry *geo) { - return (mp->m_dirblksize - sizeof(struct xfs_dir3_leaf_hdr)) / + return (geo->blksize - sizeof(struct xfs_dir3_leaf_hdr)) / (uint)sizeof(struct xfs_dir2_leaf_entry); } @@ -591,9 +593,9 @@ xfs_da3_node_hdr_to_disk( * Directory free space block operations */ static int -xfs_dir2_free_max_bests(struct xfs_mount *mp) +xfs_dir2_free_max_bests(struct xfs_da_geometry *geo) { - return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) / + return (geo->blksize - sizeof(struct xfs_dir2_free_hdr)) / sizeof(xfs_dir2_data_off_t); } @@ -607,24 +609,25 @@ xfs_dir2_free_bests_p(struct xfs_dir2_free *free) * Convert data space db to the corresponding free db. */ static xfs_dir2_db_t -xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) +xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) { - return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp); + return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + + (db / xfs_dir2_free_max_bests(geo)); } /* * Convert data space db to the corresponding index in a free db. */ static int -xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) +xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) { - return db % xfs_dir2_free_max_bests(mp); + return db % xfs_dir2_free_max_bests(geo); } static int -xfs_dir3_free_max_bests(struct xfs_mount *mp) +xfs_dir3_free_max_bests(struct xfs_da_geometry *geo) { - return (mp->m_dirblksize - sizeof(struct xfs_dir3_free_hdr)) / + return (geo->blksize - sizeof(struct xfs_dir3_free_hdr)) / sizeof(xfs_dir2_data_off_t); } @@ -638,18 +641,19 @@ xfs_dir3_free_bests_p(struct xfs_dir2_free *free) * Convert data space db to the corresponding free db. */ static xfs_dir2_db_t -xfs_dir3_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) +xfs_dir3_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) { - return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir3_free_max_bests(mp); + return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + + (db / xfs_dir3_free_max_bests(geo)); } /* * Convert data space db to the corresponding index in a free db. */ static int -xfs_dir3_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) +xfs_dir3_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) { - return db % xfs_dir3_free_max_bests(mp); + return db % xfs_dir3_free_max_bests(geo); } static void diff --git a/fs/xfs/xfs_da_format.h b/fs/xfs/xfs_da_format.h index a19d3f8f639c..0a49b0286372 100644 --- a/fs/xfs/xfs_da_format.h +++ b/fs/xfs/xfs_da_format.h @@ -19,10 +19,6 @@ #ifndef __XFS_DA_FORMAT_H__ #define __XFS_DA_FORMAT_H__ -/*======================================================================== - * Directory Structure when greater than XFS_LBSIZE(mp) bytes. - *========================================================================*/ - /* * This structure is common to both leaf nodes and non-leaf nodes in the Btree. * @@ -122,8 +118,6 @@ struct xfs_da3_icnode_hdr { __uint16_t level; }; -#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize - /* * Directory version 2. * @@ -330,8 +324,6 @@ xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr) #define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) #define XFS_DIR2_DATA_SPACE 0 #define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_DATA_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET) /* * Describe a free area in the data block. @@ -456,8 +448,6 @@ xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup) */ #define XFS_DIR2_LEAF_SPACE 1 #define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_LEAF_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET) /* * Leaf block header. @@ -514,17 +504,6 @@ struct xfs_dir3_leaf { #define XFS_DIR3_LEAF_CRC_OFF offsetof(struct xfs_dir3_leaf_hdr, info.crc) /* - * Get address of the bestcount field in the single-leaf block. - */ -static inline struct xfs_dir2_leaf_tail * -xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp) -{ - return (struct xfs_dir2_leaf_tail *) - ((char *)lp + mp->m_dirblksize - - sizeof(struct xfs_dir2_leaf_tail)); -} - -/* * Get address of the bests array in the single-leaf block. */ static inline __be16 * @@ -534,123 +513,6 @@ xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp) } /* - * DB blocks here are logical directory block numbers, not filesystem blocks. - */ - -/* - * Convert dataptr to byte in file space - */ -static inline xfs_dir2_off_t -xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; -} - -/* - * Convert byte in file space to dataptr. It had better be aligned. - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); -} - -/* - * Convert byte in space to (DB) block - */ -static inline xfs_dir2_db_t -xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_db_t) - (by >> (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)); -} - -/* - * Convert dataptr to a block number - */ -static inline xfs_dir2_db_t -xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp)); -} - -/* - * Convert byte in space to offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_data_aoff_t)(by & - ((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) - 1)); -} - -/* - * Convert dataptr to a byte offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp)); -} - -/* - * Convert block and offset to byte in space - */ -static inline xfs_dir2_off_t -xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return ((xfs_dir2_off_t)db << - (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) + o; -} - -/* - * Convert block (DB) to block (dablk) - */ -static inline xfs_dablk_t -xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return (xfs_dablk_t)(db << mp->m_sb.sb_dirblklog); -} - -/* - * Convert byte in space to (DA) block - */ -static inline xfs_dablk_t -xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by)); -} - -/* - * Convert block and offset to dataptr - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o)); -} - -/* - * Convert block (dablk) to block (DB) - */ -static inline xfs_dir2_db_t -xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) -{ - return (xfs_dir2_db_t)(da >> mp->m_sb.sb_dirblklog); -} - -/* - * Convert block (dablk) to byte offset in space - */ -static inline xfs_dir2_off_t -xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) -{ - return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0); -} - -/* * Free space block defintions for the node format. */ @@ -659,8 +521,6 @@ xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) */ #define XFS_DIR2_FREE_SPACE 2 #define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_FREE_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET) typedef struct xfs_dir2_free_hdr { __be32 magic; /* XFS_DIR2_FREE_MAGIC */ @@ -736,16 +596,6 @@ typedef struct xfs_dir2_block_tail { } xfs_dir2_block_tail_t; /* - * Pointer to the leaf header embedded in a data block (1-block format) - */ -static inline struct xfs_dir2_block_tail * -xfs_dir2_block_tail_p(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr) -{ - return ((struct xfs_dir2_block_tail *) - ((char *)hdr + mp->m_dirblksize)) - 1; -} - -/* * Pointer to the leaf entries embedded in a data block (1-block format) */ static inline struct xfs_dir2_leaf_entry * @@ -764,10 +614,6 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) * of an attribute name may not be unique, we may have duplicate keys. The * internal links in the Btree are logical block offsets into the file. * - *======================================================================== - * Attribute structure when equal to XFS_LBSIZE(mp) bytes. - *======================================================================== - * * Struct leaf_entry's are packed from the top. Name/values grow from the * bottom but are not packed. The freemap contains run-length-encoded entries * for the free bytes after the leaf_entry's, but only the N largest such, diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index fda46253966a..79670cda48ae 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -85,38 +85,74 @@ static struct xfs_nameops xfs_ascii_ci_nameops = { .compname = xfs_ascii_ci_compname, }; -void -xfs_dir_mount( - xfs_mount_t *mp) +int +xfs_da_mount( + struct xfs_mount *mp) { - int nodehdr_size; + struct xfs_da_geometry *dageo; + int nodehdr_size; - ASSERT(xfs_sb_version_hasdirv2(&mp->m_sb)); + ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT); ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= XFS_MAX_BLOCKSIZE); mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); - mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog); - mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog; - mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp)); - mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp)); - mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp)); - nodehdr_size = mp->m_dir_inode_ops->node_hdr_size; - mp->m_attr_node_ents = (mp->m_sb.sb_blocksize - nodehdr_size) / + mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), + KM_SLEEP | KM_MAYFAIL); + mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), + KM_SLEEP | KM_MAYFAIL); + if (!mp->m_dir_geo || !mp->m_attr_geo) { + kmem_free(mp->m_dir_geo); + kmem_free(mp->m_attr_geo); + return ENOMEM; + } + + /* set up directory geometry */ + dageo = mp->m_dir_geo; + dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog; + dageo->fsblog = mp->m_sb.sb_blocklog; + dageo->blksize = 1 << dageo->blklog; + dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; + + /* + * Now we've set up the block conversion variables, we can calculate the + * segment block constants using the geometry structure. + */ + dageo->datablk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_DATA_OFFSET); + dageo->leafblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_LEAF_OFFSET); + dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET); + dageo->node_ents = (dageo->blksize - nodehdr_size) / (uint)sizeof(xfs_da_node_entry_t); - mp->m_dir_node_ents = (mp->m_dirblksize - nodehdr_size) / + dageo->magicpct = (dageo->blksize * 37) / 100; + + /* set up attribute geometry - single fsb only */ + dageo = mp->m_attr_geo; + dageo->blklog = mp->m_sb.sb_blocklog; + dageo->fsblog = mp->m_sb.sb_blocklog; + dageo->blksize = 1 << dageo->blklog; + dageo->fsbcount = 1; + dageo->node_ents = (dageo->blksize - nodehdr_size) / (uint)sizeof(xfs_da_node_entry_t); + dageo->magicpct = (dageo->blksize * 37) / 100; - mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100; if (xfs_sb_version_hasasciici(&mp->m_sb)) mp->m_dirnameops = &xfs_ascii_ci_nameops; else mp->m_dirnameops = &xfs_default_nameops; + return 0; +} + +void +xfs_da_unmount( + struct xfs_mount *mp) +{ + kmem_free(mp->m_dir_geo); + kmem_free(mp->m_attr_geo); } /* @@ -192,6 +228,7 @@ xfs_dir_init( if (!args) return ENOMEM; + args->geo = dp->i_mount->m_dir_geo; args->dp = dp; args->trans = tp; error = xfs_dir2_sf_create(args, pdp->i_ino); @@ -226,6 +263,7 @@ xfs_dir_createname( if (!args) return ENOMEM; + args->geo = dp->i_mount->m_dir_geo; args->name = name->name; args->namelen = name->len; args->filetype = name->type; @@ -244,7 +282,7 @@ xfs_dir_createname( goto out_free; } - rval = xfs_dir2_isblock(tp, dp, &v); + rval = xfs_dir2_isblock(args, &v); if (rval) goto out_free; if (v) { @@ -252,7 +290,7 @@ xfs_dir_createname( goto out_free; } - rval = xfs_dir2_isleaf(tp, dp, &v); + rval = xfs_dir2_isleaf(args, &v); if (rval) goto out_free; if (v) @@ -320,6 +358,7 @@ xfs_dir_lookup( * annotations into the reclaim path for the ilock. */ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + args->geo = dp->i_mount->m_dir_geo; args->name = name->name; args->namelen = name->len; args->filetype = name->type; @@ -336,7 +375,7 @@ xfs_dir_lookup( goto out_check_rval; } - rval = xfs_dir2_isblock(tp, dp, &v); + rval = xfs_dir2_isblock(args, &v); if (rval) goto out_free; if (v) { @@ -344,7 +383,7 @@ xfs_dir_lookup( goto out_check_rval; } - rval = xfs_dir2_isleaf(tp, dp, &v); + rval = xfs_dir2_isleaf(args, &v); if (rval) goto out_free; if (v) @@ -391,6 +430,7 @@ xfs_dir_removename( if (!args) return ENOMEM; + args->geo = dp->i_mount->m_dir_geo; args->name = name->name; args->namelen = name->len; args->filetype = name->type; @@ -408,7 +448,7 @@ xfs_dir_removename( goto out_free; } - rval = xfs_dir2_isblock(tp, dp, &v); + rval = xfs_dir2_isblock(args, &v); if (rval) goto out_free; if (v) { @@ -416,7 +456,7 @@ xfs_dir_removename( goto out_free; } - rval = xfs_dir2_isleaf(tp, dp, &v); + rval = xfs_dir2_isleaf(args, &v); if (rval) goto out_free; if (v) @@ -455,6 +495,7 @@ xfs_dir_replace( if (!args) return ENOMEM; + args->geo = dp->i_mount->m_dir_geo; args->name = name->name; args->namelen = name->len; args->filetype = name->type; @@ -472,7 +513,7 @@ xfs_dir_replace( goto out_free; } - rval = xfs_dir2_isblock(tp, dp, &v); + rval = xfs_dir2_isblock(args, &v); if (rval) goto out_free; if (v) { @@ -480,7 +521,7 @@ xfs_dir_replace( goto out_free; } - rval = xfs_dir2_isleaf(tp, dp, &v); + rval = xfs_dir2_isleaf(args, &v); if (rval) goto out_free; if (v) @@ -516,6 +557,7 @@ xfs_dir_canenter( if (!args) return ENOMEM; + args->geo = dp->i_mount->m_dir_geo; args->name = name->name; args->namelen = name->len; args->filetype = name->type; @@ -531,7 +573,7 @@ xfs_dir_canenter( goto out_free; } - rval = xfs_dir2_isblock(tp, dp, &v); + rval = xfs_dir2_isblock(args, &v); if (rval) goto out_free; if (v) { @@ -539,7 +581,7 @@ xfs_dir_canenter( goto out_free; } - rval = xfs_dir2_isleaf(tp, dp, &v); + rval = xfs_dir2_isleaf(args, &v); if (rval) goto out_free; if (v) @@ -579,13 +621,13 @@ xfs_dir2_grow_inode( * Set lowest possible block in the space requested. */ bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE); - count = mp->m_dirblkfsbs; + count = args->geo->fsbcount; error = xfs_da_grow_inode_int(args, &bno, count); if (error) return error; - *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); + *dbp = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)bno); /* * Update file's size if this is the data space and it grew. @@ -607,19 +649,16 @@ xfs_dir2_grow_inode( */ int xfs_dir2_isblock( - xfs_trans_t *tp, - xfs_inode_t *dp, - int *vp) /* out: 1 is block, 0 is not block */ + struct xfs_da_args *args, + int *vp) /* out: 1 is block, 0 is not block */ { - xfs_fileoff_t last; /* last file offset */ - xfs_mount_t *mp; - int rval; + xfs_fileoff_t last; /* last file offset */ + int rval; - mp = dp->i_mount; - if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) + if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) return rval; - rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize; - ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize); + rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize; + ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize); *vp = rval; return 0; } @@ -629,18 +668,15 @@ xfs_dir2_isblock( */ int xfs_dir2_isleaf( - xfs_trans_t *tp, - xfs_inode_t *dp, - int *vp) /* out: 1 is leaf, 0 is not leaf */ + struct xfs_da_args *args, + int *vp) /* out: 1 is block, 0 is not block */ { - xfs_fileoff_t last; /* last file offset */ - xfs_mount_t *mp; - int rval; + xfs_fileoff_t last; /* last file offset */ + int rval; - mp = dp->i_mount; - if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) + if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) return rval; - *vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog); + *vp = last == args->geo->leafblk + args->geo->fsbcount; return 0; } @@ -668,11 +704,11 @@ xfs_dir2_shrink_inode( dp = args->dp; mp = dp->i_mount; tp = args->trans; - da = xfs_dir2_db_to_da(mp, db); + da = xfs_dir2_db_to_da(args->geo, db); /* * Unmap the fsblock(s). */ - if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs, + if ((error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, XFS_BMAPI_METADATA, 0, args->firstblock, args->flist, &done))) { /* @@ -699,12 +735,12 @@ xfs_dir2_shrink_inode( /* * If it's not a data block, we're done. */ - if (db >= XFS_DIR2_LEAF_FIRSTDB(mp)) + if (db >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET)) return 0; /* * If the block isn't the last one in the directory, we're done. */ - if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(mp, db + 1, 0)) + if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(args->geo, db + 1, 0)) return 0; bno = da; if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) { @@ -713,7 +749,7 @@ xfs_dir2_shrink_inode( */ return error; } - if (db == mp->m_dirdatablk) + if (db == args->geo->datablk) ASSERT(bno == 0); else ASSERT(bno > 0); diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h index cec70e0781ab..c8e86b0b5e99 100644 --- a/fs/xfs/xfs_dir2.h +++ b/fs/xfs/xfs_dir2.h @@ -80,7 +80,7 @@ struct xfs_dir_ops { struct xfs_dir3_icleaf_hdr *from); void (*leaf_hdr_from_disk)(struct xfs_dir3_icleaf_hdr *to, struct xfs_dir2_leaf *from); - int (*leaf_max_ents)(struct xfs_mount *mp); + int (*leaf_max_ents)(struct xfs_da_geometry *geo); struct xfs_dir2_leaf_entry * (*leaf_ents_p)(struct xfs_dir2_leaf *lp); @@ -97,10 +97,12 @@ struct xfs_dir_ops { struct xfs_dir3_icfree_hdr *from); void (*free_hdr_from_disk)(struct xfs_dir3_icfree_hdr *to, struct xfs_dir2_free *from); - int (*free_max_bests)(struct xfs_mount *mp); + int (*free_max_bests)(struct xfs_da_geometry *geo); __be16 * (*free_bests_p)(struct xfs_dir2_free *free); - xfs_dir2_db_t (*db_to_fdb)(struct xfs_mount *mp, xfs_dir2_db_t db); - int (*db_to_fdindex)(struct xfs_mount *mp, xfs_dir2_db_t db); + xfs_dir2_db_t (*db_to_fdb)(struct xfs_da_geometry *geo, + xfs_dir2_db_t db); + int (*db_to_fdindex)(struct xfs_da_geometry *geo, + xfs_dir2_db_t db); }; extern const struct xfs_dir_ops * @@ -112,7 +114,9 @@ extern const struct xfs_dir_ops * * Generic directory interface routines */ extern void xfs_dir_startup(void); -extern void xfs_dir_mount(struct xfs_mount *mp); +extern int xfs_da_mount(struct xfs_mount *mp); +extern void xfs_da_unmount(struct xfs_mount *mp); + extern int xfs_dir_isempty(struct xfs_inode *dp); extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_inode *pdp); @@ -142,23 +146,23 @@ extern int xfs_dir2_sf_to_block(struct xfs_da_args *args); /* * Interface routines used by userspace utilities */ -extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r); -extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r); +extern int xfs_dir2_isblock(struct xfs_da_args *args, int *r); +extern int xfs_dir2_isleaf(struct xfs_da_args *args, int *r); extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, struct xfs_buf *bp); extern void xfs_dir2_data_freescan(struct xfs_inode *dp, struct xfs_dir2_data_hdr *hdr, int *loghead); -extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_inode *dp, +extern void xfs_dir2_data_log_entry(struct xfs_da_args *args, struct xfs_buf *bp, struct xfs_dir2_data_entry *dep); -extern void xfs_dir2_data_log_header(struct xfs_trans *tp, struct xfs_inode *dp, +extern void xfs_dir2_data_log_header(struct xfs_da_args *args, struct xfs_buf *bp); -extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp, - struct xfs_dir2_data_unused *dup); -extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_inode *dp, +extern void xfs_dir2_data_log_unused(struct xfs_da_args *args, + struct xfs_buf *bp, struct xfs_dir2_data_unused *dup); +extern void xfs_dir2_data_make_free(struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); -extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_inode *dp, +extern void xfs_dir2_data_use_free(struct xfs_da_args *args, struct xfs_buf *bp, struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 4f6a38cb83a4..c7cd3154026a 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -136,7 +136,7 @@ xfs_dir3_block_read( struct xfs_mount *mp = dp->i_mount; int err; - err = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp, + err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp, XFS_DATA_FORK, &xfs_dir3_block_buf_ops); if (!err && tp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); @@ -281,8 +281,7 @@ out: */ static void xfs_dir2_block_compact( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp, struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_block_tail *btp, @@ -315,18 +314,17 @@ xfs_dir2_block_compact( *lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1); *lfloghigh -= be32_to_cpu(btp->stale) - 1; be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); - xfs_dir2_data_make_free(tp, dp, bp, + xfs_dir2_data_make_free(args, bp, (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), needlog, &needscan); - blp += be32_to_cpu(btp->stale) - 1; btp->stale = cpu_to_be32(1); /* * If we now need to rebuild the bestfree map, do so. * This needs to happen before the next call to use_free. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, needlog); + xfs_dir2_data_freescan(args->dp, hdr, needlog); } /* @@ -378,7 +376,7 @@ xfs_dir2_block_addname( * Set up pointers to parts of the block. */ hdr = bp->b_addr; - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* @@ -421,7 +419,7 @@ xfs_dir2_block_addname( * If need to compact the leaf entries, do it now. */ if (compact) { - xfs_dir2_block_compact(tp, dp, bp, hdr, btp, blp, &needlog, + xfs_dir2_block_compact(args, bp, hdr, btp, blp, &needlog, &lfloghigh, &lfloglow); /* recalculate blp post-compaction */ blp = xfs_dir2_block_leaf_p(btp); @@ -456,7 +454,7 @@ xfs_dir2_block_addname( /* * Mark the space needed for the new leaf entry, now in use. */ - xfs_dir2_data_use_free(tp, dp, bp, enddup, + xfs_dir2_data_use_free(args, bp, enddup, (xfs_dir2_data_aoff_t) ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) - sizeof(*blp)), @@ -537,13 +535,13 @@ xfs_dir2_block_addname( * Fill in the leaf entry. */ blp[mid].hashval = cpu_to_be32(args->hashval); - blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, + blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( (char *)dep - (char *)hdr)); xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); /* * Mark space for the data entry used. */ - xfs_dir2_data_use_free(tp, dp, bp, dup, + xfs_dir2_data_use_free(args, bp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), (xfs_dir2_data_aoff_t)len, &needlog, &needscan); /* @@ -561,9 +559,9 @@ xfs_dir2_block_addname( if (needscan) xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, bp); + xfs_dir2_data_log_header(args, bp); xfs_dir2_block_log_tail(tp, bp); - xfs_dir2_data_log_entry(tp, dp, bp, dep); + xfs_dir2_data_log_entry(args, bp, dep); xfs_dir3_data_check(dp, bp); return 0; } @@ -582,7 +580,7 @@ xfs_dir2_block_log_leaf( xfs_dir2_leaf_entry_t *blp; xfs_dir2_block_tail_t *btp; - btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); + btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr); blp = xfs_dir2_block_leaf_p(btp); xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr), (uint)((char *)&blp[last + 1] - (char *)hdr - 1)); @@ -599,7 +597,7 @@ xfs_dir2_block_log_tail( xfs_dir2_data_hdr_t *hdr = bp->b_addr; xfs_dir2_block_tail_t *btp; - btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); + btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr); xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr), (uint)((char *)(btp + 1) - (char *)hdr - 1)); } @@ -634,13 +632,14 @@ xfs_dir2_block_lookup( mp = dp->i_mount; hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Get the offset from the leaf entry, to point to the data. */ dep = (xfs_dir2_data_entry_t *)((char *)hdr + - xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(blp[ent].address))); /* * Fill in inode number, CI name if appropriate, release the block. */ @@ -686,7 +685,7 @@ xfs_dir2_block_lookup_int( hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Loop doing a binary search for our hash value. @@ -724,7 +723,7 @@ xfs_dir2_block_lookup_int( * Get pointer to the entry from the leaf. */ dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr)); + ((char *)hdr + xfs_dir2_dataptr_to_off(args->geo, addr)); /* * Compare name and if it's an exact match, return the index * and buffer. If it's the first case-insensitive match, store @@ -791,18 +790,19 @@ xfs_dir2_block_removename( tp = args->trans; mp = dp->i_mount; hdr = bp->b_addr; - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Point to the data entry using the leaf entry. */ - dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + dep = (xfs_dir2_data_entry_t *)((char *)hdr + + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(blp[ent].address))); /* * Mark the data entry's space free. */ needlog = needscan = 0; - xfs_dir2_data_make_free(tp, dp, bp, + xfs_dir2_data_make_free(args, bp, (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); /* @@ -821,7 +821,7 @@ xfs_dir2_block_removename( if (needscan) xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, bp); + xfs_dir2_data_log_header(args, bp); xfs_dir3_data_check(dp, bp); /* * See if the size as a shortform is good enough. @@ -866,20 +866,21 @@ xfs_dir2_block_replace( dp = args->dp; mp = dp->i_mount; hdr = bp->b_addr; - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Point to the data entry we need to change. */ - dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + dep = (xfs_dir2_data_entry_t *)((char *)hdr + + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(blp[ent].address))); ASSERT(be64_to_cpu(dep->inumber) != args->inumber); /* * Change the inode number to the new value. */ dep->inumber = cpu_to_be64(args->inumber); dp->d_ops->data_put_ftype(dep, args->filetype); - xfs_dir2_data_log_entry(args->trans, dp, bp, dep); + xfs_dir2_data_log_entry(args, bp, dep); xfs_dir3_data_check(dp, bp); return 0; } @@ -939,7 +940,7 @@ xfs_dir2_leaf_to_block( leaf = lbp->b_addr; dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); ents = dp->d_ops->leaf_ents_p(leaf); - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC || leafhdr.magic == XFS_DIR3_LEAF1_MAGIC); @@ -949,13 +950,13 @@ xfs_dir2_leaf_to_block( * been left behind during no-space-reservation operations. * These will show up in the leaf bests table. */ - while (dp->i_d.di_size > mp->m_dirblksize) { + while (dp->i_d.di_size > args->geo->blksize) { int hdrsz; hdrsz = dp->d_ops->data_entry_offset; bestsp = xfs_dir2_leaf_bests_p(ltp); if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == - mp->m_dirblksize - hdrsz) { + args->geo->blksize - hdrsz) { if ((error = xfs_dir2_leaf_trim_data(args, lbp, (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1)))) @@ -967,7 +968,7 @@ xfs_dir2_leaf_to_block( * Read the data block if we don't already have it, give up if it fails. */ if (!dbp) { - error = xfs_dir3_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp); + error = xfs_dir3_data_read(tp, dp, args->geo->datablk, -1, &dbp); if (error) return error; } @@ -983,7 +984,7 @@ xfs_dir2_leaf_to_block( /* * Look at the last data entry. */ - tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1; + tagp = (__be16 *)((char *)hdr + args->geo->blksize) - 1; dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); /* * If it's not free or is too short we can't do it. @@ -1002,12 +1003,12 @@ xfs_dir2_leaf_to_block( /* * Use up the space at the end of the block (blp/btp). */ - xfs_dir2_data_use_free(tp, dp, dbp, dup, mp->m_dirblksize - size, size, + xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size, &needlog, &needscan); /* * Initialize the block tail. */ - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); btp->count = cpu_to_be32(leafhdr.count - leafhdr.stale); btp->stale = 0; xfs_dir2_block_log_tail(tp, dbp); @@ -1028,11 +1029,11 @@ xfs_dir2_leaf_to_block( if (needscan) xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_header(args, dbp); /* * Pitch the old leaf block. */ - error = xfs_da_shrink_inode(args, mp->m_dirleafblk, lbp); + error = xfs_da_shrink_inode(args, args->geo->leafblk, lbp); if (error) return error; @@ -1141,13 +1142,13 @@ xfs_dir2_sf_to_block( */ dup = dp->d_ops->data_unused_p(hdr); needlog = needscan = 0; - xfs_dir2_data_use_free(tp, dp, bp, dup, mp->m_dirblksize - i, i, &needlog, - &needscan); + xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i, + i, &needlog, &needscan); ASSERT(needscan == 0); /* * Fill in the tail. */ - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); btp->count = cpu_to_be32(sfp->count + 2); /* ., .. */ btp->stale = 0; blp = xfs_dir2_block_leaf_p(btp); @@ -1155,7 +1156,7 @@ xfs_dir2_sf_to_block( /* * Remove the freespace, we'll manage it. */ - xfs_dir2_data_use_free(tp, dp, bp, dup, + xfs_dir2_data_use_free(args, bp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), be16_to_cpu(dup->length), &needlog, &needscan); /* @@ -1168,9 +1169,9 @@ xfs_dir2_sf_to_block( dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, dp, bp, dep); + xfs_dir2_data_log_entry(args, bp, dep); blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); - blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, + blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( (char *)dep - (char *)hdr)); /* * Create entry for .. @@ -1182,9 +1183,9 @@ xfs_dir2_sf_to_block( dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, dp, bp, dep); + xfs_dir2_data_log_entry(args, bp, dep); blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); - blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, + blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( (char *)dep - (char *)hdr)); offset = dp->d_ops->data_first_offset; /* @@ -1216,7 +1217,7 @@ xfs_dir2_sf_to_block( dup->length = cpu_to_be16(newoffset - offset); *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( ((char *)dup - (char *)hdr)); - xfs_dir2_data_log_unused(tp, bp, dup); + xfs_dir2_data_log_unused(args, bp, dup); xfs_dir2_data_freeinsert(hdr, dp->d_ops->data_bestfree_p(hdr), dup, &dummy); @@ -1233,12 +1234,12 @@ xfs_dir2_sf_to_block( memcpy(dep->name, sfep->name, dep->namelen); tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, dp, bp, dep); + xfs_dir2_data_log_entry(args, bp, dep); name.name = sfep->name; name.len = sfep->namelen; blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> hashname(&name)); - blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, + blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( (char *)dep - (char *)hdr)); offset = (int)((char *)(tagp + 1) - (char *)hdr); if (++i == sfp->count) diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index afa4ad523f3f..8c2f6422648e 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -63,8 +63,10 @@ __xfs_dir3_data_check( int stale; /* count of stale leaves */ struct xfs_name name; const struct xfs_dir_ops *ops; + struct xfs_da_geometry *geo; mp = bp->b_target->bt_mount; + geo = mp->m_dir_geo; /* * We can be passed a null dp here from a verifier, so we need to go the @@ -78,7 +80,7 @@ __xfs_dir3_data_check( switch (hdr->magic) { case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(geo, hdr); lep = xfs_dir2_block_leaf_p(btp); endp = (char *)lep; @@ -94,7 +96,7 @@ __xfs_dir3_data_check( break; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): - endp = (char *)hdr + mp->m_dirblksize; + endp = (char *)hdr + geo->blksize; break; default: XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); @@ -172,9 +174,9 @@ __xfs_dir3_data_check( lastfree = 0; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { - addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, - (xfs_dir2_data_aoff_t) - ((char *)dep - (char *)hdr)); + addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, + (xfs_dir2_data_aoff_t) + ((char *)dep - (char *)hdr)); name.name = dep->name; name.len = dep->namelen; hash = mp->m_dirnameops->hashname(&name); @@ -329,12 +331,11 @@ xfs_dir3_data_read( int xfs_dir3_data_readahead( - struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mapped_bno) { - return xfs_da_reada_buf(tp, dp, bno, mapped_bno, + return xfs_da_reada_buf(dp, bno, mapped_bno, XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops); } @@ -510,6 +511,7 @@ xfs_dir2_data_freescan( struct xfs_dir2_data_free *bf; char *endp; /* end of block's data */ char *p; /* current entry pointer */ + struct xfs_da_geometry *geo = dp->i_mount->m_dir_geo; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || @@ -528,10 +530,10 @@ xfs_dir2_data_freescan( p = (char *)dp->d_ops->data_entry_p(hdr); if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { - btp = xfs_dir2_block_tail_p(dp->i_mount, hdr); + btp = xfs_dir2_block_tail_p(geo, hdr); endp = (char *)xfs_dir2_block_leaf_p(btp); } else - endp = (char *)hdr + dp->i_mount->m_dirblksize; + endp = (char *)hdr + geo->blksize; /* * Loop over the block's entries. */ @@ -585,8 +587,8 @@ xfs_dir3_data_init( /* * Get the buffer set up for the block. */ - error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp, - XFS_DATA_FORK); + error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno), + -1, &bp, XFS_DATA_FORK); if (error) return error; bp->b_ops = &xfs_dir3_data_buf_ops; @@ -621,15 +623,15 @@ xfs_dir3_data_init( dup = dp->d_ops->data_unused_p(hdr); dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - t = mp->m_dirblksize - (uint)dp->d_ops->data_entry_offset; + t = args->geo->blksize - (uint)dp->d_ops->data_entry_offset; bf[0].length = cpu_to_be16(t); dup->length = cpu_to_be16(t); *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); /* * Log it and return it. */ - xfs_dir2_data_log_header(tp, dp, bp); - xfs_dir2_data_log_unused(tp, bp, dup); + xfs_dir2_data_log_header(args, bp); + xfs_dir2_data_log_unused(args, bp, dup); *bpp = bp; return 0; } @@ -639,8 +641,7 @@ xfs_dir3_data_init( */ void xfs_dir2_data_log_entry( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_entry_t *dep) /* data entry pointer */ { @@ -651,8 +652,8 @@ xfs_dir2_data_log_entry( hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr), - (uint)((char *)(dp->d_ops->data_entry_tag_p(dep) + 1) - + xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr), + (uint)((char *)(args->dp->d_ops->data_entry_tag_p(dep) + 1) - (char *)hdr - 1)); } @@ -661,8 +662,7 @@ xfs_dir2_data_log_entry( */ void xfs_dir2_data_log_header( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp) { #ifdef DEBUG @@ -674,7 +674,8 @@ xfs_dir2_data_log_header( hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); #endif - xfs_trans_log_buf(tp, bp, 0, dp->d_ops->data_entry_offset - 1); + xfs_trans_log_buf(args->trans, bp, 0, + args->dp->d_ops->data_entry_offset - 1); } /* @@ -682,7 +683,7 @@ xfs_dir2_data_log_header( */ void xfs_dir2_data_log_unused( - struct xfs_trans *tp, + struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_unused_t *dup) /* data unused pointer */ { @@ -696,13 +697,13 @@ xfs_dir2_data_log_unused( /* * Log the first part of the unused entry. */ - xfs_trans_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr), + xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr), (uint)((char *)&dup->length + sizeof(dup->length) - 1 - (char *)hdr)); /* * Log the end (tag) of the unused entry. */ - xfs_trans_log_buf(tp, bp, + xfs_trans_log_buf(args->trans, bp, (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr), (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr + sizeof(xfs_dir2_data_off_t) - 1)); @@ -714,8 +715,7 @@ xfs_dir2_data_log_unused( */ void xfs_dir2_data_make_free( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, /* starting byte offset */ xfs_dir2_data_aoff_t len, /* length in bytes */ @@ -725,14 +725,12 @@ xfs_dir2_data_make_free( xfs_dir2_data_hdr_t *hdr; /* data block pointer */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ char *endptr; /* end of data area */ - xfs_mount_t *mp; /* filesystem mount point */ int needscan; /* need to regen bestfree */ xfs_dir2_data_unused_t *newdup; /* new unused entry */ xfs_dir2_data_unused_t *postdup; /* unused entry after us */ xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ struct xfs_dir2_data_free *bf; - mp = tp->t_mountp; hdr = bp->b_addr; /* @@ -740,20 +738,20 @@ xfs_dir2_data_make_free( */ if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)) - endptr = (char *)hdr + mp->m_dirblksize; + endptr = (char *)hdr + args->geo->blksize; else { xfs_dir2_block_tail_t *btp; /* block tail */ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); endptr = (char *)xfs_dir2_block_leaf_p(btp); } /* * If this isn't the start of the block, then back up to * the previous entry and see if it's free. */ - if (offset > dp->d_ops->data_entry_offset) { + if (offset > args->dp->d_ops->data_entry_offset) { __be16 *tagp; /* tag just before us */ tagp = (__be16 *)((char *)hdr + offset) - 1; @@ -779,7 +777,7 @@ xfs_dir2_data_make_free( * Previous and following entries are both free, * merge everything into a single free entry. */ - bf = dp->d_ops->data_bestfree_p(hdr); + bf = args->dp->d_ops->data_bestfree_p(hdr); if (prevdup && postdup) { xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */ @@ -801,7 +799,7 @@ xfs_dir2_data_make_free( be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, prevdup); + xfs_dir2_data_log_unused(args, bp, prevdup); if (!needscan) { /* * Has to be the case that entries 0 and 1 are @@ -836,7 +834,7 @@ xfs_dir2_data_make_free( be16_add_cpu(&prevdup->length, len); *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, prevdup); + xfs_dir2_data_log_unused(args, bp, prevdup); /* * If the previous entry was in the table, the new entry * is longer, so it will be in the table too. Remove @@ -864,7 +862,7 @@ xfs_dir2_data_make_free( newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, newdup); + xfs_dir2_data_log_unused(args, bp, newdup); /* * If the following entry was in the table, the new entry * is longer, so it will be in the table too. Remove @@ -891,7 +889,7 @@ xfs_dir2_data_make_free( newdup->length = cpu_to_be16(len); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, newdup); + xfs_dir2_data_log_unused(args, bp, newdup); xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); } *needscanp = needscan; @@ -902,8 +900,7 @@ xfs_dir2_data_make_free( */ void xfs_dir2_data_use_free( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_unused_t *dup, /* unused entry */ xfs_dir2_data_aoff_t offset, /* starting offset to use */ @@ -934,7 +931,7 @@ xfs_dir2_data_use_free( * Look up the entry in the bestfree table. */ oldlen = be16_to_cpu(dup->length); - bf = dp->d_ops->data_bestfree_p(hdr); + bf = args->dp->d_ops->data_bestfree_p(hdr); dfp = xfs_dir2_data_freefind(hdr, bf, dup); ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length)); /* @@ -966,7 +963,7 @@ xfs_dir2_data_use_free( newdup->length = cpu_to_be16(oldlen - len); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, newdup); + xfs_dir2_data_log_unused(args, bp, newdup); /* * If it was in the table, remove it and add the new one. */ @@ -994,7 +991,7 @@ xfs_dir2_data_use_free( newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, newdup); + xfs_dir2_data_log_unused(args, bp, newdup); /* * If it was in the table, remove it and add the new one. */ @@ -1022,13 +1019,13 @@ xfs_dir2_data_use_free( newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, newdup); + xfs_dir2_data_log_unused(args, bp, newdup); newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); *xfs_dir2_data_unused_tag_p(newdup2) = cpu_to_be16((char *)newdup2 - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, newdup2); + xfs_dir2_data_log_unused(args, bp, newdup2); /* * If the old entry was in the table, we need to scan * if the 3rd entry was valid, since these entries diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index d36e97df1187..fb0aad4440c1 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -41,9 +41,10 @@ */ static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp, int *indexp, struct xfs_buf **dbpp); -static void xfs_dir3_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp, - int first, int last); -static void xfs_dir3_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp); +static void xfs_dir3_leaf_log_bests(struct xfs_da_args *args, + struct xfs_buf *bp, int first, int last); +static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args, + struct xfs_buf *bp); /* * Check the internal consistency of a leaf1 block. @@ -92,6 +93,7 @@ xfs_dir3_leaf_check_int( int i; const struct xfs_dir_ops *ops; struct xfs_dir3_icleaf_hdr leafhdr; + struct xfs_da_geometry *geo = mp->m_dir_geo; /* * we can be passed a null dp here from a verifier, so we need to go the @@ -105,14 +107,14 @@ xfs_dir3_leaf_check_int( } ents = ops->leaf_ents_p(leaf); - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(geo, leaf); /* * XXX (dgc): This value is not restrictive enough. * Should factor in the size of the bests table as well. * We can deduce a value for that from di_size. */ - if (hdr->count > ops->leaf_max_ents(mp)) + if (hdr->count > ops->leaf_max_ents(geo)) return false; /* Leaves and bests don't overlap in leaf format. */ @@ -323,7 +325,7 @@ xfs_dir3_leaf_init( if (type == XFS_DIR2_LEAF1_MAGIC) { struct xfs_dir2_leaf_tail *ltp; - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf); ltp->bestcount = 0; bp->b_ops = &xfs_dir3_leaf1_buf_ops; xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAF1_BUF); @@ -347,18 +349,18 @@ xfs_dir3_leaf_get_buf( int error; ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC); - ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) && - bno < XFS_DIR2_FREE_FIRSTDB(mp)); + ASSERT(bno >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET) && + bno < xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET)); - error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp, - XFS_DATA_FORK); + error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, bno), + -1, &bp, XFS_DATA_FORK); if (error) return error; xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic); - xfs_dir3_leaf_log_header(tp, dp, bp); + xfs_dir3_leaf_log_header(args, bp); if (magic == XFS_DIR2_LEAF1_MAGIC) - xfs_dir3_leaf_log_tail(tp, bp); + xfs_dir3_leaf_log_tail(args, bp); *bpp = bp; return 0; } @@ -403,8 +405,8 @@ xfs_dir2_block_to_leaf( if ((error = xfs_da_grow_inode(args, &blkno))) { return error; } - ldb = xfs_dir2_da_to_db(mp, blkno); - ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp)); + ldb = xfs_dir2_da_to_db(args->geo, blkno); + ASSERT(ldb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET)); /* * Initialize the leaf block, get a buffer for it. */ @@ -415,7 +417,7 @@ xfs_dir2_block_to_leaf( leaf = lbp->b_addr; hdr = dbp->b_addr; xfs_dir3_data_check(dp, dbp); - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); bf = dp->d_ops->data_bestfree_p(hdr); ents = dp->d_ops->leaf_ents_p(leaf); @@ -427,23 +429,23 @@ xfs_dir2_block_to_leaf( leafhdr.count = be32_to_cpu(btp->count); leafhdr.stale = be32_to_cpu(btp->stale); dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, lbp); + xfs_dir3_leaf_log_header(args, lbp); /* * Could compact these but I think we always do the conversion * after squeezing out stale entries. */ memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, dp, lbp, 0, leafhdr.count - 1); + xfs_dir3_leaf_log_ents(args, lbp, 0, leafhdr.count - 1); needscan = 0; needlog = 1; /* * Make the space formerly occupied by the leaf entries and block * tail be free. */ - xfs_dir2_data_make_free(tp, dp, dbp, + xfs_dir2_data_make_free(args, dbp, (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), - (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize - + (xfs_dir2_data_aoff_t)((char *)hdr + args->geo->blksize - (char *)blp), &needlog, &needscan); /* @@ -461,7 +463,7 @@ xfs_dir2_block_to_leaf( /* * Set up leaf tail and bests table. */ - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); ltp->bestcount = cpu_to_be32(1); bestsp = xfs_dir2_leaf_bests_p(ltp); bestsp[0] = bf[0].length; @@ -469,10 +471,10 @@ xfs_dir2_block_to_leaf( * Log the data header and leaf bests table. */ if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_header(args, dbp); xfs_dir3_leaf_check(dp, lbp); xfs_dir3_data_check(dp, dbp); - xfs_dir3_leaf_log_bests(tp, lbp, 0, 0); + xfs_dir3_leaf_log_bests(args, lbp, 0, 0); return 0; } @@ -641,7 +643,7 @@ xfs_dir2_leaf_addname( tp = args->trans; mp = dp->i_mount; - error = xfs_dir3_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp); + error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); if (error) return error; @@ -653,7 +655,7 @@ xfs_dir2_leaf_addname( */ index = xfs_dir2_leaf_search_hash(args, lbp); leaf = lbp->b_addr; - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); ents = dp->d_ops->leaf_ents_p(leaf); dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); @@ -670,7 +672,7 @@ xfs_dir2_leaf_addname( index++, lep++) { if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) continue; - i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); + i = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); ASSERT(i < be32_to_cpu(ltp->bestcount)); ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF)); if (be16_to_cpu(bestsp[i]) >= length) { @@ -810,14 +812,15 @@ xfs_dir2_leaf_addname( memmove(&bestsp[0], &bestsp[1], be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0])); be32_add_cpu(<p->bestcount, 1); - xfs_dir3_leaf_log_tail(tp, lbp); - xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); + xfs_dir3_leaf_log_tail(args, lbp); + xfs_dir3_leaf_log_bests(args, lbp, 0, + be32_to_cpu(ltp->bestcount) - 1); } /* * If we're filling in a previously empty block just log it. */ else - xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block); + xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block); hdr = dbp->b_addr; bf = dp->d_ops->data_bestfree_p(hdr); bestsp[use_block] = bf[0].length; @@ -828,8 +831,8 @@ xfs_dir2_leaf_addname( * Just read that one in. */ error = xfs_dir3_data_read(tp, dp, - xfs_dir2_db_to_da(mp, use_block), - -1, &dbp); + xfs_dir2_db_to_da(args->geo, use_block), + -1, &dbp); if (error) { xfs_trans_brelse(tp, lbp); return error; @@ -848,7 +851,7 @@ xfs_dir2_leaf_addname( /* * Mark the initial part of our freespace in use for the new entry. */ - xfs_dir2_data_use_free(tp, dp, dbp, dup, + xfs_dir2_data_use_free(args, dbp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, &needlog, &needscan); /* @@ -870,8 +873,8 @@ xfs_dir2_leaf_addname( * Need to log the data block's header. */ if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); - xfs_dir2_data_log_entry(tp, dp, dbp, dep); + xfs_dir2_data_log_header(args, dbp); + xfs_dir2_data_log_entry(args, dbp, dep); /* * If the bests table needs to be changed, do it. * Log the change unless we've already done that. @@ -879,7 +882,7 @@ xfs_dir2_leaf_addname( if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) { bestsp[use_block] = bf[0].length; if (!grown) - xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block); + xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block); } lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale, @@ -889,14 +892,15 @@ xfs_dir2_leaf_addname( * Fill in the new leaf entry. */ lep->hashval = cpu_to_be32(args->hashval); - lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block, + lep->address = cpu_to_be32( + xfs_dir2_db_off_to_dataptr(args->geo, use_block, be16_to_cpu(*tagp))); /* * Log the leaf fields and give up the buffers. */ dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, lbp); - xfs_dir3_leaf_log_ents(tp, dp, lbp, lfloglow, lfloghigh); + xfs_dir3_leaf_log_header(args, lbp); + xfs_dir3_leaf_log_ents(args, lbp, lfloglow, lfloghigh); xfs_dir3_leaf_check(dp, lbp); xfs_dir3_data_check(dp, dbp); return 0; @@ -948,9 +952,9 @@ xfs_dir3_leaf_compact( leafhdr->stale = 0; dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr); - xfs_dir3_leaf_log_header(args->trans, dp, bp); + xfs_dir3_leaf_log_header(args, bp); if (loglow != -1) - xfs_dir3_leaf_log_ents(args->trans, dp, bp, loglow, to - 1); + xfs_dir3_leaf_log_ents(args, bp, loglow, to - 1); } /* @@ -1052,7 +1056,7 @@ xfs_dir3_leaf_compact_x1( */ static void xfs_dir3_leaf_log_bests( - xfs_trans_t *tp, /* transaction pointer */ + struct xfs_da_args *args, struct xfs_buf *bp, /* leaf buffer */ int first, /* first entry to log */ int last) /* last entry to log */ @@ -1065,10 +1069,11 @@ xfs_dir3_leaf_log_bests( ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)); - ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); firstb = xfs_dir2_leaf_bests_p(ltp) + first; lastb = xfs_dir2_leaf_bests_p(ltp) + last; - xfs_trans_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf), + xfs_trans_log_buf(args->trans, bp, + (uint)((char *)firstb - (char *)leaf), (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1)); } @@ -1077,8 +1082,7 @@ xfs_dir3_leaf_log_bests( */ void xfs_dir3_leaf_log_ents( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp, int first, int last) @@ -1093,10 +1097,11 @@ xfs_dir3_leaf_log_ents( leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); - ents = dp->d_ops->leaf_ents_p(leaf); + ents = args->dp->d_ops->leaf_ents_p(leaf); firstlep = &ents[first]; lastlep = &ents[last]; - xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), + xfs_trans_log_buf(args->trans, bp, + (uint)((char *)firstlep - (char *)leaf), (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1)); } @@ -1105,8 +1110,7 @@ xfs_dir3_leaf_log_ents( */ void xfs_dir3_leaf_log_header( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp) { struct xfs_dir2_leaf *leaf = bp->b_addr; @@ -1116,8 +1120,9 @@ xfs_dir3_leaf_log_header( leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); - xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), - dp->d_ops->leaf_hdr_size - 1); + xfs_trans_log_buf(args->trans, bp, + (uint)((char *)&leaf->hdr - (char *)leaf), + args->dp->d_ops->leaf_hdr_size - 1); } /* @@ -1125,21 +1130,20 @@ xfs_dir3_leaf_log_header( */ STATIC void xfs_dir3_leaf_log_tail( - struct xfs_trans *tp, + struct xfs_da_args *args, struct xfs_buf *bp) { struct xfs_dir2_leaf *leaf = bp->b_addr; xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ - struct xfs_mount *mp = tp->t_mountp; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); - ltp = xfs_dir2_leaf_tail_p(mp, leaf); - xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), - (uint)(mp->m_dirblksize - 1)); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + xfs_trans_log_buf(args->trans, bp, (uint)((char *)ltp - (char *)leaf), + (uint)(args->geo->blksize - 1)); } /* @@ -1185,7 +1189,7 @@ xfs_dir2_leaf_lookup( */ dep = (xfs_dir2_data_entry_t *) ((char *)dbp->b_addr + - xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); /* * Return the found inode number & CI name if appropriate */ @@ -1231,7 +1235,7 @@ xfs_dir2_leaf_lookup_int( tp = args->trans; mp = dp->i_mount; - error = xfs_dir3_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp); + error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); if (error) return error; @@ -1260,7 +1264,8 @@ xfs_dir2_leaf_lookup_int( /* * Get the new data block number. */ - newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); + newdb = xfs_dir2_dataptr_to_db(args->geo, + be32_to_cpu(lep->address)); /* * If it's not the same as the old data block number, * need to pitch the old one and read the new one. @@ -1269,8 +1274,8 @@ xfs_dir2_leaf_lookup_int( if (dbp) xfs_trans_brelse(tp, dbp); error = xfs_dir3_data_read(tp, dp, - xfs_dir2_db_to_da(mp, newdb), - -1, &dbp); + xfs_dir2_db_to_da(args->geo, newdb), + -1, &dbp); if (error) { xfs_trans_brelse(tp, lbp); return error; @@ -1281,7 +1286,8 @@ xfs_dir2_leaf_lookup_int( * Point to the data entry. */ dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr + - xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(lep->address))); /* * Compare name and if it's an exact match, return the index * and buffer. If it's the first case-insensitive match, store @@ -1310,8 +1316,8 @@ xfs_dir2_leaf_lookup_int( if (cidb != curdb) { xfs_trans_brelse(tp, dbp); error = xfs_dir3_data_read(tp, dp, - xfs_dir2_db_to_da(mp, cidb), - -1, &dbp); + xfs_dir2_db_to_da(args->geo, cidb), + -1, &dbp); if (error) { xfs_trans_brelse(tp, lbp); return error; @@ -1380,18 +1386,18 @@ xfs_dir2_leaf_removename( * Point to the leaf entry, use that to point to the data entry. */ lep = &ents[index]; - db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); - dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); + db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); + dep = (xfs_dir2_data_entry_t *)((char *)hdr + + xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); needscan = needlog = 0; oldbest = be16_to_cpu(bf[0].length); - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); ASSERT(be16_to_cpu(bestsp[db]) == oldbest); /* * Mark the former data entry unused. */ - xfs_dir2_data_make_free(tp, dp, dbp, + xfs_dir2_data_make_free(args, dbp, (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); /* @@ -1399,10 +1405,10 @@ xfs_dir2_leaf_removename( */ leafhdr.stale++; dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, lbp); + xfs_dir3_leaf_log_header(args, lbp); lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); - xfs_dir3_leaf_log_ents(tp, dp, lbp, index, index); + xfs_dir3_leaf_log_ents(args, lbp, index, index); /* * Scan the freespace in the data block again if necessary, @@ -1411,22 +1417,22 @@ xfs_dir2_leaf_removename( if (needscan) xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_header(args, dbp); /* * If the longest freespace in the data block has changed, * put the new value in the bests table and log that. */ if (be16_to_cpu(bf[0].length) != oldbest) { bestsp[db] = bf[0].length; - xfs_dir3_leaf_log_bests(tp, lbp, db, db); + xfs_dir3_leaf_log_bests(args, lbp, db, db); } xfs_dir3_data_check(dp, dbp); /* * If the data block is now empty then get rid of the data block. */ if (be16_to_cpu(bf[0].length) == - mp->m_dirblksize - dp->d_ops->data_entry_offset) { - ASSERT(db != mp->m_dirdatablk); + args->geo->blksize - dp->d_ops->data_entry_offset) { + ASSERT(db != args->geo->datablk); if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { /* * Nope, can't get rid of it because it caused @@ -1459,15 +1465,16 @@ xfs_dir2_leaf_removename( memmove(&bestsp[db - i], bestsp, (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp)); be32_add_cpu(<p->bestcount, -(db - i)); - xfs_dir3_leaf_log_tail(tp, lbp); - xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); + xfs_dir3_leaf_log_tail(args, lbp); + xfs_dir3_leaf_log_bests(args, lbp, 0, + be32_to_cpu(ltp->bestcount) - 1); } else bestsp[db] = cpu_to_be16(NULLDATAOFF); } /* * If the data block was not the first one, drop it. */ - else if (db != mp->m_dirdatablk) + else if (db != args->geo->datablk) dbp = NULL; xfs_dir3_leaf_check(dp, lbp); @@ -1515,7 +1522,7 @@ xfs_dir2_leaf_replace( */ dep = (xfs_dir2_data_entry_t *) ((char *)dbp->b_addr + - xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); ASSERT(args->inumber != be64_to_cpu(dep->inumber)); /* * Put the new inode number in, log it. @@ -1523,7 +1530,7 @@ xfs_dir2_leaf_replace( dep->inumber = cpu_to_be64(args->inumber); dp->d_ops->data_put_ftype(dep, args->filetype); tp = args->trans; - xfs_dir2_data_log_entry(tp, dp, dbp, dep); + xfs_dir2_data_log_entry(args, dbp, dep); xfs_dir3_leaf_check(dp, lbp); xfs_trans_brelse(tp, lbp); return 0; @@ -1609,12 +1616,13 @@ xfs_dir2_leaf_trim_data( /* * Read the offending data block. We need its buffer. */ - error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp); + error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, db), + -1, &dbp); if (error) return error; leaf = lbp->b_addr; - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); #ifdef DEBUG { @@ -1624,7 +1632,7 @@ xfs_dir2_leaf_trim_data( ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); ASSERT(be16_to_cpu(bf[0].length) == - mp->m_dirblksize - dp->d_ops->data_entry_offset); + args->geo->blksize - dp->d_ops->data_entry_offset); ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); } #endif @@ -1643,8 +1651,8 @@ xfs_dir2_leaf_trim_data( bestsp = xfs_dir2_leaf_bests_p(ltp); be32_add_cpu(<p->bestcount, -1); memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp)); - xfs_dir3_leaf_log_tail(tp, lbp); - xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); + xfs_dir3_leaf_log_tail(args, lbp); + xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); return 0; } @@ -1708,22 +1716,22 @@ xfs_dir2_node_to_leaf( /* * Get the last offset in the file. */ - if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) { + if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) { return error; } - fo -= mp->m_dirblkfsbs; + fo -= args->geo->fsbcount; /* * If there are freespace blocks other than the first one, * take this opportunity to remove trailing empty freespace blocks * that may have been left behind during no-space-reservation * operations. */ - while (fo > mp->m_dirfreeblk) { + while (fo > args->geo->freeblk) { if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) { return error; } if (rval) - fo -= mp->m_dirblkfsbs; + fo -= args->geo->fsbcount; else return 0; } @@ -1736,7 +1744,7 @@ xfs_dir2_node_to_leaf( /* * If it's not the single leaf block, give up. */ - if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize) + if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + args->geo->blksize) return 0; lbp = state->path.blk[0].bp; leaf = lbp->b_addr; @@ -1748,7 +1756,7 @@ xfs_dir2_node_to_leaf( /* * Read the freespace block. */ - error = xfs_dir2_free_read(tp, dp, mp->m_dirfreeblk, &fbp); + error = xfs_dir2_free_read(tp, dp, args->geo->freeblk, &fbp); if (error) return error; free = fbp->b_addr; @@ -1760,7 +1768,7 @@ xfs_dir2_node_to_leaf( * Now see if the leafn and free data will fit in a leaf1. * If not, release the buffer and give up. */ - if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > mp->m_dirblksize) { + if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > args->geo->blksize) { xfs_trans_brelse(tp, fbp); return 0; } @@ -1780,7 +1788,7 @@ xfs_dir2_node_to_leaf( /* * Set up the leaf tail from the freespace block. */ - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); ltp->bestcount = cpu_to_be32(freehdr.nvalid); /* @@ -1790,15 +1798,17 @@ xfs_dir2_node_to_leaf( freehdr.nvalid * sizeof(xfs_dir2_data_off_t)); dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, lbp); - xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); - xfs_dir3_leaf_log_tail(tp, lbp); + xfs_dir3_leaf_log_header(args, lbp); + xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); + xfs_dir3_leaf_log_tail(args, lbp); xfs_dir3_leaf_check(dp, lbp); /* * Get rid of the freespace block. */ - error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp); + error = xfs_dir2_shrink_inode(args, + xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET), + fbp); if (error) { /* * This can't fail here because it can only happen when diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index cb434d732681..da43d304fca2 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -195,17 +195,18 @@ xfs_dir2_free_try_read( static int xfs_dir3_free_get_buf( - struct xfs_trans *tp, - struct xfs_inode *dp, + xfs_da_args_t *args, xfs_dir2_db_t fbno, struct xfs_buf **bpp) { + struct xfs_trans *tp = args->trans; + struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; struct xfs_buf *bp; int error; struct xfs_dir3_icfree_hdr hdr; - error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fbno), + error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, fbno), -1, &bp, XFS_DATA_FORK); if (error) return error; @@ -240,8 +241,7 @@ xfs_dir3_free_get_buf( */ STATIC void xfs_dir2_free_log_bests( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp, int first, /* first entry to log */ int last) /* last entry to log */ @@ -250,10 +250,10 @@ xfs_dir2_free_log_bests( __be16 *bests; free = bp->b_addr; - bests = dp->d_ops->free_bests_p(free); + bests = args->dp->d_ops->free_bests_p(free); ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); - xfs_trans_log_buf(tp, bp, + xfs_trans_log_buf(args->trans, bp, (uint)((char *)&bests[first] - (char *)free), (uint)((char *)&bests[last] - (char *)free + sizeof(bests[0]) - 1)); @@ -264,8 +264,7 @@ xfs_dir2_free_log_bests( */ static void xfs_dir2_free_log_header( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp) { #ifdef DEBUG @@ -275,7 +274,8 @@ xfs_dir2_free_log_header( ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); #endif - xfs_trans_log_buf(tp, bp, 0, dp->d_ops->free_hdr_size - 1); + xfs_trans_log_buf(args->trans, bp, 0, + args->dp->d_ops->free_hdr_size - 1); } /* @@ -315,20 +315,20 @@ xfs_dir2_leaf_to_node( if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fdb))) { return error; } - ASSERT(fdb == XFS_DIR2_FREE_FIRSTDB(mp)); + ASSERT(fdb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET)); /* * Get the buffer for the new freespace block. */ - error = xfs_dir3_free_get_buf(tp, dp, fdb, &fbp); + error = xfs_dir3_free_get_buf(args, fdb, &fbp); if (error) return error; free = fbp->b_addr; dp->d_ops->free_hdr_from_disk(&freehdr, free); leaf = lbp->b_addr; - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); ASSERT(be32_to_cpu(ltp->bestcount) <= - (uint)dp->i_d.di_size / mp->m_dirblksize); + (uint)dp->i_d.di_size / args->geo->blksize); /* * Copy freespace entries from the leaf block to the new block. @@ -349,8 +349,8 @@ xfs_dir2_leaf_to_node( freehdr.nvalid = be32_to_cpu(ltp->bestcount); dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); - xfs_dir2_free_log_bests(tp, dp, fbp, 0, freehdr.nvalid - 1); - xfs_dir2_free_log_header(tp, dp, fbp); + xfs_dir2_free_log_bests(args, fbp, 0, freehdr.nvalid - 1); + xfs_dir2_free_log_header(args, fbp); /* * Converting the leaf to a leafnode is just a matter of changing the @@ -364,7 +364,7 @@ xfs_dir2_leaf_to_node( leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC); lbp->b_ops = &xfs_dir3_leafn_buf_ops; xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF); - xfs_dir3_leaf_log_header(tp, dp, lbp); + xfs_dir3_leaf_log_header(args, lbp); xfs_dir3_leaf_check(dp, lbp); return 0; } @@ -415,7 +415,7 @@ xfs_dir2_leafn_add( * a compact. */ - if (leafhdr.count == dp->d_ops->leaf_max_ents(mp)) { + if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) { if (!leafhdr.stale) return XFS_ERROR(ENOSPC); compact = leafhdr.stale > 1; @@ -450,12 +450,12 @@ xfs_dir2_leafn_add( highstale, &lfloglow, &lfloghigh); lep->hashval = cpu_to_be32(args->hashval); - lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, + lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(args->geo, args->blkno, args->index)); dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, bp); - xfs_dir3_leaf_log_ents(tp, dp, bp, lfloglow, lfloghigh); + xfs_dir3_leaf_log_header(args, bp); + xfs_dir3_leaf_log_ents(args, bp, lfloglow, lfloghigh); xfs_dir3_leaf_check(dp, bp); return 0; } @@ -471,7 +471,8 @@ xfs_dir2_free_hdr_check( dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr); - ASSERT((hdr.firstdb % dp->d_ops->free_max_bests(dp->i_mount)) == 0); + ASSERT((hdr.firstdb % + dp->d_ops->free_max_bests(dp->i_mount->m_dir_geo)) == 0); ASSERT(hdr.firstdb <= db); ASSERT(db < hdr.firstdb + hdr.nvalid); } @@ -576,7 +577,8 @@ xfs_dir2_leafn_lookup_for_addname( /* * Pull the data block number from the entry. */ - newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); + newdb = xfs_dir2_dataptr_to_db(args->geo, + be32_to_cpu(lep->address)); /* * For addname, we're looking for a place to put the new entry. * We want to use a data block with an entry of equal @@ -593,7 +595,7 @@ xfs_dir2_leafn_lookup_for_addname( * Convert the data block to the free block * holding its freespace information. */ - newfdb = dp->d_ops->db_to_fdb(mp, newdb); + newfdb = dp->d_ops->db_to_fdb(args->geo, newdb); /* * If it's not the one we have in hand, read it in. */ @@ -605,7 +607,8 @@ xfs_dir2_leafn_lookup_for_addname( xfs_trans_brelse(tp, curbp); error = xfs_dir2_free_read(tp, dp, - xfs_dir2_db_to_da(mp, newfdb), + xfs_dir2_db_to_da(args->geo, + newfdb), &curbp); if (error) return error; @@ -616,7 +619,7 @@ xfs_dir2_leafn_lookup_for_addname( /* * Get the index for our entry. */ - fi = dp->d_ops->db_to_fdindex(mp, curdb); + fi = dp->d_ops->db_to_fdindex(args->geo, curdb); /* * If it has room, return it. */ @@ -721,7 +724,8 @@ xfs_dir2_leafn_lookup_for_entry( /* * Pull the data block number from the entry. */ - newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); + newdb = xfs_dir2_dataptr_to_db(args->geo, + be32_to_cpu(lep->address)); /* * Not adding a new entry, so we really want to find * the name given to us. @@ -746,7 +750,8 @@ xfs_dir2_leafn_lookup_for_entry( curbp = state->extrablk.bp; } else { error = xfs_dir3_data_read(tp, dp, - xfs_dir2_db_to_da(mp, newdb), + xfs_dir2_db_to_da(args->geo, + newdb), -1, &curbp); if (error) return error; @@ -758,7 +763,8 @@ xfs_dir2_leafn_lookup_for_entry( * Point to the data entry. */ dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr + - xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(lep->address))); /* * Compare the entry and if it's an exact match, return * EEXIST immediately. If it's the first case-insensitive @@ -844,7 +850,6 @@ xfs_dir3_leafn_moveents( int start_d,/* destination leaf index */ int count) /* count of leaves to copy */ { - struct xfs_trans *tp = args->trans; int stale; /* count stale leaves copied */ trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count); @@ -863,7 +868,7 @@ xfs_dir3_leafn_moveents( if (start_d < dhdr->count) { memmove(&dents[start_d + count], &dents[start_d], (dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, args->dp, bp_d, start_d + count, + xfs_dir3_leaf_log_ents(args, bp_d, start_d + count, count + dhdr->count - 1); } /* @@ -885,8 +890,7 @@ xfs_dir3_leafn_moveents( */ memcpy(&dents[start_d], &sents[start_s], count * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, args->dp, bp_d, - start_d, start_d + count - 1); + xfs_dir3_leaf_log_ents(args, bp_d, start_d, start_d + count - 1); /* * If there are source entries after the ones we copied, @@ -895,8 +899,7 @@ xfs_dir3_leafn_moveents( if (start_s + count < shdr->count) { memmove(&sents[start_s], &sents[start_s + count], count * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, args->dp, bp_s, - start_s, start_s + count - 1); + xfs_dir3_leaf_log_ents(args, bp_s, start_s, start_s + count - 1); } /* @@ -1032,8 +1035,8 @@ xfs_dir2_leafn_rebalance( /* log the changes made when moving the entries */ dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1); dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2); - xfs_dir3_leaf_log_header(args->trans, dp, blk1->bp); - xfs_dir3_leaf_log_header(args->trans, dp, blk2->bp); + xfs_dir3_leaf_log_header(args, blk1->bp); + xfs_dir3_leaf_log_header(args, blk2->bp); xfs_dir3_leaf_check(dp, blk1->bp); xfs_dir3_leaf_check(dp, blk2->bp); @@ -1076,7 +1079,6 @@ xfs_dir3_data_block_free( struct xfs_buf *fbp, int longest) { - struct xfs_trans *tp = args->trans; int logfree = 0; __be16 *bests; struct xfs_dir3_icfree_hdr freehdr; @@ -1090,7 +1092,7 @@ xfs_dir3_data_block_free( * value. */ bests[findex] = cpu_to_be16(longest); - xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex); + xfs_dir2_free_log_bests(args, fbp, findex, findex); return 0; } @@ -1118,7 +1120,7 @@ xfs_dir3_data_block_free( } dp->d_ops->free_hdr_to_disk(free, &freehdr); - xfs_dir2_free_log_header(tp, dp, fbp); + xfs_dir2_free_log_header(args, fbp); /* * If there are no useful entries left in the block, get rid of the @@ -1142,7 +1144,7 @@ xfs_dir3_data_block_free( /* Log the free entry that changed, unless we got rid of it. */ if (logfree) - xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex); + xfs_dir2_free_log_bests(args, fbp, findex, findex); return 0; } @@ -1193,9 +1195,9 @@ xfs_dir2_leafn_remove( /* * Extract the data block and offset from the entry. */ - db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); + db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); ASSERT(dblk->blkno == db); - off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)); + off = xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)); ASSERT(dblk->index == off); /* @@ -1204,10 +1206,10 @@ xfs_dir2_leafn_remove( */ leafhdr.stale++; dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, bp); + xfs_dir3_leaf_log_header(args, bp); lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); - xfs_dir3_leaf_log_ents(tp, dp, bp, index, index); + xfs_dir3_leaf_log_ents(args, bp, index, index); /* * Make the data entry free. Keep track of the longest freespace @@ -1219,7 +1221,7 @@ xfs_dir2_leafn_remove( bf = dp->d_ops->data_bestfree_p(hdr); longest = be16_to_cpu(bf[0].length); needlog = needscan = 0; - xfs_dir2_data_make_free(tp, dp, dbp, off, + xfs_dir2_data_make_free(args, dbp, off, dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); /* * Rescan the data block freespaces for bestfree. @@ -1228,7 +1230,7 @@ xfs_dir2_leafn_remove( if (needscan) xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_header(args, dbp); xfs_dir3_data_check(dp, dbp); /* * If the longest data block freespace changes, need to update @@ -1245,8 +1247,9 @@ xfs_dir2_leafn_remove( * Convert the data block number to a free block, * read in the free block. */ - fdb = dp->d_ops->db_to_fdb(mp, db); - error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(mp, fdb), + fdb = dp->d_ops->db_to_fdb(args->geo, db); + error = xfs_dir2_free_read(tp, dp, + xfs_dir2_db_to_da(args->geo, fdb), &fbp); if (error) return error; @@ -1255,20 +1258,21 @@ xfs_dir2_leafn_remove( { struct xfs_dir3_icfree_hdr freehdr; dp->d_ops->free_hdr_from_disk(&freehdr, free); - ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(mp) * - (fdb - XFS_DIR2_FREE_FIRSTDB(mp))); + ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(args->geo) * + (fdb - xfs_dir2_byte_to_db(args->geo, + XFS_DIR2_FREE_OFFSET))); } #endif /* * Calculate which entry we need to fix. */ - findex = dp->d_ops->db_to_fdindex(mp, db); + findex = dp->d_ops->db_to_fdindex(args->geo, db); longest = be16_to_cpu(bf[0].length); /* * If the data block is now empty we can get rid of it * (usually). */ - if (longest == mp->m_dirblksize - + if (longest == args->geo->blksize - dp->d_ops->data_entry_offset) { /* * Try to punch out the data block. @@ -1303,7 +1307,7 @@ xfs_dir2_leafn_remove( */ *rval = (dp->d_ops->leaf_hdr_size + (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) < - mp->m_dir_magicpct; + args->geo->magicpct; return 0; } @@ -1336,7 +1340,7 @@ xfs_dir2_leafn_split( /* * Initialize the new leaf block. */ - error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(mp, blkno), + error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(args->geo, blkno), &newblk->bp, XFS_DIR2_LEAFN_MAGIC); if (error) return error; @@ -1410,7 +1414,7 @@ xfs_dir2_leafn_toosmall( count = leafhdr.count - leafhdr.stale; bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]); - if (bytes > (state->blocksize >> 1)) { + if (bytes > (state->args->geo->blksize >> 1)) { /* * Blk over 50%, don't try to join. */ @@ -1463,7 +1467,8 @@ xfs_dir2_leafn_toosmall( * Count bytes in the two blocks combined. */ count = leafhdr.count - leafhdr.stale; - bytes = state->blocksize - (state->blocksize >> 2); + bytes = state->args->geo->blksize - + (state->args->geo->blksize >> 2); leaf = bp->b_addr; dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf); @@ -1560,8 +1565,8 @@ xfs_dir2_leafn_unbalance( /* log the changes made when moving the entries */ dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr); dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr); - xfs_dir3_leaf_log_header(args->trans, dp, save_blk->bp); - xfs_dir3_leaf_log_header(args->trans, dp, drop_blk->bp); + xfs_dir3_leaf_log_header(args, save_blk->bp); + xfs_dir3_leaf_log_header(args, drop_blk->bp); xfs_dir3_leaf_check(dp, save_blk->bp); xfs_dir3_leaf_check(dp, drop_blk->bp); @@ -1587,8 +1592,6 @@ xfs_dir2_node_addname( state = xfs_da_state_alloc(); state->args = args; state->mp = args->dp->i_mount; - state->blocksize = state->mp->m_dirblksize; - state->node_ents = state->mp->m_dir_node_ents; /* * Look up the name. We're not supposed to find it, but * this gives us the insertion point. @@ -1727,9 +1730,9 @@ xfs_dir2_node_addname_int( if (dbno == -1) { xfs_fileoff_t fo; /* freespace block number */ - if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) + if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) return error; - lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo); + lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo); fbno = ifbno; } /* @@ -1747,7 +1750,8 @@ xfs_dir2_node_addname_int( * us a freespace block to start with. */ if (++fbno == 0) - fbno = XFS_DIR2_FREE_FIRSTDB(mp); + fbno = xfs_dir2_byte_to_db(args->geo, + XFS_DIR2_FREE_OFFSET); /* * If it's ifbno we already looked at it. */ @@ -1765,8 +1769,8 @@ xfs_dir2_node_addname_int( * to avoid it. */ error = xfs_dir2_free_try_read(tp, dp, - xfs_dir2_db_to_da(mp, fbno), - &fbp); + xfs_dir2_db_to_da(args->geo, fbno), + &fbp); if (error) return error; if (!fbp) @@ -1834,10 +1838,10 @@ xfs_dir2_node_addname_int( * Get the freespace block corresponding to the data block * that was just allocated. */ - fbno = dp->d_ops->db_to_fdb(mp, dbno); + fbno = dp->d_ops->db_to_fdb(args->geo, dbno); error = xfs_dir2_free_try_read(tp, dp, - xfs_dir2_db_to_da(mp, fbno), - &fbp); + xfs_dir2_db_to_da(args->geo, fbno), + &fbp); if (error) return error; @@ -1851,12 +1855,13 @@ xfs_dir2_node_addname_int( if (error) return error; - if (unlikely(dp->d_ops->db_to_fdb(mp, dbno) != fbno)) { + if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) { xfs_alert(mp, "%s: dir ino %llu needed freesp block %lld for\n" " data block %lld, got %lld ifbno %llu lastfbno %d", __func__, (unsigned long long)dp->i_ino, - (long long)dp->d_ops->db_to_fdb(mp, dbno), + (long long)dp->d_ops->db_to_fdb( + args->geo, dbno), (long long)dbno, (long long)fbno, (unsigned long long)ifbno, lastfbno); if (fblk) { @@ -1877,7 +1882,7 @@ xfs_dir2_node_addname_int( /* * Get a buffer for the new block. */ - error = xfs_dir3_free_get_buf(tp, dp, fbno, &fbp); + error = xfs_dir3_free_get_buf(args, fbno, &fbp); if (error) return error; free = fbp->b_addr; @@ -1887,8 +1892,10 @@ xfs_dir2_node_addname_int( /* * Remember the first slot as our empty slot. */ - freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) * - dp->d_ops->free_max_bests(mp); + freehdr.firstdb = + (fbno - xfs_dir2_byte_to_db(args->geo, + XFS_DIR2_FREE_OFFSET)) * + dp->d_ops->free_max_bests(args->geo); } else { free = fbp->b_addr; bests = dp->d_ops->free_bests_p(free); @@ -1898,13 +1905,13 @@ xfs_dir2_node_addname_int( /* * Set the freespace block index from the data block number. */ - findex = dp->d_ops->db_to_fdindex(mp, dbno); + findex = dp->d_ops->db_to_fdindex(args->geo, dbno); /* * If it's after the end of the current entries in the * freespace block, extend that table. */ if (findex >= freehdr.nvalid) { - ASSERT(findex < dp->d_ops->free_max_bests(mp)); + ASSERT(findex < dp->d_ops->free_max_bests(args->geo)); freehdr.nvalid = findex + 1; /* * Tag new entry so nused will go up. @@ -1918,7 +1925,7 @@ xfs_dir2_node_addname_int( if (bests[findex] == cpu_to_be16(NULLDATAOFF)) { freehdr.nused++; dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); - xfs_dir2_free_log_header(tp, dp, fbp); + xfs_dir2_free_log_header(args, fbp); } /* * Update the real value in the table. @@ -1943,7 +1950,8 @@ xfs_dir2_node_addname_int( /* * Read the data block in. */ - error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, dbno), + error = xfs_dir3_data_read(tp, dp, + xfs_dir2_db_to_da(args->geo, dbno), -1, &dbp); if (error) return error; @@ -1961,7 +1969,7 @@ xfs_dir2_node_addname_int( /* * Mark the first part of the unused space, inuse for us. */ - xfs_dir2_data_use_free(tp, dp, dbp, dup, + xfs_dir2_data_use_free(args, dbp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, &needlog, &needscan); /* @@ -1974,7 +1982,7 @@ xfs_dir2_node_addname_int( dp->d_ops->data_put_ftype(dep, args->filetype); tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, dp, dbp, dep); + xfs_dir2_data_log_entry(args, dbp, dep); /* * Rescan the block for bestfree if needed. */ @@ -1984,7 +1992,7 @@ xfs_dir2_node_addname_int( * Log the data block header if needed. */ if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_header(args, dbp); /* * If the freespace entry is now wrong, update it. */ @@ -1997,7 +2005,7 @@ xfs_dir2_node_addname_int( * Log the freespace entry if needed. */ if (logfree) - xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex); + xfs_dir2_free_log_bests(args, fbp, findex, findex); /* * Return the data block and offset in args, then drop the data block. */ @@ -2028,8 +2036,6 @@ xfs_dir2_node_lookup( state = xfs_da_state_alloc(); state->args = args; state->mp = args->dp->i_mount; - state->blocksize = state->mp->m_dirblksize; - state->node_ents = state->mp->m_dir_node_ents; /* * Fill in the path to the entry in the cursor. */ @@ -2083,8 +2089,6 @@ xfs_dir2_node_removename( state = xfs_da_state_alloc(); state->args = args; state->mp = args->dp->i_mount; - state->blocksize = state->mp->m_dirblksize; - state->node_ents = state->mp->m_dir_node_ents; /* Look up the entry we're deleting, set up the cursor. */ error = xfs_da3_node_lookup_int(state, &rval); @@ -2153,8 +2157,6 @@ xfs_dir2_node_replace( state = xfs_da_state_alloc(); state->args = args; state->mp = args->dp->i_mount; - state->blocksize = state->mp->m_dirblksize; - state->node_ents = state->mp->m_dir_node_ents; inum = args->inumber; /* * Lookup the entry to change in the btree. @@ -2186,15 +2188,15 @@ xfs_dir2_node_replace( hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); dep = (xfs_dir2_data_entry_t *) ((char *)hdr + - xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(lep->address))); ASSERT(inum != be64_to_cpu(dep->inumber)); /* * Fill in the new inode number and log the entry. */ dep->inumber = cpu_to_be64(inum); args->dp->d_ops->data_put_ftype(dep, args->filetype); - xfs_dir2_data_log_entry(args->trans, args->dp, - state->extrablk.bp, dep); + xfs_dir2_data_log_entry(args, state->extrablk.bp, dep); rval = 0; } /* @@ -2262,9 +2264,9 @@ xfs_dir2_node_trim_free( /* * Blow the block away. */ - if ((error = - xfs_dir2_shrink_inode(args, xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo), - bp))) { + error = xfs_dir2_shrink_inode(args, + xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo), bp); + if (error) { /* * Can't fail with ENOSPC since that only happens with no * space reservation, when breaking up an extent into two diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h index 8b9d2281f85b..27ce0794d196 100644 --- a/fs/xfs/xfs_dir2_priv.h +++ b/fs/xfs/xfs_dir2_priv.h @@ -20,6 +20,140 @@ struct dir_context; +/* + * Directory offset/block conversion functions. + * + * DB blocks here are logical directory block numbers, not filesystem blocks. + */ + +/* + * Convert dataptr to byte in file space + */ +static inline xfs_dir2_off_t +xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp) +{ + return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; +} + +/* + * Convert byte in file space to dataptr. It had better be aligned. + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by) +{ + return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); +} + +/* + * Convert byte in space to (DB) block + */ +static inline xfs_dir2_db_t +xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ + return (xfs_dir2_db_t)(by >> geo->blklog); +} + +/* + * Convert dataptr to a block number + */ +static inline xfs_dir2_db_t +xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp)); +} + +/* + * Convert byte in space to offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ + return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1)); +} + +/* + * Convert dataptr to a byte offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp)); +} + +/* + * Convert block and offset to byte in space + */ +static inline xfs_dir2_off_t +xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return ((xfs_dir2_off_t)db << geo->blklog) + o; +} + +/* + * Convert block (DB) to block (dablk) + */ +static inline xfs_dablk_t +xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ + return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog)); +} + +/* + * Convert byte in space to (DA) block + */ +static inline xfs_dablk_t +xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ + return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by)); +} + +/* + * Convert block and offset to dataptr + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o)); +} + +/* + * Convert block (dablk) to block (DB) + */ +static inline xfs_dir2_db_t +xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da) +{ + return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog)); +} + +/* + * Convert block (dablk) to byte offset in space + */ +static inline xfs_dir2_off_t +xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da) +{ + return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0); +} + +/* + * Directory tail pointer accessor functions. Based on block geometry. + */ +static inline struct xfs_dir2_block_tail * +xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr) +{ + return ((struct xfs_dir2_block_tail *) + ((char *)hdr + geo->blksize)) - 1; +} + +static inline struct xfs_dir2_leaf_tail * +xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) +{ + return (struct xfs_dir2_leaf_tail *) + ((char *)lp + geo->blksize - + sizeof(struct xfs_dir2_leaf_tail)); +} + /* xfs_dir2.c */ extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, @@ -54,8 +188,8 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); -extern int xfs_dir3_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp, - xfs_dablk_t bno, xfs_daddr_t mapped_bno); +extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, + xfs_daddr_t mapped_bno); extern struct xfs_dir2_data_free * xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, @@ -77,9 +211,9 @@ extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr, int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno, struct xfs_buf **bpp, __uint16_t magic); -extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_inode *dp, +extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args, struct xfs_buf *bp, int first, int last); -extern void xfs_dir3_leaf_log_header(struct xfs_trans *tp, struct xfs_inode *dp, +extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args, struct xfs_buf *bp); extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index aead369e1c30..48e99afb9cb0 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -76,26 +76,25 @@ const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = { STATIC int xfs_dir2_sf_getdents( - xfs_inode_t *dp, /* incore directory inode */ + struct xfs_da_args *args, struct dir_context *ctx) { int i; /* shortform entry number */ - xfs_mount_t *mp; /* filesystem mount point */ + struct xfs_inode *dp = args->dp; /* incore directory inode */ xfs_dir2_dataptr_t off; /* current entry's offset */ xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ xfs_dir2_dataptr_t dot_offset; xfs_dir2_dataptr_t dotdot_offset; xfs_ino_t ino; - - mp = dp->i_mount; + struct xfs_da_geometry *geo = args->geo; ASSERT(dp->i_df.if_flags & XFS_IFINLINE); /* * Give up if the directory is way too short. */ if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); return XFS_ERROR(EIO); } @@ -109,18 +108,18 @@ xfs_dir2_sf_getdents( /* * If the block number in the offset is out of range, we're done. */ - if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk) + if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk) return 0; /* * Precalculate offsets for . and .. as we will always need them. * * XXX(hch): the second argument is sometimes 0 and sometimes - * mp->m_dirdatablk. + * geo->datablk */ - dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, + dot_offset = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, dp->d_ops->data_dot_offset); - dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, + dotdot_offset = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, dp->d_ops->data_dotdot_offset); /* @@ -149,7 +148,7 @@ xfs_dir2_sf_getdents( for (i = 0; i < sfp->count; i++) { __uint8_t filetype; - off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, + off = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, xfs_dir2_sf_get_offset(sfep)); if (ctx->pos > off) { @@ -161,13 +160,13 @@ xfs_dir2_sf_getdents( filetype = dp->d_ops->sf_get_ftype(sfep); ctx->pos = off & 0x7fffffff; if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino, - xfs_dir3_get_dtype(mp, filetype))) + xfs_dir3_get_dtype(dp->i_mount, filetype))) return 0; sfep = dp->d_ops->sf_nextentry(sfp, sfep); } - ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & - 0x7fffffff; + ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) & + 0x7fffffff; return 0; } @@ -176,9 +175,10 @@ xfs_dir2_sf_getdents( */ STATIC int xfs_dir2_block_getdents( - xfs_inode_t *dp, /* incore inode */ + struct xfs_da_args *args, struct dir_context *ctx) { + struct xfs_inode *dp = args->dp; /* incore directory inode */ xfs_dir2_data_hdr_t *hdr; /* block header */ struct xfs_buf *bp; /* buffer for block */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -186,16 +186,15 @@ xfs_dir2_block_getdents( xfs_dir2_data_unused_t *dup; /* block unused entry */ char *endptr; /* end of the data entries */ int error; /* error return value */ - xfs_mount_t *mp; /* filesystem mount point */ char *ptr; /* current data entry */ int wantoff; /* starting block offset */ xfs_off_t cook; + struct xfs_da_geometry *geo = args->geo; - mp = dp->i_mount; /* * If the block number in the offset is out of range, we're done. */ - if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk) + if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk) return 0; error = xfs_dir3_block_read(NULL, dp, &bp); @@ -206,13 +205,13 @@ xfs_dir2_block_getdents( * Extract the byte offset we start at from the seek pointer. * We'll skip entries before this. */ - wantoff = xfs_dir2_dataptr_to_off(mp, ctx->pos); + wantoff = xfs_dir2_dataptr_to_off(geo, ctx->pos); hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); /* * Set up values for the loop. */ - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(geo, hdr); ptr = (char *)dp->d_ops->data_entry_p(hdr); endptr = (char *)xfs_dir2_block_leaf_p(btp); @@ -244,7 +243,7 @@ xfs_dir2_block_getdents( if ((char *)dep - (char *)hdr < wantoff) continue; - cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, + cook = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, (char *)dep - (char *)hdr); ctx->pos = cook & 0x7fffffff; @@ -254,7 +253,7 @@ xfs_dir2_block_getdents( */ if (!dir_emit(ctx, (char *)dep->name, dep->namelen, be64_to_cpu(dep->inumber), - xfs_dir3_get_dtype(mp, filetype))) { + xfs_dir3_get_dtype(dp->i_mount, filetype))) { xfs_trans_brelse(NULL, bp); return 0; } @@ -264,8 +263,8 @@ xfs_dir2_block_getdents( * Reached the end of the block. * Set the offset to a non-existent block 1 and return. */ - ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & - 0x7fffffff; + ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) & + 0x7fffffff; xfs_trans_brelse(NULL, bp); return 0; } @@ -286,13 +285,13 @@ struct xfs_dir2_leaf_map_info { STATIC int xfs_dir2_leaf_readbuf( - struct xfs_inode *dp, + struct xfs_da_args *args, size_t bufsize, struct xfs_dir2_leaf_map_info *mip, xfs_dir2_off_t *curoff, struct xfs_buf **bpp) { - struct xfs_mount *mp = dp->i_mount; + struct xfs_inode *dp = args->dp; struct xfs_buf *bp = *bpp; struct xfs_bmbt_irec *map = mip->map; struct blk_plug plug; @@ -300,6 +299,7 @@ xfs_dir2_leaf_readbuf( int length; int i; int j; + struct xfs_da_geometry *geo = args->geo; /* * If we have a buffer, we need to release it and @@ -309,12 +309,12 @@ xfs_dir2_leaf_readbuf( if (bp) { xfs_trans_brelse(NULL, bp); bp = NULL; - mip->map_blocks -= mp->m_dirblkfsbs; + mip->map_blocks -= geo->fsbcount; /* * Loop to get rid of the extents for the * directory block. */ - for (i = mp->m_dirblkfsbs; i > 0; ) { + for (i = geo->fsbcount; i > 0; ) { j = min_t(int, map->br_blockcount, i); map->br_blockcount -= j; map->br_startblock += j; @@ -333,8 +333,7 @@ xfs_dir2_leaf_readbuf( /* * Recalculate the readahead blocks wanted. */ - mip->ra_want = howmany(bufsize + mp->m_dirblksize, - mp->m_sb.sb_blocksize) - 1; + mip->ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)) - 1; ASSERT(mip->ra_want >= 0); /* @@ -342,14 +341,14 @@ xfs_dir2_leaf_readbuf( * run out of data blocks, get some more mappings. */ if (1 + mip->ra_want > mip->map_blocks && - mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) { + mip->map_off < xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET)) { /* * Get more bmaps, fill in after the ones * we already have in the table. */ mip->nmap = mip->map_size - mip->map_valid; error = xfs_bmapi_read(dp, mip->map_off, - xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) - + xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET) - mip->map_off, &map[mip->map_valid], &mip->nmap, 0); @@ -370,7 +369,7 @@ xfs_dir2_leaf_readbuf( i = mip->map_valid + mip->nmap - 1; mip->map_off = map[i].br_startoff + map[i].br_blockcount; } else - mip->map_off = xfs_dir2_byte_to_da(mp, + mip->map_off = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET); /* @@ -396,18 +395,18 @@ xfs_dir2_leaf_readbuf( * No valid mappings, so no more data blocks. */ if (!mip->map_valid) { - *curoff = xfs_dir2_da_to_byte(mp, mip->map_off); + *curoff = xfs_dir2_da_to_byte(geo, mip->map_off); goto out; } /* * Read the directory block starting at the first mapping. */ - mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff); + mip->curdb = xfs_dir2_da_to_db(geo, map->br_startoff); error = xfs_dir3_data_read(NULL, dp, map->br_startoff, - map->br_blockcount >= mp->m_dirblkfsbs ? - XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp); - + map->br_blockcount >= geo->fsbcount ? + XFS_FSB_TO_DADDR(dp->i_mount, map->br_startblock) : + -1, &bp); /* * Should just skip over the data block instead of giving up. */ @@ -419,7 +418,7 @@ xfs_dir2_leaf_readbuf( * was previously ra. */ if (mip->ra_current) - mip->ra_current -= mp->m_dirblkfsbs; + mip->ra_current -= geo->fsbcount; /* * Do we need more readahead? @@ -427,16 +426,16 @@ xfs_dir2_leaf_readbuf( blk_start_plug(&plug); for (mip->ra_index = mip->ra_offset = i = 0; mip->ra_want > mip->ra_current && i < mip->map_blocks; - i += mp->m_dirblkfsbs) { + i += geo->fsbcount) { ASSERT(mip->ra_index < mip->map_valid); /* * Read-ahead a contiguous directory block. */ if (i > mip->ra_current && - map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) { - xfs_dir3_data_readahead(NULL, dp, + map[mip->ra_index].br_blockcount >= geo->fsbcount) { + xfs_dir3_data_readahead(dp, map[mip->ra_index].br_startoff + mip->ra_offset, - XFS_FSB_TO_DADDR(mp, + XFS_FSB_TO_DADDR(dp->i_mount, map[mip->ra_index].br_startblock + mip->ra_offset)); mip->ra_current = i; @@ -447,7 +446,7 @@ xfs_dir2_leaf_readbuf( * use our mapping, but this is a very rare case. */ else if (i > mip->ra_current) { - xfs_dir3_data_readahead(NULL, dp, + xfs_dir3_data_readahead(dp, map[mip->ra_index].br_startoff + mip->ra_offset, -1); mip->ra_current = i; @@ -456,15 +455,14 @@ xfs_dir2_leaf_readbuf( /* * Advance offset through the mapping table. */ - for (j = 0; j < mp->m_dirblkfsbs; j++) { + for (j = 0; j < geo->fsbcount; j += length ) { /* * The rest of this extent but not more than a dir * block. */ - length = min_t(int, mp->m_dirblkfsbs, + length = min_t(int, geo->fsbcount, map[mip->ra_index].br_blockcount - mip->ra_offset); - j += length; mip->ra_offset += length; /* @@ -489,22 +487,23 @@ out: */ STATIC int xfs_dir2_leaf_getdents( - xfs_inode_t *dp, /* incore directory inode */ + struct xfs_da_args *args, struct dir_context *ctx, size_t bufsize) { + struct xfs_inode *dp = args->dp; struct xfs_buf *bp = NULL; /* data block buffer */ xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_dir2_data_unused_t *dup; /* unused entry */ int error = 0; /* error return value */ int length; /* temporary length value */ - xfs_mount_t *mp; /* filesystem mount point */ int byteoff; /* offset in current block */ xfs_dir2_off_t curoff; /* current overall offset */ xfs_dir2_off_t newoff; /* new curoff after new blk */ char *ptr = NULL; /* pointer to current data */ struct xfs_dir2_leaf_map_info *map_info; + struct xfs_da_geometry *geo = args->geo; /* * If the offset is at or past the largest allowed value, @@ -513,15 +512,12 @@ xfs_dir2_leaf_getdents( if (ctx->pos >= XFS_DIR2_MAX_DATAPTR) return 0; - mp = dp->i_mount; - /* * Set up to bmap a number of blocks based on the caller's * buffer size, the directory block size, and the filesystem * block size. */ - length = howmany(bufsize + mp->m_dirblksize, - mp->m_sb.sb_blocksize); + length = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) + (length * sizeof(struct xfs_bmbt_irec)), KM_SLEEP | KM_NOFS); @@ -531,14 +527,14 @@ xfs_dir2_leaf_getdents( * Inside the loop we keep the main offset value as a byte offset * in the directory file. */ - curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos); + curoff = xfs_dir2_dataptr_to_byte(ctx->pos); /* * Force this conversion through db so we truncate the offset * down to get the start of the data block. */ - map_info->map_off = xfs_dir2_db_to_da(mp, - xfs_dir2_byte_to_db(mp, curoff)); + map_info->map_off = xfs_dir2_db_to_da(geo, + xfs_dir2_byte_to_db(geo, curoff)); /* * Loop over directory entries until we reach the end offset. @@ -551,9 +547,9 @@ xfs_dir2_leaf_getdents( * If we have no buffer, or we're off the end of the * current buffer, need to get another one. */ - if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) { + if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { - error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info, + error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, &curoff, &bp); if (error || !map_info->map_valid) break; @@ -561,7 +557,8 @@ xfs_dir2_leaf_getdents( /* * Having done a read, we need to set a new offset. */ - newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0); + newoff = xfs_dir2_db_off_to_byte(geo, + map_info->curdb, 0); /* * Start of the current block. */ @@ -571,7 +568,7 @@ xfs_dir2_leaf_getdents( * Make sure we're in the right block. */ else if (curoff > newoff) - ASSERT(xfs_dir2_byte_to_db(mp, curoff) == + ASSERT(xfs_dir2_byte_to_db(geo, curoff) == map_info->curdb); hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); @@ -579,7 +576,7 @@ xfs_dir2_leaf_getdents( * Find our position in the block. */ ptr = (char *)dp->d_ops->data_entry_p(hdr); - byteoff = xfs_dir2_byte_to_off(mp, curoff); + byteoff = xfs_dir2_byte_to_off(geo, curoff); /* * Skip past the header. */ @@ -608,10 +605,10 @@ xfs_dir2_leaf_getdents( * Now set our real offset. */ curoff = - xfs_dir2_db_off_to_byte(mp, - xfs_dir2_byte_to_db(mp, curoff), + xfs_dir2_db_off_to_byte(geo, + xfs_dir2_byte_to_db(geo, curoff), (char *)ptr - (char *)hdr); - if (ptr >= (char *)hdr + mp->m_dirblksize) { + if (ptr >= (char *)hdr + geo->blksize) { continue; } } @@ -635,10 +632,10 @@ xfs_dir2_leaf_getdents( length = dp->d_ops->data_entsize(dep->namelen); filetype = dp->d_ops->data_get_ftype(dep); - ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; + ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; if (!dir_emit(ctx, (char *)dep->name, dep->namelen, be64_to_cpu(dep->inumber), - xfs_dir3_get_dtype(mp, filetype))) + xfs_dir3_get_dtype(dp->i_mount, filetype))) break; /* @@ -653,10 +650,10 @@ xfs_dir2_leaf_getdents( /* * All done. Set output offset value to current offset. */ - if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR)) + if (curoff > xfs_dir2_dataptr_to_byte(XFS_DIR2_MAX_DATAPTR)) ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; else - ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; + ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; kmem_free(map_info); if (bp) xfs_trans_brelse(NULL, bp); @@ -668,13 +665,14 @@ xfs_dir2_leaf_getdents( */ int xfs_readdir( - xfs_inode_t *dp, - struct dir_context *ctx, - size_t bufsize) + struct xfs_inode *dp, + struct dir_context *ctx, + size_t bufsize) { - int rval; /* return value */ - int v; /* type-checking value */ - uint lock_mode; + struct xfs_da_args args = { NULL }; + int rval; + int v; + uint lock_mode; trace_xfs_readdir(dp); @@ -684,15 +682,18 @@ xfs_readdir( ASSERT(S_ISDIR(dp->i_d.di_mode)); XFS_STATS_INC(xs_dir_getdents); + args.dp = dp; + args.geo = dp->i_mount->m_dir_geo; + lock_mode = xfs_ilock_data_map_shared(dp); if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) - rval = xfs_dir2_sf_getdents(dp, ctx); - else if ((rval = xfs_dir2_isblock(NULL, dp, &v))) + rval = xfs_dir2_sf_getdents(&args, ctx); + else if ((rval = xfs_dir2_isblock(&args, &v))) ; else if (v) - rval = xfs_dir2_block_getdents(dp, ctx); + rval = xfs_dir2_block_getdents(&args, ctx); else - rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize); + rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize); xfs_iunlock(dp, lock_mode); return rval; diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index 3725fb1b902b..53c3be619db5 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -82,8 +82,10 @@ xfs_dir2_block_sfsize( xfs_ino_t parent = 0; /* parent inode number */ int size=0; /* total computed size */ int has_ftype; + struct xfs_da_geometry *geo; mp = dp->i_mount; + geo = mp->m_dir_geo; /* * if there is a filetype field, add the extra byte to the namelen @@ -92,7 +94,7 @@ xfs_dir2_block_sfsize( has_ftype = xfs_sb_version_hasftype(&mp->m_sb) ? 1 : 0; count = i8count = namelen = 0; - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* @@ -104,8 +106,8 @@ xfs_dir2_block_sfsize( /* * Calculate the pointer to the entry at hand. */ - dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr)); + dep = (xfs_dir2_data_entry_t *)((char *)hdr + + xfs_dir2_dataptr_to_off(geo, addr)); /* * Detect . and .., so we can special-case them. * . is not included in sf directories. @@ -195,7 +197,7 @@ xfs_dir2_block_to_sf( /* * Set up to loop over the block's entries. */ - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); ptr = (char *)dp->d_ops->data_entry_p(hdr); endptr = (char *)xfs_dir2_block_leaf_p(btp); sfep = xfs_dir2_sf_firstentry(sfp); @@ -247,7 +249,7 @@ xfs_dir2_block_to_sf( /* now we are done with the block, we can shrink the inode */ logflags = XFS_ILOG_CORE; - error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp); + error = xfs_dir2_shrink_inode(args, args->geo->datablk, bp); if (error) { ASSERT(error != ENOSPC); goto out; @@ -285,14 +287,12 @@ int /* error */ xfs_dir2_sf_addname( xfs_da_args_t *args) /* operation arguments */ { - int add_entsize; /* size of the new entry */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return value */ int incr_isize; /* total change in size */ int new_isize; /* di_size after adding name */ int objchange; /* changing to 8-byte inodes */ xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */ - int old_isize; /* di_size before adding name */ int pick; /* which algorithm to use */ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ @@ -316,8 +316,7 @@ xfs_dir2_sf_addname( /* * Compute entry (and change in) size. */ - add_entsize = dp->d_ops->sf_entsize(sfp, args->namelen); - incr_isize = add_entsize; + incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen); objchange = 0; #if XFS_BIG_INUMS /* @@ -325,11 +324,8 @@ xfs_dir2_sf_addname( */ if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { /* - * Yes, adjust the entry size and the total size. + * Yes, adjust the inode size. old count + (parent + new) */ - add_entsize += - (uint)sizeof(xfs_dir2_ino8_t) - - (uint)sizeof(xfs_dir2_ino4_t); incr_isize += (sfp->count + 2) * ((uint)sizeof(xfs_dir2_ino8_t) - @@ -337,8 +333,7 @@ xfs_dir2_sf_addname( objchange = 1; } #endif - old_isize = (int)dp->i_d.di_size; - new_isize = old_isize + incr_isize; + new_isize = (int)dp->i_d.di_size + incr_isize; /* * Won't fit as shortform any more (due to size), * or the pick routine says it won't (due to offset values). @@ -593,7 +588,7 @@ xfs_dir2_sf_addname_pick( * we'll go back, convert to block, then try the insert and convert * to leaf. */ - if (used + (holefit ? 0 : size) > mp->m_dirblksize) + if (used + (holefit ? 0 : size) > args->geo->blksize) return 0; /* * If changing the inode number size, do it the hard way. @@ -608,7 +603,7 @@ xfs_dir2_sf_addname_pick( /* * If it won't fit at the end then do it the hard way (use the hole). */ - if (used + size > mp->m_dirblksize) + if (used + size > args->geo->blksize) return 2; /* * Do it the easy way. @@ -659,7 +654,7 @@ xfs_dir2_sf_check( ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size); ASSERT(offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + - (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dirblksize); + (uint)sizeof(xfs_dir2_block_tail_t) <= args->geo->blksize); } #endif /* DEBUG */ @@ -1110,9 +1105,9 @@ xfs_dir2_sf_toino4( } /* - * Convert from 4-byte inode numbers to 8-byte inode numbers. - * The new 8-byte inode number is not there yet, we leave with the - * count 1 but no corresponding entry. + * Convert existing entries from 4-byte inode numbers to 8-byte inode numbers. + * The new entry w/ an 8-byte inode number is not there yet; we leave with + * i8count set to 1, but no corresponding 8-byte entry. */ static void xfs_dir2_sf_toino8( @@ -1145,7 +1140,7 @@ xfs_dir2_sf_toino8( ASSERT(oldsfp->i8count == 0); memcpy(buf, oldsfp, oldsize); /* - * Compute the new inode size. + * Compute the new inode size (nb: entry count + 1 for parent) */ newsize = oldsize + diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 868b19f096bf..3ee0cd43edc0 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -353,10 +353,10 @@ xfs_qm_dqalloc( dqp->q_blkno, mp->m_quotainfo->qi_dqchunklen, 0); - - error = xfs_buf_geterror(bp); - if (error) + if (!bp) { + error = ENOMEM; goto error1; + } bp->b_ops = &xfs_dquot_buf_ops; /* @@ -832,47 +832,6 @@ restart: return (0); } - -STATIC void -xfs_qm_dqput_final( - struct xfs_dquot *dqp) -{ - struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; - struct xfs_dquot *gdqp; - struct xfs_dquot *pdqp; - - trace_xfs_dqput_free(dqp); - - if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) - XFS_STATS_INC(xs_qm_dquot_unused); - - /* - * If we just added a udquot to the freelist, then we want to release - * the gdquot/pdquot reference that it (probably) has. Otherwise it'll - * keep the gdquot/pdquot from getting reclaimed. - */ - gdqp = dqp->q_gdquot; - if (gdqp) { - xfs_dqlock(gdqp); - dqp->q_gdquot = NULL; - } - - pdqp = dqp->q_pdquot; - if (pdqp) { - xfs_dqlock(pdqp); - dqp->q_pdquot = NULL; - } - xfs_dqunlock(dqp); - - /* - * If we had a group/project quota hint, release it now. - */ - if (gdqp) - xfs_qm_dqput(gdqp); - if (pdqp) - xfs_qm_dqput(pdqp); -} - /* * Release a reference to the dquot (decrement ref-count) and unlock it. * @@ -888,10 +847,14 @@ xfs_qm_dqput( trace_xfs_dqput(dqp); - if (--dqp->q_nrefs > 0) - xfs_dqunlock(dqp); - else - xfs_qm_dqput_final(dqp); + if (--dqp->q_nrefs == 0) { + struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; + trace_xfs_dqput_free(dqp); + + if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) + XFS_STATS_INC(xs_qm_dquot_unused); + } + xfs_dqunlock(dqp); } /* diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index d22ed0053c32..68a68f704837 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -52,8 +52,6 @@ typedef struct xfs_dquot { int q_bufoffset; /* off of dq in buffer (# dquots) */ xfs_fileoff_t q_fileoffset; /* offset in quotas file */ - struct xfs_dquot*q_gdquot; /* group dquot, hint only */ - struct xfs_dquot*q_pdquot; /* project dquot, hint only */ xfs_disk_dquot_t q_core; /* actual usage & quotas */ xfs_dq_logitem_t q_logitem; /* dquot log item */ xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c index 610da8177737..c2ac0c611ad8 100644 --- a/fs/xfs/xfs_dquot_buf.c +++ b/fs/xfs/xfs_dquot_buf.c @@ -35,7 +35,6 @@ int xfs_calc_dquots_per_chunk( - struct xfs_mount *mp, unsigned int nbblks) /* basic block units */ { unsigned int ndquots; @@ -194,7 +193,7 @@ xfs_dquot_buf_verify_crc( if (mp->m_quotainfo) ndquots = mp->m_quotainfo->qi_dqperchunk; else - ndquots = xfs_calc_dquots_per_chunk(mp, + ndquots = xfs_calc_dquots_per_chunk( XFS_BB_TO_FSB(mp, bp->b_length)); for (i = 0; i < ndquots; i++, d++) { @@ -225,7 +224,7 @@ xfs_dquot_buf_verify( if (mp->m_quotainfo) ndquots = mp->m_quotainfo->qi_dqperchunk; else - ndquots = xfs_calc_dquots_per_chunk(mp, bp->b_length); + ndquots = xfs_calc_dquots_per_chunk(bp->b_length); /* * On the first read of the buffer, verify that each dquot is valid. diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 830c1c937b88..1b8160dc04d1 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -944,7 +944,7 @@ xfs_dir_open( */ mode = xfs_ilock_data_map_shared(ip); if (ip->i_d.di_nextents > 0) - xfs_dir3_data_readahead(NULL, ip, 0, -1); + xfs_dir3_data_readahead(ip, 0, -1); xfs_iunlock(ip, mode); return 0; } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 12b6e7701985..8ec81bed7992 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006-2007 Silicon Graphics, Inc. + * Copyright (c) 2014 Christoph Hellwig. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -32,100 +33,20 @@ #include "xfs_filestream.h" #include "xfs_trace.h" -#ifdef XFS_FILESTREAMS_TRACE - -ktrace_t *xfs_filestreams_trace_buf; - -STATIC void -xfs_filestreams_trace( - xfs_mount_t *mp, /* mount point */ - int type, /* type of trace */ - const char *func, /* source function */ - int line, /* source line number */ - __psunsigned_t arg0, - __psunsigned_t arg1, - __psunsigned_t arg2, - __psunsigned_t arg3, - __psunsigned_t arg4, - __psunsigned_t arg5) -{ - ktrace_enter(xfs_filestreams_trace_buf, - (void *)(__psint_t)(type | (line << 16)), - (void *)func, - (void *)(__psunsigned_t)current_pid(), - (void *)mp, - (void *)(__psunsigned_t)arg0, - (void *)(__psunsigned_t)arg1, - (void *)(__psunsigned_t)arg2, - (void *)(__psunsigned_t)arg3, - (void *)(__psunsigned_t)arg4, - (void *)(__psunsigned_t)arg5, - NULL, NULL, NULL, NULL, NULL, NULL); -} - -#define TRACE0(mp,t) TRACE6(mp,t,0,0,0,0,0,0) -#define TRACE1(mp,t,a0) TRACE6(mp,t,a0,0,0,0,0,0) -#define TRACE2(mp,t,a0,a1) TRACE6(mp,t,a0,a1,0,0,0,0) -#define TRACE3(mp,t,a0,a1,a2) TRACE6(mp,t,a0,a1,a2,0,0,0) -#define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0) -#define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0) -#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \ - xfs_filestreams_trace(mp, t, __func__, __LINE__, \ - (__psunsigned_t)a0, (__psunsigned_t)a1, \ - (__psunsigned_t)a2, (__psunsigned_t)a3, \ - (__psunsigned_t)a4, (__psunsigned_t)a5) - -#define TRACE_AG_SCAN(mp, ag, ag2) \ - TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2); -#define TRACE_AG_PICK1(mp, max_ag, maxfree) \ - TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree); -#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \ - TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \ - cnt, free, scan, flag) -#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \ - TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2) -#define TRACE_FREE(mp, ip, pip, ag, cnt) \ - TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt) -#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \ - TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt) -#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \ - TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt) -#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \ - TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt) -#define TRACE_ORPHAN(mp, ip, ag) \ - TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag); - - -#else -#define TRACE_AG_SCAN(mp, ag, ag2) -#define TRACE_AG_PICK1(mp, max_ag, maxfree) -#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) -#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) -#define TRACE_FREE(mp, ip, pip, ag, cnt) -#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) -#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) -#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) -#define TRACE_ORPHAN(mp, ip, ag) -#endif - -static kmem_zone_t *item_zone; +struct xfs_fstrm_item { + struct xfs_mru_cache_elem mru; + struct xfs_inode *ip; + xfs_agnumber_t ag; /* AG in use for this directory */ +}; -/* - * Structure for associating a file or a directory with an allocation group. - * The parent directory pointer is only needed for files, but since there will - * generally be vastly more files than directories in the cache, using the same - * data structure simplifies the code with very little memory overhead. - */ -typedef struct fstrm_item -{ - xfs_agnumber_t ag; /* AG currently in use for the file/directory. */ - xfs_inode_t *ip; /* inode self-pointer. */ - xfs_inode_t *pip; /* Parent directory inode pointer. */ -} fstrm_item_t; +enum xfs_fstrm_alloc { + XFS_PICK_USERDATA = 1, + XFS_PICK_LOWSPACE = 2, +}; /* * Allocation group filestream associations are tracked with per-ag atomic - * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a + * counters. These counters allow xfs_filestream_pick_ag() to tell whether a * particular AG already has active filestreams associated with it. The mount * point's m_peraglock is used to protect these counters from per-ag array * re-allocation during a growfs operation. When xfs_growfs_data_private() is @@ -160,7 +81,7 @@ typedef struct fstrm_item * the cache that reference per-ag array elements that have since been * reallocated. */ -static int +int xfs_filestream_peek_ag( xfs_mount_t *mp, xfs_agnumber_t agno) @@ -200,23 +121,40 @@ xfs_filestream_put_ag( xfs_perag_put(pag); } +static void +xfs_fstrm_free_func( + struct xfs_mru_cache_elem *mru) +{ + struct xfs_fstrm_item *item = + container_of(mru, struct xfs_fstrm_item, mru); + + xfs_filestream_put_ag(item->ip->i_mount, item->ag); + + trace_xfs_filestream_free(item->ip, item->ag); + + kmem_free(item); +} + /* * Scan the AGs starting at startag looking for an AG that isn't in use and has * at least minlen blocks free. */ static int -_xfs_filestream_pick_ag( - xfs_mount_t *mp, - xfs_agnumber_t startag, - xfs_agnumber_t *agp, - int flags, - xfs_extlen_t minlen) +xfs_filestream_pick_ag( + struct xfs_inode *ip, + xfs_agnumber_t startag, + xfs_agnumber_t *agp, + int flags, + xfs_extlen_t minlen) { - int streams, max_streams; - int err, trylock, nscan; - xfs_extlen_t longest, free, minfree, maxfree = 0; - xfs_agnumber_t ag, max_ag = NULLAGNUMBER; - struct xfs_perag *pag; + struct xfs_mount *mp = ip->i_mount; + struct xfs_fstrm_item *item; + struct xfs_perag *pag; + xfs_extlen_t longest, free = 0, minfree, maxfree = 0; + xfs_agnumber_t ag, max_ag = NULLAGNUMBER; + int err, trylock, nscan; + + ASSERT(S_ISDIR(ip->i_d.di_mode)); /* 2% of an AG's blocks must be free for it to be chosen. */ minfree = mp->m_sb.sb_agblocks / 50; @@ -228,8 +166,9 @@ _xfs_filestream_pick_ag( trylock = XFS_ALLOC_FLAG_TRYLOCK; for (nscan = 0; 1; nscan++) { + trace_xfs_filestream_scan(ip, ag); + pag = xfs_perag_get(mp, ag); - TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms)); if (!pag->pagf_init) { err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); @@ -246,7 +185,6 @@ _xfs_filestream_pick_ag( /* Keep track of the AG with the most free blocks. */ if (pag->pagf_freeblks > maxfree) { maxfree = pag->pagf_freeblks; - max_streams = atomic_read(&pag->pagf_fstrms); max_ag = ag; } @@ -269,7 +207,6 @@ _xfs_filestream_pick_ag( /* Break out, retaining the reference on the AG. */ free = pag->pagf_freeblks; - streams = atomic_read(&pag->pagf_fstrms); xfs_perag_put(pag); *agp = ag; break; @@ -305,317 +242,98 @@ next_ag: */ if (max_ag != NULLAGNUMBER) { xfs_filestream_get_ag(mp, max_ag); - TRACE_AG_PICK1(mp, max_ag, maxfree); - streams = max_streams; free = maxfree; *agp = max_ag; break; } /* take AG 0 if none matched */ - TRACE_AG_PICK1(mp, max_ag, maxfree); + trace_xfs_filestream_pick(ip, *agp, free, nscan); *agp = 0; return 0; } - TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags); - - return 0; -} + trace_xfs_filestream_pick(ip, *agp, free, nscan); -/* - * Set the allocation group number for a file or a directory, updating inode - * references and per-AG references as appropriate. - */ -static int -_xfs_filestream_update_ag( - xfs_inode_t *ip, - xfs_inode_t *pip, - xfs_agnumber_t ag) -{ - int err = 0; - xfs_mount_t *mp; - xfs_mru_cache_t *cache; - fstrm_item_t *item; - xfs_agnumber_t old_ag; - xfs_inode_t *old_pip; - - /* - * Either ip is a regular file and pip is a directory, or ip is a - * directory and pip is NULL. - */ - ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip && - S_ISDIR(pip->i_d.di_mode)) || - (S_ISDIR(ip->i_d.di_mode) && !pip))); - - mp = ip->i_mount; - cache = mp->m_filestream; - - item = xfs_mru_cache_lookup(cache, ip->i_ino); - if (item) { - ASSERT(item->ip == ip); - old_ag = item->ag; - item->ag = ag; - old_pip = item->pip; - item->pip = pip; - xfs_mru_cache_done(cache); - - /* - * If the AG has changed, drop the old ref and take a new one, - * effectively transferring the reference from old to new AG. - */ - if (ag != old_ag) { - xfs_filestream_put_ag(mp, old_ag); - xfs_filestream_get_ag(mp, ag); - } - - /* - * If ip is a file and its pip has changed, drop the old ref and - * take a new one. - */ - if (pip && pip != old_pip) { - IRELE(old_pip); - IHOLD(pip); - } - - TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag), - ag, xfs_filestream_peek_ag(mp, ag)); + if (*agp == NULLAGNUMBER) return 0; - } - item = kmem_zone_zalloc(item_zone, KM_MAYFAIL); + err = ENOMEM; + item = kmem_alloc(sizeof(*item), KM_MAYFAIL); if (!item) - return ENOMEM; + goto out_put_ag; - item->ag = ag; + item->ag = *agp; item->ip = ip; - item->pip = pip; - err = xfs_mru_cache_insert(cache, ip->i_ino, item); + err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru); if (err) { - kmem_zone_free(item_zone, item); - return err; + if (err == EEXIST) + err = 0; + goto out_free_item; } - /* Take a reference on the AG. */ - xfs_filestream_get_ag(mp, ag); - - /* - * Take a reference on the inode itself regardless of whether it's a - * regular file or a directory. - */ - IHOLD(ip); - - /* - * In the case of a regular file, take a reference on the parent inode - * as well to ensure it remains in-core. - */ - if (pip) - IHOLD(pip); - - TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag), - ag, xfs_filestream_peek_ag(mp, ag)); - return 0; -} - -/* xfs_fstrm_free_func(): callback for freeing cached stream items. */ -STATIC void -xfs_fstrm_free_func( - unsigned long ino, - void *data) -{ - fstrm_item_t *item = (fstrm_item_t *)data; - xfs_inode_t *ip = item->ip; - - ASSERT(ip->i_ino == ino); - - xfs_iflags_clear(ip, XFS_IFILESTREAM); - - /* Drop the reference taken on the AG when the item was added. */ - xfs_filestream_put_ag(ip->i_mount, item->ag); - - TRACE_FREE(ip->i_mount, ip, item->pip, item->ag, - xfs_filestream_peek_ag(ip->i_mount, item->ag)); - - /* - * _xfs_filestream_update_ag() always takes a reference on the inode - * itself, whether it's a file or a directory. Release it here. - * This can result in the inode being freed and so we must - * not hold any inode locks when freeing filesstreams objects - * otherwise we can deadlock here. - */ - IRELE(ip); - - /* - * In the case of a regular file, _xfs_filestream_update_ag() also - * takes a ref on the parent inode to keep it in-core. Release that - * too. - */ - if (item->pip) - IRELE(item->pip); - - /* Finally, free the memory allocated for the item. */ - kmem_zone_free(item_zone, item); -} - -/* - * xfs_filestream_init() is called at xfs initialisation time to set up the - * memory zone that will be used for filestream data structure allocation. - */ -int -xfs_filestream_init(void) -{ - item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); - if (!item_zone) - return -ENOMEM; - - return 0; -} - -/* - * xfs_filestream_uninit() is called at xfs termination time to destroy the - * memory zone that was used for filestream data structure allocation. - */ -void -xfs_filestream_uninit(void) -{ - kmem_zone_destroy(item_zone); -} - -/* - * xfs_filestream_mount() is called when a file system is mounted with the - * filestream option. It is responsible for allocating the data structures - * needed to track the new file system's file streams. - */ -int -xfs_filestream_mount( - xfs_mount_t *mp) -{ - int err; - unsigned int lifetime, grp_count; - - /* - * The filestream timer tunable is currently fixed within the range of - * one second to four minutes, with five seconds being the default. The - * group count is somewhat arbitrary, but it'd be nice to adhere to the - * timer tunable to within about 10 percent. This requires at least 10 - * groups. - */ - lifetime = xfs_fstrm_centisecs * 10; - grp_count = 10; - - err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count, - xfs_fstrm_free_func); +out_free_item: + kmem_free(item); +out_put_ag: + xfs_filestream_put_ag(mp, *agp); return err; } -/* - * xfs_filestream_unmount() is called when a file system that was mounted with - * the filestream option is unmounted. It drains the data structures created - * to track the file system's file streams and frees all the memory that was - * allocated. - */ -void -xfs_filestream_unmount( - xfs_mount_t *mp) +static struct xfs_inode * +xfs_filestream_get_parent( + struct xfs_inode *ip) { - xfs_mru_cache_destroy(mp->m_filestream); -} + struct inode *inode = VFS_I(ip), *dir = NULL; + struct dentry *dentry, *parent; -/* - * Return the AG of the filestream the file or directory belongs to, or - * NULLAGNUMBER otherwise. - */ -xfs_agnumber_t -xfs_filestream_lookup_ag( - xfs_inode_t *ip) -{ - xfs_mru_cache_t *cache; - fstrm_item_t *item; - xfs_agnumber_t ag; - int ref; - - if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) { - ASSERT(0); - return NULLAGNUMBER; - } + dentry = d_find_alias(inode); + if (!dentry) + goto out; - cache = ip->i_mount->m_filestream; - item = xfs_mru_cache_lookup(cache, ip->i_ino); - if (!item) { - TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0); - return NULLAGNUMBER; - } + parent = dget_parent(dentry); + if (!parent) + goto out_dput; - ASSERT(ip == item->ip); - ag = item->ag; - ref = xfs_filestream_peek_ag(ip->i_mount, ag); - xfs_mru_cache_done(cache); + dir = igrab(parent->d_inode); + dput(parent); - TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref); - return ag; +out_dput: + dput(dentry); +out: + return dir ? XFS_I(dir) : NULL; } /* - * xfs_filestream_associate() should only be called to associate a regular file - * with its parent directory. Calling it with a child directory isn't - * appropriate because filestreams don't apply to entire directory hierarchies. - * Creating a file in a child directory of an existing filestream directory - * starts a new filestream with its own allocation group association. + * Find the right allocation group for a file, either by finding an + * existing file stream or creating a new one. * - * Returns < 0 on error, 0 if successful association occurred, > 0 if - * we failed to get an association because of locking issues. + * Returns NULLAGNUMBER in case of an error. */ -int -xfs_filestream_associate( - xfs_inode_t *pip, - xfs_inode_t *ip) +xfs_agnumber_t +xfs_filestream_lookup_ag( + struct xfs_inode *ip) { - xfs_mount_t *mp; - xfs_mru_cache_t *cache; - fstrm_item_t *item; - xfs_agnumber_t ag, rotorstep, startag; - int err = 0; + struct xfs_mount *mp = ip->i_mount; + struct xfs_inode *pip = NULL; + xfs_agnumber_t startag, ag = NULLAGNUMBER; + struct xfs_mru_cache_elem *mru; - ASSERT(S_ISDIR(pip->i_d.di_mode)); ASSERT(S_ISREG(ip->i_d.di_mode)); - if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode)) - return -EINVAL; - mp = pip->i_mount; - cache = mp->m_filestream; + pip = xfs_filestream_get_parent(ip); + if (!pip) + goto out; - /* - * We have a problem, Houston. - * - * Taking the iolock here violates inode locking order - we already - * hold the ilock. Hence if we block getting this lock we may never - * wake. Unfortunately, that means if we can't get the lock, we're - * screwed in terms of getting a stream association - we can't spin - * waiting for the lock because someone else is waiting on the lock we - * hold and we cannot drop that as we are in a transaction here. - * - * Lucky for us, this inversion is not a problem because it's a - * directory inode that we are trying to lock here. - * - * So, if we can't get the iolock without sleeping then just give up - */ - if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) - return 1; - - /* If the parent directory is already in the cache, use its AG. */ - item = xfs_mru_cache_lookup(cache, pip->i_ino); - if (item) { - ASSERT(item->ip == pip); - ag = item->ag; - xfs_mru_cache_done(cache); - - TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag)); - err = _xfs_filestream_update_ag(ip, pip, ag); + mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); + if (mru) { + ag = container_of(mru, struct xfs_fstrm_item, mru)->ag; + xfs_mru_cache_done(mp->m_filestream); - goto exit; + trace_xfs_filestream_lookup(ip, ag); + goto out; } /* @@ -623,202 +341,94 @@ xfs_filestream_associate( * use the directory inode's AG. */ if (mp->m_flags & XFS_MOUNT_32BITINODES) { - rotorstep = xfs_rotorstep; + xfs_agnumber_t rotorstep = xfs_rotorstep; startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; mp->m_agfrotor = (mp->m_agfrotor + 1) % (mp->m_sb.sb_agcount * rotorstep); } else startag = XFS_INO_TO_AGNO(mp, pip->i_ino); - /* Pick a new AG for the parent inode starting at startag. */ - err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0); - if (err || ag == NULLAGNUMBER) - goto exit_did_pick; - - /* Associate the parent inode with the AG. */ - err = _xfs_filestream_update_ag(pip, NULL, ag); - if (err) - goto exit_did_pick; - - /* Associate the file inode with the AG. */ - err = _xfs_filestream_update_ag(ip, pip, ag); - if (err) - goto exit_did_pick; - - TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag)); - -exit_did_pick: - /* - * If _xfs_filestream_pick_ag() returned a valid AG, remove the - * reference it took on it, since the file and directory will have taken - * their own now if they were successfully cached. - */ - if (ag != NULLAGNUMBER) - xfs_filestream_put_ag(mp, ag); - -exit: - xfs_iunlock(pip, XFS_IOLOCK_EXCL); - return -err; + if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0)) + ag = NULLAGNUMBER; +out: + IRELE(pip); + return ag; } /* - * Pick a new allocation group for the current file and its file stream. This - * function is called by xfs_bmap_filestreams() with the mount point's per-ag - * lock held. + * Pick a new allocation group for the current file and its file stream. + * + * This is called when the allocator can't find a suitable extent in the + * current AG, and we have to move the stream into a new AG with more space. */ int xfs_filestream_new_ag( struct xfs_bmalloca *ap, xfs_agnumber_t *agp) { - int flags, err; - xfs_inode_t *ip, *pip = NULL; - xfs_mount_t *mp; - xfs_mru_cache_t *cache; - xfs_extlen_t minlen; - fstrm_item_t *dir, *file; - xfs_agnumber_t ag = NULLAGNUMBER; - - ip = ap->ip; - mp = ip->i_mount; - cache = mp->m_filestream; - minlen = ap->length; - *agp = NULLAGNUMBER; + struct xfs_inode *ip = ap->ip, *pip; + struct xfs_mount *mp = ip->i_mount; + xfs_extlen_t minlen = ap->length; + xfs_agnumber_t startag = 0; + int flags, err = 0; + struct xfs_mru_cache_elem *mru; - /* - * Look for the file in the cache, removing it if it's found. Doing - * this allows it to be held across the dir lookup that follows. - */ - file = xfs_mru_cache_remove(cache, ip->i_ino); - if (file) { - ASSERT(ip == file->ip); - - /* Save the file's parent inode and old AG number for later. */ - pip = file->pip; - ag = file->ag; - - /* Look for the file's directory in the cache. */ - dir = xfs_mru_cache_lookup(cache, pip->i_ino); - if (dir) { - ASSERT(pip == dir->ip); - - /* - * If the directory has already moved on to a new AG, - * use that AG as the new AG for the file. Don't - * forget to twiddle the AG refcounts to match the - * movement. - */ - if (dir->ag != file->ag) { - xfs_filestream_put_ag(mp, file->ag); - xfs_filestream_get_ag(mp, dir->ag); - *agp = file->ag = dir->ag; - } - - xfs_mru_cache_done(cache); - } + *agp = NULLAGNUMBER; - /* - * Put the file back in the cache. If this fails, the free - * function needs to be called to tidy up in the same way as if - * the item had simply expired from the cache. - */ - err = xfs_mru_cache_insert(cache, ip->i_ino, file); - if (err) { - xfs_fstrm_free_func(ip->i_ino, file); - return err; - } + pip = xfs_filestream_get_parent(ip); + if (!pip) + goto exit; - /* - * If the file's AG was moved to the directory's new AG, there's - * nothing more to be done. - */ - if (*agp != NULLAGNUMBER) { - TRACE_MOVEAG(mp, ip, pip, - ag, xfs_filestream_peek_ag(mp, ag), - *agp, xfs_filestream_peek_ag(mp, *agp)); - return 0; - } + mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino); + if (mru) { + struct xfs_fstrm_item *item = + container_of(mru, struct xfs_fstrm_item, mru); + startag = (item->ag + 1) % mp->m_sb.sb_agcount; } - /* - * If the file's parent directory is known, take its iolock in exclusive - * mode to prevent two sibling files from racing each other to migrate - * themselves and their parent to different AGs. - * - * Note that we lock the parent directory iolock inside the child - * iolock here. That's fine as we never hold both parent and child - * iolock in any other place. This is different from the ilock, - * which requires locking of the child after the parent for namespace - * operations. - */ - if (pip) - xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); - - /* - * A new AG needs to be found for the file. If the file's parent - * directory is also known, it will be moved to the new AG as well to - * ensure that files created inside it in future use the new AG. - */ - ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount; flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0); - err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen); - if (err || *agp == NULLAGNUMBER) - goto exit; + err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen); /* - * If the file wasn't found in the file cache, then its parent directory - * inode isn't known. For this to have happened, the file must either - * be pre-existing, or it was created long enough ago that its cache - * entry has expired. This isn't the sort of usage that the filestreams - * allocator is trying to optimise, so there's no point trying to track - * its new AG somehow in the filestream data structures. + * Only free the item here so we skip over the old AG earlier. */ - if (!pip) { - TRACE_ORPHAN(mp, ip, *agp); - goto exit; - } - - /* Associate the parent inode with the AG. */ - err = _xfs_filestream_update_ag(pip, NULL, *agp); - if (err) - goto exit; - - /* Associate the file inode with the AG. */ - err = _xfs_filestream_update_ag(ip, pip, *agp); - if (err) - goto exit; - - TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0, - *agp, xfs_filestream_peek_ag(mp, *agp)); + if (mru) + xfs_fstrm_free_func(mru); + IRELE(pip); exit: - /* - * If _xfs_filestream_pick_ag() returned a valid AG, remove the - * reference it took on it, since the file and directory will have taken - * their own now if they were successfully cached. - */ - if (*agp != NULLAGNUMBER) - xfs_filestream_put_ag(mp, *agp); - else + if (*agp == NULLAGNUMBER) *agp = 0; - - if (pip) - xfs_iunlock(pip, XFS_IOLOCK_EXCL); - return err; } -/* - * Remove an association between an inode and a filestream object. - * Typically this is done on last close of an unlinked file. - */ void xfs_filestream_deassociate( - xfs_inode_t *ip) + struct xfs_inode *ip) { - xfs_mru_cache_t *cache = ip->i_mount->m_filestream; + xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino); +} + +int +xfs_filestream_mount( + xfs_mount_t *mp) +{ + /* + * The filestream timer tunable is currently fixed within the range of + * one second to four minutes, with five seconds being the default. The + * group count is somewhat arbitrary, but it'd be nice to adhere to the + * timer tunable to within about 10 percent. This requires at least 10 + * groups. + */ + return xfs_mru_cache_create(&mp->m_filestream, xfs_fstrm_centisecs * 10, + 10, xfs_fstrm_free_func); +} - xfs_mru_cache_delete(cache, ip->i_ino); +void +xfs_filestream_unmount( + xfs_mount_t *mp) +{ + xfs_mru_cache_destroy(mp->m_filestream); } diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h index 6d61dbee8564..2ef43406e53b 100644 --- a/fs/xfs/xfs_filestream.h +++ b/fs/xfs/xfs_filestream.h @@ -20,50 +20,20 @@ struct xfs_mount; struct xfs_inode; -struct xfs_perag; struct xfs_bmalloca; -#ifdef XFS_FILESTREAMS_TRACE -#define XFS_FSTRM_KTRACE_INFO 1 -#define XFS_FSTRM_KTRACE_AGSCAN 2 -#define XFS_FSTRM_KTRACE_AGPICK1 3 -#define XFS_FSTRM_KTRACE_AGPICK2 4 -#define XFS_FSTRM_KTRACE_UPDATE 5 -#define XFS_FSTRM_KTRACE_FREE 6 -#define XFS_FSTRM_KTRACE_ITEM_LOOKUP 7 -#define XFS_FSTRM_KTRACE_ASSOCIATE 8 -#define XFS_FSTRM_KTRACE_MOVEAG 9 -#define XFS_FSTRM_KTRACE_ORPHAN 10 - -#define XFS_FSTRM_KTRACE_SIZE 16384 -extern ktrace_t *xfs_filestreams_trace_buf; - -#endif - -/* allocation selection flags */ -typedef enum xfs_fstrm_alloc { - XFS_PICK_USERDATA = 1, - XFS_PICK_LOWSPACE = 2, -} xfs_fstrm_alloc_t; - -/* prototypes for filestream.c */ -int xfs_filestream_init(void); -void xfs_filestream_uninit(void); int xfs_filestream_mount(struct xfs_mount *mp); void xfs_filestream_unmount(struct xfs_mount *mp); -xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); -int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip); void xfs_filestream_deassociate(struct xfs_inode *ip); +xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp); +int xfs_filestream_peek_ag(struct xfs_mount *mp, xfs_agnumber_t agno); - -/* filestreams for the inode? */ static inline int xfs_inode_is_filestream( struct xfs_inode *ip) { return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) || - xfs_iflags_test(ip, XFS_IFILESTREAM) || (ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM); } diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h index 9898f31d05d8..34d85aca3058 100644 --- a/fs/xfs/xfs_format.h +++ b/fs/xfs/xfs_format.h @@ -202,6 +202,8 @@ typedef __be32 xfs_alloc_ptr_t; */ #define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ #define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */ +#define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */ +#define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */ typedef __uint64_t xfs_inofree_t; #define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) @@ -244,7 +246,17 @@ typedef __be32 xfs_inobt_ptr_t; * block numbers in the AG. */ #define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) -#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) +#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) + +/* + * The first data block of an AG depends on whether the filesystem was formatted + * with the finobt feature. If so, account for the finobt reserved root btree + * block. + */ +#define XFS_PREALLOC_BLOCKS(mp) \ + (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \ + XFS_FIBT_BLOCK(mp) + 1 : \ + XFS_IBT_BLOCK(mp) + 1) diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index c5fc116dfaa3..d34703dbcb42 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -238,6 +238,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ #define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */ #define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ +#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */ /* * Minimum and maximum sizes need for growth checks. diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 02fb943cbf22..d2295561570a 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -24,6 +24,8 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" @@ -74,23 +76,18 @@ xfs_fs_geometry( } if (new_version >= 3) { geo->version = XFS_FSOP_GEOM_VERSION; - geo->flags = + geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | + XFS_FSOP_GEOM_FLAGS_DIRV2 | (xfs_sb_version_hasattr(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_ATTR : 0) | - (xfs_sb_version_hasnlink(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_NLINK : 0) | (xfs_sb_version_hasquota(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_QUOTA : 0) | (xfs_sb_version_hasalign(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_IALIGN : 0) | (xfs_sb_version_hasdalign(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_DALIGN : 0) | - (xfs_sb_version_hasshared(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_SHARED : 0) | (xfs_sb_version_hasextflgbit(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) | - (xfs_sb_version_hasdirv2(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) | (xfs_sb_version_hassector(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | (xfs_sb_version_hasasciici(&mp->m_sb) ? @@ -104,11 +101,13 @@ xfs_fs_geometry( (xfs_sb_version_hascrc(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_V5SB : 0) | (xfs_sb_version_hasftype(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_FTYPE : 0); + XFS_FSOP_GEOM_FLAGS_FTYPE : 0) | + (xfs_sb_version_hasfinobt(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_FINOBT : 0); geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? mp->m_sb.sb_logsectsize : BBSIZE; geo->rtsectsize = mp->m_sb.sb_blocksize; - geo->dirblocksize = mp->m_dirblksize; + geo->dirblocksize = mp->m_dir_geo->blksize; } if (new_version >= 4) { geo->flags |= @@ -316,6 +315,10 @@ xfs_growfs_data_private( agi->agi_dirino = cpu_to_be32(NULLAGINO); if (xfs_sb_version_hascrc(&mp->m_sb)) uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid); + if (xfs_sb_version_hasfinobt(&mp->m_sb)) { + agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); + agi->agi_free_level = cpu_to_be32(1); + } for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); @@ -407,6 +410,34 @@ xfs_growfs_data_private( xfs_buf_relse(bp); if (error) goto error0; + + /* + * FINO btree root block + */ + if (xfs_sb_version_hasfinobt(&mp->m_sb)) { + bp = xfs_growfs_get_hdr_buf(mp, + XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)), + BTOBB(mp->m_sb.sb_blocksize), 0, + &xfs_inobt_buf_ops); + if (!bp) { + error = ENOMEM; + goto error0; + } + + if (xfs_sb_version_hascrc(&mp->m_sb)) + xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC, + 0, 0, agno, + XFS_BTREE_CRC_BLOCKS); + else + xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0, + 0, agno, 0); + + error = xfs_bwrite(bp); + xfs_buf_relse(bp); + if (error) + goto error0; + } + } xfs_trans_agblocks_delta(tp, nfree); /* diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 8f711db61a0c..5960e5593fe0 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -112,6 +112,66 @@ xfs_inobt_get_rec( } /* + * Insert a single inobt record. Cursor must already point to desired location. + */ +STATIC int +xfs_inobt_insert_rec( + struct xfs_btree_cur *cur, + __int32_t freecount, + xfs_inofree_t free, + int *stat) +{ + cur->bc_rec.i.ir_freecount = freecount; + cur->bc_rec.i.ir_free = free; + return xfs_btree_insert(cur, stat); +} + +/* + * Insert records describing a newly allocated inode chunk into the inobt. + */ +STATIC int +xfs_inobt_insert( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agino_t newino, + xfs_agino_t newlen, + xfs_btnum_t btnum) +{ + struct xfs_btree_cur *cur; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + xfs_agino_t thisino; + int i; + int error; + + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); + + for (thisino = newino; + thisino < newino + newlen; + thisino += XFS_INODES_PER_CHUNK) { + error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i); + if (error) { + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; + } + ASSERT(i == 0); + + error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK, + XFS_INOBT_ALL_FREE, &i); + if (error) { + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; + } + ASSERT(i == 1); + } + + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + + return 0; +} + +/* * Verify that the number of free inodes in the AGI is correct. */ #ifdef DEBUG @@ -220,10 +280,8 @@ xfs_ialloc_inode_init( if (tp) xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos, mp->m_sb.sb_inodesize, length, gen); - } else if (xfs_sb_version_hasnlink(&mp->m_sb)) + } else version = 2; - else - version = 1; for (j = 0; j < nbufs; j++) { /* @@ -303,13 +361,10 @@ xfs_ialloc_ag_alloc( { xfs_agi_t *agi; /* allocation group header */ xfs_alloc_arg_t args; /* allocation argument structure */ - xfs_btree_cur_t *cur; /* inode btree cursor */ xfs_agnumber_t agno; int error; - int i; xfs_agino_t newino; /* new first inode's number */ xfs_agino_t newlen; /* new number of inodes */ - xfs_agino_t thisino; /* current inode number, for loop */ int isaligned = 0; /* inode allocation at stripe unit */ /* boundary */ struct xfs_perag *pag; @@ -459,29 +514,19 @@ xfs_ialloc_ag_alloc( agi->agi_newino = cpu_to_be32(newino); /* - * Insert records describing the new inode chunk into the btree. + * Insert records describing the new inode chunk into the btrees. */ - cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); - for (thisino = newino; - thisino < newino + newlen; - thisino += XFS_INODES_PER_CHUNK) { - cur->bc_rec.i.ir_startino = thisino; - cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK; - cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE; - error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i); - if (error) { - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; - } - ASSERT(i == 0); - error = xfs_btree_insert(cur, &i); - if (error) { - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, + XFS_BTNUM_INO); + if (error) + return error; + + if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { + error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, + XFS_BTNUM_FINO); + if (error) return error; - } - ASSERT(i == 1); } - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); /* * Log allocation group header fields */ @@ -675,13 +720,10 @@ xfs_ialloc_get_rec( } /* - * Allocate an inode. - * - * The caller selected an AG for us, and made sure that free inodes are - * available. + * Allocate an inode using the inobt-only algorithm. */ STATIC int -xfs_dialloc_ag( +xfs_dialloc_ag_inobt( struct xfs_trans *tp, struct xfs_buf *agbp, xfs_ino_t parent, @@ -707,7 +749,7 @@ xfs_dialloc_ag( ASSERT(pag->pagi_freecount > 0); restart_pagno: - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some. @@ -940,6 +982,294 @@ error0: } /* + * Use the free inode btree to allocate an inode based on distance from the + * parent. Note that the provided cursor may be deleted and replaced. + */ +STATIC int +xfs_dialloc_ag_finobt_near( + xfs_agino_t pagino, + struct xfs_btree_cur **ocur, + struct xfs_inobt_rec_incore *rec) +{ + struct xfs_btree_cur *lcur = *ocur; /* left search cursor */ + struct xfs_btree_cur *rcur; /* right search cursor */ + struct xfs_inobt_rec_incore rrec; + int error; + int i, j; + + error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i); + if (error) + return error; + + if (i == 1) { + error = xfs_inobt_get_rec(lcur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + /* + * See if we've landed in the parent inode record. The finobt + * only tracks chunks with at least one free inode, so record + * existence is enough. + */ + if (pagino >= rec->ir_startino && + pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK)) + return 0; + } + + error = xfs_btree_dup_cursor(lcur, &rcur); + if (error) + return error; + + error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j); + if (error) + goto error_rcur; + if (j == 1) { + error = xfs_inobt_get_rec(rcur, &rrec, &j); + if (error) + goto error_rcur; + XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); + } + + XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); + if (i == 1 && j == 1) { + /* + * Both the left and right records are valid. Choose the closer + * inode chunk to the target. + */ + if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) > + (rrec.ir_startino - pagino)) { + *rec = rrec; + xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); + *ocur = rcur; + } else { + xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); + } + } else if (j == 1) { + /* only the right record is valid */ + *rec = rrec; + xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); + *ocur = rcur; + } else if (i == 1) { + /* only the left record is valid */ + xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); + } + + return 0; + +error_rcur: + xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR); + return error; +} + +/* + * Use the free inode btree to find a free inode based on a newino hint. If + * the hint is NULL, find the first free inode in the AG. + */ +STATIC int +xfs_dialloc_ag_finobt_newino( + struct xfs_agi *agi, + struct xfs_btree_cur *cur, + struct xfs_inobt_rec_incore *rec) +{ + int error; + int i; + + if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { + error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ, + &i); + if (error) + return error; + if (i == 1) { + error = xfs_inobt_get_rec(cur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + return 0; + } + } + + /* + * Find the first inode available in the AG. + */ + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + error = xfs_inobt_get_rec(cur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + return 0; +} + +/* + * Update the inobt based on a modification made to the finobt. Also ensure that + * the records from both trees are equivalent post-modification. + */ +STATIC int +xfs_dialloc_ag_update_inobt( + struct xfs_btree_cur *cur, /* inobt cursor */ + struct xfs_inobt_rec_incore *frec, /* finobt record */ + int offset) /* inode offset */ +{ + struct xfs_inobt_rec_incore rec; + int error; + int i; + + error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % + XFS_INODES_PER_CHUNK) == 0); + + rec.ir_free &= ~XFS_INOBT_MASK(offset); + rec.ir_freecount--; + + XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && + (rec.ir_freecount == frec->ir_freecount)); + + error = xfs_inobt_update(cur, &rec); + if (error) + return error; + + return 0; +} + +/* + * Allocate an inode using the free inode btree, if available. Otherwise, fall + * back to the inobt search algorithm. + * + * The caller selected an AG for us, and made sure that free inodes are + * available. + */ +STATIC int +xfs_dialloc_ag( + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_ino_t parent, + xfs_ino_t *inop) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); + xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); + struct xfs_perag *pag; + struct xfs_btree_cur *cur; /* finobt cursor */ + struct xfs_btree_cur *icur; /* inobt cursor */ + struct xfs_inobt_rec_incore rec; + xfs_ino_t ino; + int error; + int offset; + int i; + + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + return xfs_dialloc_ag_inobt(tp, agbp, parent, inop); + + pag = xfs_perag_get(mp, agno); + + /* + * If pagino is 0 (this is the root inode allocation) use newino. + * This must work because we've just allocated some. + */ + if (!pagino) + pagino = be32_to_cpu(agi->agi_newino); + + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); + + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error_cur; + + /* + * The search algorithm depends on whether we're in the same AG as the + * parent. If so, find the closest available inode to the parent. If + * not, consider the agi hint or find the first free inode in the AG. + */ + if (agno == pagno) + error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); + else + error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); + if (error) + goto error_cur; + + offset = xfs_lowbit64(rec.ir_free); + ASSERT(offset >= 0); + ASSERT(offset < XFS_INODES_PER_CHUNK); + ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % + XFS_INODES_PER_CHUNK) == 0); + ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); + + /* + * Modify or remove the finobt record. + */ + rec.ir_free &= ~XFS_INOBT_MASK(offset); + rec.ir_freecount--; + if (rec.ir_freecount) + error = xfs_inobt_update(cur, &rec); + else + error = xfs_btree_delete(cur, &i); + if (error) + goto error_cur; + + /* + * The finobt has now been updated appropriately. We haven't updated the + * agi and superblock yet, so we can create an inobt cursor and validate + * the original freecount. If all is well, make the equivalent update to + * the inobt using the finobt record and offset information. + */ + icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); + + error = xfs_check_agi_freecount(icur, agi); + if (error) + goto error_icur; + + error = xfs_dialloc_ag_update_inobt(icur, &rec, offset); + if (error) + goto error_icur; + + /* + * Both trees have now been updated. We must update the perag and + * superblock before we can check the freecount for each btree. + */ + be32_add_cpu(&agi->agi_freecount, -1); + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); + pag->pagi_freecount--; + + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); + + error = xfs_check_agi_freecount(icur, agi); + if (error) + goto error_icur; + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error_icur; + + xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + xfs_perag_put(pag); + *inop = ino; + return 0; + +error_icur: + xfs_btree_del_cursor(icur, XFS_BTREE_ERROR); +error_cur: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + xfs_perag_put(pag); + return error; +} + +/* * Allocate an inode on disk. * * Mode is used to tell whether the new inode will need space, and whether it @@ -1098,78 +1428,34 @@ out_error: return XFS_ERROR(error); } -/* - * Free disk inode. Carefully avoids touching the incore inode, all - * manipulations incore are the caller's responsibility. - * The on-disk inode is not changed by this operation, only the - * btree (free inode mask) is changed. - */ -int -xfs_difree( - xfs_trans_t *tp, /* transaction pointer */ - xfs_ino_t inode, /* inode to be freed */ - xfs_bmap_free_t *flist, /* extents to free */ - int *delete, /* set if inode cluster was deleted */ - xfs_ino_t *first_ino) /* first inode in deleted cluster */ +STATIC int +xfs_difree_inobt( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agino_t agino, + struct xfs_bmap_free *flist, + int *deleted, + xfs_ino_t *first_ino, + struct xfs_inobt_rec_incore *orec) { - /* REFERENCED */ - xfs_agblock_t agbno; /* block number containing inode */ - xfs_buf_t *agbp; /* buffer containing allocation group header */ - xfs_agino_t agino; /* inode number relative to allocation group */ - xfs_agnumber_t agno; /* allocation group number */ - xfs_agi_t *agi; /* allocation group header */ - xfs_btree_cur_t *cur; /* inode btree cursor */ - int error; /* error return value */ - int i; /* result code */ - int ilen; /* inodes in an inode cluster */ - xfs_mount_t *mp; /* mount structure for filesystem */ - int off; /* offset of inode in inode chunk */ - xfs_inobt_rec_incore_t rec; /* btree record */ - struct xfs_perag *pag; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + struct xfs_perag *pag; + struct xfs_btree_cur *cur; + struct xfs_inobt_rec_incore rec; + int ilen; + int error; + int i; + int off; - mp = tp->t_mountp; - - /* - * Break up inode number into its components. - */ - agno = XFS_INO_TO_AGNO(mp, inode); - if (agno >= mp->m_sb.sb_agcount) { - xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", - __func__, agno, mp->m_sb.sb_agcount); - ASSERT(0); - return XFS_ERROR(EINVAL); - } - agino = XFS_INO_TO_AGINO(mp, inode); - if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { - xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", - __func__, (unsigned long long)inode, - (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); - ASSERT(0); - return XFS_ERROR(EINVAL); - } - agbno = XFS_AGINO_TO_AGBNO(mp, agino); - if (agbno >= mp->m_sb.sb_agblocks) { - xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", - __func__, agbno, mp->m_sb.sb_agblocks); - ASSERT(0); - return XFS_ERROR(EINVAL); - } - /* - * Get the allocation group header. - */ - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - if (error) { - xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", - __func__, error); - return error; - } - agi = XFS_BUF_TO_AGI(agbp); ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); - ASSERT(agbno < be32_to_cpu(agi->agi_length)); + ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length)); + /* * Initialize the cursor. */ - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); error = xfs_check_agi_freecount(cur, agi); if (error) @@ -1209,7 +1495,7 @@ xfs_difree( if (!(mp->m_flags & XFS_MOUNT_IKEEP) && (rec.ir_freecount == mp->m_ialloc_inos)) { - *delete = 1; + *deleted = 1; *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); /* @@ -1237,7 +1523,7 @@ xfs_difree( XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)), mp->m_ialloc_blks, flist, mp); } else { - *delete = 0; + *deleted = 0; error = xfs_inobt_update(cur, &rec); if (error) { @@ -1261,6 +1547,7 @@ xfs_difree( if (error) goto error0; + *orec = rec; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); return 0; @@ -1269,6 +1556,182 @@ error0: return error; } +/* + * Free an inode in the free inode btree. + */ +STATIC int +xfs_difree_finobt( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agino_t agino, + struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + struct xfs_btree_cur *cur; + struct xfs_inobt_rec_incore rec; + int offset = agino - ibtrec->ir_startino; + int error; + int i; + + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); + + error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); + if (error) + goto error; + if (i == 0) { + /* + * If the record does not exist in the finobt, we must have just + * freed an inode in a previously fully allocated chunk. If not, + * something is out of sync. + */ + XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error); + + error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, + ibtrec->ir_free, &i); + if (error) + goto error; + ASSERT(i == 1); + + goto out; + } + + /* + * Read and update the existing record. We could just copy the ibtrec + * across here, but that would defeat the purpose of having redundant + * metadata. By making the modifications independently, we can catch + * corruptions that we wouldn't see if we just copied from one record + * to another. + */ + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + goto error; + XFS_WANT_CORRUPTED_GOTO(i == 1, error); + + rec.ir_free |= XFS_INOBT_MASK(offset); + rec.ir_freecount++; + + XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) && + (rec.ir_freecount == ibtrec->ir_freecount), + error); + + /* + * The content of inobt records should always match between the inobt + * and finobt. The lifecycle of records in the finobt is different from + * the inobt in that the finobt only tracks records with at least one + * free inode. Hence, if all of the inodes are free and we aren't + * keeping inode chunks permanently on disk, remove the record. + * Otherwise, update the record with the new information. + */ + if (rec.ir_freecount == mp->m_ialloc_inos && + !(mp->m_flags & XFS_MOUNT_IKEEP)) { + error = xfs_btree_delete(cur, &i); + if (error) + goto error; + ASSERT(i == 1); + } else { + error = xfs_inobt_update(cur, &rec); + if (error) + goto error; + } + +out: + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error; + + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + return 0; + +error: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; +} + +/* + * Free disk inode. Carefully avoids touching the incore inode, all + * manipulations incore are the caller's responsibility. + * The on-disk inode is not changed by this operation, only the + * btree (free inode mask) is changed. + */ +int +xfs_difree( + struct xfs_trans *tp, /* transaction pointer */ + xfs_ino_t inode, /* inode to be freed */ + struct xfs_bmap_free *flist, /* extents to free */ + int *deleted,/* set if inode cluster was deleted */ + xfs_ino_t *first_ino)/* first inode in deleted cluster */ +{ + /* REFERENCED */ + xfs_agblock_t agbno; /* block number containing inode */ + struct xfs_buf *agbp; /* buffer for allocation group header */ + xfs_agino_t agino; /* allocation group inode number */ + xfs_agnumber_t agno; /* allocation group number */ + int error; /* error return value */ + struct xfs_mount *mp; /* mount structure for filesystem */ + struct xfs_inobt_rec_incore rec;/* btree record */ + + mp = tp->t_mountp; + + /* + * Break up inode number into its components. + */ + agno = XFS_INO_TO_AGNO(mp, inode); + if (agno >= mp->m_sb.sb_agcount) { + xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", + __func__, agno, mp->m_sb.sb_agcount); + ASSERT(0); + return XFS_ERROR(EINVAL); + } + agino = XFS_INO_TO_AGINO(mp, inode); + if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { + xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", + __func__, (unsigned long long)inode, + (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); + ASSERT(0); + return XFS_ERROR(EINVAL); + } + agbno = XFS_AGINO_TO_AGBNO(mp, agino); + if (agbno >= mp->m_sb.sb_agblocks) { + xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", + __func__, agbno, mp->m_sb.sb_agblocks); + ASSERT(0); + return XFS_ERROR(EINVAL); + } + /* + * Get the allocation group header. + */ + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + if (error) { + xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", + __func__, error); + return error; + } + + /* + * Fix up the inode allocation btree. + */ + error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino, + &rec); + if (error) + goto error0; + + /* + * Fix up the free inode btree. + */ + if (xfs_sb_version_hasfinobt(&mp->m_sb)) { + error = xfs_difree_finobt(mp, tp, agbp, agino, &rec); + if (error) + goto error0; + } + + return 0; + +error0: + return error; +} + STATIC int xfs_imap_lookup( struct xfs_mount *mp, @@ -1300,7 +1763,7 @@ xfs_imap_lookup( * we have a record, we need to ensure it contains the inode number * we are looking up. */ - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); if (!error) { if (i) @@ -1488,7 +1951,16 @@ xfs_ialloc_compute_maxlevels( } /* - * Log specified fields for the ag hdr (inode section) + * Log specified fields for the ag hdr (inode section). The growth of the agi + * structure over time requires that we interpret the buffer as two logical + * regions delineated by the end of the unlinked list. This is due to the size + * of the hash table and its location in the middle of the agi. + * + * For example, a request to log a field before agi_unlinked and a field after + * agi_unlinked could cause us to log the entire hash table and use an excessive + * amount of log space. To avoid this behavior, log the region up through + * agi_unlinked in one call and the region after agi_unlinked through the end of + * the structure in another. */ void xfs_ialloc_log_agi( @@ -1511,6 +1983,8 @@ xfs_ialloc_log_agi( offsetof(xfs_agi_t, agi_newino), offsetof(xfs_agi_t, agi_dirino), offsetof(xfs_agi_t, agi_unlinked), + offsetof(xfs_agi_t, agi_free_root), + offsetof(xfs_agi_t, agi_free_level), sizeof(xfs_agi_t) }; #ifdef DEBUG @@ -1519,15 +1993,30 @@ xfs_ialloc_log_agi( agi = XFS_BUF_TO_AGI(bp); ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); #endif + + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); + /* - * Compute byte offsets for the first and last fields. + * Compute byte offsets for the first and last fields in the first + * region and log the agi buffer. This only logs up through + * agi_unlinked. */ - xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); + if (fields & XFS_AGI_ALL_BITS_R1) { + xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1, + &first, &last); + xfs_trans_log_buf(tp, bp, first, last); + } + /* - * Log the allocation group inode header buffer. + * Mask off the bits in the first region and calculate the first and + * last field offsets for any bits in the second region. */ - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); - xfs_trans_log_buf(tp, bp, first, last); + fields &= ~XFS_AGI_ALL_BITS_R1; + if (fields) { + xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2, + &first, &last); + xfs_trans_log_buf(tp, bp, first, last); + } } #ifdef DEBUG @@ -1640,7 +2129,6 @@ xfs_read_agi( if (error) return error; - ASSERT(!xfs_buf_geterror(*bpp)); xfs_buf_set_ref(*bpp, XFS_AGI_REF); return 0; } diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index 812365d17e67..95ad1c002d60 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -90,7 +90,7 @@ xfs_difree( struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t inode, /* inode to be freed */ struct xfs_bmap_free *flist, /* extents to free */ - int *delete, /* set if inode cluster was deleted */ + int *deleted, /* set if inode cluster was deleted */ xfs_ino_t *first_ino); /* first inode in deleted cluster */ /* diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 7e309b11e87d..726f83a681a5 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -49,7 +49,8 @@ xfs_inobt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno); + cur->bc_private.a.agbp, cur->bc_private.a.agno, + cur->bc_btnum); } STATIC void @@ -66,12 +67,26 @@ xfs_inobt_set_root( xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); } +STATIC void +xfs_finobt_set_root( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *nptr, + int inc) /* level change */ +{ + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + + agi->agi_free_root = nptr->s; + be32_add_cpu(&agi->agi_free_level, inc); + xfs_ialloc_log_agi(cur->bc_tp, agbp, + XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL); +} + STATIC int xfs_inobt_alloc_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *start, union xfs_btree_ptr *new, - int length, int *stat) { xfs_alloc_arg_t args; /* block allocation args */ @@ -173,6 +188,17 @@ xfs_inobt_init_ptr_from_cur( ptr->s = agi->agi_root; } +STATIC void +xfs_finobt_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); + + ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); + ptr->s = agi->agi_free_root; +} + STATIC __int64_t xfs_inobt_key_diff( struct xfs_btree_cur *cur, @@ -203,6 +229,7 @@ xfs_inobt_verify( */ switch (block->bb_magic) { case cpu_to_be32(XFS_IBT_CRC_MAGIC): + case cpu_to_be32(XFS_FIBT_CRC_MAGIC): if (!xfs_sb_version_hascrc(&mp->m_sb)) return false; if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) @@ -214,6 +241,7 @@ xfs_inobt_verify( return false; /* fall through */ case cpu_to_be32(XFS_IBT_MAGIC): + case cpu_to_be32(XFS_FIBT_MAGIC): break; default: return 0; @@ -317,6 +345,28 @@ static const struct xfs_btree_ops xfs_inobt_ops = { #endif }; +static const struct xfs_btree_ops xfs_finobt_ops = { + .rec_len = sizeof(xfs_inobt_rec_t), + .key_len = sizeof(xfs_inobt_key_t), + + .dup_cursor = xfs_inobt_dup_cursor, + .set_root = xfs_finobt_set_root, + .alloc_block = xfs_inobt_alloc_block, + .free_block = xfs_inobt_free_block, + .get_minrecs = xfs_inobt_get_minrecs, + .get_maxrecs = xfs_inobt_get_maxrecs, + .init_key_from_rec = xfs_inobt_init_key_from_rec, + .init_rec_from_key = xfs_inobt_init_rec_from_key, + .init_rec_from_cur = xfs_inobt_init_rec_from_cur, + .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur, + .key_diff = xfs_inobt_key_diff, + .buf_ops = &xfs_inobt_buf_ops, +#if defined(DEBUG) || defined(XFS_WARN) + .keys_inorder = xfs_inobt_keys_inorder, + .recs_inorder = xfs_inobt_recs_inorder, +#endif +}; + /* * Allocate a new inode btree cursor. */ @@ -325,7 +375,8 @@ xfs_inobt_init_cursor( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ struct xfs_buf *agbp, /* buffer for agi structure */ - xfs_agnumber_t agno) /* allocation group number */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_btnum_t btnum) /* ialloc or free ino btree */ { struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); struct xfs_btree_cur *cur; @@ -334,11 +385,17 @@ xfs_inobt_init_cursor( cur->bc_tp = tp; cur->bc_mp = mp; - cur->bc_nlevels = be32_to_cpu(agi->agi_level); - cur->bc_btnum = XFS_BTNUM_INO; + cur->bc_btnum = btnum; + if (btnum == XFS_BTNUM_INO) { + cur->bc_nlevels = be32_to_cpu(agi->agi_level); + cur->bc_ops = &xfs_inobt_ops; + } else { + cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); + cur->bc_ops = &xfs_finobt_ops; + } + cur->bc_blocklog = mp->m_sb.sb_blocklog; - cur->bc_ops = &xfs_inobt_ops; if (xfs_sb_version_hascrc(&mp->m_sb)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index f38b22011c4e..d7ebea72c2d0 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -58,7 +58,8 @@ struct xfs_mount; ((index) - 1) * sizeof(xfs_inobt_ptr_t))) extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, - struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); + struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t, + xfs_btnum_t); extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 98d35244eecc..c48df5f25b9f 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -507,8 +507,7 @@ STATIC int xfs_inode_ag_walk( struct xfs_mount *mp, struct xfs_perag *pag, - int (*execute)(struct xfs_inode *ip, - struct xfs_perag *pag, int flags, + int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args, @@ -582,7 +581,7 @@ restart: for (i = 0; i < nr_found; i++) { if (!batch[i]) continue; - error = execute(batch[i], pag, flags, args); + error = execute(batch[i], flags, args); IRELE(batch[i]); if (error == EAGAIN) { skipped++; @@ -636,8 +635,7 @@ xfs_eofblocks_worker( int xfs_inode_ag_iterator( struct xfs_mount *mp, - int (*execute)(struct xfs_inode *ip, - struct xfs_perag *pag, int flags, + int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args) @@ -664,8 +662,7 @@ xfs_inode_ag_iterator( int xfs_inode_ag_iterator_tag( struct xfs_mount *mp, - int (*execute)(struct xfs_inode *ip, - struct xfs_perag *pag, int flags, + int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args, @@ -1209,7 +1206,6 @@ xfs_inode_match_id( STATIC int xfs_inode_free_eofblocks( struct xfs_inode *ip, - struct xfs_perag *pag, int flags, void *args) { diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 9ed68bb750f5..9cf017b899be 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -60,12 +60,10 @@ int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); void xfs_eofblocks_worker(struct work_struct *); int xfs_inode_ag_iterator(struct xfs_mount *mp, - int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, - int flags, void *args), + int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args); int xfs_inode_ag_iterator_tag(struct xfs_mount *mp, - int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, - int flags, void *args), + int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args, int tag); static inline int diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 768087bedbac..a6115fe1ac94 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -655,7 +655,6 @@ xfs_ialloc( uint flags; int error; timespec_t tv; - int filestreams = 0; /* * Call the space management code to pick @@ -682,6 +681,14 @@ xfs_ialloc( return error; ASSERT(ip != NULL); + /* + * We always convert v1 inodes to v2 now - we only support filesystems + * with >= v2 inode capability, so there is no reason for ever leaving + * an inode in v1 format. + */ + if (ip->i_d.di_version == 1) + ip->i_d.di_version = 2; + ip->i_d.di_mode = mode; ip->i_d.di_onlink = 0; ip->i_d.di_nlink = nlink; @@ -691,27 +698,6 @@ xfs_ialloc( xfs_set_projid(ip, prid); memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - /* - * If the superblock version is up to where we support new format - * inodes and this is currently an old format inode, then change - * the inode version number now. This way we only do the conversion - * here rather than here and in the flush/logging code. - */ - if (xfs_sb_version_hasnlink(&mp->m_sb) && - ip->i_d.di_version == 1) { - ip->i_d.di_version = 2; - /* - * We've already zeroed the old link count, the projid field, - * and the pad field. - */ - } - - /* - * Project ids won't be stored on disk if we are using a version 1 inode. - */ - if ((prid != 0) && (ip->i_d.di_version == 1)) - xfs_bump_ino_vers2(tp, ip); - if (pip && XFS_INHERIT_GID(pip)) { ip->i_d.di_gid = pip->i_d.di_gid; if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) { @@ -772,13 +758,6 @@ xfs_ialloc( flags |= XFS_ILOG_DEV; break; case S_IFREG: - /* - * we can't set up filestreams until after the VFS inode - * is set up properly. - */ - if (pip && xfs_inode_is_filestream(pip)) - filestreams = 1; - /* fall through */ case S_IFDIR: if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { uint di_flags = 0; @@ -844,15 +823,6 @@ xfs_ialloc( /* now that we have an i_mode we can setup inode ops and unlock */ xfs_setup_inode(ip); - /* now we have set up the vfs inode we can associate the filestream */ - if (filestreams) { - error = xfs_filestream_associate(pip, ip); - if (error < 0) - return -error; - if (!error) - xfs_iflags_set(ip, XFS_IFILESTREAM); - } - *ipp = ip; return 0; } @@ -1073,40 +1043,6 @@ xfs_droplink( } /* - * This gets called when the inode's version needs to be changed from 1 to 2. - * Currently this happens when the nlink field overflows the old 16-bit value - * or when chproj is called to change the project for the first time. - * As a side effect the superblock version will also get rev'd - * to contain the NLINK bit. - */ -void -xfs_bump_ino_vers2( - xfs_trans_t *tp, - xfs_inode_t *ip) -{ - xfs_mount_t *mp; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(ip->i_d.di_version == 1); - - ip->i_d.di_version = 2; - ip->i_d.di_onlink = 0; - memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - mp = tp->t_mountp; - if (!xfs_sb_version_hasnlink(&mp->m_sb)) { - spin_lock(&mp->m_sb_lock); - if (!xfs_sb_version_hasnlink(&mp->m_sb)) { - xfs_sb_version_addnlink(&mp->m_sb); - spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, XFS_SB_VERSIONNUM); - } else { - spin_unlock(&mp->m_sb_lock); - } - } - /* Caller must log the inode */ -} - -/* * Increment the link count on an inode & log the change. */ int @@ -1116,22 +1052,10 @@ xfs_bumplink( { xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); + ASSERT(ip->i_d.di_version > 1); ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE)); ip->i_d.di_nlink++; inc_nlink(VFS_I(ip)); - if ((ip->i_d.di_version == 1) && - (ip->i_d.di_nlink > XFS_MAXLINK_1)) { - /* - * The inode has increased its number of links beyond - * what can fit in an old format inode. It now needs - * to be converted to a version 2 inode with a 32 bit - * link count. If this is the first inode in the file - * system to do this, then we need to bump the superblock - * version number as well. - */ - xfs_bump_ino_vers2(tp, ip); - } - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); return 0; } @@ -1699,16 +1623,6 @@ xfs_release( int truncated; /* - * If we are using filestreams, and we have an unlinked - * file that we are processing the last close on, then nothing - * will be able to reopen and write to this file. Purge this - * inode from the filestreams cache so that it doesn't delay - * teardown of the inode. - */ - if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) - xfs_filestream_deassociate(ip); - - /* * If we previously truncated this file and removed old data * in the process, we want to initiate "early" writeout on * the last close. This is an attempt to combat the notorious @@ -1838,9 +1752,33 @@ xfs_inactive_ifree( int error; tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0); + + /* + * The ifree transaction might need to allocate blocks for record + * insertion to the finobt. We don't want to fail here at ENOSPC, so + * allow ifree to dip into the reserved block pool if necessary. + * + * Freeing large sets of inodes generally means freeing inode chunks, + * directory and file data blocks, so this should be relatively safe. + * Only under severe circumstances should it be possible to free enough + * inodes to exhaust the reserve block pool via finobt expansion while + * at the same time not creating free space in the filesystem. + * + * Send a warning if the reservation does happen to fail, as the inode + * now remains allocated and sits on the unlinked list until the fs is + * repaired. + */ + tp->t_flags |= XFS_TRANS_RESERVE; + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, + XFS_IFREE_SPACE_RES(mp), 0); if (error) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); + if (error == ENOSPC) { + xfs_warn_ratelimited(mp, + "Failed to remove inode(s) from unlinked list. " + "Please free space, unmount and run xfs_repair."); + } else { + ASSERT(XFS_FORCED_SHUTDOWN(mp)); + } xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); return error; } @@ -2664,13 +2602,7 @@ xfs_remove( if (error) goto std_return; - /* - * If we are using filestreams, kill the stream association. - * If the file is still open it may get a new one but that - * will get killed on last close in xfs_close() so we don't - * have to worry about that. - */ - if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) + if (is_dir && xfs_inode_is_filestream(ip)) xfs_filestream_deassociate(ip); return 0; @@ -3258,6 +3190,7 @@ xfs_iflush_int( ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); ASSERT(iip != NULL && iip->ili_fields != 0); + ASSERT(ip->i_d.di_version > 1); /* set *dip = inode's place in the buffer */ dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); @@ -3318,7 +3251,7 @@ xfs_iflush_int( } /* - * Inode item log recovery for v1/v2 inodes are dependent on the + * Inode item log recovery for v2 inodes are dependent on the * di_flushiter count for correct sequencing. We bump the flush * iteration count so we can detect flushes which postdate a log record * during recovery. This is redundant as we now log every change and @@ -3341,40 +3274,9 @@ xfs_iflush_int( if (ip->i_d.di_flushiter == DI_MAX_FLUSH) ip->i_d.di_flushiter = 0; - /* - * If this is really an old format inode and the superblock version - * has not been updated to support only new format inodes, then - * convert back to the old inode format. If the superblock version - * has been updated, then make the conversion permanent. - */ - ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); - if (ip->i_d.di_version == 1) { - if (!xfs_sb_version_hasnlink(&mp->m_sb)) { - /* - * Convert it back. - */ - ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); - dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); - } else { - /* - * The superblock version has already been bumped, - * so just make the conversion to the new inode - * format permanent. - */ - ip->i_d.di_version = 2; - dip->di_version = 2; - ip->i_d.di_onlink = 0; - dip->di_onlink = 0; - memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - memset(&(dip->di_pad[0]), 0, - sizeof(dip->di_pad)); - ASSERT(xfs_get_projid(ip) == 0); - } - } - - xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); + xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); if (XFS_IFORK_Q(ip)) - xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); + xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); xfs_inobp_check(mp, bp); /* diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index f2fcde52b66d..f72bffa67266 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -209,7 +209,6 @@ xfs_get_initial_prid(struct xfs_inode *dp) #define XFS_ISTALE (1 << 1) /* inode has been staled */ #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ #define XFS_INEW (1 << 3) /* inode has just been allocated */ -#define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */ #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ #define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ @@ -225,8 +224,7 @@ xfs_get_initial_prid(struct xfs_inode *dp) */ #define XFS_IRECLAIM_RESET_FLAGS \ (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ - XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \ - XFS_IFILESTREAM); + XFS_IDIRTY_RELEASE | XFS_ITRUNCATED) /* * Synchronize processes attempting to flush the in-core inode back to disk. @@ -379,7 +377,6 @@ int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, struct xfs_inode **, int *); int xfs_droplink(struct xfs_trans *, struct xfs_inode *); int xfs_bumplink(struct xfs_trans *, struct xfs_inode *); -void xfs_bump_ino_vers2(struct xfs_trans *, struct xfs_inode *); /* from xfs_file.c */ int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c index 24e993996bdc..cb35ae41d4a1 100644 --- a/fs/xfs/xfs_inode_buf.c +++ b/fs/xfs/xfs_inode_buf.c @@ -437,17 +437,16 @@ xfs_iread( } /* - * The inode format changed when we moved the link count and - * made it 32 bits long. If this is an old format inode, - * convert it in memory to look like a new one. If it gets - * flushed to disk we will convert back before flushing or - * logging it. We zero out the new projid field and the old link - * count field. We'll handle clearing the pad field (the remains - * of the old uuid field) when we actually convert the inode to - * the new format. We don't change the version number so that we - * can distinguish this from a real new format inode. + * Automatically convert version 1 inode formats in memory to version 2 + * inode format. If the inode is modified, it will get logged and + * rewritten as a version 2 inode. We can do this because we set the + * superblock feature bit for v2 inodes unconditionally during mount + * and it means the reast of the code can assume the inode version is 2 + * or higher. */ if (ip->i_d.di_version == 1) { + ip->i_d.di_version = 2; + memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); ip->i_d.di_nlink = ip->i_d.di_onlink; ip->i_d.di_onlink = 0; xfs_set_projid(ip, 0); diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c index 73514c0486b7..b031e8d0d928 100644 --- a/fs/xfs/xfs_inode_fork.c +++ b/fs/xfs/xfs_inode_fork.c @@ -798,8 +798,7 @@ xfs_iflush_fork( xfs_inode_t *ip, xfs_dinode_t *dip, xfs_inode_log_item_t *iip, - int whichfork, - xfs_buf_t *bp) + int whichfork) { char *cp; xfs_ifork_t *ifp; diff --git a/fs/xfs/xfs_inode_fork.h b/fs/xfs/xfs_inode_fork.h index eb329a1ea888..7d3b1ed6dcbe 100644 --- a/fs/xfs/xfs_inode_fork.h +++ b/fs/xfs/xfs_inode_fork.h @@ -127,8 +127,7 @@ typedef struct xfs_ifork { int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *); void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, - struct xfs_inode_log_item *, int, - struct xfs_buf *); + struct xfs_inode_log_item *, int); void xfs_idestroy_fork(struct xfs_inode *, int); void xfs_idata_realloc(struct xfs_inode *, int, int); void xfs_iroot_realloc(struct xfs_inode *, int, int); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 686889b4a1e5..a640137b3573 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -145,34 +145,6 @@ xfs_inode_item_size( xfs_inode_item_attr_fork_size(iip, nvecs, nbytes); } -/* - * If this is a v1 format inode, then we need to log it as such. This means - * that we have to copy the link count from the new field to the old. We - * don't have to worry about the new fields, because nothing trusts them as - * long as the old inode version number is there. - */ -STATIC void -xfs_inode_item_format_v1_inode( - struct xfs_inode *ip) -{ - if (!xfs_sb_version_hasnlink(&ip->i_mount->m_sb)) { - /* - * Convert it back. - */ - ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); - ip->i_d.di_onlink = ip->i_d.di_nlink; - } else { - /* - * The superblock version has already been bumped, - * so just make the conversion to the new inode - * format permanent. - */ - ip->i_d.di_version = 2; - ip->i_d.di_onlink = 0; - memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - } -} - STATIC void xfs_inode_item_format_data_fork( struct xfs_inode_log_item *iip, @@ -370,6 +342,8 @@ xfs_inode_item_format( struct xfs_inode_log_format *ilf; struct xfs_log_iovec *vecp = NULL; + ASSERT(ip->i_d.di_version > 1); + ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT); ilf->ilf_type = XFS_LI_INODE; ilf->ilf_ino = ip->i_ino; @@ -380,8 +354,6 @@ xfs_inode_item_format( ilf->ilf_size = 2; /* format + core */ xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format)); - if (ip->i_d.di_version == 1) - xfs_inode_item_format_v1_inode(ip); xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE, &ip->i_d, xfs_icdinode_size(ip->i_d.di_version)); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 6152cbe353e8..8bc1bbce7451 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -543,10 +543,11 @@ xfs_attrmulti_by_handle( ops = memdup_user(am_hreq.ops, size); if (IS_ERR(ops)) { - error = PTR_ERR(ops); + error = -PTR_ERR(ops); goto out_dput; } + error = ENOMEM; attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); if (!attr_name) goto out_kfree_ops; @@ -556,7 +557,7 @@ xfs_attrmulti_by_handle( ops[i].am_error = strncpy_from_user((char *)attr_name, ops[i].am_attrname, MAXNAMELEN); if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) - error = -ERANGE; + error = ERANGE; if (ops[i].am_error < 0) break; @@ -1227,15 +1228,8 @@ xfs_ioctl_setattr( olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_pdquot, pdqp); } + ASSERT(ip->i_d.di_version > 1); xfs_set_projid(ip, fa->fsx_projid); - - /* - * We may have to rev the inode as well as - * the superblock version number since projids didn't - * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. - */ - if (ip->i_d.di_version == 1) - xfs_bump_ino_vers2(tp, ip); } } diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index a7992f8de9d3..944d5baa710a 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -424,10 +424,11 @@ xfs_compat_attrmulti_by_handle( ops = memdup_user(compat_ptr(am_hreq.ops), size); if (IS_ERR(ops)) { - error = PTR_ERR(ops); + error = -PTR_ERR(ops); goto out_dput; } + error = ENOMEM; attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); if (!attr_name) goto out_kfree_ops; @@ -438,7 +439,7 @@ xfs_compat_attrmulti_by_handle( compat_ptr(ops[i].am_attrname), MAXNAMELEN); if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) - error = -ERANGE; + error = ERANGE; if (ops[i].am_error < 0) break; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 3b80ebae05f5..6c5eb4c551e3 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -730,7 +730,7 @@ xfs_iomap_write_allocate( */ nimaps = 1; end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); - error = xfs_bmap_last_offset(NULL, ip, &last_block, + error = xfs_bmap_last_offset(ip, &last_block, XFS_DATA_FORK); if (error) goto trans_cancel; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 36d630319a27..205613a06068 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -829,22 +829,34 @@ xfs_setattr_size( */ inode_dio_wait(inode); + /* + * Do all the page cache truncate work outside the transaction context + * as the "lock" order is page lock->log space reservation. i.e. + * locking pages inside the transaction can ABBA deadlock with + * writeback. We have to do the VFS inode size update before we truncate + * the pagecache, however, to avoid racing with page faults beyond the + * new EOF they are not serialised against truncate operations except by + * page locks and size updates. + * + * Hence we are in a situation where a truncate can fail with ENOMEM + * from xfs_trans_reserve(), but having already truncated the in-memory + * version of the file (i.e. made user visible changes). There's not + * much we can do about this, except to hope that the caller sees ENOMEM + * and retries the truncate operation. + */ error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); if (error) return error; + truncate_setsize(inode, newsize); tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) goto out_trans_cancel; - truncate_setsize(inode, newsize); - commit_flags = XFS_TRANS_RELEASE_LOG_RES; lock_flags |= XFS_ILOCK_EXCL; - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, 0); /* diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f46338285152..cb64f222d607 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -270,7 +270,8 @@ xfs_bulkstat( /* * Allocate and initialize a btree cursor for ialloc btree. */ - cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, + XFS_BTNUM_INO); irbp = irbuf; irbufend = irbuf + nirbuf; end_of_ag = 0; @@ -621,7 +622,8 @@ xfs_inumbers( agino = 0; continue; } - cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, + XFS_BTNUM_INO); error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, &tmp); if (error) { diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a5f8bd9899d3..292308dede6d 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1165,7 +1165,7 @@ xlog_iodone(xfs_buf_t *bp) /* * Race to shutdown the filesystem if we see an error. */ - if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp, + if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) { xfs_buf_ioerror_alert(bp, __func__); xfs_buf_stale(bp); @@ -3952,11 +3952,14 @@ xfs_log_force_umount( retval = xlog_state_ioerror(log); spin_unlock(&log->l_icloglock); } + /* - * Wake up everybody waiting on xfs_log_force. - * Callback all log item committed functions as if the - * log writes were completed. + * Wake up everybody waiting on xfs_log_force. Wake the CIL push first + * as if the log writes were completed. The abort handling in the log + * item committed callback functions will do this again under lock to + * avoid races. */ + wake_up_all(&log->l_cilp->xc_commit_wait); xlog_state_do_callback(log, XFS_LI_ABORTED, NULL); #ifdef XFSERRORDEBUG diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 2c4004475e71..84e0deb95abd 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -24,7 +24,8 @@ struct xfs_log_vec { struct xfs_log_iovec *lv_iovecp; /* iovec array */ struct xfs_log_item *lv_item; /* owner */ char *lv_buf; /* formatted buffer */ - int lv_buf_len; /* size of formatted buffer */ + int lv_bytes; /* accounted space in buffer */ + int lv_buf_len; /* aligned size of buffer */ int lv_size; /* size of allocated lv */ }; @@ -52,15 +53,21 @@ xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, return vec->i_addr; } +/* + * We need to make sure the next buffer is naturally aligned for the biggest + * basic data type we put into it. We already accounted for this padding when + * sizing the buffer. + * + * However, this padding does not get written into the log, and hence we have to + * track the space used by the log vectors separately to prevent log space hangs + * due to inaccurate accounting (i.e. a leak) of the used log space through the + * CIL context ticket. + */ static inline void xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len) { - /* - * We need to make sure the next buffer is naturally aligned for the - * biggest basic data type we put into it. We already accounted for - * this when sizing the buffer. - */ lv->lv_buf_len += round_up(len, sizeof(uint64_t)); + lv->lv_bytes += len; vec->i_len = len; } diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 7e5455391176..b3425b34e3d5 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -97,7 +97,7 @@ xfs_cil_prepare_item( { /* Account for the new LV being passed in */ if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) { - *diff_len += lv->lv_buf_len; + *diff_len += lv->lv_bytes; *diff_iovecs += lv->lv_niovecs; } @@ -111,7 +111,7 @@ xfs_cil_prepare_item( else if (old_lv != lv) { ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); - *diff_len -= old_lv->lv_buf_len; + *diff_len -= old_lv->lv_bytes; *diff_iovecs -= old_lv->lv_niovecs; kmem_free(old_lv); } @@ -239,7 +239,7 @@ xlog_cil_insert_format_items( * that the space reservation accounting is correct. */ *diff_iovecs -= lv->lv_niovecs; - *diff_len -= lv->lv_buf_len; + *diff_len -= lv->lv_bytes; } else { /* allocate new data chunk */ lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS); @@ -259,6 +259,7 @@ xlog_cil_insert_format_items( /* The allocated data region lies beyond the iovec region */ lv->lv_buf_len = 0; + lv->lv_bytes = 0; lv->lv_buf = (char *)lv + buf_size - nbytes; ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t))); @@ -385,7 +386,15 @@ xlog_cil_committed( xfs_extent_busy_clear(mp, &ctx->busy_extents, (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); + /* + * If we are aborting the commit, wake up anyone waiting on the + * committing list. If we don't, then a shutdown we can leave processes + * waiting in xlog_cil_force_lsn() waiting on a sequence commit that + * will never happen because we aborted it. + */ spin_lock(&ctx->cil->xc_push_lock); + if (abort) + wake_up_all(&ctx->cil->xc_commit_wait); list_del(&ctx->committing); spin_unlock(&ctx->cil->xc_push_lock); @@ -564,8 +573,18 @@ restart: spin_lock(&cil->xc_push_lock); list_for_each_entry(new_ctx, &cil->xc_committing, committing) { /* + * Avoid getting stuck in this loop because we were woken by the + * shutdown, but then went back to sleep once already in the + * shutdown state. + */ + if (XLOG_FORCED_SHUTDOWN(log)) { + spin_unlock(&cil->xc_push_lock); + goto out_abort_free_ticket; + } + + /* * Higher sequences will wait for this one so skip them. - * Don't wait for own own sequence, either. + * Don't wait for our own sequence, either. */ if (new_ctx->sequence >= ctx->sequence) continue; @@ -810,6 +829,13 @@ restart: */ spin_lock(&cil->xc_push_lock); list_for_each_entry(ctx, &cil->xc_committing, committing) { + /* + * Avoid getting stuck in this loop because we were woken by the + * shutdown, but then went back to sleep once already in the + * shutdown state. + */ + if (XLOG_FORCED_SHUTDOWN(log)) + goto out_shutdown; if (ctx->sequence > sequence) continue; if (!ctx->commit_lsn) { @@ -833,14 +859,12 @@ restart: * push sequence after the above wait loop and the CIL still contains * dirty objects. * - * When the push occurs, it will empty the CIL and - * atomically increment the currect sequence past the push sequence and - * move it into the committing list. Of course, if the CIL is clean at - * the time of the push, it won't have pushed the CIL at all, so in that - * case we should try the push for this sequence again from the start - * just in case. + * When the push occurs, it will empty the CIL and atomically increment + * the currect sequence past the push sequence and move it into the + * committing list. Of course, if the CIL is clean at the time of the + * push, it won't have pushed the CIL at all, so in that case we should + * try the push for this sequence again from the start just in case. */ - if (sequence == cil->xc_current_sequence && !list_empty(&cil->xc_cil)) { spin_unlock(&cil->xc_push_lock); @@ -849,6 +873,17 @@ restart: spin_unlock(&cil->xc_push_lock); return commit_lsn; + + /* + * We detected a shutdown in progress. We need to trigger the log force + * to pass through it's iclog state machine error handling, even though + * we are already in a shutdown state. Hence we can't return + * NULLCOMMITLSN here as that has special meaning to log forces (i.e. + * LSN is already stable), so we return a zero LSN instead. + */ +out_shutdown: + spin_unlock(&cil->xc_push_lock); + return 0; } /* diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index bce53ac81096..981af0f6504b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2138,7 +2138,9 @@ xlog_recover_validate_buf_type( bp->b_ops = &xfs_allocbt_buf_ops; break; case XFS_IBT_CRC_MAGIC: + case XFS_FIBT_CRC_MAGIC: case XFS_IBT_MAGIC: + case XFS_FIBT_MAGIC: bp->b_ops = &xfs_inobt_buf_ops; break; case XFS_BMAP_CRC_MAGIC: @@ -3145,7 +3147,7 @@ xlog_recover_efd_pass2( } lip = xfs_trans_ail_cursor_next(ailp, &cur); } - xfs_trans_ail_cursor_done(ailp, &cur); + xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); return 0; @@ -3520,8 +3522,7 @@ out: STATIC int xlog_recover_unmount_trans( - struct xlog *log, - struct xlog_recover *trans) + struct xlog *log) { /* Do nothing now */ xfs_warn(log->l_mp, "%s: Unmount LR", __func__); @@ -3595,7 +3596,7 @@ xlog_recover_process_data( trans, pass); break; case XLOG_UNMOUNT_TRANS: - error = xlog_recover_unmount_trans(log, trans); + error = xlog_recover_unmount_trans(log); break; case XLOG_WAS_CONT_TRANS: error = xlog_recover_add_to_cont_trans(log, @@ -3757,7 +3758,7 @@ xlog_recover_process_efis( lip = xfs_trans_ail_cursor_next(ailp, &cur); } out: - xfs_trans_ail_cursor_done(ailp, &cur); + xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); return error; } diff --git a/fs/xfs/xfs_log_rlimit.c b/fs/xfs/xfs_log_rlimit.c index 2af1a0a4d0f1..ee7e0e80246b 100644 --- a/fs/xfs/xfs_log_rlimit.c +++ b/fs/xfs/xfs_log_rlimit.c @@ -42,7 +42,7 @@ xfs_log_calc_max_attrsetm_res( int size; int nblks; - size = xfs_attr_leaf_entsize_local_max(mp->m_sb.sb_blocksize) - + size = xfs_attr_leaf_entsize_local_max(mp->m_attr_geo->blksize) - MAXNAMELEN - 1; nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); nblks += XFS_B_TO_FSB(mp, size); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 944f3d9456a8..3507cd0ec400 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -323,8 +323,19 @@ reread: /* * Initialize the mount structure from the superblock. */ - xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); - xfs_sb_quota_from_disk(&mp->m_sb); + xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); + xfs_sb_quota_from_disk(sbp); + + /* + * If we haven't validated the superblock, do so now before we try + * to check the sector size and reread the superblock appropriately. + */ + if (sbp->sb_magicnum != XFS_SB_MAGIC) { + if (loud) + xfs_warn(mp, "Invalid superblock magic number"); + error = EINVAL; + goto release_buf; + } /* * We must be able to do sector-sized and sector-aligned IO. @@ -337,11 +348,11 @@ reread: goto release_buf; } - /* - * Re-read the superblock so the buffer is correctly sized, - * and properly verified. - */ if (buf_ops == NULL) { + /* + * Re-read the superblock so the buffer is correctly sized, + * and properly verified. + */ xfs_buf_relse(bp); sector_size = sbp->sb_sectsize; buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops; @@ -697,6 +708,12 @@ xfs_mountfs( mp->m_update_flags |= XFS_SB_VERSIONNUM; } + /* always use v2 inodes by default now */ + if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) { + mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT; + mp->m_update_flags |= XFS_SB_VERSIONNUM; + } + /* * Check if sb_agblocks is aligned at stripe boundary * If sb_agblocks is NOT aligned turn off m_dalign since @@ -774,12 +791,11 @@ xfs_mountfs( mp->m_dmevmask = 0; /* not persistent; set after each mount */ - xfs_dir_mount(mp); - - /* - * Initialize the attribute manager's entries. - */ - mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100; + error = xfs_da_mount(mp); + if (error) { + xfs_warn(mp, "Failed dir/attr init: %d", error); + goto out_remove_uuid; + } /* * Initialize the precomputed transaction reservations values. @@ -794,7 +810,7 @@ xfs_mountfs( error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); if (error) { xfs_warn(mp, "Failed per-ag init: %d", error); - goto out_remove_uuid; + goto out_free_dir; } if (!sbp->sb_logblocks) { @@ -969,6 +985,8 @@ xfs_mountfs( xfs_wait_buftarg(mp->m_ddev_targp); out_free_perag: xfs_free_perag(mp); + out_free_dir: + xfs_da_unmount(mp); out_remove_uuid: xfs_uuid_unmount(mp); out: @@ -1046,6 +1064,7 @@ xfs_unmountfs( "Freespace may not be correct on next mount."); xfs_log_unmount(mp); + xfs_da_unmount(mp); xfs_uuid_unmount(mp); #if defined(DEBUG) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a466c5e5826e..7295a0b7c343 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -27,6 +27,7 @@ struct xfs_nameops; struct xfs_ail; struct xfs_quotainfo; struct xfs_dir_ops; +struct xfs_da_geometry; #ifdef HAVE_PERCPU_SB @@ -96,6 +97,8 @@ typedef struct xfs_mount { uint m_readio_blocks; /* min read size blocks */ uint m_writeio_log; /* min write size log bytes */ uint m_writeio_blocks; /* min write size blocks */ + struct xfs_da_geometry *m_dir_geo; /* directory block geometry */ + struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */ struct xlog *m_log; /* log specific stuff */ int m_logbufs; /* number of log buffers */ int m_logbsize; /* size of each log buffer */ @@ -131,8 +134,6 @@ typedef struct xfs_mount { int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_dmevmask; /* DMI events for this FS */ __uint64_t m_flags; /* global mount flags */ - uint m_dir_node_ents; /* #entries in a dir danode */ - uint m_attr_node_ents; /* #entries in attr danode */ int m_ialloc_inos; /* inodes in inode allocation */ int m_ialloc_blks; /* blocks in inode allocation */ int m_inoalign_mask;/* mask sb_inoalignmt if used */ @@ -145,17 +146,10 @@ typedef struct xfs_mount { int m_dalign; /* stripe unit */ int m_swidth; /* stripe width */ int m_sinoalign; /* stripe unit inode alignment */ - int m_attr_magicpct;/* 37% of the blocksize */ - int m_dir_magicpct; /* 37% of the dir blocksize */ __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */ const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */ - int m_dirblksize; /* directory block sz--bytes */ - int m_dirblkfsbs; /* directory block sz--fsbs */ - xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */ - xfs_dablk_t m_dirleafblk; /* blockno of dir non-data v2 */ - xfs_dablk_t m_dirfreeblk; /* blockno of dirfreeindex v2 */ uint m_chsize; /* size of next field */ atomic_t m_active_trans; /* number trans frozen */ #ifdef HAVE_PERCPU_SB diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 4aff56395732..f99b4933dc22 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -100,14 +100,20 @@ * likely result in a loop in one of the lists. That's a sure-fire recipe for * an infinite loop in the code. */ -typedef struct xfs_mru_cache_elem -{ - struct list_head list_node; - unsigned long key; - void *value; -} xfs_mru_cache_elem_t; +struct xfs_mru_cache { + struct radix_tree_root store; /* Core storage data structure. */ + struct list_head *lists; /* Array of lists, one per grp. */ + struct list_head reap_list; /* Elements overdue for reaping. */ + spinlock_t lock; /* Lock to protect this struct. */ + unsigned int grp_count; /* Number of discrete groups. */ + unsigned int grp_time; /* Time period spanned by grps. */ + unsigned int lru_grp; /* Group containing time zero. */ + unsigned long time_zero; /* Time first element was added. */ + xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ + struct delayed_work work; /* Workqueue data for reaping. */ + unsigned int queued; /* work has been queued */ +}; -static kmem_zone_t *xfs_mru_elem_zone; static struct workqueue_struct *xfs_mru_reap_wq; /* @@ -129,12 +135,12 @@ static struct workqueue_struct *xfs_mru_reap_wq; */ STATIC unsigned long _xfs_mru_cache_migrate( - xfs_mru_cache_t *mru, - unsigned long now) + struct xfs_mru_cache *mru, + unsigned long now) { - unsigned int grp; - unsigned int migrated = 0; - struct list_head *lru_list; + unsigned int grp; + unsigned int migrated = 0; + struct list_head *lru_list; /* Nothing to do if the data store is empty. */ if (!mru->time_zero) @@ -193,11 +199,11 @@ _xfs_mru_cache_migrate( */ STATIC void _xfs_mru_cache_list_insert( - xfs_mru_cache_t *mru, - xfs_mru_cache_elem_t *elem) + struct xfs_mru_cache *mru, + struct xfs_mru_cache_elem *elem) { - unsigned int grp = 0; - unsigned long now = jiffies; + unsigned int grp = 0; + unsigned long now = jiffies; /* * If the data store is empty, initialise time zero, leave grp set to @@ -231,10 +237,10 @@ _xfs_mru_cache_list_insert( */ STATIC void _xfs_mru_cache_clear_reap_list( - xfs_mru_cache_t *mru) __releases(mru->lock) __acquires(mru->lock) - + struct xfs_mru_cache *mru) + __releases(mru->lock) __acquires(mru->lock) { - xfs_mru_cache_elem_t *elem, *next; + struct xfs_mru_cache_elem *elem, *next; struct list_head tmp; INIT_LIST_HEAD(&tmp); @@ -252,15 +258,8 @@ _xfs_mru_cache_clear_reap_list( spin_unlock(&mru->lock); list_for_each_entry_safe(elem, next, &tmp, list_node) { - - /* Remove the element from the reap list. */ list_del_init(&elem->list_node); - - /* Call the client's free function with the key and value pointer. */ - mru->free_func(elem->key, elem->value); - - /* Free the element structure. */ - kmem_zone_free(xfs_mru_elem_zone, elem); + mru->free_func(elem); } spin_lock(&mru->lock); @@ -277,7 +276,8 @@ STATIC void _xfs_mru_cache_reap( struct work_struct *work) { - xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work); + struct xfs_mru_cache *mru = + container_of(work, struct xfs_mru_cache, work.work); unsigned long now, next; ASSERT(mru && mru->lists); @@ -304,28 +304,16 @@ _xfs_mru_cache_reap( int xfs_mru_cache_init(void) { - xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t), - "xfs_mru_cache_elem"); - if (!xfs_mru_elem_zone) - goto out; - xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1); if (!xfs_mru_reap_wq) - goto out_destroy_mru_elem_zone; - + return -ENOMEM; return 0; - - out_destroy_mru_elem_zone: - kmem_zone_destroy(xfs_mru_elem_zone); - out: - return -ENOMEM; } void xfs_mru_cache_uninit(void) { destroy_workqueue(xfs_mru_reap_wq); - kmem_zone_destroy(xfs_mru_elem_zone); } /* @@ -336,14 +324,14 @@ xfs_mru_cache_uninit(void) */ int xfs_mru_cache_create( - xfs_mru_cache_t **mrup, + struct xfs_mru_cache **mrup, unsigned int lifetime_ms, unsigned int grp_count, xfs_mru_cache_free_func_t free_func) { - xfs_mru_cache_t *mru = NULL; - int err = 0, grp; - unsigned int grp_time; + struct xfs_mru_cache *mru = NULL; + int err = 0, grp; + unsigned int grp_time; if (mrup) *mrup = NULL; @@ -400,7 +388,7 @@ exit: */ static void xfs_mru_cache_flush( - xfs_mru_cache_t *mru) + struct xfs_mru_cache *mru) { if (!mru || !mru->lists) return; @@ -420,7 +408,7 @@ xfs_mru_cache_flush( void xfs_mru_cache_destroy( - xfs_mru_cache_t *mru) + struct xfs_mru_cache *mru) { if (!mru || !mru->lists) return; @@ -438,38 +426,30 @@ xfs_mru_cache_destroy( */ int xfs_mru_cache_insert( - xfs_mru_cache_t *mru, - unsigned long key, - void *value) + struct xfs_mru_cache *mru, + unsigned long key, + struct xfs_mru_cache_elem *elem) { - xfs_mru_cache_elem_t *elem; + int error; ASSERT(mru && mru->lists); if (!mru || !mru->lists) return EINVAL; - elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP); - if (!elem) + if (radix_tree_preload(GFP_KERNEL)) return ENOMEM; - if (radix_tree_preload(GFP_KERNEL)) { - kmem_zone_free(xfs_mru_elem_zone, elem); - return ENOMEM; - } - INIT_LIST_HEAD(&elem->list_node); elem->key = key; - elem->value = value; spin_lock(&mru->lock); - - radix_tree_insert(&mru->store, key, elem); + error = -radix_tree_insert(&mru->store, key, elem); radix_tree_preload_end(); - _xfs_mru_cache_list_insert(mru, elem); - + if (!error) + _xfs_mru_cache_list_insert(mru, elem); spin_unlock(&mru->lock); - return 0; + return error; } /* @@ -478,13 +458,12 @@ xfs_mru_cache_insert( * the client data pointer for the removed element is returned, otherwise this * function will return a NULL pointer. */ -void * +struct xfs_mru_cache_elem * xfs_mru_cache_remove( - xfs_mru_cache_t *mru, - unsigned long key) + struct xfs_mru_cache *mru, + unsigned long key) { - xfs_mru_cache_elem_t *elem; - void *value = NULL; + struct xfs_mru_cache_elem *elem; ASSERT(mru && mru->lists); if (!mru || !mru->lists) @@ -492,17 +471,11 @@ xfs_mru_cache_remove( spin_lock(&mru->lock); elem = radix_tree_delete(&mru->store, key); - if (elem) { - value = elem->value; + if (elem) list_del(&elem->list_node); - } - spin_unlock(&mru->lock); - if (elem) - kmem_zone_free(xfs_mru_elem_zone, elem); - - return value; + return elem; } /* @@ -511,13 +484,14 @@ xfs_mru_cache_remove( */ void xfs_mru_cache_delete( - xfs_mru_cache_t *mru, - unsigned long key) + struct xfs_mru_cache *mru, + unsigned long key) { - void *value = xfs_mru_cache_remove(mru, key); + struct xfs_mru_cache_elem *elem; - if (value) - mru->free_func(key, value); + elem = xfs_mru_cache_remove(mru, key); + if (elem) + mru->free_func(elem); } /* @@ -540,12 +514,12 @@ xfs_mru_cache_delete( * status, we need to help it get it right by annotating the path that does * not release the lock. */ -void * +struct xfs_mru_cache_elem * xfs_mru_cache_lookup( - xfs_mru_cache_t *mru, - unsigned long key) + struct xfs_mru_cache *mru, + unsigned long key) { - xfs_mru_cache_elem_t *elem; + struct xfs_mru_cache_elem *elem; ASSERT(mru && mru->lists); if (!mru || !mru->lists) @@ -560,7 +534,7 @@ xfs_mru_cache_lookup( } else spin_unlock(&mru->lock); - return elem ? elem->value : NULL; + return elem; } /* @@ -570,7 +544,8 @@ xfs_mru_cache_lookup( */ void xfs_mru_cache_done( - xfs_mru_cache_t *mru) __releases(mru->lock) + struct xfs_mru_cache *mru) + __releases(mru->lock) { spin_unlock(&mru->lock); } diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h index 36dd3ec8b4eb..fb5245ba5ff7 100644 --- a/fs/xfs/xfs_mru_cache.h +++ b/fs/xfs/xfs_mru_cache.h @@ -18,24 +18,15 @@ #ifndef __XFS_MRU_CACHE_H__ #define __XFS_MRU_CACHE_H__ +struct xfs_mru_cache; -/* Function pointer type for callback to free a client's data pointer. */ -typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*); +struct xfs_mru_cache_elem { + struct list_head list_node; + unsigned long key; +}; -typedef struct xfs_mru_cache -{ - struct radix_tree_root store; /* Core storage data structure. */ - struct list_head *lists; /* Array of lists, one per grp. */ - struct list_head reap_list; /* Elements overdue for reaping. */ - spinlock_t lock; /* Lock to protect this struct. */ - unsigned int grp_count; /* Number of discrete groups. */ - unsigned int grp_time; /* Time period spanned by grps. */ - unsigned int lru_grp; /* Group containing time zero. */ - unsigned long time_zero; /* Time first element was added. */ - xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ - struct delayed_work work; /* Workqueue data for reaping. */ - unsigned int queued; /* work has been queued */ -} xfs_mru_cache_t; +/* Function pointer type for callback to free a client's data pointer. */ +typedef void (*xfs_mru_cache_free_func_t)(struct xfs_mru_cache_elem *elem); int xfs_mru_cache_init(void); void xfs_mru_cache_uninit(void); @@ -44,10 +35,12 @@ int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, xfs_mru_cache_free_func_t free_func); void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, - void *value); -void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); + struct xfs_mru_cache_elem *elem); +struct xfs_mru_cache_elem * +xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); -void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); +struct xfs_mru_cache_elem * +xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); void xfs_mru_cache_done(struct xfs_mru_cache *mru); #endif /* __XFS_MRU_CACHE_H__ */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index dc977b6e6a36..6d26759c779a 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -193,47 +193,6 @@ xfs_qm_dqpurge( } /* - * Release the group or project dquot pointers the user dquots maybe carrying - * around as a hint, and proceed to purge the user dquot cache if requested. -*/ -STATIC int -xfs_qm_dqpurge_hints( - struct xfs_dquot *dqp, - void *data) -{ - struct xfs_dquot *gdqp = NULL; - struct xfs_dquot *pdqp = NULL; - uint flags = *((uint *)data); - - xfs_dqlock(dqp); - if (dqp->dq_flags & XFS_DQ_FREEING) { - xfs_dqunlock(dqp); - return EAGAIN; - } - - /* If this quota has a hint attached, prepare for releasing it now */ - gdqp = dqp->q_gdquot; - if (gdqp) - dqp->q_gdquot = NULL; - - pdqp = dqp->q_pdquot; - if (pdqp) - dqp->q_pdquot = NULL; - - xfs_dqunlock(dqp); - - if (gdqp) - xfs_qm_dqrele(gdqp); - if (pdqp) - xfs_qm_dqrele(pdqp); - - if (flags & XFS_QMOPT_UQUOTA) - return xfs_qm_dqpurge(dqp, NULL); - - return 0; -} - -/* * Purge the dquot cache. */ void @@ -241,18 +200,8 @@ xfs_qm_dqpurge_all( struct xfs_mount *mp, uint flags) { - /* - * We have to release group/project dquot hint(s) from the user dquot - * at first if they are there, otherwise we would run into an infinite - * loop while walking through radix tree to purge other type of dquots - * since their refcount is not zero if the user dquot refers to them - * as hint. - * - * Call the special xfs_qm_dqpurge_hints() will end up go through the - * general xfs_qm_dqpurge() against user dquot cache if requested. - */ - xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge_hints, &flags); - + if (flags & XFS_QMOPT_UQUOTA) + xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL); if (flags & XFS_QMOPT_GQUOTA) xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); if (flags & XFS_QMOPT_PQUOTA) @@ -409,7 +358,6 @@ xfs_qm_dqattach_one( xfs_dqid_t id, uint type, uint doalloc, - xfs_dquot_t *udqhint, /* hint */ xfs_dquot_t **IO_idqpp) { xfs_dquot_t *dqp; @@ -419,9 +367,9 @@ xfs_qm_dqattach_one( error = 0; /* - * See if we already have it in the inode itself. IO_idqpp is - * &i_udquot or &i_gdquot. This made the code look weird, but - * made the logic a lot simpler. + * See if we already have it in the inode itself. IO_idqpp is &i_udquot + * or &i_gdquot. This made the code look weird, but made the logic a lot + * simpler. */ dqp = *IO_idqpp; if (dqp) { @@ -430,49 +378,10 @@ xfs_qm_dqattach_one( } /* - * udqhint is the i_udquot field in inode, and is non-NULL only - * when the type arg is group/project. Its purpose is to save a - * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside - * the user dquot. - */ - if (udqhint) { - ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); - xfs_dqlock(udqhint); - - /* - * No need to take dqlock to look at the id. - * - * The ID can't change until it gets reclaimed, and it won't - * be reclaimed as long as we have a ref from inode and we - * hold the ilock. - */ - if (type == XFS_DQ_GROUP) - dqp = udqhint->q_gdquot; - else - dqp = udqhint->q_pdquot; - if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { - ASSERT(*IO_idqpp == NULL); - - *IO_idqpp = xfs_qm_dqhold(dqp); - xfs_dqunlock(udqhint); - return 0; - } - - /* - * We can't hold a dquot lock when we call the dqget code. - * We'll deadlock in no time, because of (not conforming to) - * lock ordering - the inodelock comes before any dquot lock, - * and we may drop and reacquire the ilock in xfs_qm_dqget(). - */ - xfs_dqunlock(udqhint); - } - - /* - * Find the dquot from somewhere. This bumps the - * reference count of dquot and returns it locked. - * This can return ENOENT if dquot didn't exist on - * disk and we didn't ask it to allocate; - * ESRCH if quotas got turned off suddenly. + * Find the dquot from somewhere. This bumps the reference count of + * dquot and returns it locked. This can return ENOENT if dquot didn't + * exist on disk and we didn't ask it to allocate; ESRCH if quotas got + * turned off suddenly. */ error = xfs_qm_dqget(ip->i_mount, ip, id, type, doalloc | XFS_QMOPT_DOWARN, &dqp); @@ -490,48 +399,6 @@ xfs_qm_dqattach_one( return 0; } - -/* - * Given a udquot and group/project type, attach the group/project - * dquot pointer to the udquot as a hint for future lookups. - */ -STATIC void -xfs_qm_dqattach_hint( - struct xfs_inode *ip, - int type) -{ - struct xfs_dquot **dqhintp; - struct xfs_dquot *dqp; - struct xfs_dquot *udq = ip->i_udquot; - - ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); - - xfs_dqlock(udq); - - if (type == XFS_DQ_GROUP) { - dqp = ip->i_gdquot; - dqhintp = &udq->q_gdquot; - } else { - dqp = ip->i_pdquot; - dqhintp = &udq->q_pdquot; - } - - if (*dqhintp) { - struct xfs_dquot *tmp; - - if (*dqhintp == dqp) - goto done; - - tmp = *dqhintp; - *dqhintp = NULL; - xfs_qm_dqrele(tmp); - } - - *dqhintp = xfs_qm_dqhold(dqp); -done: - xfs_dqunlock(udq); -} - static bool xfs_qm_need_dqattach( struct xfs_inode *ip) @@ -562,7 +429,6 @@ xfs_qm_dqattach_locked( uint flags) { xfs_mount_t *mp = ip->i_mount; - uint nquotas = 0; int error = 0; if (!xfs_qm_need_dqattach(ip)) @@ -570,77 +436,39 @@ xfs_qm_dqattach_locked( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (XFS_IS_UQUOTA_ON(mp)) { + if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) { error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, flags & XFS_QMOPT_DQALLOC, - NULL, &ip->i_udquot); + &ip->i_udquot); if (error) goto done; - nquotas++; + ASSERT(ip->i_udquot); } - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (XFS_IS_GQUOTA_ON(mp)) { + if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) { error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, flags & XFS_QMOPT_DQALLOC, - ip->i_udquot, &ip->i_gdquot); - /* - * Don't worry about the udquot that we may have - * attached above. It'll get detached, if not already. - */ + &ip->i_gdquot); if (error) goto done; - nquotas++; + ASSERT(ip->i_gdquot); } - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (XFS_IS_PQUOTA_ON(mp)) { + if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) { error = xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, flags & XFS_QMOPT_DQALLOC, - ip->i_udquot, &ip->i_pdquot); - /* - * Don't worry about the udquot that we may have - * attached above. It'll get detached, if not already. - */ + &ip->i_pdquot); if (error) goto done; - nquotas++; + ASSERT(ip->i_pdquot); } +done: /* - * Attach this group/project quota to the user quota as a hint. - * This WON'T, in general, result in a thrash. + * Don't worry about the dquots that we may have attached before any + * error - they'll get detached later if it has not already been done. */ - if (nquotas > 1 && ip->i_udquot) { - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(ip->i_gdquot || !XFS_IS_GQUOTA_ON(mp)); - ASSERT(ip->i_pdquot || !XFS_IS_PQUOTA_ON(mp)); - - /* - * We do not have i_udquot locked at this point, but this check - * is OK since we don't depend on the i_gdquot to be accurate - * 100% all the time. It is just a hint, and this will - * succeed in general. - */ - if (ip->i_udquot->q_gdquot != ip->i_gdquot) - xfs_qm_dqattach_hint(ip, XFS_DQ_GROUP); - - if (ip->i_udquot->q_pdquot != ip->i_pdquot) - xfs_qm_dqattach_hint(ip, XFS_DQ_PROJ); - } - - done: -#ifdef DEBUG - if (!error) { - if (XFS_IS_UQUOTA_ON(mp)) - ASSERT(ip->i_udquot); - if (XFS_IS_GQUOTA_ON(mp)) - ASSERT(ip->i_gdquot); - if (XFS_IS_PQUOTA_ON(mp)) - ASSERT(ip->i_pdquot); - } ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -#endif return error; } @@ -865,8 +693,7 @@ xfs_qm_init_quotainfo( /* Precalc some constants */ qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); - qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(mp, - qinf->qi_dqchunklen); + qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen); mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 3daf5ea1eb8d..bbc813caba4c 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -278,9 +278,10 @@ xfs_qm_scall_trunc_qfiles( xfs_mount_t *mp, uint flags) { - int error; + int error = EINVAL; - if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { + if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0 || + (flags & ~XFS_DQ_ALLTYPES)) { xfs_debug(mp, "%s: flags=%x m_qflags=%x", __func__, flags, mp->m_qflags); return XFS_ERROR(EINVAL); @@ -959,7 +960,6 @@ xfs_qm_export_flags( STATIC int xfs_dqrele_inode( struct xfs_inode *ip, - struct xfs_perag *pag, int flags, void *args) { diff --git a/fs/xfs/xfs_quota_defs.h b/fs/xfs/xfs_quota_defs.h index b3b2b1065c0f..137e20937077 100644 --- a/fs/xfs/xfs_quota_defs.h +++ b/fs/xfs/xfs_quota_defs.h @@ -156,6 +156,6 @@ typedef __uint16_t xfs_qwarncnt_t; extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, xfs_dqid_t id, uint type, uint flags, char *str); -extern int xfs_calc_dquots_per_chunk(struct xfs_mount *mp, unsigned int nbblks); +extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index af33cafe69b6..2ad1b9822e92 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -100,16 +100,36 @@ xfs_fs_set_xstate( if (!XFS_IS_QUOTA_ON(mp)) return -EINVAL; return -xfs_qm_scall_quotaoff(mp, flags); - case Q_XQUOTARM: - if (XFS_IS_QUOTA_ON(mp)) - return -EINVAL; - return -xfs_qm_scall_trunc_qfiles(mp, flags); } return -EINVAL; } STATIC int +xfs_fs_rm_xquota( + struct super_block *sb, + unsigned int uflags) +{ + struct xfs_mount *mp = XFS_M(sb); + unsigned int flags = 0; + + if (sb->s_flags & MS_RDONLY) + return -EROFS; + + if (XFS_IS_QUOTA_ON(mp)) + return -EINVAL; + + if (uflags & FS_USER_QUOTA) + flags |= XFS_DQ_USER; + if (uflags & FS_GROUP_QUOTA) + flags |= XFS_DQ_GROUP; + if (uflags & FS_USER_QUOTA) + flags |= XFS_DQ_PROJ; + + return -xfs_qm_scall_trunc_qfiles(mp, flags); +} + +STATIC int xfs_fs_get_dqblk( struct super_block *sb, struct kqid qid, @@ -149,6 +169,7 @@ const struct quotactl_ops xfs_quotactl_operations = { .get_xstatev = xfs_fs_get_xstatev, .get_xstate = xfs_fs_get_xstate, .set_xstate = xfs_fs_set_xstate, + .rm_xquota = xfs_fs_rm_xquota, .get_dqblk = xfs_fs_get_dqblk, .set_dqblk = xfs_fs_set_dqblk, }; diff --git a/fs/xfs/xfs_rtbitmap.c b/fs/xfs/xfs_rtbitmap.c index b1f2fe8af4a8..f4dd697cac08 100644 --- a/fs/xfs/xfs_rtbitmap.c +++ b/fs/xfs/xfs_rtbitmap.c @@ -74,7 +74,6 @@ xfs_rtbuf_get( mp->m_bsize, 0, &bp, NULL); if (error) return error; - ASSERT(!xfs_buf_geterror(bp)); *bpp = bp; return 0; } diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c index 8baf61afae1d..c3453b11f563 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/xfs_sb.c @@ -291,7 +291,8 @@ xfs_mount_validate_sb( (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) || sbp->sb_dblocks == 0 || sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || - sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) { + sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp) || + sbp->sb_shared_vn != 0)) { xfs_notice(mp, "SB sanity check failed"); return XFS_ERROR(EFSCORRUPTED); } @@ -333,15 +334,6 @@ xfs_mount_validate_sb( xfs_warn(mp, "Offline file system operation in progress!"); return XFS_ERROR(EFSCORRUPTED); } - - /* - * Version 1 directory format has never worked on Linux. - */ - if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { - xfs_warn(mp, "file system using version 1 directory format"); - return XFS_ERROR(ENOSYS); - } - return 0; } diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index f7b2fe77c5a5..c43c2d609a24 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -36,8 +36,6 @@ struct xfs_trans; #define XFS_SB_VERSION_5 5 /* CRC enabled filesystem */ #define XFS_SB_VERSION_NUMBITS 0x000f #define XFS_SB_VERSION_ALLFBITS 0xfff0 -#define XFS_SB_VERSION_SASHFBITS 0xf000 -#define XFS_SB_VERSION_REALFBITS 0x0ff0 #define XFS_SB_VERSION_ATTRBIT 0x0010 #define XFS_SB_VERSION_NLINKBIT 0x0020 #define XFS_SB_VERSION_QUOTABIT 0x0040 @@ -50,24 +48,15 @@ struct xfs_trans; #define XFS_SB_VERSION_DIRV2BIT 0x2000 #define XFS_SB_VERSION_BORGBIT 0x4000 /* ASCII only case-insens. */ #define XFS_SB_VERSION_MOREBITSBIT 0x8000 -#define XFS_SB_VERSION_OKSASHFBITS \ - (XFS_SB_VERSION_EXTFLGBIT | \ - XFS_SB_VERSION_DIRV2BIT | \ - XFS_SB_VERSION_BORGBIT) -#define XFS_SB_VERSION_OKREALFBITS \ - (XFS_SB_VERSION_ATTRBIT | \ - XFS_SB_VERSION_NLINKBIT | \ - XFS_SB_VERSION_QUOTABIT | \ - XFS_SB_VERSION_ALIGNBIT | \ - XFS_SB_VERSION_DALIGNBIT | \ - XFS_SB_VERSION_SHAREDBIT | \ - XFS_SB_VERSION_LOGV2BIT | \ - XFS_SB_VERSION_SECTORBIT | \ - XFS_SB_VERSION_MOREBITSBIT) -#define XFS_SB_VERSION_OKREALBITS \ - (XFS_SB_VERSION_NUMBITS | \ - XFS_SB_VERSION_OKREALFBITS | \ - XFS_SB_VERSION_OKSASHFBITS) + +/* + * Supported feature bit list is just all bits in the versionnum field because + * we've used them all up and understand them all. Except, of course, for the + * shared superblock bit, which nobody knows what it does and so is unsupported. + */ +#define XFS_SB_VERSION_OKBITS \ + ((XFS_SB_VERSION_NUMBITS | XFS_SB_VERSION_ALLFBITS) & \ + ~XFS_SB_VERSION_SHAREDBIT) /* * There are two words to hold XFS "feature" bits: the original @@ -76,7 +65,6 @@ struct xfs_trans; * * These defines represent bits in sb_features2. */ -#define XFS_SB_VERSION2_REALFBITS 0x00ffffff /* Mask: features */ #define XFS_SB_VERSION2_RESERVED1BIT 0x00000001 #define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */ #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 @@ -86,16 +74,11 @@ struct xfs_trans; #define XFS_SB_VERSION2_CRCBIT 0x00000100 /* metadata CRCs */ #define XFS_SB_VERSION2_FTYPE 0x00000200 /* inode type in dir */ -#define XFS_SB_VERSION2_OKREALFBITS \ +#define XFS_SB_VERSION2_OKBITS \ (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ XFS_SB_VERSION2_ATTR2BIT | \ XFS_SB_VERSION2_PROJID32BIT | \ XFS_SB_VERSION2_FTYPE) -#define XFS_SB_VERSION2_OKSASHFBITS \ - (0) -#define XFS_SB_VERSION2_OKREALBITS \ - (XFS_SB_VERSION2_OKREALFBITS | \ - XFS_SB_VERSION2_OKSASHFBITS ) /* * Superblock - in core version. Must match the ondisk version below. @@ -345,214 +328,140 @@ typedef enum { #define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) -static inline int xfs_sb_good_version(xfs_sb_t *sbp) -{ - /* We always support version 1-3 */ - if (sbp->sb_versionnum >= XFS_SB_VERSION_1 && - sbp->sb_versionnum <= XFS_SB_VERSION_3) - return 1; - - /* We support version 4 if all feature bits are supported */ - if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) { - if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || - ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && - (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) - return 0; - - if (sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN) - return 0; - return 1; - } - if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) - return 1; - - return 0; -} - /* - * Detect a mismatched features2 field. Older kernels read/wrote - * this into the wrong slot, so to be safe we keep them in sync. + * The first XFS version we support is a v4 superblock with V2 directories. */ -static inline int xfs_sb_has_mismatched_features2(xfs_sb_t *sbp) +static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp) { - return (sbp->sb_bad_features2 != sbp->sb_features2); -} - -static inline unsigned xfs_sb_version_tonew(unsigned v) -{ - if (v == XFS_SB_VERSION_1) - return XFS_SB_VERSION_4; + if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) + return false; - if (v == XFS_SB_VERSION_2) - return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT; + /* check for unknown features in the fs */ + if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || + ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && + (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) + return false; - return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT | - XFS_SB_VERSION_NLINKBIT; + return true; } -static inline unsigned xfs_sb_version_toold(unsigned v) +static inline bool xfs_sb_good_version(struct xfs_sb *sbp) { - if (v & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) - return 0; - if (v & XFS_SB_VERSION_NLINKBIT) - return XFS_SB_VERSION_3; - if (v & XFS_SB_VERSION_ATTRBIT) - return XFS_SB_VERSION_2; - return XFS_SB_VERSION_1; -} - -static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp) -{ - return sbp->sb_versionnum == XFS_SB_VERSION_2 || - sbp->sb_versionnum == XFS_SB_VERSION_3 || - (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT)); + if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) + return true; + if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) + return xfs_sb_good_v4_features(sbp); + return false; } -static inline void xfs_sb_version_addattr(xfs_sb_t *sbp) +/* + * Detect a mismatched features2 field. Older kernels read/wrote + * this into the wrong slot, so to be safe we keep them in sync. + */ +static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp) { - if (sbp->sb_versionnum == XFS_SB_VERSION_1) - sbp->sb_versionnum = XFS_SB_VERSION_2; - else if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4) - sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT; - else - sbp->sb_versionnum = XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT; + return sbp->sb_bad_features2 != sbp->sb_features2; } -static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasattr(struct xfs_sb *sbp) { - return sbp->sb_versionnum == XFS_SB_VERSION_3 || - (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT)); + return (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT); } -static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp) +static inline void xfs_sb_version_addattr(struct xfs_sb *sbp) { - if (sbp->sb_versionnum <= XFS_SB_VERSION_2) - sbp->sb_versionnum = XFS_SB_VERSION_3; - else - sbp->sb_versionnum |= XFS_SB_VERSION_NLINKBIT; + sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT; } -static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasquota(struct xfs_sb *sbp) { - return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT); + return (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT); } -static inline void xfs_sb_version_addquota(xfs_sb_t *sbp) +static inline void xfs_sb_version_addquota(struct xfs_sb *sbp) { - if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4) - sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT; - else - sbp->sb_versionnum = xfs_sb_version_tonew(sbp->sb_versionnum) | - XFS_SB_VERSION_QUOTABIT; + sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT; } -static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasalign(struct xfs_sb *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || - (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && + return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT)); } -static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp) -{ - return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); -} - -static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasdalign(struct xfs_sb *sbp) { - return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT); + return (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); } -static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp) +static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || - (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || + (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); } -static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasextflgbit(struct xfs_sb *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || - (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT)); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || + (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT); } -static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || - (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT)); + return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT); } -static inline int xfs_sb_version_hassector(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasasciici(struct xfs_sb *sbp) { - return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT); + return (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); } -static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp) { - return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); -} - -static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) -{ - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || - (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT)); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || + (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); } /* * sb_features2 bit version macros. - * - * For example, for a bit defined as XFS_SB_VERSION2_FUNBIT, has a macro: - * - * SB_VERSION_HASFUNBIT(xfs_sb_t *sbp) - * ((xfs_sb_version_hasmorebits(sbp) && - * ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT) */ - -static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp) +static inline bool xfs_sb_version_haslazysbcount(struct xfs_sb *sbp) { return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || (xfs_sb_version_hasmorebits(sbp) && (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)); } -static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasattr2(struct xfs_sb *sbp) { return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || (xfs_sb_version_hasmorebits(sbp) && (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT)); } -static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp) +static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp) { sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT; + sbp->sb_bad_features2 |= XFS_SB_VERSION2_ATTR2BIT; } -static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp) +static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp) { sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; + sbp->sb_bad_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; if (!sbp->sb_features2) sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; } -static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasprojid32bit(struct xfs_sb *sbp) { return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || (xfs_sb_version_hasmorebits(sbp) && (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT)); } -static inline void xfs_sb_version_addprojid32bit(xfs_sb_t *sbp) +static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp) { sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT; @@ -587,7 +496,9 @@ xfs_sb_has_compat_feature( return (sbp->sb_features_compat & feature) != 0; } -#define XFS_SB_FEAT_RO_COMPAT_ALL 0 +#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ +#define XFS_SB_FEAT_RO_COMPAT_ALL \ + (XFS_SB_FEAT_RO_COMPAT_FINOBT) #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL static inline bool xfs_sb_has_ro_compat_feature( @@ -623,12 +534,12 @@ xfs_sb_has_incompat_log_feature( /* * V5 superblock specific feature checks */ -static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp) +static inline int xfs_sb_version_hascrc(struct xfs_sb *sbp) { return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; } -static inline int xfs_sb_version_has_pquotino(xfs_sb_t *sbp) +static inline int xfs_sb_version_has_pquotino(struct xfs_sb *sbp) { return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; } @@ -641,6 +552,12 @@ static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp) (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)); } +static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp) +{ + return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && + (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT); +} + /* * end of superblock version macros */ diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h index 4484e5151395..82404da2ca67 100644 --- a/fs/xfs/xfs_shared.h +++ b/fs/xfs/xfs_shared.h @@ -238,7 +238,7 @@ int xfs_log_calc_minimum_size(struct xfs_mount *); int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen); int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, uint32_t size, struct xfs_buf *bp); -bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, +bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset, uint32_t size, struct xfs_buf *bp); void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_inode *ip, struct xfs_ifork *ifp); diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index ce372b7d5644..f2240383d4bb 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -59,6 +59,7 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v) { "abtc2", XFSSTAT_END_ABTC_V2 }, { "bmbt2", XFSSTAT_END_BMBT_V2 }, { "ibt2", XFSSTAT_END_IBT_V2 }, + { "fibt2", XFSSTAT_END_FIBT_V2 }, /* we print both series of quota information together */ { "qm", XFSSTAT_END_QM }, }; diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index c03ad38ceaeb..c8f238b8299a 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h @@ -183,7 +183,23 @@ struct xfsstats { __uint32_t xs_ibt_2_alloc; __uint32_t xs_ibt_2_free; __uint32_t xs_ibt_2_moves; -#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_IBT_V2+6) +#define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2+15) + __uint32_t xs_fibt_2_lookup; + __uint32_t xs_fibt_2_compare; + __uint32_t xs_fibt_2_insrec; + __uint32_t xs_fibt_2_delrec; + __uint32_t xs_fibt_2_newroot; + __uint32_t xs_fibt_2_killroot; + __uint32_t xs_fibt_2_increment; + __uint32_t xs_fibt_2_decrement; + __uint32_t xs_fibt_2_lshift; + __uint32_t xs_fibt_2_rshift; + __uint32_t xs_fibt_2_split; + __uint32_t xs_fibt_2_join; + __uint32_t xs_fibt_2_alloc; + __uint32_t xs_fibt_2_free; + __uint32_t xs_fibt_2_moves; +#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_FIBT_V2+6) __uint32_t xs_qm_dqreclaims; __uint32_t xs_qm_dqreclaim_misses; __uint32_t xs_qm_dquot_dups; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 3494eff8e4eb..8f0333b3f7a0 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -765,20 +765,18 @@ xfs_open_devices( * Setup xfs_mount buffer target pointers */ error = ENOMEM; - mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname); + mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev); if (!mp->m_ddev_targp) goto out_close_rtdev; if (rtdev) { - mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1, - mp->m_fsname); + mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev); if (!mp->m_rtdev_targp) goto out_free_ddev_targ; } if (logdev && logdev != ddev) { - mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1, - mp->m_fsname); + mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev); if (!mp->m_logdev_targp) goto out_free_rtdev_targ; } else { @@ -811,8 +809,7 @@ xfs_setup_devices( { int error; - error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize, - mp->m_sb.sb_sectsize); + error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize); if (error) return error; @@ -822,14 +819,12 @@ xfs_setup_devices( if (xfs_sb_version_hassector(&mp->m_sb)) log_sector_size = mp->m_sb.sb_logsectsize; error = xfs_setsize_buftarg(mp->m_logdev_targp, - mp->m_sb.sb_blocksize, log_sector_size); if (error) return error; } if (mp->m_rtdev_targp) { error = xfs_setsize_buftarg(mp->m_rtdev_targp, - mp->m_sb.sb_blocksize, mp->m_sb.sb_sectsize); if (error) return error; @@ -1754,13 +1749,9 @@ init_xfs_fs(void) if (error) goto out_destroy_wq; - error = xfs_filestream_init(); - if (error) - goto out_mru_cache_uninit; - error = xfs_buf_init(); if (error) - goto out_filestream_uninit; + goto out_mru_cache_uninit; error = xfs_init_procfs(); if (error) @@ -1787,8 +1778,6 @@ init_xfs_fs(void) xfs_cleanup_procfs(); out_buf_terminate: xfs_buf_terminate(); - out_filestream_uninit: - xfs_filestream_uninit(); out_mru_cache_uninit: xfs_mru_cache_uninit(); out_destroy_wq: @@ -1807,7 +1796,6 @@ exit_xfs_fs(void) xfs_sysctl_unregister(); xfs_cleanup_procfs(); xfs_buf_terminate(); - xfs_filestream_uninit(); xfs_mru_cache_uninit(); xfs_destroy_workqueues(); xfs_destroy_zones(); diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 52979aa90986..d69363c833e1 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -27,6 +27,7 @@ #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_inode.h" #include "xfs_ialloc.h" @@ -92,7 +93,7 @@ xfs_readlink_bmap( cur_chunk = bp->b_addr; if (xfs_sb_version_hascrc(&mp->m_sb)) { - if (!xfs_symlink_hdr_ok(mp, ip->i_ino, offset, + if (!xfs_symlink_hdr_ok(ip->i_ino, offset, byte_cnt, bp)) { error = EFSCORRUPTED; xfs_alert(mp, diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c index 9b32052ff65e..23c2f2577c8d 100644 --- a/fs/xfs/xfs_symlink_remote.c +++ b/fs/xfs/xfs_symlink_remote.c @@ -80,7 +80,6 @@ xfs_symlink_hdr_set( */ bool xfs_symlink_hdr_ok( - struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, uint32_t size, diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index dee3279c095e..1e85bcd0e418 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -46,6 +46,7 @@ #include "xfs_log_recover.h" #include "xfs_inode_item.h" #include "xfs_bmap_btree.h" +#include "xfs_filestream.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 65d8c793a25c..6910458915cf 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -538,6 +538,64 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered); +DECLARE_EVENT_CLASS(xfs_filestream_class, + TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), + TP_ARGS(ip, agno), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_agnumber_t, agno) + __field(int, streams) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->agno = agno; + __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno); + ), + TP_printk("dev %d:%d ino 0x%llx agno %u streams %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->agno, + __entry->streams) +) +#define DEFINE_FILESTREAM_EVENT(name) \ +DEFINE_EVENT(xfs_filestream_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), \ + TP_ARGS(ip, agno)) +DEFINE_FILESTREAM_EVENT(xfs_filestream_free); +DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup); +DEFINE_FILESTREAM_EVENT(xfs_filestream_scan); + +TRACE_EVENT(xfs_filestream_pick, + TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno, + xfs_extlen_t free, int nscan), + TP_ARGS(ip, agno, free, nscan), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_agnumber_t, agno) + __field(int, streams) + __field(xfs_extlen_t, free) + __field(int, nscan) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->agno = agno; + __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno); + __entry->free = free; + __entry->nscan = nscan; + ), + TP_printk("dev %d:%d ino 0x%llx agno %u streams %d free %d nscan %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->agno, + __entry->streams, + __entry->free, + __entry->nscan) +); + DECLARE_EVENT_CLASS(xfs_lock_class, TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, unsigned long caller_ip), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 54a57326d85b..d03932564ccb 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -827,7 +827,7 @@ xfs_trans_committed_bulk( xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); spin_lock(&ailp->xa_lock); - xfs_trans_ail_cursor_done(ailp, &cur); + xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); } diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index a7287354e535..cb0f3a84cc68 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -173,7 +173,6 @@ xfs_trans_ail_cursor_next( */ void xfs_trans_ail_cursor_done( - struct xfs_ail *ailp, struct xfs_ail_cursor *cur) { cur->item = NULL; @@ -368,7 +367,7 @@ xfsaild_push( * If the AIL is empty or our push has reached the end we are * done now. */ - xfs_trans_ail_cursor_done(ailp, &cur); + xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); goto out_done; } @@ -453,7 +452,7 @@ xfsaild_push( break; lsn = lip->li_lsn; } - xfs_trans_ail_cursor_done(ailp, &cur); + xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list)) diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 12e86af9d9b9..bd1281862ad7 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -133,8 +133,7 @@ struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp, xfs_lsn_t lsn); struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); -void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, - struct xfs_ail_cursor *cur); +void xfs_trans_ail_cursor_done(struct xfs_ail_cursor *cur); #if BITS_PER_LONG != 64 static inline void diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index ae368165244d..f2bda7c76b8a 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c @@ -26,6 +26,7 @@ #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_bmap_btree.h" #include "xfs_ialloc.h" @@ -106,6 +107,47 @@ xfs_calc_inode_res( } /* + * The free inode btree is a conditional feature and the log reservation + * requirements differ slightly from that of the traditional inode allocation + * btree. The finobt tracks records for inode chunks with at least one free + * inode. A record can be removed from the tree for an inode allocation + * or free and thus the finobt reservation is unconditional across: + * + * - inode allocation + * - inode free + * - inode chunk allocation + * + * The 'modify' param indicates to include the record modification scenario. The + * 'alloc' param indicates to include the reservation for free space btree + * modifications on behalf of finobt modifications. This is required only for + * transactions that do not already account for free space btree modifications. + * + * the free inode btree: max depth * block size + * the allocation btrees: 2 trees * (max depth - 1) * block size + * the free inode btree entry: block size + */ +STATIC uint +xfs_calc_finobt_res( + struct xfs_mount *mp, + int alloc, + int modify) +{ + uint res; + + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + return 0; + + res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); + if (alloc) + res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)); + if (modify) + res += (uint)XFS_FSB_TO_B(mp, 1); + + return res; +} + +/* * Various log reservation values. * * These are based on the size of the file system block because that is what @@ -302,6 +344,7 @@ xfs_calc_remove_reservation( * the superblock for the nlink flag: sector size * the directory btree: (max depth + v2) * dir block size * the directory inode's bmap btree: (max depth + v2) * block size + * the finobt (record modification and allocation btrees) */ STATIC uint xfs_calc_create_resv_modify( @@ -310,7 +353,8 @@ xfs_calc_create_resv_modify( return xfs_calc_inode_res(mp, 2) + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + (uint)XFS_FSB_TO_B(mp, 1) + - xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + + xfs_calc_finobt_res(mp, 1, 1); } /* @@ -348,6 +392,7 @@ __xfs_calc_create_reservation( * the superblock for the nlink flag: sector size * the inode btree: max depth * blocksize * the allocation btrees: 2 trees * (max depth - 1) * block size + * the finobt (record insertion) */ STATIC uint xfs_calc_icreate_resv_alloc( @@ -357,7 +402,8 @@ xfs_calc_icreate_resv_alloc( mp->m_sb.sb_sectsize + xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)); + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_finobt_res(mp, 0, 0); } STATIC uint @@ -425,6 +471,7 @@ xfs_calc_symlink_reservation( * the on disk inode before ours in the agi hash list: inode cluster size * the inode btree: max depth * blocksize * the allocation btrees: 2 trees * (max depth - 1) * block size + * the finobt (record insertion, removal or modification) */ STATIC uint xfs_calc_ifree_reservation( @@ -439,7 +486,8 @@ xfs_calc_ifree_reservation( xfs_calc_buf_res(2 + mp->m_ialloc_blks + mp->m_in_maxlevels, 0) + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)); + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_finobt_res(mp, 0, 1); } /* @@ -562,7 +610,7 @@ xfs_calc_addafork_reservation( return XFS_DQUOT_LOGRES(mp) + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(1, mp->m_dirblksize) + + xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1, XFS_FSB_TO_B(mp, 1)) + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h index af5dbe06cb65..bf9c4579334d 100644 --- a/fs/xfs/xfs_trans_space.h +++ b/fs/xfs/xfs_trans_space.h @@ -28,7 +28,8 @@ (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \ XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \ XFS_EXTENTADD_SPACE_RES(mp,w)) -#define XFS_DAENTER_1B(mp,w) ((w) == XFS_DATA_FORK ? (mp)->m_dirblkfsbs : 1) +#define XFS_DAENTER_1B(mp,w) \ + ((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1) #define XFS_DAENTER_DBS(mp,w) \ (XFS_DA_NODE_MAXDEPTH + (((w) == XFS_DATA_FORK) ? 2 : 0)) #define XFS_DAENTER_BLOCKS(mp,w) \ @@ -47,13 +48,15 @@ #define XFS_DIRREMOVE_SPACE_RES(mp) \ XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) #define XFS_IALLOC_SPACE_RES(mp) \ - ((mp)->m_ialloc_blks + (mp)->m_in_maxlevels - 1) + ((mp)->m_ialloc_blks + \ + (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \ + ((mp)->m_in_maxlevels - 1))) /* * Space reservation values for various transactions. */ #define XFS_ADDAFORK_SPACE_RES(mp) \ - ((mp)->m_dirblkfsbs + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK)) + ((mp)->m_dir_geo->fsbcount + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK)) #define XFS_ATTRRM_SPACE_RES(mp) \ XFS_DAREMOVE_SPACE_RES(mp, XFS_ATTR_FORK) /* This macro is not used - see inline code in xfs_attr_set */ @@ -82,5 +85,8 @@ (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) #define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) +#define XFS_IFREE_SPACE_RES(mp) \ + (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0) + #endif /* __XFS_TRANS_SPACE_H__ */ diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 82bbc34d54a3..65c6e6650b1a 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -134,7 +134,7 @@ typedef enum { typedef enum { XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, - XFS_BTNUM_MAX + XFS_BTNUM_FINOi, XFS_BTNUM_MAX } xfs_btnum_t; struct xfs_name { diff --git a/include/linux/quota.h b/include/linux/quota.h index cc7494a35429..0f3c5d38da1f 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -329,6 +329,7 @@ struct quotactl_ops { int (*get_xstate)(struct super_block *, struct fs_quota_stat *); int (*set_xstate)(struct super_block *, unsigned int, int); int (*get_xstatev)(struct super_block *, struct fs_quota_statv *); + int (*rm_xquota)(struct super_block *, unsigned int); }; struct quota_format_type { |