diff options
Diffstat (limited to 'fs/xfs')
46 files changed, 767 insertions, 1048 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index aecf2519db76..c2e30eea74dc 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -186,19 +186,37 @@ xfs_destroy_ioend( } /* + * If the end of the current ioend is beyond the current EOF, + * return the new EOF value, otherwise zero. + */ +STATIC xfs_fsize_t +xfs_ioend_new_eof( + xfs_ioend_t *ioend) +{ + xfs_inode_t *ip = XFS_I(ioend->io_inode); + xfs_fsize_t isize; + xfs_fsize_t bsize; + + bsize = ioend->io_offset + ioend->io_size; + isize = MAX(ip->i_size, ip->i_new_size); + isize = MIN(isize, bsize); + return isize > ip->i_d.di_size ? isize : 0; +} + +/* * Update on-disk file size now that data has been written to disk. * The current in-memory file size is i_size. If a write is beyond * eof i_new_size will be the intended file size until i_size is * updated. If this write does not extend all the way to the valid * file size then restrict this update to the end of the write. */ + STATIC void xfs_setfilesize( xfs_ioend_t *ioend) { xfs_inode_t *ip = XFS_I(ioend->io_inode); xfs_fsize_t isize; - xfs_fsize_t bsize; ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); ASSERT(ioend->io_type != IOMAP_READ); @@ -206,17 +224,10 @@ xfs_setfilesize( if (unlikely(ioend->io_error)) return; - bsize = ioend->io_offset + ioend->io_size; - xfs_ilock(ip, XFS_ILOCK_EXCL); - - isize = MAX(ip->i_size, ip->i_new_size); - isize = MIN(isize, bsize); - - if (ip->i_d.di_size < isize) { + isize = xfs_ioend_new_eof(ioend); + if (isize) { ip->i_d.di_size = isize; - ip->i_update_core = 1; - ip->i_update_size = 1; xfs_mark_inode_dirty_sync(ip); } @@ -405,10 +416,16 @@ xfs_submit_ioend_bio( struct bio *bio) { atomic_inc(&ioend->io_remaining); - bio->bi_private = ioend; bio->bi_end_io = xfs_end_bio; + /* + * If the I/O is beyond EOF we mark the inode dirty immediately + * but don't update the inode size until I/O completion. + */ + if (xfs_ioend_new_eof(ioend)) + xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); + submit_bio(WRITE, bio); ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); bio_put(bio); @@ -1636,4 +1653,5 @@ const struct address_space_operations xfs_address_space_operations = { .direct_IO = xfs_vm_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 0542fd507649..eff61e2732af 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -42,7 +42,7 @@ #include <linux/dcache.h> -static struct vm_operations_struct xfs_file_vm_ops; +static const struct vm_operations_struct xfs_file_vm_ops; STATIC ssize_t xfs_file_aio_read( @@ -172,12 +172,14 @@ xfs_file_release( */ STATIC int xfs_file_fsync( - struct file *filp, - struct dentry *dentry, - int datasync) + struct file *file, + struct dentry *dentry, + int datasync) { - xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); - return -xfs_fsync(XFS_I(dentry->d_inode)); + struct xfs_inode *ip = XFS_I(dentry->d_inode); + + xfs_iflags_clear(ip, XFS_ITRUNCATED); + return -xfs_fsync(ip); } STATIC int @@ -271,7 +273,7 @@ const struct file_operations xfs_dir_file_operations = { .fsync = xfs_file_fsync, }; -static struct vm_operations_struct xfs_file_vm_ops = { +static const struct vm_operations_struct xfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = xfs_vm_page_mkwrite, }; diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 8070b34cc287..cd42ef78f6b5 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -43,7 +43,6 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_utils.h" @@ -58,19 +57,22 @@ #include <linux/fiemap.h> /* - * Bring the atime in the XFS inode uptodate. - * Used before logging the inode to disk or when the Linux inode goes away. + * Bring the timestamps in the XFS inode uptodate. + * + * Used before writing the inode to disk. */ void -xfs_synchronize_atime( +xfs_synchronize_times( xfs_inode_t *ip) { struct inode *inode = VFS_I(ip); - if (!(inode->i_state & I_CLEAR)) { - ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; - ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; - } + ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; + ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; + ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec; + ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec; } /* @@ -107,32 +109,20 @@ xfs_ichgtime( if ((flags & XFS_ICHGTIME_MOD) && !timespec_equal(&inode->i_mtime, &tv)) { inode->i_mtime = tv; - ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; - ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; sync_it = 1; } if ((flags & XFS_ICHGTIME_CHG) && !timespec_equal(&inode->i_ctime, &tv)) { inode->i_ctime = tv; - ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec; - ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec; sync_it = 1; } /* - * We update the i_update_core field _after_ changing - * the timestamps in order to coordinate properly with - * xfs_iflush() so that we don't lose timestamp updates. - * This keeps us from having to hold the inode lock - * while doing this. We use the SYNCHRONIZE macro to - * ensure that the compiler does not reorder the update - * of i_update_core above the timestamp updates above. + * Update complete - now make sure everyone knows that the inode + * is dirty. */ - if (sync_it) { - SYNCHRONIZE(); - ip->i_update_core = 1; + if (sync_it) xfs_mark_inode_dirty_sync(ip); - } } /* @@ -485,14 +475,6 @@ xfs_vn_put_link( } STATIC int -xfs_vn_permission( - struct inode *inode, - int mask) -{ - return generic_permission(inode, mask, xfs_check_acl); -} - -STATIC int xfs_vn_getattr( struct vfsmount *mnt, struct dentry *dentry, @@ -515,10 +497,8 @@ xfs_vn_getattr( stat->gid = ip->i_d.di_gid; stat->ino = ip->i_ino; stat->atime = inode->i_atime; - stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec; - stat->mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; - stat->ctime.tv_sec = ip->i_d.di_ctime.t_sec; - stat->ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; + stat->mtime = inode->i_mtime; + stat->ctime = inode->i_ctime; stat->blocks = XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); @@ -696,7 +676,7 @@ xfs_vn_fiemap( } static const struct inode_operations xfs_inode_operations = { - .permission = xfs_vn_permission, + .check_acl = xfs_check_acl, .truncate = xfs_vn_truncate, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, @@ -724,7 +704,7 @@ static const struct inode_operations xfs_dir_inode_operations = { .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, .rename = xfs_vn_rename, - .permission = xfs_vn_permission, + .check_acl = xfs_check_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -749,7 +729,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, .rename = xfs_vn_rename, - .permission = xfs_vn_permission, + .check_acl = xfs_check_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -762,7 +742,7 @@ static const struct inode_operations xfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = xfs_vn_follow_link, .put_link = xfs_vn_put_link, - .permission = xfs_vn_permission, + .check_acl = xfs_check_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 7078974a6eee..072050f8d346 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -667,7 +667,7 @@ start: xip->i_new_size = new_size; if (likely(!(ioflags & IO_INVIS))) - xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + file_update_time(file); /* * If the offset is beyond the size of the file, we have a couple @@ -812,18 +812,21 @@ write_retry: /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { + loff_t end = pos + ret - 1; int error2; xfs_iunlock(xip, iolock); if (need_i_mutex) mutex_unlock(&inode->i_mutex); - error2 = sync_page_range(inode, mapping, pos, ret); + + error2 = filemap_write_and_wait_range(mapping, pos, end); if (!error) error = error2; if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_ilock(xip, iolock); - error2 = xfs_write_sync_logforce(mp, xip); + + error2 = xfs_fsync(xip); if (!error) error = error2; } diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index cb6e2cca214f..3d4a0c84d634 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -80,7 +80,7 @@ xfs_fs_set_xstate( if (sb->s_flags & MS_RDONLY) return -EROFS; - if (!XFS_IS_QUOTA_RUNNING(mp)) + if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -150,7 +150,7 @@ xfs_fs_set_xquota( return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); } -struct quotactl_ops xfs_quotactl_operations = { +const struct quotactl_ops xfs_quotactl_operations = { .quota_sync = xfs_fs_quota_sync, .get_xstate = xfs_fs_get_xstate, .set_xstate = xfs_fs_set_xstate, diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c index c3526d445f6a..76fdc5861932 100644 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ b/fs/xfs/linux-2.6/xfs_stats.c @@ -20,16 +20,9 @@ DEFINE_PER_CPU(struct xfsstats, xfsstats); -STATIC int -xfs_read_xfsstats( - char *buffer, - char **start, - off_t offset, - int count, - int *eof, - void *data) +static int xfs_stat_proc_show(struct seq_file *m, void *v) { - int c, i, j, len, val; + int c, i, j, val; __uint64_t xs_xstrat_bytes = 0; __uint64_t xs_write_bytes = 0; __uint64_t xs_read_bytes = 0; @@ -60,18 +53,18 @@ xfs_read_xfsstats( }; /* Loop over all stats groups */ - for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) { - len += sprintf(buffer + len, "%s", xstats[i].desc); + for (i=j = 0; i < ARRAY_SIZE(xstats); i++) { + seq_printf(m, "%s", xstats[i].desc); /* inner loop does each group */ while (j < xstats[i].endpoint) { val = 0; /* sum over all cpus */ for_each_possible_cpu(c) val += *(((__u32*)&per_cpu(xfsstats, c) + j)); - len += sprintf(buffer + len, " %u", val); + seq_printf(m, " %u", val); j++; } - buffer[len++] = '\n'; + seq_putc(m, '\n'); } /* extra precision counters */ for_each_possible_cpu(i) { @@ -80,36 +73,38 @@ xfs_read_xfsstats( xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; } - len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n", + seq_printf(m, "xpc %Lu %Lu %Lu\n", xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); - len += sprintf(buffer + len, "debug %u\n", + seq_printf(m, "debug %u\n", #if defined(DEBUG) 1); #else 0); #endif + return 0; +} - if (offset >= len) { - *start = buffer; - *eof = 1; - return 0; - } - *start = buffer + offset; - if ((len -= offset) > count) - return count; - *eof = 1; - - return len; +static int xfs_stat_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, xfs_stat_proc_show, NULL); } +static const struct file_operations xfs_stat_proc_fops = { + .owner = THIS_MODULE, + .open = xfs_stat_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + int xfs_init_procfs(void) { if (!proc_mkdir("fs/xfs", NULL)) goto out; - if (!create_proc_read_entry("fs/xfs/stat", 0, NULL, - xfs_read_xfsstats, NULL)) + if (!proc_create("fs/xfs/stat", 0, NULL, + &xfs_stat_proc_fops)) goto out_remove_entry; return 0; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a220d36f789b..18a4b8e11df2 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -67,7 +67,7 @@ #include <linux/freezer.h> #include <linux/parser.h> -static struct super_operations xfs_super_operations; +static const struct super_operations xfs_super_operations; static kmem_zone_t *xfs_ioend_zone; mempool_t *xfs_ioend_pool; @@ -579,15 +579,19 @@ xfs_showargs( else if (mp->m_qflags & XFS_UQUOTA_ACCT) seq_puts(m, "," MNTOPT_UQUOTANOENF); - if (mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) - seq_puts(m, "," MNTOPT_PRJQUOTA); - else if (mp->m_qflags & XFS_PQUOTA_ACCT) - seq_puts(m, "," MNTOPT_PQUOTANOENF); - - if (mp->m_qflags & (XFS_GQUOTA_ACCT|XFS_OQUOTA_ENFD)) - seq_puts(m, "," MNTOPT_GRPQUOTA); - else if (mp->m_qflags & XFS_GQUOTA_ACCT) - seq_puts(m, "," MNTOPT_GQUOTANOENF); + /* Either project or group quotas can be active, not both */ + + if (mp->m_qflags & XFS_PQUOTA_ACCT) { + if (mp->m_qflags & XFS_OQUOTA_ENFD) + seq_puts(m, "," MNTOPT_PRJQUOTA); + else + seq_puts(m, "," MNTOPT_PQUOTANOENF); + } else if (mp->m_qflags & XFS_GQUOTA_ACCT) { + if (mp->m_qflags & XFS_OQUOTA_ENFD) + seq_puts(m, "," MNTOPT_GRPQUOTA); + else + seq_puts(m, "," MNTOPT_GQUOTANOENF); + } if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) seq_puts(m, "," MNTOPT_NOQUOTA); @@ -687,7 +691,7 @@ xfs_barrier_test( return error; } -void +STATIC void xfs_mountfs_check_barriers(xfs_mount_t *mp) { int error; @@ -973,6 +977,28 @@ xfs_fs_inode_init_once( } /* + * Dirty the XFS inode when mark_inode_dirty_sync() is called so that + * we catch unlogged VFS level updates to the inode. Care must be taken + * here - the transaction code calls mark_inode_dirty_sync() to mark the + * VFS inode dirty in a transaction and clears the i_update_core field; + * it must clear the field after calling mark_inode_dirty_sync() to + * correctly indicate that the dirty state has been propagated into the + * inode log item. + * + * We need the barrier() to maintain correct ordering between unlogged + * updates and the transaction commit code that clears the i_update_core + * field. This requires all updates to be completed before marking the + * inode dirty. + */ +STATIC void +xfs_fs_dirty_inode( + struct inode *inode) +{ + barrier(); + XFS_I(inode)->i_update_core = 1; +} + +/* * Attempt to flush the inode, this will actually fail * if the inode is pinned, but we dirty the inode again * at the point when it is unpinned after a log write, @@ -1122,7 +1148,7 @@ xfs_fs_put_super( } STATIC int -xfs_fs_sync_super( +xfs_fs_sync_fs( struct super_block *sb, int wait) { @@ -1130,23 +1156,23 @@ xfs_fs_sync_super( int error; /* - * Treat a sync operation like a freeze. This is to work - * around a race in sync_inodes() which works in two phases - * - an asynchronous flush, which can write out an inode - * without waiting for file size updates to complete, and a - * synchronous flush, which wont do anything because the - * async flush removed the inode's dirty flag. Also - * sync_inodes() will not see any files that just have - * outstanding transactions to be flushed because we don't - * dirty the Linux inode until after the transaction I/O - * completes. + * Not much we can do for the first async pass. Writing out the + * superblock would be counter-productive as we are going to redirty + * when writing out other data and metadata (and writing out a single + * block is quite fast anyway). + * + * Try to asynchronously kick off quota syncing at least. */ - if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) - error = xfs_quiesce_data(mp); - else - error = xfs_sync_fsdata(mp, 0); + if (!wait) { + xfs_qm_sync(mp, SYNC_TRYLOCK); + return 0; + } - if (unlikely(laptop_mode)) { + error = xfs_quiesce_data(mp); + if (error) + return -error; + + if (laptop_mode) { int prev_sync_seq = mp->m_sync_seq; /* @@ -1165,7 +1191,7 @@ xfs_fs_sync_super( mp->m_sync_seq != prev_sync_seq); } - return -error; + return 0; } STATIC int @@ -1532,13 +1558,14 @@ xfs_fs_get_sb( mnt); } -static struct super_operations xfs_super_operations = { +static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, + .dirty_inode = xfs_fs_dirty_inode, .write_inode = xfs_fs_write_inode, .clear_inode = xfs_fs_clear_inode, .put_super = xfs_fs_put_super, - .sync_fs = xfs_fs_sync_super, + .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, .statfs = xfs_fs_statfs, .remount_fs = xfs_fs_remount, diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 5a2ea3a21781..18175ebd58ed 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -93,7 +93,7 @@ extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); extern const struct export_operations xfs_export_operations; extern struct xattr_handler *xfs_xattr_handlers[]; -extern struct quotactl_ops xfs_quotactl_operations; +extern const struct quotactl_ops xfs_quotactl_operations; #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 98ef624d9baf..961df0a22c78 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -309,11 +309,15 @@ xfs_sync_attr( STATIC int xfs_commit_dummy_trans( struct xfs_mount *mp, - uint log_flags) + uint flags) { struct xfs_inode *ip = mp->m_rootip; struct xfs_trans *tp; int error; + int log_flags = XFS_LOG_FORCE; + + if (flags & SYNC_WAIT) + log_flags |= XFS_LOG_SYNC; /* * Put a dummy transaction in the log to tell recovery @@ -331,13 +335,12 @@ xfs_commit_dummy_trans( xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - /* XXX(hch): ignoring the error here.. */ error = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* the log force ensures this transaction is pushed to disk */ xfs_log_force(mp, 0, log_flags); - return 0; + return error; } int @@ -385,7 +388,20 @@ xfs_sync_fsdata( else XFS_BUF_ASYNC(bp); - return xfs_bwrite(mp, bp); + error = xfs_bwrite(mp, bp); + if (error) + return error; + + /* + * If this is a data integrity sync make sure all pending buffers + * are flushed out for the log coverage check below. + */ + if (flags & SYNC_WAIT) + xfs_flush_buftarg(mp->m_ddev_targp, 1); + + if (xfs_log_need_covered(mp)) + error = xfs_commit_dummy_trans(mp, flags); + return error; out_brelse: xfs_buf_relse(bp); @@ -419,14 +435,16 @@ xfs_quiesce_data( /* push non-blocking */ xfs_sync_data(mp, 0); xfs_qm_sync(mp, SYNC_TRYLOCK); - xfs_filestream_flush(mp); - /* push and block */ + /* push and block till complete */ xfs_sync_data(mp, SYNC_WAIT); xfs_qm_sync(mp, SYNC_WAIT); + /* drop inode references pinned by filestreams */ + xfs_filestream_flush(mp); + /* write superblock and hoover up shutdown errors */ - error = xfs_sync_fsdata(mp, 0); + error = xfs_sync_fsdata(mp, SYNC_WAIT); /* flush data-only devices */ if (mp->m_rtdev_targp) @@ -570,8 +588,6 @@ xfs_sync_worker( /* dgc: errors ignored here */ error = xfs_qm_sync(mp, SYNC_TRYLOCK); error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); - if (xfs_log_need_covered(mp)) - error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE); } mp->m_sync_seq++; wake_up(&mp->m_wait_single_sync_task); @@ -749,21 +765,6 @@ __xfs_inode_clear_reclaim_tag( XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); } -void -xfs_inode_clear_reclaim_tag( - xfs_inode_t *ip) -{ - xfs_mount_t *mp = ip->i_mount; - xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); - - read_lock(&pag->pag_ici_lock); - spin_lock(&ip->i_flags_lock); - __xfs_inode_clear_reclaim_tag(mp, pag, ip); - spin_unlock(&ip->i_flags_lock); - read_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); -} - STATIC int xfs_reclaim_inode_now( struct xfs_inode *ip, diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 59120602588a..27920eb7a820 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -49,7 +49,6 @@ int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); -void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, struct xfs_inode *ip); diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 916c0ffb6083..c5bc67c4e3bb 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -26,7 +26,6 @@ STATIC int xfs_stats_clear_proc_handler( ctl_table *ctl, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -34,7 +33,7 @@ xfs_stats_clear_proc_handler( int c, ret, *valp = ctl->data; __uint32_t vn_active; - ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); if (!ret && write && *valp) { printk("XFS Clearing xfsstats\n"); diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c index 21b08c0396a1..83e7ea3e25fa 100644 --- a/fs/xfs/quota/xfs_qm_stats.c +++ b/fs/xfs/quota/xfs_qm_stats.c @@ -48,50 +48,34 @@ struct xqmstats xqmstats; -STATIC int -xfs_qm_read_xfsquota( - char *buffer, - char **start, - off_t offset, - int count, - int *eof, - void *data) +static int xqm_proc_show(struct seq_file *m, void *v) { - int len; - /* maximum; incore; ratio free to inuse; freelist */ - len = sprintf(buffer, "%d\t%d\t%d\t%u\n", + seq_printf(m, "%d\t%d\t%d\t%u\n", ndquot, xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0); - - if (offset >= len) { - *start = buffer; - *eof = 1; - return 0; - } - *start = buffer + offset; - if ((len -= offset) > count) - return count; - *eof = 1; - - return len; + return 0; } -STATIC int -xfs_qm_read_stats( - char *buffer, - char **start, - off_t offset, - int count, - int *eof, - void *data) +static int xqm_proc_open(struct inode *inode, struct file *file) { - int len; + return single_open(file, xqm_proc_show, NULL); +} + +static const struct file_operations xqm_proc_fops = { + .owner = THIS_MODULE, + .open = xqm_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +static int xqmstat_proc_show(struct seq_file *m, void *v) +{ /* quota performance statistics */ - len = sprintf(buffer, "qm %u %u %u %u %u %u %u %u\n", + seq_printf(m, "qm %u %u %u %u %u %u %u %u\n", xqmstats.xs_qm_dqreclaims, xqmstats.xs_qm_dqreclaim_misses, xqmstats.xs_qm_dquot_dups, @@ -100,25 +84,27 @@ xfs_qm_read_stats( xqmstats.xs_qm_dqwants, xqmstats.xs_qm_dqshake_reclaims, xqmstats.xs_qm_dqinact_reclaims); + return 0; +} - if (offset >= len) { - *start = buffer; - *eof = 1; - return 0; - } - *start = buffer + offset; - if ((len -= offset) > count) - return count; - *eof = 1; - - return len; +static int xqmstat_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, xqmstat_proc_show, NULL); } +static const struct file_operations xqmstat_proc_fops = { + .owner = THIS_MODULE, + .open = xqmstat_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + void xfs_qm_init_procfs(void) { - create_proc_read_entry("fs/xfs/xqmstat", 0, NULL, xfs_qm_read_stats, NULL); - create_proc_read_entry("fs/xfs/xqm", 0, NULL, xfs_qm_read_xfsquota, NULL); + proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops); + proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops); } void diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 4e4276b956e8..5d1a3b98a6e6 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -876,7 +876,6 @@ xfs_dqrele_inode( ip->i_gdquot = NULL; } xfs_iput(ip, XFS_ILOCK_EXCL); - IRELE(ip); return 0; } diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index f24b50b68d03..a5d54bf4931b 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -198,6 +198,15 @@ typedef struct xfs_perag xfs_agino_t pagi_count; /* number of allocated inodes */ int pagb_count; /* pagb slots in use */ xfs_perag_busy_t *pagb_list; /* unstable blocks */ + + /* + * Inode allocation search lookup optimisation. + * If the pagino matches, the search for new inodes + * doesn't need to search the near ones again straight away + */ + xfs_agino_t pagl_pagino; + xfs_agino_t pagl_leftrec; + xfs_agino_t pagl_rightrec; #ifdef __KERNEL__ spinlock_t pagb_lock; /* lock for pagb_list */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 8ee5b5a76a2a..8971fb09d387 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -3713,7 +3713,7 @@ done: * entry (null if none). Else, *lastxp will be set to the index * of the found entry; *gotp will contain the entry. */ -xfs_bmbt_rec_host_t * /* pointer to found extent entry */ +STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ xfs_bmap_search_multi_extents( xfs_ifork_t *ifp, /* inode fork pointer */ xfs_fileoff_t bno, /* block number searched for */ diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 1b8ff9256bd0..56f62d2edc35 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -392,17 +392,6 @@ xfs_bmap_count_blocks( int whichfork, int *count); -/* - * Search the extent records for the entry containing block bno. - * If bno lies in a hole, point to the next entry. If bno lies - * past eof, *eofp will be set, and *prevp will contain the last - * entry (null if none). Else, *lastxp will be set to the index - * of the found entry; *gotp will contain the entry. - */ -xfs_bmbt_rec_host_t * -xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *, - xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *); - #endif /* __KERNEL__ */ #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 5c1ade06578e..eb7b702d0690 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -202,16 +202,6 @@ xfs_bmbt_get_state( ext_flag); } -/* Endian flipping versions of the bmbt extraction functions */ -void -xfs_bmbt_disk_get_all( - xfs_bmbt_rec_t *r, - xfs_bmbt_irec_t *s) -{ - __xfs_bmbt_get_all(get_unaligned_be64(&r->l0), - get_unaligned_be64(&r->l1), s); -} - /* * Extract the blockcount field from an on disk bmap extent record. */ @@ -816,6 +806,16 @@ xfs_bmbt_trace_key( *l1 = 0; } +/* Endian flipping versions of the bmbt extraction functions */ +STATIC void +xfs_bmbt_disk_get_all( + xfs_bmbt_rec_t *r, + xfs_bmbt_irec_t *s) +{ + __xfs_bmbt_get_all(get_unaligned_be64(&r->l0), + get_unaligned_be64(&r->l1), s); +} + STATIC void xfs_bmbt_trace_record( struct xfs_btree_cur *cur, diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 0e8df007615e..5549d495947f 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -220,7 +220,6 @@ extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r); -extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 26717388acf5..52b5f14d0c32 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -646,46 +646,6 @@ xfs_btree_read_bufl( } /* - * Get a buffer for the block, return it read in. - * Short-form addressing. - */ -int /* error */ -xfs_btree_read_bufs( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* allocation group block number */ - uint lock, /* lock flags for read_buf */ - xfs_buf_t **bpp, /* buffer for agno/agbno */ - int refval) /* ref count value for buffer */ -{ - xfs_buf_t *bp; /* return value */ - xfs_daddr_t d; /* real disk block address */ - int error; - - ASSERT(agno != NULLAGNUMBER); - ASSERT(agbno != NULLAGBLOCK); - d = XFS_AGB_TO_DADDR(mp, agno, agbno); - if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, - mp->m_bsize, lock, &bp))) { - return error; - } - ASSERT(!bp || !XFS_BUF_GETERROR(bp)); - if (bp != NULL) { - switch (refval) { - case XFS_ALLOC_BTREE_REF: - XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); - break; - case XFS_INO_BTREE_REF: - XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval); - break; - } - } - *bpp = bp; - return 0; -} - -/* * Read-ahead the block, don't wait for it, don't return a buffer. * Long-form addressing. */ @@ -2951,7 +2911,7 @@ error0: * inode we have to copy the single block it was pointing to into the * inode. */ -int +STATIC int xfs_btree_kill_iroot( struct xfs_btree_cur *cur) { diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 4f852b735b96..7fa07062bdda 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -379,20 +379,6 @@ xfs_btree_read_bufl( int refval);/* ref count value for buffer */ /* - * Get a buffer for the block, return it read in. - * Short-form addressing. - */ -int /* error */ -xfs_btree_read_bufs( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* allocation group block number */ - uint lock, /* lock flags for read_buf */ - struct xfs_buf **bpp, /* buffer for agno/agbno */ - int refval);/* ref count value for buffer */ - -/* * Read-ahead the block, don't wait for it, don't return a buffer. * Long-form addressing. */ @@ -432,7 +418,6 @@ int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); -int xfs_btree_kill_iroot(struct xfs_btree_cur *); int xfs_btree_insert(struct xfs_btree_cur *, int *); int xfs_btree_delete(struct xfs_btree_cur *, int *); int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 7465f9ee125f..ab89a7e94a0f 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -206,10 +206,10 @@ xfs_swap_extents( * process that the file was not changed out from * under it. */ - if ((sbp->bs_ctime.tv_sec != ip->i_d.di_ctime.t_sec) || - (sbp->bs_ctime.tv_nsec != ip->i_d.di_ctime.t_nsec) || - (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) || - (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) { + if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || + (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || + (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || + (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { error = XFS_ERROR(EBUSY); goto out_unlock; } diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index fa913e459442..41ad537c49e9 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -854,6 +854,7 @@ xfs_dir2_leaf_getdents( */ ra_want = howmany(bufsize + mp->m_dirblksize, mp->m_sb.sb_blocksize) - 1; + ASSERT(ra_want >= 0); /* * If we don't have as many as we want, and we haven't @@ -1088,7 +1089,8 @@ xfs_dir2_leaf_getdents( */ ptr += length; curoff += length; - bufsize -= length; + /* bufsize may have just been a guess; don't go negative */ + bufsize = bufsize > length ? bufsize - length : 0; } /* diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index c4ea51b55dce..f52ac276277e 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -117,7 +117,7 @@ struct getbmapx { #define BMV_IF_VALID \ (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) -/* bmv_oflags values - returned for for each non-header segment */ +/* bmv_oflags values - returned for each non-header segment */ #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ #define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */ #define BMV_OF_LAST 0x4 /* segment is the last in the file */ diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 3120a3a5e20f..0785797db828 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -57,75 +57,35 @@ xfs_ialloc_cluster_alignment( } /* - * Lookup the record equal to ino in the btree given by cur. - */ -STATIC int /* error */ -xfs_inobt_lookup_eq( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free, /* free inode mask */ - int *stat) /* success/failure */ -{ - cur->bc_rec.i.ir_startino = ino; - cur->bc_rec.i.ir_freecount = fcnt; - cur->bc_rec.i.ir_free = free; - return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); -} - -/* - * Lookup the first record greater than or equal to ino - * in the btree given by cur. + * Lookup a record by ino in the btree given by cur. */ int /* error */ -xfs_inobt_lookup_ge( +xfs_inobt_lookup( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free, /* free inode mask */ + xfs_lookup_t dir, /* <=, >=, == */ int *stat) /* success/failure */ { cur->bc_rec.i.ir_startino = ino; - cur->bc_rec.i.ir_freecount = fcnt; - cur->bc_rec.i.ir_free = free; - return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); + cur->bc_rec.i.ir_freecount = 0; + cur->bc_rec.i.ir_free = 0; + return xfs_btree_lookup(cur, dir, stat); } /* - * Lookup the first record less than or equal to ino - * in the btree given by cur. - */ -int /* error */ -xfs_inobt_lookup_le( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free, /* free inode mask */ - int *stat) /* success/failure */ -{ - cur->bc_rec.i.ir_startino = ino; - cur->bc_rec.i.ir_freecount = fcnt; - cur->bc_rec.i.ir_free = free; - return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); -} - -/* - * Update the record referred to by cur to the value given - * by [ino, fcnt, free]. + * Update the record referred to by cur to the value given. * This either works (return 0) or gets an EFSCORRUPTED error. */ STATIC int /* error */ xfs_inobt_update( struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free) /* free inode mask */ + xfs_inobt_rec_incore_t *irec) /* btree record */ { union xfs_btree_rec rec; - rec.inobt.ir_startino = cpu_to_be32(ino); - rec.inobt.ir_freecount = cpu_to_be32(fcnt); - rec.inobt.ir_free = cpu_to_be64(free); + rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); + rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount); + rec.inobt.ir_free = cpu_to_be64(irec->ir_free); return xfs_btree_update(cur, &rec); } @@ -135,9 +95,7 @@ xfs_inobt_update( int /* error */ xfs_inobt_get_rec( struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agino_t *ino, /* output: starting inode of chunk */ - __int32_t *fcnt, /* output: number of free inodes */ - xfs_inofree_t *free, /* output: free inode mask */ + xfs_inobt_rec_incore_t *irec, /* btree record */ int *stat) /* output: success/failure */ { union xfs_btree_rec *rec; @@ -145,14 +103,136 @@ xfs_inobt_get_rec( error = xfs_btree_get_rec(cur, &rec, stat); if (!error && *stat == 1) { - *ino = be32_to_cpu(rec->inobt.ir_startino); - *fcnt = be32_to_cpu(rec->inobt.ir_freecount); - *free = be64_to_cpu(rec->inobt.ir_free); + irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); + irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount); + irec->ir_free = be64_to_cpu(rec->inobt.ir_free); } return error; } /* + * Verify that the number of free inodes in the AGI is correct. + */ +#ifdef DEBUG +STATIC int +xfs_check_agi_freecount( + struct xfs_btree_cur *cur, + struct xfs_agi *agi) +{ + if (cur->bc_nlevels == 1) { + xfs_inobt_rec_incore_t rec; + int freecount = 0; + int error; + int i; + + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); + if (error) + return error; + + do { + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + return error; + + if (i) { + freecount += rec.ir_freecount; + error = xfs_btree_increment(cur, 0, &i); + if (error) + return error; + } + } while (i == 1); + + if (!XFS_FORCED_SHUTDOWN(cur->bc_mp)) + ASSERT(freecount == be32_to_cpu(agi->agi_freecount)); + } + return 0; +} +#else +#define xfs_check_agi_freecount(cur, agi) 0 +#endif + +/* + * Initialise a new set of inodes. + */ +STATIC void +xfs_ialloc_inode_init( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_agblock_t length, + unsigned int gen) +{ + struct xfs_buf *fbuf; + struct xfs_dinode *free; + int blks_per_cluster, nbufs, ninodes; + int version; + int i, j; + xfs_daddr_t d; + + /* + * Loop over the new block(s), filling in the inodes. + * For small block sizes, manipulate the inodes in buffers + * which are multiples of the blocks size. + */ + if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { + blks_per_cluster = 1; + nbufs = length; + ninodes = mp->m_sb.sb_inopblock; + } else { + blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / + mp->m_sb.sb_blocksize; + nbufs = length / blks_per_cluster; + ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; + } + + /* + * Figure out what version number to use in the inodes we create. + * If the superblock version has caught up to the one that supports + * the new inode format, then use the new inode version. Otherwise + * use the old version so that old kernels will continue to be + * able to use the file system. + */ + if (xfs_sb_version_hasnlink(&mp->m_sb)) + version = 2; + else + version = 1; + + for (j = 0; j < nbufs; j++) { + /* + * Get the block. + */ + d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); + fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, + mp->m_bsize * blks_per_cluster, + XFS_BUF_LOCK); + ASSERT(fbuf); + ASSERT(!XFS_BUF_GETERROR(fbuf)); + + /* + * Initialize all inodes in this buffer and then log them. + * + * XXX: It would be much better if we had just one transaction + * to log a whole cluster of inodes instead of all the + * individual transactions causing a lot of log traffic. + */ + xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); + for (i = 0; i < ninodes; i++) { + int ioffset = i << mp->m_sb.sb_inodelog; + uint isize = sizeof(struct xfs_dinode); + + free = xfs_make_iptr(mp, fbuf, i); + free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); + free->di_version = version; + free->di_gen = cpu_to_be32(gen); + free->di_next_unlinked = cpu_to_be32(NULLAGINO); + xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); + } + xfs_trans_inode_alloc_buf(tp, fbuf); + } +} + +/* * Allocate new inodes in the allocation group specified by agbp. * Return 0 for success, else error code. */ @@ -164,24 +244,15 @@ xfs_ialloc_ag_alloc( { xfs_agi_t *agi; /* allocation group header */ xfs_alloc_arg_t args; /* allocation argument structure */ - int blks_per_cluster; /* fs blocks per inode cluster */ xfs_btree_cur_t *cur; /* inode btree cursor */ - xfs_daddr_t d; /* disk addr of buffer */ xfs_agnumber_t agno; int error; - xfs_buf_t *fbuf; /* new free inodes' buffer */ - xfs_dinode_t *free; /* new free inode structure */ - int i; /* inode counter */ - int j; /* block counter */ - int nbufs; /* num bufs of new inodes */ + int i; xfs_agino_t newino; /* new first inode's number */ xfs_agino_t newlen; /* new number of inodes */ - int ninodes; /* num inodes per buf */ xfs_agino_t thisino; /* current inode number, for loop */ - int version; /* inode version number to use */ int isaligned = 0; /* inode allocation at stripe unit */ /* boundary */ - unsigned int gen; args.tp = tp; args.mp = tp->t_mountp; @@ -202,12 +273,12 @@ xfs_ialloc_ag_alloc( */ agi = XFS_BUF_TO_AGI(agbp); newino = be32_to_cpu(agi->agi_newino); + agno = be32_to_cpu(agi->agi_seqno); args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + XFS_IALLOC_BLOCKS(args.mp); if (likely(newino != NULLAGINO && (args.agbno < be32_to_cpu(agi->agi_length)))) { - args.fsbno = XFS_AGB_TO_FSB(args.mp, - be32_to_cpu(agi->agi_seqno), args.agbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); args.type = XFS_ALLOCTYPE_THIS_BNO; args.mod = args.total = args.wasdel = args.isfl = args.userdata = args.minalignslop = 0; @@ -258,8 +329,7 @@ xfs_ialloc_ag_alloc( * For now, just allocate blocks up front. */ args.agbno = be32_to_cpu(agi->agi_root); - args.fsbno = XFS_AGB_TO_FSB(args.mp, - be32_to_cpu(agi->agi_seqno), args.agbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); /* * Allocate a fixed-size extent of inodes. */ @@ -282,8 +352,7 @@ xfs_ialloc_ag_alloc( if (isaligned && args.fsbno == NULLFSBLOCK) { args.type = XFS_ALLOCTYPE_NEAR_BNO; args.agbno = be32_to_cpu(agi->agi_root); - args.fsbno = XFS_AGB_TO_FSB(args.mp, - be32_to_cpu(agi->agi_seqno), args.agbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); args.alignment = xfs_ialloc_cluster_alignment(&args); if ((error = xfs_alloc_vextent(&args))) return error; @@ -294,85 +363,30 @@ xfs_ialloc_ag_alloc( return 0; } ASSERT(args.len == args.minlen); - /* - * Convert the results. - */ - newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); - /* - * Loop over the new block(s), filling in the inodes. - * For small block sizes, manipulate the inodes in buffers - * which are multiples of the blocks size. - */ - if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) { - blks_per_cluster = 1; - nbufs = (int)args.len; - ninodes = args.mp->m_sb.sb_inopblock; - } else { - blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) / - args.mp->m_sb.sb_blocksize; - nbufs = (int)args.len / blks_per_cluster; - ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock; - } - /* - * Figure out what version number to use in the inodes we create. - * If the superblock version has caught up to the one that supports - * the new inode format, then use the new inode version. Otherwise - * use the old version so that old kernels will continue to be - * able to use the file system. - */ - if (xfs_sb_version_hasnlink(&args.mp->m_sb)) - version = 2; - else - version = 1; /* + * Stamp and write the inode buffers. + * * Seed the new inode cluster with a random generation number. This * prevents short-term reuse of generation numbers if a chunk is * freed and then immediately reallocated. We use random numbers * rather than a linear progression to prevent the next generation * number from being easily guessable. */ - gen = random32(); - for (j = 0; j < nbufs; j++) { - /* - * Get the block. - */ - d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno), - args.agbno + (j * blks_per_cluster)); - fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d, - args.mp->m_bsize * blks_per_cluster, - XFS_BUF_LOCK); - ASSERT(fbuf); - ASSERT(!XFS_BUF_GETERROR(fbuf)); + xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, args.len, + random32()); - /* - * Initialize all inodes in this buffer and then log them. - * - * XXX: It would be much better if we had just one transaction to - * log a whole cluster of inodes instead of all the individual - * transactions causing a lot of log traffic. - */ - xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); - for (i = 0; i < ninodes; i++) { - int ioffset = i << args.mp->m_sb.sb_inodelog; - uint isize = sizeof(struct xfs_dinode); - - free = xfs_make_iptr(args.mp, fbuf, i); - free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); - free->di_version = version; - free->di_gen = cpu_to_be32(gen); - free->di_next_unlinked = cpu_to_be32(NULLAGINO); - xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); - } - xfs_trans_inode_alloc_buf(tp, fbuf); - } + /* + * Convert the results. + */ + newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); be32_add_cpu(&agi->agi_count, newlen); be32_add_cpu(&agi->agi_freecount, newlen); - agno = be32_to_cpu(agi->agi_seqno); down_read(&args.mp->m_peraglock); args.mp->m_perag[agno].pagi_freecount += newlen; up_read(&args.mp->m_peraglock); agi->agi_newino = cpu_to_be32(newino); + /* * Insert records describing the new inode chunk into the btree. */ @@ -380,13 +394,17 @@ xfs_ialloc_ag_alloc( for (thisino = newino; thisino < newino + newlen; thisino += XFS_INODES_PER_CHUNK) { - if ((error = xfs_inobt_lookup_eq(cur, thisino, - XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) { + cur->bc_rec.i.ir_startino = thisino; + cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK; + cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE; + error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i); + if (error) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); return error; } ASSERT(i == 0); - if ((error = xfs_btree_insert(cur, &i))) { + error = xfs_btree_insert(cur, &i); + if (error) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); return error; } @@ -539,6 +557,62 @@ nextag: } /* + * Try to retrieve the next record to the left/right from the current one. + */ +STATIC int +xfs_ialloc_next_rec( + struct xfs_btree_cur *cur, + xfs_inobt_rec_incore_t *rec, + int *done, + int left) +{ + int error; + int i; + + if (left) + error = xfs_btree_decrement(cur, 0, &i); + else + error = xfs_btree_increment(cur, 0, &i); + + if (error) + return error; + *done = !i; + if (i) { + error = xfs_inobt_get_rec(cur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + + return 0; +} + +STATIC int +xfs_ialloc_get_rec( + struct xfs_btree_cur *cur, + xfs_agino_t agino, + xfs_inobt_rec_incore_t *rec, + int *done, + int left) +{ + int error; + int i; + + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i); + if (error) + return error; + *done = !i; + if (i) { + error = xfs_inobt_get_rec(cur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + + return 0; +} + +/* * Visible inode allocation functions. */ @@ -592,8 +666,8 @@ xfs_dialloc( int j; /* result code */ xfs_mount_t *mp; /* file system mount structure */ int offset; /* index of inode in chunk */ - xfs_agino_t pagino; /* parent's a.g. relative inode # */ - xfs_agnumber_t pagno; /* parent's allocation group number */ + xfs_agino_t pagino; /* parent's AG relative inode # */ + xfs_agnumber_t pagno; /* parent's AG number */ xfs_inobt_rec_incore_t rec; /* inode allocation record */ xfs_agnumber_t tagno; /* testing allocation group number */ xfs_btree_cur_t *tcur; /* temp cursor */ @@ -716,6 +790,8 @@ nextag: */ agno = tagno; *IO_agbp = NULL; + + restart_pagno: cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); /* * If pagino is 0 (this is the root inode allocation) use newino. @@ -723,220 +799,200 @@ nextag: */ if (!pagino) pagino = be32_to_cpu(agi->agi_newino); -#ifdef DEBUG - if (cur->bc_nlevels == 1) { - int freecount = 0; - if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - do { - if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, - &rec.ir_freecount, &rec.ir_free, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - freecount += rec.ir_freecount; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto error0; - } while (i == 1); + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error0; - ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || - XFS_FORCED_SHUTDOWN(mp)); - } -#endif /* - * If in the same a.g. as the parent, try to get near the parent. + * If in the same AG as the parent, try to get near the parent. */ if (pagno == agno) { - if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i))) + xfs_perag_t *pag = &mp->m_perag[agno]; + int doneleft; /* done, to the left */ + int doneright; /* done, to the right */ + int searchdistance = 10; + + error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_inobt_get_rec(cur, &rec, &j); + if (error) goto error0; - if (i != 0 && - (error = xfs_inobt_get_rec(cur, &rec.ir_startino, - &rec.ir_freecount, &rec.ir_free, &j)) == 0 && - j == 1 && - rec.ir_freecount > 0) { + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + if (rec.ir_freecount > 0) { /* * Found a free inode in the same chunk - * as parent, done. + * as the parent, done. */ + goto alloc_inode; } + + + /* + * In the same AG as parent, but parent's chunk is full. + */ + + /* duplicate the cursor, search left & right simultaneously */ + error = xfs_btree_dup_cursor(cur, &tcur); + if (error) + goto error0; + /* - * In the same a.g. as parent, but parent's chunk is full. + * Skip to last blocks looked up if same parent inode. */ - else { - int doneleft; /* done, to the left */ - int doneright; /* done, to the right */ + if (pagino != NULLAGINO && + pag->pagl_pagino == pagino && + pag->pagl_leftrec != NULLAGINO && + pag->pagl_rightrec != NULLAGINO) { + error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, + &trec, &doneleft, 1); + if (error) + goto error1; + error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, + &rec, &doneright, 0); if (error) - goto error0; - ASSERT(i == 1); - ASSERT(j == 1); - /* - * Duplicate the cursor, search left & right - * simultaneously. - */ - if ((error = xfs_btree_dup_cursor(cur, &tcur))) - goto error0; - /* - * Search left with tcur, back up 1 record. - */ - if ((error = xfs_btree_decrement(tcur, 0, &i))) goto error1; - doneleft = !i; - if (!doneleft) { - if ((error = xfs_inobt_get_rec(tcur, - &trec.ir_startino, - &trec.ir_freecount, - &trec.ir_free, &i))) - goto error1; - XFS_WANT_CORRUPTED_GOTO(i == 1, error1); - } - /* - * Search right with cur, go forward 1 record. - */ - if ((error = xfs_btree_increment(cur, 0, &i))) + } else { + /* search left with tcur, back up 1 record */ + error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1); + if (error) goto error1; - doneright = !i; - if (!doneright) { - if ((error = xfs_inobt_get_rec(cur, - &rec.ir_startino, - &rec.ir_freecount, - &rec.ir_free, &i))) - goto error1; - XFS_WANT_CORRUPTED_GOTO(i == 1, error1); - } - /* - * Loop until we find the closest inode chunk - * with a free one. - */ - while (!doneleft || !doneright) { - int useleft; /* using left inode - chunk this time */ + /* search right with cur, go forward 1 record. */ + error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0); + if (error) + goto error1; + } + + /* + * Loop until we find an inode chunk with a free inode. + */ + while (!doneleft || !doneright) { + int useleft; /* using left inode chunk this time */ + + if (!--searchdistance) { /* - * Figure out which block is closer, - * if both are valid. - */ - if (!doneleft && !doneright) - useleft = - pagino - - (trec.ir_startino + - XFS_INODES_PER_CHUNK - 1) < - rec.ir_startino - pagino; - else - useleft = !doneleft; - /* - * If checking the left, does it have - * free inodes? - */ - if (useleft && trec.ir_freecount) { - /* - * Yes, set it up as the chunk to use. - */ - rec = trec; - xfs_btree_del_cursor(cur, - XFS_BTREE_NOERROR); - cur = tcur; - break; - } - /* - * If checking the right, does it have - * free inodes? - */ - if (!useleft && rec.ir_freecount) { - /* - * Yes, it's already set up. - */ - xfs_btree_del_cursor(tcur, - XFS_BTREE_NOERROR); - break; - } - /* - * If used the left, get another one - * further left. - */ - if (useleft) { - if ((error = xfs_btree_decrement(tcur, 0, - &i))) - goto error1; - doneleft = !i; - if (!doneleft) { - if ((error = xfs_inobt_get_rec( - tcur, - &trec.ir_startino, - &trec.ir_freecount, - &trec.ir_free, &i))) - goto error1; - XFS_WANT_CORRUPTED_GOTO(i == 1, - error1); - } - } - /* - * If used the right, get another one - * further right. + * Not in range - save last search + * location and allocate a new inode */ - else { - if ((error = xfs_btree_increment(cur, 0, - &i))) - goto error1; - doneright = !i; - if (!doneright) { - if ((error = xfs_inobt_get_rec( - cur, - &rec.ir_startino, - &rec.ir_freecount, - &rec.ir_free, &i))) - goto error1; - XFS_WANT_CORRUPTED_GOTO(i == 1, - error1); - } - } + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + pag->pagl_leftrec = trec.ir_startino; + pag->pagl_rightrec = rec.ir_startino; + pag->pagl_pagino = pagino; + goto newino; + } + + /* figure out the closer block if both are valid. */ + if (!doneleft && !doneright) { + useleft = pagino - + (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) < + rec.ir_startino - pagino; + } else { + useleft = !doneleft; } - ASSERT(!doneleft || !doneright); + + /* free inodes to the left? */ + if (useleft && trec.ir_freecount) { + rec = trec; + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + cur = tcur; + + pag->pagl_leftrec = trec.ir_startino; + pag->pagl_rightrec = rec.ir_startino; + pag->pagl_pagino = pagino; + goto alloc_inode; + } + + /* free inodes to the right? */ + if (!useleft && rec.ir_freecount) { + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + + pag->pagl_leftrec = trec.ir_startino; + pag->pagl_rightrec = rec.ir_startino; + pag->pagl_pagino = pagino; + goto alloc_inode; + } + + /* get next record to check */ + if (useleft) { + error = xfs_ialloc_next_rec(tcur, &trec, + &doneleft, 1); + } else { + error = xfs_ialloc_next_rec(cur, &rec, + &doneright, 0); + } + if (error) + goto error1; } + + /* + * We've reached the end of the btree. because + * we are only searching a small chunk of the + * btree each search, there is obviously free + * inodes closer to the parent inode than we + * are now. restart the search again. + */ + pag->pagl_pagino = NULLAGINO; + pag->pagl_leftrec = NULLAGINO; + pag->pagl_rightrec = NULLAGINO; + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + goto restart_pagno; } + /* - * In a different a.g. from the parent. + * In a different AG from the parent. * See if the most recently allocated block has any free. */ - else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { - if ((error = xfs_inobt_lookup_eq(cur, - be32_to_cpu(agi->agi_newino), 0, 0, &i))) +newino: + if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { + error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), + XFS_LOOKUP_EQ, &i); + if (error) goto error0; - if (i == 1 && - (error = xfs_inobt_get_rec(cur, &rec.ir_startino, - &rec.ir_freecount, &rec.ir_free, &j)) == 0 && - j == 1 && - rec.ir_freecount > 0) { - /* - * The last chunk allocated in the group still has - * a free inode. - */ - } - /* - * None left in the last group, search the whole a.g. - */ - else { + + if (i == 1) { + error = xfs_inobt_get_rec(cur, &rec, &j); if (error) goto error0; - if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) - goto error0; - ASSERT(i == 1); - for (;;) { - if ((error = xfs_inobt_get_rec(cur, - &rec.ir_startino, - &rec.ir_freecount, &rec.ir_free, - &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if (rec.ir_freecount > 0) - break; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + if (j == 1 && rec.ir_freecount > 0) { + /* + * The last chunk allocated in the group + * still has a free inode. + */ + goto alloc_inode; } } } + + /* + * None left in the last group, search the whole AG + */ + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + for (;;) { + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (rec.ir_freecount > 0) + break; + error = xfs_btree_increment(cur, 0, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + +alloc_inode: offset = xfs_ialloc_find_free(&rec.ir_free); ASSERT(offset >= 0); ASSERT(offset < XFS_INODES_PER_CHUNK); @@ -945,33 +1001,19 @@ nextag: ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); rec.ir_free &= ~XFS_INOBT_MASK(offset); rec.ir_freecount--; - if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, - rec.ir_free))) + error = xfs_inobt_update(cur, &rec); + if (error) goto error0; be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); mp->m_perag[tagno].pagi_freecount--; up_read(&mp->m_peraglock); -#ifdef DEBUG - if (cur->bc_nlevels == 1) { - int freecount = 0; - if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) - goto error0; - do { - if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, - &rec.ir_freecount, &rec.ir_free, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - freecount += rec.ir_freecount; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto error0; - } while (i == 1); - ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || - XFS_FORCED_SHUTDOWN(mp)); - } -#endif + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error0; + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); *inop = ino; @@ -1062,38 +1104,23 @@ xfs_difree( * Initialize the cursor. */ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); -#ifdef DEBUG - if (cur->bc_nlevels == 1) { - int freecount = 0; - if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) - goto error0; - do { - if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, - &rec.ir_freecount, &rec.ir_free, &i))) - goto error0; - if (i) { - freecount += rec.ir_freecount; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto error0; - } - } while (i == 1); - ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || - XFS_FORCED_SHUTDOWN(mp)); - } -#endif + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error0; + /* * Look for the entry describing this inode. */ - if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { + if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { cmn_err(CE_WARN, - "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.", + "xfs_difree: xfs_inobt_lookup returned() an error %d on %s. Returning error.", error, mp->m_fsname); goto error0; } XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, - &rec.ir_free, &i))) { + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) { cmn_err(CE_WARN, "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", error, mp->m_fsname); @@ -1148,12 +1175,14 @@ xfs_difree( } else { *delete = 0; - if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) { + error = xfs_inobt_update(cur, &rec); + if (error) { cmn_err(CE_WARN, - "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.", + "xfs_difree: xfs_inobt_update returned an error %d on %s.", error, mp->m_fsname); goto error0; } + /* * Change the inode free counts and log the ag/sb changes. */ @@ -1165,28 +1194,10 @@ xfs_difree( xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); } -#ifdef DEBUG - if (cur->bc_nlevels == 1) { - int freecount = 0; + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error0; - if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) - goto error0; - do { - if ((error = xfs_inobt_get_rec(cur, - &rec.ir_startino, - &rec.ir_freecount, - &rec.ir_free, &i))) - goto error0; - if (i) { - freecount += rec.ir_freecount; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto error0; - } - } while (i == 1); - ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || - XFS_FORCED_SHUTDOWN(mp)); - } -#endif xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); return 0; @@ -1297,9 +1308,7 @@ xfs_imap( chunk_agbno = agbno - offset_agbno; } else { xfs_btree_cur_t *cur; /* inode btree cursor */ - xfs_agino_t chunk_agino; /* first agino in inode chunk */ - __int32_t chunk_cnt; /* count of free inodes in chunk */ - xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ + xfs_inobt_rec_incore_t chunk_rec; xfs_buf_t *agbp; /* agi buffer */ int i; /* temp state */ @@ -1315,15 +1324,14 @@ xfs_imap( } cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); - error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i); + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); if (error) { xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_inobt_lookup_le() failed"); + "xfs_inobt_lookup() failed"); goto error0; } - error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, - &chunk_free, &i); + error = xfs_inobt_get_rec(cur, &chunk_rec, &i); if (error) { xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_inobt_get_rec() failed"); @@ -1341,7 +1349,7 @@ xfs_imap( xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); if (error) return error; - chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); + chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino); offset_agbno = agbno - chunk_agbno; } diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index aeee8278f92c..bb5385475e1f 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -150,23 +150,15 @@ xfs_ialloc_pagi_init( xfs_agnumber_t agno); /* allocation group number */ /* - * Lookup the first record greater than or equal to ino - * in the btree given by cur. + * Lookup a record by ino in the btree given by cur. */ -int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino, - __int32_t fcnt, xfs_inofree_t free, int *stat); - -/* - * Lookup the first record less than or equal to ino - * in the btree given by cur. - */ -int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, - __int32_t fcnt, xfs_inofree_t free, int *stat); +int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino, + xfs_lookup_t dir, int *stat); /* * Get the data from the pointed-to record. */ -extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, - __int32_t *fcnt, xfs_inofree_t *free, int *stat); +extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, + xfs_inobt_rec_incore_t *rec, int *stat); #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index ecbf8b4d2e2e..80e526489be5 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -82,7 +82,6 @@ xfs_inode_alloc( memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); ip->i_flags = 0; ip->i_update_core = 0; - ip->i_update_size = 0; ip->i_delayed_blks = 0; memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); ip->i_size = 0; @@ -456,32 +455,6 @@ out_error_or_again: return error; } - -/* - * Look for the inode corresponding to the given ino in the hash table. - * If it is there and its i_transp pointer matches tp, return it. - * Otherwise, return NULL. - */ -xfs_inode_t * -xfs_inode_incore(xfs_mount_t *mp, - xfs_ino_t ino, - xfs_trans_t *tp) -{ - xfs_inode_t *ip; - xfs_perag_t *pag; - - pag = xfs_get_perag(mp, ino); - read_lock(&pag->pag_ici_lock); - ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino)); - read_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); - - /* the returned inode must match the transaction */ - if (ip && (ip->i_transp != tp)) - return NULL; - return ip; -} - /* * Decrement reference count of an inode structure and unlock it. * diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index da428b3fe0f5..b92a4fa2a0a1 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -651,7 +651,7 @@ xfs_iformat_btree( return 0; } -void +STATIC void xfs_dinode_from_disk( xfs_icdinode_t *to, xfs_dinode_t *from) @@ -1247,7 +1247,7 @@ xfs_isize_check( * In that case the pages will still be in memory, but the inode size * will never have been updated. */ -xfs_fsize_t +STATIC xfs_fsize_t xfs_file_last_byte( xfs_inode_t *ip) { @@ -3068,9 +3068,9 @@ xfs_iflush_int( SYNCHRONIZE(); /* - * Make sure to get the latest atime from the Linux inode. + * Make sure to get the latest timestamps from the Linux inode. */ - xfs_synchronize_atime(ip); + xfs_synchronize_times(ip); if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { @@ -3837,7 +3837,7 @@ xfs_iext_inline_to_direct( /* * Resize an extent indirection array to new_size bytes. */ -void +STATIC void xfs_iext_realloc_indirect( xfs_ifork_t *ifp, /* inode fork pointer */ int new_size) /* new indirection array size */ @@ -3862,7 +3862,7 @@ xfs_iext_realloc_indirect( /* * Switch from indirection array to linear (direct) extent allocations. */ -void +STATIC void xfs_iext_indirect_to_direct( xfs_ifork_t *ifp) /* inode fork pointer */ { diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 65f24a3cc992..41555de1d1db 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -261,7 +261,6 @@ typedef struct xfs_inode { /* Miscellaneous state. */ unsigned short i_flags; /* see defined flags below */ unsigned char i_update_core; /* timestamps/size is dirty */ - unsigned char i_update_size; /* di_size field is dirty */ unsigned int i_delayed_blks; /* count of delay alloc blks */ xfs_icdinode_t i_d; /* most of ondisk inode */ @@ -468,8 +467,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) /* * xfs_iget.c prototypes. */ -xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, - struct xfs_trans *); int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, uint, uint, xfs_inode_t **, xfs_daddr_t); void xfs_iput(xfs_inode_t *, uint); @@ -504,11 +501,10 @@ void xfs_ipin(xfs_inode_t *); void xfs_iunpin(xfs_inode_t *); int xfs_iflush(xfs_inode_t *, uint); void xfs_ichgtime(xfs_inode_t *, int); -xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); void xfs_lock_inodes(xfs_inode_t **, int, uint); void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); -void xfs_synchronize_atime(xfs_inode_t *); +void xfs_synchronize_times(xfs_inode_t *); void xfs_mark_inode_dirty_sync(xfs_inode_t *); #if defined(XFS_INODE_TRACE) @@ -572,8 +568,6 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *, struct xfs_buf **, uint); int xfs_iread(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, xfs_daddr_t, uint); -void xfs_dinode_from_disk(struct xfs_icdinode *, - struct xfs_dinode *); void xfs_dinode_to_disk(struct xfs_dinode *, struct xfs_icdinode *); void xfs_idestroy_fork(struct xfs_inode *, int); @@ -592,8 +586,6 @@ void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int); void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); void xfs_iext_realloc_direct(xfs_ifork_t *, int); -void xfs_iext_realloc_indirect(xfs_ifork_t *, int); -void xfs_iext_indirect_to_direct(xfs_ifork_t *); void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t); void xfs_iext_inline_to_direct(xfs_ifork_t *, int); void xfs_iext_destroy(xfs_ifork_t *); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 977c4aec587e..9794b876d6ff 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -232,6 +232,15 @@ xfs_inode_item_format( nvecs = 1; /* + * Make sure the linux inode is dirty. We do this before + * clearing i_update_core as the VFS will call back into + * XFS here and set i_update_core, so we need to dirty the + * inode first so that the ordering of i_update_core and + * unlogged modifications still works as described below. + */ + xfs_mark_inode_dirty_sync(ip); + + /* * Clear i_update_core if the timestamps (or any other * non-transactional modification) need flushing/logging * and we're about to log them with the rest of the core. @@ -263,22 +272,9 @@ xfs_inode_item_format( } /* - * We don't have to worry about re-ordering here because - * the update_size field is protected by the inode lock - * and we have that held in exclusive mode. + * Make sure to get the latest timestamps from the Linux inode. */ - if (ip->i_update_size) - ip->i_update_size = 0; - - /* - * Make sure to get the latest atime from the Linux inode. - */ - xfs_synchronize_atime(ip); - - /* - * make sure the linux inode is dirty - */ - xfs_mark_inode_dirty_sync(ip); + xfs_synchronize_times(ip); vecp->i_addr = (xfs_caddr_t)&ip->i_d; vecp->i_len = sizeof(struct xfs_icdinode); @@ -712,8 +708,6 @@ xfs_inode_item_unlock( * Clear out the fields of the inode log item particular * to the current transaction. */ - iip->ili_ilock_recur = 0; - iip->ili_iolock_recur = 0; iip->ili_flags = 0; /* diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index a52ac125f055..65bae4c9b8bf 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -137,8 +137,6 @@ typedef struct xfs_inode_log_item { struct xfs_inode *ili_inode; /* inode ptr */ xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ - unsigned short ili_ilock_recur; /* lock recursion count */ - unsigned short ili_iolock_recur; /* lock recursion count */ unsigned short ili_flags; /* misc flags */ unsigned short ili_logged; /* flushed logged data */ unsigned int ili_last_fields; /* fields when flushed */ diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h index 7a28191cb0de..b8e4ee4e89a4 100644 --- a/fs/xfs/xfs_inum.h +++ b/fs/xfs/xfs_inum.h @@ -72,7 +72,6 @@ struct xfs_mount; #if XFS_BIG_INUMS #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) -#define XFS_INO64_OFFSET ((xfs_ino_t)(1ULL << 32)) #else #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL)) #endif diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index aeb2d2221c7d..62efab2f3839 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -39,7 +39,7 @@ #include "xfs_error.h" #include "xfs_btree.h" -int +STATIC int xfs_internal_inum( xfs_mount_t *mp, xfs_ino_t ino) @@ -59,6 +59,7 @@ xfs_bulkstat_one_iget( { xfs_icdinode_t *dic; /* dinode core info pointer */ xfs_inode_t *ip; /* incore inode pointer */ + struct inode *inode; int error; error = xfs_iget(mp, NULL, ino, @@ -72,6 +73,7 @@ xfs_bulkstat_one_iget( ASSERT(ip->i_imap.im_blkno != 0); dic = &ip->i_d; + inode = VFS_I(ip); /* xfs_iget returns the following without needing * further change. @@ -83,16 +85,19 @@ xfs_bulkstat_one_iget( buf->bs_uid = dic->di_uid; buf->bs_gid = dic->di_gid; buf->bs_size = dic->di_size; + /* - * We are reading the atime from the Linux inode because the - * dinode might not be uptodate. + * We need to read the timestamps from the Linux inode because + * the VFS keeps writing directly into the inode structure instead + * of telling us about the updates. */ - buf->bs_atime.tv_sec = VFS_I(ip)->i_atime.tv_sec; - buf->bs_atime.tv_nsec = VFS_I(ip)->i_atime.tv_nsec; - buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; - buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; - buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; - buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec; + buf->bs_atime.tv_sec = inode->i_atime.tv_sec; + buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec; + buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec; + buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec; + buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec; + buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec; + buf->bs_xflags = xfs_ip2xflags(ip); buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; buf->bs_extents = dic->di_nextents; @@ -353,9 +358,6 @@ xfs_bulkstat( int end_of_ag; /* set if we've seen the ag end */ int error; /* error code */ int fmterror;/* bulkstat formatter result */ - __int32_t gcnt; /* current btree rec's count */ - xfs_inofree_t gfree; /* current btree rec's free mask */ - xfs_agino_t gino; /* current btree rec's start inode */ int i; /* loop index */ int icount; /* count of inodes good in irbuf */ size_t irbsize; /* size of irec buffer in bytes */ @@ -442,40 +444,43 @@ xfs_bulkstat( * we need to get the remainder of the chunk we're in. */ if (agino > 0) { + xfs_inobt_rec_incore_t r; + /* * Lookup the inode chunk that this inode lives in. */ - error = xfs_inobt_lookup_le(cur, agino, 0, 0, &tmp); + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, + &tmp); if (!error && /* no I/O error */ tmp && /* lookup succeeded */ /* got the record, should always work */ - !(error = xfs_inobt_get_rec(cur, &gino, &gcnt, - &gfree, &i)) && + !(error = xfs_inobt_get_rec(cur, &r, &i)) && i == 1 && /* this is the right chunk */ - agino < gino + XFS_INODES_PER_CHUNK && + agino < r.ir_startino + XFS_INODES_PER_CHUNK && /* lastino was not last in chunk */ - (chunkidx = agino - gino + 1) < + (chunkidx = agino - r.ir_startino + 1) < XFS_INODES_PER_CHUNK && /* there are some left allocated */ xfs_inobt_maskn(chunkidx, - XFS_INODES_PER_CHUNK - chunkidx) & ~gfree) { + XFS_INODES_PER_CHUNK - chunkidx) & + ~r.ir_free) { /* * Grab the chunk record. Mark all the * uninteresting inodes (because they're * before our start point) free. */ for (i = 0; i < chunkidx; i++) { - if (XFS_INOBT_MASK(i) & ~gfree) - gcnt++; + if (XFS_INOBT_MASK(i) & ~r.ir_free) + r.ir_freecount++; } - gfree |= xfs_inobt_maskn(0, chunkidx); - irbp->ir_startino = gino; - irbp->ir_freecount = gcnt; - irbp->ir_free = gfree; + r.ir_free |= xfs_inobt_maskn(0, chunkidx); + irbp->ir_startino = r.ir_startino; + irbp->ir_freecount = r.ir_freecount; + irbp->ir_free = r.ir_free; irbp++; - agino = gino + XFS_INODES_PER_CHUNK; - icount = XFS_INODES_PER_CHUNK - gcnt; + agino = r.ir_startino + XFS_INODES_PER_CHUNK; + icount = XFS_INODES_PER_CHUNK - r.ir_freecount; } else { /* * If any of those tests failed, bump the @@ -493,7 +498,7 @@ xfs_bulkstat( /* * Start of ag. Lookup the first inode chunk. */ - error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &tmp); + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp); icount = 0; } /* @@ -501,6 +506,8 @@ xfs_bulkstat( * until we run out of inodes or space in the buffer. */ while (irbp < irbufend && icount < ubcount) { + xfs_inobt_rec_incore_t r; + /* * Loop as long as we're unable to read the * inode btree. @@ -510,51 +517,55 @@ xfs_bulkstat( if (XFS_AGINO_TO_AGBNO(mp, agino) >= be32_to_cpu(agi->agi_length)) break; - error = xfs_inobt_lookup_ge(cur, agino, 0, 0, - &tmp); + error = xfs_inobt_lookup(cur, agino, + XFS_LOOKUP_GE, &tmp); cond_resched(); } /* * If ran off the end of the ag either with an error, * or the normal way, set end and stop collecting. */ - if (error || - (error = xfs_inobt_get_rec(cur, &gino, &gcnt, - &gfree, &i)) || - i == 0) { + if (error) { + end_of_ag = 1; + break; + } + + error = xfs_inobt_get_rec(cur, &r, &i); + if (error || i == 0) { end_of_ag = 1; break; } + /* * If this chunk has any allocated inodes, save it. * Also start read-ahead now for this chunk. */ - if (gcnt < XFS_INODES_PER_CHUNK) { + if (r.ir_freecount < XFS_INODES_PER_CHUNK) { /* * Loop over all clusters in the next chunk. * Do a readahead if there are any allocated * inodes in that cluster. */ - for (agbno = XFS_AGINO_TO_AGBNO(mp, gino), - chunkidx = 0; + agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino); + for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK; chunkidx += nicluster, agbno += nbcluster) { - if (xfs_inobt_maskn(chunkidx, - nicluster) & ~gfree) + if (xfs_inobt_maskn(chunkidx, nicluster) + & ~r.ir_free) xfs_btree_reada_bufs(mp, agno, agbno, nbcluster); } - irbp->ir_startino = gino; - irbp->ir_freecount = gcnt; - irbp->ir_free = gfree; + irbp->ir_startino = r.ir_startino; + irbp->ir_freecount = r.ir_freecount; + irbp->ir_free = r.ir_free; irbp++; - icount += XFS_INODES_PER_CHUNK - gcnt; + icount += XFS_INODES_PER_CHUNK - r.ir_freecount; } /* * Set agino to after this chunk and bump the cursor. */ - agino = gino + XFS_INODES_PER_CHUNK; + agino = r.ir_startino + XFS_INODES_PER_CHUNK; error = xfs_btree_increment(cur, 0, &tmp); cond_resched(); } @@ -820,9 +831,7 @@ xfs_inumbers( int bufidx; xfs_btree_cur_t *cur; int error; - __int32_t gcnt; - xfs_inofree_t gfree; - xfs_agino_t gino; + xfs_inobt_rec_incore_t r; int i; xfs_ino_t ino; int left; @@ -855,7 +864,8 @@ xfs_inumbers( continue; } cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); - error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, + &tmp); if (error) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); cur = NULL; @@ -870,9 +880,8 @@ xfs_inumbers( continue; } } - if ((error = xfs_inobt_get_rec(cur, &gino, &gcnt, &gfree, - &i)) || - i == 0) { + error = xfs_inobt_get_rec(cur, &r, &i); + if (error || i == 0) { xfs_buf_relse(agbp); agbp = NULL; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); @@ -881,10 +890,12 @@ xfs_inumbers( agino = 0; continue; } - agino = gino + XFS_INODES_PER_CHUNK - 1; - buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, gino); - buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - gcnt; - buffer[bufidx].xi_allocmask = ~gfree; + agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1; + buffer[bufidx].xi_startino = + XFS_AGINO_TO_INO(mp, agno, r.ir_startino); + buffer[bufidx].xi_alloccount = + XFS_INODES_PER_CHUNK - r.ir_freecount; + buffer[bufidx].xi_allocmask = ~r.ir_free; bufidx++; left--; if (bufidx == bcount) { diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 1fb04e7deb61..20792bf45946 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -99,11 +99,6 @@ xfs_bulkstat_one( void *dibuff, int *stat); -int -xfs_internal_inum( - xfs_mount_t *mp, - xfs_ino_t ino); - typedef int (*inumbers_fmt_pf)( void __user *ubuffer, /* buffer to write to */ const xfs_inogrp_t *buffer, /* buffer to read from */ diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index bcad5f4c1fd1..679c7c4926a2 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -451,8 +451,6 @@ extern int xlog_find_tail(xlog_t *log, extern int xlog_recover(xlog_t *log); extern int xlog_recover_finish(xlog_t *log); extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); -extern void xlog_recover_process_iunlinks(xlog_t *log); - extern struct xfs_buf *xlog_get_bp(xlog_t *, int); extern void xlog_put_bp(struct xfs_buf *); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 47da2fb45377..fb17f8226b09 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1980,7 +1980,7 @@ xlog_recover_do_reg_buffer( "XFS: NULL dquot in %s.", __func__); goto next; } - if (item->ri_buf[i].i_len < sizeof(xfs_dqblk_t)) { + if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { cmn_err(CE_ALERT, "XFS: dquot too small (%d) in %s.", item->ri_buf[i].i_len, __func__); @@ -2635,7 +2635,7 @@ xlog_recover_do_dquot_trans( "XFS: NULL dquot in %s.", __func__); return XFS_ERROR(EIO); } - if (item->ri_buf[1].i_len < sizeof(xfs_dqblk_t)) { + if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { cmn_err(CE_ALERT, "XFS: dquot too small (%d) in %s.", item->ri_buf[1].i_len, __func__); @@ -3263,7 +3263,7 @@ xlog_recover_process_one_iunlink( * freeing of the inode and its removal from the list must be * atomic. */ -void +STATIC void xlog_recover_process_iunlinks( xlog_t *log) { diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 5c6f092659c1..8b6c9e807efb 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1568,7 +1568,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) * * The m_sb_lock must be held when this routine is called. */ -int +STATIC int xfs_mod_incore_sb_unlocked( xfs_mount_t *mp, xfs_sb_field_t field, diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a5122382afde..a6c023bc0fb2 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -414,13 +414,10 @@ typedef struct xfs_mod_sb { extern int xfs_log_sbcount(xfs_mount_t *, uint); extern int xfs_mountfs(xfs_mount_t *mp); -extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); extern void xfs_unmountfs(xfs_mount_t *); extern int xfs_unmountfs_writesb(xfs_mount_t *); extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); -extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t, - int64_t, int); extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, uint, int); extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index afee7eb24323..4b0613d99faa 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -564,35 +564,6 @@ xfs_mru_cache_lookup( } /* - * To look up an element using its key, but leave its location in the internal - * lists alone, call xfs_mru_cache_peek(). If the element isn't found, this - * function returns NULL. - * - * See the comments above the declaration of the xfs_mru_cache_lookup() function - * for important locking information pertaining to this call. - */ -void * -xfs_mru_cache_peek( - xfs_mru_cache_t *mru, - unsigned long key) -{ - xfs_mru_cache_elem_t *elem; - - ASSERT(mru && mru->lists); - if (!mru || !mru->lists) - return NULL; - - spin_lock(&mru->lock); - elem = radix_tree_lookup(&mru->store, key); - if (!elem) - spin_unlock(&mru->lock); - else - __release(mru_lock); /* help sparse not be stupid */ - - return elem ? elem->value : NULL; -} - -/* * To release the internal data structure spinlock after having performed an * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done() * with the data store pointer. diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h index dd58ea1bbebe..5d439f34b0c9 100644 --- a/fs/xfs/xfs_mru_cache.h +++ b/fs/xfs/xfs_mru_cache.h @@ -49,7 +49,6 @@ int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); -void *xfs_mru_cache_peek(struct xfs_mru_cache *mru, unsigned long key); void xfs_mru_cache_done(struct xfs_mru_cache *mru); #endif /* __XFS_MRU_CACHE_H__ */ diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index fea68615ed23..3f816ad7ff19 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -88,90 +88,6 @@ xfs_write_clear_setuid( } /* - * Handle logging requirements of various synchronous types of write. - */ -int -xfs_write_sync_logforce( - xfs_mount_t *mp, - xfs_inode_t *ip) -{ - int error = 0; - - /* - * If we're treating this as O_DSYNC and we have not updated the - * size, force the log. - */ - if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && - !(ip->i_update_size)) { - xfs_inode_log_item_t *iip = ip->i_itemp; - - /* - * If an allocation transaction occurred - * without extending the size, then we have to force - * the log up the proper point to ensure that the - * allocation is permanent. We can't count on - * the fact that buffered writes lock out direct I/O - * writes - the direct I/O write could have extended - * the size nontransactionally, then finished before - * we started. xfs_write_file will think that the file - * didn't grow but the update isn't safe unless the - * size change is logged. - * - * Force the log if we've committed a transaction - * against the inode or if someone else has and - * the commit record hasn't gone to disk (e.g. - * the inode is pinned). This guarantees that - * all changes affecting the inode are permanent - * when we return. - */ - if (iip && iip->ili_last_lsn) { - error = _xfs_log_force(mp, iip->ili_last_lsn, - XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); - } else if (xfs_ipincount(ip) > 0) { - error = _xfs_log_force(mp, (xfs_lsn_t)0, - XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); - } - - } else { - xfs_trans_t *tp; - - /* - * O_SYNC or O_DSYNC _with_ a size update are handled - * the same way. - * - * If the write was synchronous then we need to make - * sure that the inode modification time is permanent. - * We'll have updated the timestamp above, so here - * we use a synchronous transaction to log the inode. - * It's not fast, but it's necessary. - * - * If this a dsync write and the size got changed - * non-transactionally, then we need to ensure that - * the size change gets logged in a synchronous - * transaction. - */ - tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); - if ((error = xfs_trans_reserve(tp, 0, - XFS_SWRITE_LOG_RES(mp), - 0, 0, 0))) { - /* Transaction reserve failed */ - xfs_trans_cancel(tp, 0); - } else { - /* Transaction reserve successful */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } - } - - return error; -} - -/* * Force a shutdown of the filesystem instantly while keeping * the filesystem consistent. We don't do an unmount here; just shutdown * the shop, make sure that absolutely nothing persistent happens to diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index f76c003ec55d..f5e4874c37d8 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h @@ -68,7 +68,6 @@ xfs_get_extsz_hint( * Prototypes for functions in xfs_rw.c. */ extern int xfs_write_clear_setuid(struct xfs_inode *ip); -extern int xfs_write_sync_logforce(struct xfs_mount *mp, struct xfs_inode *ip); extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); extern int xfs_bioerror(struct xfs_buf *bp); extern int xfs_bioerror_relse(struct xfs_buf *bp); @@ -78,10 +77,4 @@ extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, xfs_buf_t *bp, xfs_daddr_t blkno); -/* - * Prototypes for functions in xfs_vnodeops.c. - */ -extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip, - int flags); - #endif /* __XFS_RW_H__ */ diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 775249a54f6f..ed47fc77759c 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -68,7 +68,7 @@ typedef struct xfs_trans_header { #define XFS_TRANS_GROWFS 14 #define XFS_TRANS_STRAT_WRITE 15 #define XFS_TRANS_DIOSTRAT 16 -#define XFS_TRANS_WRITE_SYNC 17 +/* 17 was XFS_TRANS_WRITE_SYNC */ #define XFS_TRANS_WRITEID 18 #define XFS_TRANS_ADDAFORK 19 #define XFS_TRANS_ATTRINVAL 20 diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index f31271c30de9..2ffc570679be 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -467,6 +467,7 @@ xfs_trans_ail_update( { xfs_log_item_t *dlip = NULL; xfs_log_item_t *mlip; /* ptr to minimum lip */ + xfs_lsn_t tail_lsn; mlip = xfs_ail_min(ailp); @@ -483,8 +484,16 @@ xfs_trans_ail_update( if (mlip == dlip) { mlip = xfs_ail_min(ailp); + /* + * It is not safe to access mlip after the AIL lock is + * dropped, so we must get a copy of li_lsn before we do + * so. This is especially important on 32-bit platforms + * where accessing and updating 64-bit values like li_lsn + * is not atomic. + */ + tail_lsn = mlip->li_lsn; spin_unlock(&ailp->xa_lock); - xfs_log_move_tail(ailp->xa_mount, mlip->li_lsn); + xfs_log_move_tail(ailp->xa_mount, tail_lsn); } else { spin_unlock(&ailp->xa_lock); } @@ -514,6 +523,7 @@ xfs_trans_ail_delete( { xfs_log_item_t *dlip; xfs_log_item_t *mlip; + xfs_lsn_t tail_lsn; if (lip->li_flags & XFS_LI_IN_AIL) { mlip = xfs_ail_min(ailp); @@ -527,9 +537,16 @@ xfs_trans_ail_delete( if (mlip == dlip) { mlip = xfs_ail_min(ailp); + /* + * It is not safe to access mlip after the AIL lock + * is dropped, so we must get a copy of li_lsn + * before we do so. This is especially important + * on 32-bit platforms where accessing and updating + * 64-bit values like li_lsn is not atomic. + */ + tail_lsn = mlip ? mlip->li_lsn : 0; spin_unlock(&ailp->xa_lock); - xfs_log_move_tail(ailp->xa_mount, - (mlip ? mlip->li_lsn : 0)); + xfs_log_move_tail(ailp->xa_mount, tail_lsn); } else { spin_unlock(&ailp->xa_lock); } diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 8ee2f8c8b0a6..218829e6a152 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -307,7 +307,7 @@ xfs_trans_read_buf( return (flags & XFS_BUF_TRYLOCK) ? EAGAIN : XFS_ERROR(ENOMEM); - if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { + if (XFS_BUF_GETERROR(bp) != 0) { xfs_ioerror_alert("xfs_trans_read_buf", mp, bp, blkno); error = XFS_BUF_GETERROR(bp); @@ -315,7 +315,7 @@ xfs_trans_read_buf( return error; } #ifdef DEBUG - if (xfs_do_error && (bp != NULL)) { + if (xfs_do_error) { if (xfs_error_target == target) { if (((xfs_req_num++) % xfs_error_mod) == 0) { xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 23d276af2e0c..785ff101da0a 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -49,30 +49,7 @@ xfs_trans_inode_broot_debug( /* - * Get and lock the inode for the caller if it is not already - * locked within the given transaction. If it is already locked - * within the transaction, just increment its lock recursion count - * and return a pointer to it. - * - * For an inode to be locked in a transaction, the inode lock, as - * opposed to the io lock, must be taken exclusively. This ensures - * that the inode can be involved in only 1 transaction at a time. - * Lock recursion is handled on the io lock, but only for lock modes - * of equal or lesser strength. That is, you can recur on the io lock - * held EXCL with a SHARED request but not vice versa. Also, if - * the inode is already a part of the transaction then you cannot - * go from not holding the io lock to having it EXCL or SHARED. - * - * Use the inode cache routine xfs_inode_incore() to find the inode - * if it is already owned by this transaction. - * - * If we don't already own the inode, use xfs_iget() to get it. - * Since the inode log item structure is embedded in the incore - * inode structure and is initialized when the inode is brought - * into memory, there is nothing to do with it here. - * - * If the given transaction pointer is NULL, just call xfs_iget(). - * This simplifies code which must handle both cases. + * Get an inode and join it to the transaction. */ int xfs_trans_iget( @@ -84,62 +61,11 @@ xfs_trans_iget( xfs_inode_t **ipp) { int error; - xfs_inode_t *ip; - - /* - * If the transaction pointer is NULL, just call the normal - * xfs_iget(). - */ - if (tp == NULL) - return xfs_iget(mp, NULL, ino, flags, lock_flags, ipp, 0); - - /* - * If we find the inode in core with this transaction - * pointer in its i_transp field, then we know we already - * have it locked. In this case we just increment the lock - * recursion count and return the inode to the caller. - * Assert that the inode is already locked in the mode requested - * by the caller. We cannot do lock promotions yet, so - * die if someone gets this wrong. - */ - if ((ip = xfs_inode_incore(tp->t_mountp, ino, tp)) != NULL) { - /* - * Make sure that the inode lock is held EXCL and - * that the io lock is never upgraded when the inode - * is already a part of the transaction. - */ - ASSERT(ip->i_itemp != NULL); - ASSERT(lock_flags & XFS_ILOCK_EXCL); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || - xfs_isilocked(ip, XFS_IOLOCK_EXCL)); - ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || - (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL)); - ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || - xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)); - ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || - (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY)); - - if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) { - ip->i_itemp->ili_iolock_recur++; - } - if (lock_flags & XFS_ILOCK_EXCL) { - ip->i_itemp->ili_ilock_recur++; - } - *ipp = ip; - return 0; - } - - ASSERT(lock_flags & XFS_ILOCK_EXCL); - error = xfs_iget(tp->t_mountp, tp, ino, flags, lock_flags, &ip, 0); - if (error) { - return error; - } - ASSERT(ip != NULL); - xfs_trans_ijoin(tp, ip, lock_flags); - *ipp = ip; - return 0; + error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0); + if (!error && tp) + xfs_trans_ijoin(tp, *ipp, lock_flags); + return error; } /* @@ -163,8 +89,6 @@ xfs_trans_ijoin( xfs_inode_item_init(ip, ip->i_mount); iip = ip->i_itemp; ASSERT(iip->ili_flags == 0); - ASSERT(iip->ili_ilock_recur == 0); - ASSERT(iip->ili_iolock_recur == 0); /* * Get a log_item_desc to point at the new item. diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 492d75bae2bf..b572f7e840e0 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -611,7 +611,7 @@ xfs_fsync( xfs_inode_t *ip) { xfs_trans_t *tp; - int error; + int error = 0; int log_flushed = 0, changed = 1; xfs_itrace_entry(ip); @@ -619,14 +619,9 @@ xfs_fsync( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); - /* capture size updates in I/O completion before writing the inode. */ - error = xfs_wait_on_pages(ip, 0, -1); - if (error) - return XFS_ERROR(error); - /* * We always need to make sure that the required inode state is safe on - * disk. The vnode might be clean but we still might need to force the + * disk. The inode might be clean but we still might need to force the * log because of committed transactions that haven't hit the disk yet. * Likewise, there could be unflushed non-transactional changes to the * inode core that have to go to disk and this requires us to issue @@ -638,7 +633,7 @@ xfs_fsync( */ xfs_ilock(ip, XFS_ILOCK_SHARED); - if (!(ip->i_update_size || ip->i_update_core)) { + if (!ip->i_update_core) { /* * Timestamps/size haven't changed since last inode flush or * inode transaction commit. That means either nothing got @@ -718,7 +713,7 @@ xfs_fsync( * when the link count isn't zero and by xfs_dm_punch_hole() when * punching a hole to EOF. */ -int +STATIC int xfs_free_eofblocks( xfs_mount_t *mp, xfs_inode_t *ip, @@ -1476,8 +1471,8 @@ xfs_create( if (error == ENOSPC) { /* flush outstanding delalloc blocks and retry */ xfs_flush_inodes(dp); - error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); + error = xfs_trans_reserve(tp, resblks, log_res, 0, + XFS_TRANS_PERM_LOG_RES, log_count); } if (error == ENOSPC) { /* No space at all so try a "no-allocation" reservation */ @@ -2481,12 +2476,6 @@ xfs_reclaim( ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); /* - * Make sure the atime in the XFS inode is correct before freeing the - * Linux inode. - */ - xfs_synchronize_atime(ip); - - /* * If we have nothing to flush with this inode then complete the * teardown now, otherwise break the link between the xfs inode and the * linux inode and clean up the xfs inode later. This avoids flushing |